re PR rtl-optimization/33927 (replace_read in dse.c could handle cases where GET_MODE_CLASS (read_mode) != GET_MODE_CLASS (store_mode) (and the size is the same))
gcc/ PR rtl-optimization/33927 * Makefile.in (dse.o): Depend on $(TM_P_H). * expr.h (extract_low_bits): Declare. * expmed.c (extract_low_bits): New function. * rtlhooks.c (gen_lowpart_general): Generalize SUBREG handling. * dse.c: Include tm_p.h. (find_shift_sequence): Remove the read_reg argument and return the read value. Emit the instructions instead of returning them. Iterate on new_mode rather than calculating it each time. Check MODES_TIEABLE_P. Use simplify_gen_subreg to convert the source to NEW_MODE and extract_low_bits to convert the shifted value to READ_MODE. (replace_read): Allow the load and store to have different mode classes. Use extract_low_bits when SHIFT == 0. Create the shift or extraction instructions before trying the replacement. Update dump-file code accordingly, avoiding use of REGNO (store_info->rhs). gcc/testsuite/ * gcc.target/mips/dse-1.c: Add checks for zeros. From-SVN: r133452
This commit is contained in:
parent
48e55066d5
commit
18b526e806
8 changed files with 167 additions and 102 deletions
|
@ -1,3 +1,22 @@
|
|||
2008-03-22 Richard Sandiford <rsandifo@nildram.co.uk>
|
||||
|
||||
PR rtl-optimization/33927
|
||||
* Makefile.in (dse.o): Depend on $(TM_P_H).
|
||||
* expr.h (extract_low_bits): Declare.
|
||||
* expmed.c (extract_low_bits): New function.
|
||||
* rtlhooks.c (gen_lowpart_general): Generalize SUBREG handling.
|
||||
* dse.c: Include tm_p.h.
|
||||
(find_shift_sequence): Remove the read_reg argument and return the
|
||||
read value. Emit the instructions instead of returning them.
|
||||
Iterate on new_mode rather than calculating it each time.
|
||||
Check MODES_TIEABLE_P. Use simplify_gen_subreg to convert the
|
||||
source to NEW_MODE and extract_low_bits to convert the shifted
|
||||
value to READ_MODE.
|
||||
(replace_read): Allow the load and store to have different mode
|
||||
classes. Use extract_low_bits when SHIFT == 0. Create the shift
|
||||
or extraction instructions before trying the replacement. Update
|
||||
dump-file code accordingly, avoiding use of REGNO (store_info->rhs).
|
||||
|
||||
2008-03-22 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
* config/i386/i386.c (assign_386_stack_local): Align DImode slots
|
||||
|
|
|
@ -2555,8 +2555,8 @@ dce.o : dce.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
|
|||
$(TREE_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) $(DF_H) cselib.h \
|
||||
$(DBGCNT_H) dce.h timevar.h tree-pass.h $(DBGCNT_H)
|
||||
dse.o : dse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
|
||||
$(TREE_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h $(RECOG_H) \
|
||||
$(EXPR_H) $(DF_H) cselib.h $(DBGCNT_H) timevar.h tree-pass.h \
|
||||
$(TREE_H) $(TM_P_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h \
|
||||
$(RECOG_H) $(EXPR_H) $(DF_H) cselib.h $(DBGCNT_H) timevar.h tree-pass.h \
|
||||
alloc-pool.h $(ALIAS_H) dse.h $(OPTABS_H)
|
||||
fwprop.o : fwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
|
||||
toplev.h insn-config.h $(RECOG_H) $(FLAGS_H) $(OBSTACK_H) $(BASIC_BLOCK_H) \
|
||||
|
|
157
gcc/dse.c
157
gcc/dse.c
|
@ -29,6 +29,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "tm.h"
|
||||
#include "rtl.h"
|
||||
#include "tree.h"
|
||||
#include "tm_p.h"
|
||||
#include "regs.h"
|
||||
#include "hard-reg-set.h"
|
||||
#include "flags.h"
|
||||
|
@ -1383,9 +1384,9 @@ record_store (rtx body, bb_info_t bb_info)
|
|||
if (store_info->is_set
|
||||
/* No place to keep the value after ra. */
|
||||
&& !reload_completed
|
||||
/* The careful reviewer may wish to comment my checking that the
|
||||
rhs of a store is always a reg. */
|
||||
&& REG_P (SET_SRC (body))
|
||||
&& (REG_P (SET_SRC (body))
|
||||
|| GET_CODE (SET_SRC (body)) == SUBREG
|
||||
|| CONSTANT_P (SET_SRC (body)))
|
||||
/* Sometimes the store and reload is used for truncation and
|
||||
rounding. */
|
||||
&& !(FLOAT_MODE_P (GET_MODE (mem)) && (flag_float_store)))
|
||||
|
@ -1418,15 +1419,15 @@ dump_insn_info (const char * start, insn_info_t insn_info)
|
|||
shift. */
|
||||
|
||||
static rtx
|
||||
find_shift_sequence (rtx read_reg,
|
||||
int access_size,
|
||||
find_shift_sequence (int access_size,
|
||||
store_info_t store_info,
|
||||
read_info_t read_info,
|
||||
int shift)
|
||||
{
|
||||
enum machine_mode store_mode = GET_MODE (store_info->mem);
|
||||
enum machine_mode read_mode = GET_MODE (read_info->mem);
|
||||
rtx chosen_seq = NULL;
|
||||
enum machine_mode new_mode;
|
||||
rtx read_reg = NULL;
|
||||
|
||||
/* Some machines like the x86 have shift insns for each size of
|
||||
operand. Other machines like the ppc or the ia-64 may only have
|
||||
|
@ -1435,21 +1436,31 @@ find_shift_sequence (rtx read_reg,
|
|||
justify the value we want to read but is available in one insn on
|
||||
the machine. */
|
||||
|
||||
for (; access_size <= UNITS_PER_WORD; access_size *= 2)
|
||||
for (new_mode = smallest_mode_for_size (access_size * BITS_PER_UNIT,
|
||||
MODE_INT);
|
||||
GET_MODE_BITSIZE (new_mode) <= BITS_PER_WORD;
|
||||
new_mode = GET_MODE_WIDER_MODE (new_mode))
|
||||
{
|
||||
rtx target, new_reg, shift_seq, insn;
|
||||
enum machine_mode new_mode;
|
||||
rtx target, new_reg, shift_seq, insn, new_lhs;
|
||||
int cost;
|
||||
|
||||
/* Try a wider mode if truncating the store mode to ACCESS_SIZE
|
||||
bytes requires a real instruction. */
|
||||
if (access_size < GET_MODE_SIZE (store_mode)
|
||||
&& !TRULY_NOOP_TRUNCATION (access_size * BITS_PER_UNIT,
|
||||
/* Try a wider mode if truncating the store mode to NEW_MODE
|
||||
requires a real instruction. */
|
||||
if (GET_MODE_BITSIZE (new_mode) < GET_MODE_BITSIZE (store_mode)
|
||||
&& !TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (new_mode),
|
||||
GET_MODE_BITSIZE (store_mode)))
|
||||
continue;
|
||||
|
||||
new_mode = smallest_mode_for_size (access_size * BITS_PER_UNIT,
|
||||
MODE_INT);
|
||||
/* Also try a wider mode if the necessary punning is either not
|
||||
desirable or not possible. */
|
||||
if (!CONSTANT_P (store_info->rhs)
|
||||
&& !MODES_TIEABLE_P (new_mode, store_mode))
|
||||
continue;
|
||||
new_lhs = simplify_gen_subreg (new_mode, copy_rtx (store_info->rhs),
|
||||
store_mode, 0);
|
||||
if (new_lhs == NULL_RTX)
|
||||
continue;
|
||||
|
||||
new_reg = gen_reg_rtx (new_mode);
|
||||
|
||||
start_sequence ();
|
||||
|
@ -1485,31 +1496,13 @@ find_shift_sequence (rtx read_reg,
|
|||
take the value from the store and put it into the
|
||||
shift pseudo, then shift it, then generate another
|
||||
move to put in into the target of the read. */
|
||||
start_sequence ();
|
||||
emit_move_insn (new_reg, gen_lowpart (new_mode, store_info->rhs));
|
||||
emit_move_insn (new_reg, new_lhs);
|
||||
emit_insn (shift_seq);
|
||||
convert_move (read_reg, new_reg, 1);
|
||||
|
||||
if (dump_file)
|
||||
{
|
||||
fprintf (dump_file, " -- adding extract insn r%d:%s = r%d:%s\n",
|
||||
REGNO (new_reg), GET_MODE_NAME (new_mode),
|
||||
REGNO (store_info->rhs), GET_MODE_NAME (store_mode));
|
||||
|
||||
fprintf (dump_file, " -- with shift of r%d by %d\n",
|
||||
REGNO(new_reg), shift);
|
||||
fprintf (dump_file, " -- and second extract insn r%d:%s = r%d:%s\n",
|
||||
REGNO (read_reg), GET_MODE_NAME (read_mode),
|
||||
REGNO (new_reg), GET_MODE_NAME (new_mode));
|
||||
}
|
||||
|
||||
/* Get the three insn sequence and return it. */
|
||||
chosen_seq = get_insns ();
|
||||
end_sequence ();
|
||||
read_reg = extract_low_bits (read_mode, new_mode, new_reg);
|
||||
break;
|
||||
}
|
||||
|
||||
return chosen_seq;
|
||||
return read_reg;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1552,16 +1545,11 @@ replace_read (store_info_t store_info, insn_info_t store_insn,
|
|||
enum machine_mode read_mode = GET_MODE (read_info->mem);
|
||||
int shift;
|
||||
int access_size; /* In bytes. */
|
||||
rtx read_reg = gen_reg_rtx (read_mode);
|
||||
rtx shift_seq = NULL;
|
||||
rtx insns, read_reg;
|
||||
|
||||
if (!dbg_cnt (dse))
|
||||
return false;
|
||||
|
||||
if (GET_MODE_CLASS (read_mode) != MODE_INT
|
||||
|| GET_MODE_CLASS (store_mode) != MODE_INT)
|
||||
return false;
|
||||
|
||||
/* To get here the read is within the boundaries of the write so
|
||||
shift will never be negative. Start out with the shift being in
|
||||
bytes. */
|
||||
|
@ -1575,62 +1563,43 @@ replace_read (store_info_t store_info, insn_info_t store_insn,
|
|||
/* From now on it is bits. */
|
||||
shift *= BITS_PER_UNIT;
|
||||
|
||||
/* We need to keep this in perspective. We are replacing a read
|
||||
/* Create a sequence of instructions to set up the read register.
|
||||
This sequence goes immediately before the store and its result
|
||||
is read by the load.
|
||||
|
||||
We need to keep this in perspective. We are replacing a read
|
||||
with a sequence of insns, but the read will almost certainly be
|
||||
in cache, so it is not going to be an expensive one. Thus, we
|
||||
are not willing to do a multi insn shift or worse a subroutine
|
||||
call to get rid of the read. */
|
||||
if (shift)
|
||||
{
|
||||
if (access_size > UNITS_PER_WORD)
|
||||
return false;
|
||||
|
||||
shift_seq = find_shift_sequence (read_reg, access_size, store_info,
|
||||
read_info, shift);
|
||||
if (!shift_seq)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "replacing load at %d from store at %d\n",
|
||||
INSN_UID (read_insn->insn), INSN_UID (store_insn->insn));
|
||||
fprintf (dump_file, "trying to replace %smode load in insn %d"
|
||||
" from %smode store in insn %d\n",
|
||||
GET_MODE_NAME (read_mode), INSN_UID (read_insn->insn),
|
||||
GET_MODE_NAME (store_mode), INSN_UID (store_insn->insn));
|
||||
start_sequence ();
|
||||
if (shift)
|
||||
read_reg = find_shift_sequence (access_size, store_info, read_info, shift);
|
||||
else
|
||||
read_reg = extract_low_bits (read_mode, store_mode,
|
||||
copy_rtx (store_info->rhs));
|
||||
if (read_reg == NULL_RTX)
|
||||
{
|
||||
end_sequence ();
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " -- could not extract bits of stored value\n");
|
||||
return false;
|
||||
}
|
||||
/* Force the value into a new register so that it won't be clobbered
|
||||
between the store and the load. */
|
||||
read_reg = copy_to_mode_reg (read_mode, read_reg);
|
||||
insns = get_insns ();
|
||||
end_sequence ();
|
||||
|
||||
if (validate_change (read_insn->insn, loc, read_reg, 0))
|
||||
{
|
||||
rtx insns;
|
||||
deferred_change_t deferred_change = pool_alloc (deferred_change_pool);
|
||||
|
||||
if (read_mode == store_mode)
|
||||
{
|
||||
start_sequence ();
|
||||
|
||||
/* The modes are the same and everything lines up. Just
|
||||
generate a simple move. */
|
||||
emit_move_insn (read_reg, store_info->rhs);
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " -- adding move insn r%d = r%d\n",
|
||||
REGNO (read_reg), REGNO (store_info->rhs));
|
||||
insns = get_insns ();
|
||||
end_sequence ();
|
||||
}
|
||||
else if (shift)
|
||||
insns = shift_seq;
|
||||
else
|
||||
{
|
||||
/* The modes are different but the lsb are in the same
|
||||
place, we need to extract the value in the right from the
|
||||
rhs of the store. */
|
||||
start_sequence ();
|
||||
convert_move (read_reg, store_info->rhs, 1);
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " -- adding extract insn r%d:%s = r%d:%s\n",
|
||||
REGNO (read_reg), GET_MODE_NAME (read_mode),
|
||||
REGNO (store_info->rhs), GET_MODE_NAME (store_mode));
|
||||
insns = get_insns ();
|
||||
end_sequence ();
|
||||
}
|
||||
|
||||
/* Insert this right before the store insn where it will be safe
|
||||
from later insns that might change it before the read. */
|
||||
emit_insn_before (insns, store_insn->insn);
|
||||
|
@ -1668,12 +1637,22 @@ replace_read (store_info_t store_info, insn_info_t store_insn,
|
|||
rest of dse, play like this read never happened. */
|
||||
read_insn->read_rec = read_info->next;
|
||||
pool_free (read_info_pool, read_info);
|
||||
if (dump_file)
|
||||
{
|
||||
fprintf (dump_file, " -- replaced the loaded MEM with ");
|
||||
print_simple_rtl (dump_file, read_reg);
|
||||
fprintf (dump_file, "\n");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " -- validation failure\n");
|
||||
{
|
||||
fprintf (dump_file, " -- replacing the loaded MEM with ");
|
||||
print_simple_rtl (dump_file, read_reg);
|
||||
fprintf (dump_file, " led to an invalid instruction\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
60
gcc/expmed.c
60
gcc/expmed.c
|
@ -2098,6 +2098,66 @@ extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
|
|||
NULL_RTX, 0);
|
||||
}
|
||||
|
||||
/* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
|
||||
the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
|
||||
MODE, fill the upper bits with zeros. Fail if the layout of either
|
||||
mode is unknown (as for CC modes) or if the extraction would involve
|
||||
unprofitable mode punning. Return the value on success, otherwise
|
||||
return null.
|
||||
|
||||
This is different from gen_lowpart* in these respects:
|
||||
|
||||
- the returned value must always be considered an rvalue
|
||||
|
||||
- when MODE is wider than SRC_MODE, the extraction involves
|
||||
a zero extension
|
||||
|
||||
- when MODE is smaller than SRC_MODE, the extraction involves
|
||||
a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
|
||||
|
||||
In other words, this routine performs a computation, whereas the
|
||||
gen_lowpart* routines are conceptually lvalue or rvalue subreg
|
||||
operations. */
|
||||
|
||||
rtx
|
||||
extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
|
||||
{
|
||||
enum machine_mode int_mode, src_int_mode;
|
||||
|
||||
if (mode == src_mode)
|
||||
return src;
|
||||
|
||||
if (CONSTANT_P (src))
|
||||
return simplify_gen_subreg (mode, src, src_mode,
|
||||
subreg_lowpart_offset (mode, src_mode));
|
||||
|
||||
if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
|
||||
return NULL_RTX;
|
||||
|
||||
if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
|
||||
&& MODES_TIEABLE_P (mode, src_mode))
|
||||
{
|
||||
rtx x = gen_lowpart_common (mode, src);
|
||||
if (x)
|
||||
return x;
|
||||
}
|
||||
|
||||
src_int_mode = int_mode_for_mode (src_mode);
|
||||
int_mode = int_mode_for_mode (mode);
|
||||
if (src_int_mode == BLKmode || int_mode == BLKmode)
|
||||
return NULL_RTX;
|
||||
|
||||
if (!MODES_TIEABLE_P (src_int_mode, src_mode))
|
||||
return NULL_RTX;
|
||||
if (!MODES_TIEABLE_P (int_mode, mode))
|
||||
return NULL_RTX;
|
||||
|
||||
src = gen_lowpart (src_int_mode, src);
|
||||
src = convert_modes (int_mode, src_int_mode, src, true);
|
||||
src = gen_lowpart (mode, src);
|
||||
return src;
|
||||
}
|
||||
|
||||
/* Add INC into TARGET. */
|
||||
|
||||
void
|
||||
|
|
|
@ -744,6 +744,7 @@ extern void store_bit_field (rtx, unsigned HOST_WIDE_INT,
|
|||
extern rtx extract_bit_field (rtx, unsigned HOST_WIDE_INT,
|
||||
unsigned HOST_WIDE_INT, int, rtx,
|
||||
enum machine_mode, enum machine_mode);
|
||||
extern rtx extract_low_bits (enum machine_mode, enum machine_mode, rtx);
|
||||
extern rtx expand_mult (enum machine_mode, rtx, rtx, rtx, int);
|
||||
extern rtx expand_mult_highpart_adjust (enum machine_mode, rtx, rtx, rtx, rtx, int);
|
||||
|
||||
|
|
|
@ -43,11 +43,9 @@ gen_lowpart_general (enum machine_mode mode, rtx x)
|
|||
|
||||
if (result)
|
||||
return result;
|
||||
/* If it's a REG, it must be a hard reg that's not valid in MODE. */
|
||||
else if (REG_P (x)
|
||||
/* Or we could have a subreg of a floating point value. */
|
||||
|| (GET_CODE (x) == SUBREG
|
||||
&& FLOAT_MODE_P (GET_MODE (SUBREG_REG (x)))))
|
||||
/* Handle SUBREGs and hard REGs that were rejected by
|
||||
simplify_gen_subreg. */
|
||||
else if (REG_P (x) || GET_CODE (x) == SUBREG)
|
||||
{
|
||||
result = gen_lowpart_common (mode, copy_to_reg (x));
|
||||
gcc_assert (result != 0);
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
2008-03-22 Richard Sandiford <rsandifo@nildram.co.uk>
|
||||
|
||||
* gcc.target/mips/dse-1.c: Add checks for zeros.
|
||||
|
||||
2008-03-21 Andrew Pinski <andrew_pinski@playstation.sony.com>
|
||||
|
||||
PR target/27946
|
||||
|
|
|
@ -1,7 +1,4 @@
|
|||
/* ??? Further to the subreg comment below, we can't rely on any of the
|
||||
tests passing unless we handle subregs, and the patch to do so has
|
||||
been rejected for the time being. */
|
||||
/* { dg-do compile { target { ! *-*-* } } } */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-mips-options "-mgp64 -O" } */
|
||||
|
||||
#define TEST(ID, TYPE1, TYPE2) \
|
||||
|
@ -18,6 +15,13 @@
|
|||
u->m2 = x; \
|
||||
return (u->m1[0] \
|
||||
+ u->m1[sizeof (TYPE2) / sizeof (TYPE1) - 1]); \
|
||||
} \
|
||||
\
|
||||
TYPE1 __attribute__((nomips16)) \
|
||||
g##ID (union u##ID *u) \
|
||||
{ \
|
||||
u->m2 = 0; \
|
||||
return (u->m1[0] | u->m1[1]); \
|
||||
}
|
||||
|
||||
TEST (1, unsigned int, unsigned long long);
|
||||
|
@ -32,8 +36,8 @@ TEST (8, short, int);
|
|||
TEST (9, unsigned char, unsigned int);
|
||||
TEST (10, signed char, int);
|
||||
|
||||
/* DSE isn't yet read to consider stores of subregs, so the corresponding
|
||||
(char, short) tests won't pass. */
|
||||
TEST (11, unsigned char, unsigned short);
|
||||
TEST (12, signed char, short);
|
||||
|
||||
/* { dg-final { scan-assembler-not "\tlh\t" } } */
|
||||
/* { dg-final { scan-assembler-not "\tlhu\t" } } */
|
||||
|
|
Loading…
Add table
Reference in a new issue