re PR target/15184 (Direct access to byte inside word not working with -march=pentiumpro)
PR target/15184 * combine.c (try_combine): If I0 is a memory load and I3 a store to a related address, increase the "goodness" of doing a 4-insn combination with I0-I3. (make_field_assignment): Handle SUBREGs in the ior+and case. PR target/15184 * gcc.target/i386/pr15184-1.c: New test. * gcc.target/i386/pr15184-2.c: New test. From-SVN: r220249
This commit is contained in:
parent
b8aa7083ec
commit
52d285303c
5 changed files with 134 additions and 0 deletions
|
@ -1,3 +1,11 @@
|
|||
2015-01-29 Jeff Law <law@redhat.com>
|
||||
|
||||
PR target/15184
|
||||
* combine.c (try_combine): If I0 is a memory load and I3 a store
|
||||
to a related address, increase the "goodness" of doing a 4-insn
|
||||
combination with I0-I3.
|
||||
(make_field_assignment): Handle SUBREGs in the ior+and case.
|
||||
|
||||
2015-01-29 Yuri Rumyantsev <ysrumyan@gmail.com>
|
||||
|
||||
PR tree-optimization/64746
|
||||
|
|
|
@ -2620,6 +2620,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
|
|||
int i;
|
||||
int ngood = 0;
|
||||
int nshift = 0;
|
||||
rtx set0, set3;
|
||||
|
||||
if (!flag_expensive_optimizations)
|
||||
return 0;
|
||||
|
@ -2643,6 +2644,34 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
|
|||
|| GET_CODE (src) == LSHIFTRT)
|
||||
nshift++;
|
||||
}
|
||||
|
||||
/* If I0 loads a memory and I3 sets the same memory, then I2 and I3
|
||||
are likely manipulating its value. Ideally we'll be able to combine
|
||||
all four insns into a bitfield insertion of some kind.
|
||||
|
||||
Note the source in I0 might be inside a sign/zero extension and the
|
||||
memory modes in I0 and I3 might be different. So extract the address
|
||||
from the destination of I3 and search for it in the source of I0.
|
||||
|
||||
In the event that there's a match but the source/dest do not actually
|
||||
refer to the same memory, the worst that happens is we try some
|
||||
combinations that we wouldn't have otherwise. */
|
||||
if ((set0 = single_set (i0))
|
||||
/* Ensure the source of SET0 is a MEM, possibly buried inside
|
||||
an extension. */
|
||||
&& (GET_CODE (SET_SRC (set0)) == MEM
|
||||
|| ((GET_CODE (SET_SRC (set0)) == ZERO_EXTEND
|
||||
|| GET_CODE (SET_SRC (set0)) == SIGN_EXTEND)
|
||||
&& GET_CODE (XEXP (SET_SRC (set0), 0)) == MEM))
|
||||
&& (set3 = single_set (i3))
|
||||
/* Ensure the destination of SET3 is a MEM. */
|
||||
&& GET_CODE (SET_DEST (set3)) == MEM
|
||||
/* Would it be better to extract the base address for the MEM
|
||||
in SET3 and look for that? I don't have cases where it matters
|
||||
but I could envision such cases. */
|
||||
&& rtx_referenced_p (XEXP (SET_DEST (set3), 0), SET_SRC (set0)))
|
||||
ngood += 2;
|
||||
|
||||
if (ngood < 2 && nshift < 2)
|
||||
return 0;
|
||||
}
|
||||
|
@ -9272,6 +9301,13 @@ make_field_assignment (rtx x)
|
|||
to the appropriate position, force it to the required mode, and
|
||||
make the extraction. Check for the AND in both operands. */
|
||||
|
||||
/* One or more SUBREGs might obscure the constant-position field
|
||||
assignment. The first one we are likely to encounter is an outer
|
||||
narrowing SUBREG, which we can just strip for the purposes of
|
||||
identifying the constant-field assignment. */
|
||||
if (GET_CODE (src) == SUBREG && subreg_lowpart_p (src))
|
||||
src = SUBREG_REG (src);
|
||||
|
||||
if (GET_CODE (src) != IOR && GET_CODE (src) != XOR)
|
||||
return x;
|
||||
|
||||
|
@ -9282,10 +9318,38 @@ make_field_assignment (rtx x)
|
|||
&& CONST_INT_P (XEXP (rhs, 1))
|
||||
&& rtx_equal_for_field_assignment_p (XEXP (rhs, 0), dest))
|
||||
c1 = INTVAL (XEXP (rhs, 1)), other = lhs;
|
||||
/* The second SUBREG that might get in the way is a paradoxical
|
||||
SUBREG around the first operand of the AND. We want to
|
||||
pretend the operand is as wide as the destination here. We
|
||||
do this by creating a new MEM in the wider mode for the sole
|
||||
purpose of the call to rtx_equal_for_field_assignment_p. Also
|
||||
note this trick only works for MEMs. */
|
||||
else if (GET_CODE (rhs) == AND
|
||||
&& paradoxical_subreg_p (XEXP (rhs, 0))
|
||||
&& GET_CODE (SUBREG_REG (XEXP (rhs, 0))) == MEM
|
||||
&& CONST_INT_P (XEXP (rhs, 1))
|
||||
&& rtx_equal_for_field_assignment_p (gen_rtx_MEM (GET_MODE (dest),
|
||||
XEXP (SUBREG_REG (XEXP (rhs, 0)), 0)),
|
||||
dest))
|
||||
c1 = INTVAL (XEXP (rhs, 1)), other = lhs;
|
||||
else if (GET_CODE (lhs) == AND
|
||||
&& CONST_INT_P (XEXP (lhs, 1))
|
||||
&& rtx_equal_for_field_assignment_p (XEXP (lhs, 0), dest))
|
||||
c1 = INTVAL (XEXP (lhs, 1)), other = rhs;
|
||||
/* The second SUBREG that might get in the way is a paradoxical
|
||||
SUBREG around the first operand of the AND. We want to
|
||||
pretend the operand is as wide as the destination here. We
|
||||
do this by creating a new MEM in the wider mode for the sole
|
||||
purpose of the call to rtx_equal_for_field_assignment_p. Also
|
||||
note this trick only works for MEMs. */
|
||||
else if (GET_CODE (lhs) == AND
|
||||
&& paradoxical_subreg_p (XEXP (lhs, 0))
|
||||
&& GET_CODE (SUBREG_REG (XEXP (lhs, 0))) == MEM
|
||||
&& CONST_INT_P (XEXP (lhs, 1))
|
||||
&& rtx_equal_for_field_assignment_p (gen_rtx_MEM (GET_MODE (dest),
|
||||
XEXP (SUBREG_REG (XEXP (lhs, 0)), 0)),
|
||||
dest))
|
||||
c1 = INTVAL (XEXP (lhs, 1)), other = rhs;
|
||||
else
|
||||
return x;
|
||||
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
2015-01-29 Jeff Law <law@redhat.com>
|
||||
|
||||
PR target/15184
|
||||
* gcc.target/i386/pr15184-1.c: New test.
|
||||
* gcc.target/i386/pr15184-2.c: New test.
|
||||
|
||||
2015-01-29 Yuri Rumyantsev <ysrumyan@gmail.com>
|
||||
|
||||
PR tree-optimization/64746
|
||||
|
|
33
gcc/testsuite/gcc.target/i386/pr15184-1.c
Normal file
33
gcc/testsuite/gcc.target/i386/pr15184-1.c
Normal file
|
@ -0,0 +1,33 @@
|
|||
/* PR 15184 first two tests, plus two addition ones. */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -m32 -march=pentiumpro" } */
|
||||
|
||||
#define regparm __attribute__((__regparm__(3)))
|
||||
|
||||
extern unsigned int x;
|
||||
extern unsigned short y;
|
||||
|
||||
void regparm f0(unsigned char c)
|
||||
{
|
||||
x = (x & 0xFFFFFF00) | (unsigned int)c;
|
||||
}
|
||||
|
||||
void regparm f1(unsigned char c)
|
||||
{
|
||||
x = (x & 0xFFFF00FF) | ((unsigned int)c << 8);
|
||||
}
|
||||
|
||||
void regparm f2(unsigned char c)
|
||||
{
|
||||
x = (x & 0xFF00FFFF) | ((unsigned int)c << 16);
|
||||
}
|
||||
void regparm f3(unsigned char c)
|
||||
{
|
||||
x = (x & 0x00FFFFFF) | ((unsigned int)c << 24);
|
||||
}
|
||||
|
||||
|
||||
/* Each function should compile down to a byte move from
|
||||
the input register into x, possibly at an offset within x. */
|
||||
/* { dg-final { scan-assembler-times "\tmovb\t%al, x" 4 } } */
|
||||
|
23
gcc/testsuite/gcc.target/i386/pr15184-2.c
Normal file
23
gcc/testsuite/gcc.target/i386/pr15184-2.c
Normal file
|
@ -0,0 +1,23 @@
|
|||
/* PR 15184 second two tests
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -m32 -march=pentiumpro" } */
|
||||
|
||||
#define regparm __attribute__((__regparm__(3)))
|
||||
|
||||
extern unsigned int x;
|
||||
extern unsigned short y;
|
||||
|
||||
void regparm g0(unsigned char c)
|
||||
{
|
||||
y = (y & 0xFF00) | (unsigned short)c;
|
||||
}
|
||||
|
||||
void regparm g1(unsigned char c)
|
||||
{
|
||||
y = (y & 0x00FF) | ((unsigned short)c << 8);
|
||||
}
|
||||
|
||||
/* Each function should compile down to a byte move from
|
||||
the input register into x, possibly at an offset within x. */
|
||||
/* { dg-final { scan-assembler-times "\tmovb\t%al, y" 2 } } */
|
||||
|
Loading…
Add table
Reference in a new issue