re PR target/15184 (Direct access to byte inside word not working with -march=pentiumpro)

PR target/15184
	* combine.c (try_combine): If I0 is a memory load and I3 a store
	to a related address, increase the "goodness" of doing a 4-insn
	combination with I0-I3.
	(make_field_assignment): Handle SUBREGs in the ior+and case.

	PR target/15184
	* gcc.target/i386/pr15184-1.c: New test.
	* gcc.target/i386/pr15184-2.c: New test.

From-SVN: r220249
This commit is contained in:
Jeff Law 2015-01-29 07:30:45 -07:00 committed by Jeff Law
parent b8aa7083ec
commit 52d285303c
5 changed files with 134 additions and 0 deletions

View file

@ -1,3 +1,11 @@
2015-01-29 Jeff Law <law@redhat.com>
PR target/15184
* combine.c (try_combine): If I0 is a memory load and I3 a store
to a related address, increase the "goodness" of doing a 4-insn
combination with I0-I3.
(make_field_assignment): Handle SUBREGs in the ior+and case.
2015-01-29 Yuri Rumyantsev <ysrumyan@gmail.com>
PR tree-optimization/64746

View file

@ -2620,6 +2620,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
int i;
int ngood = 0;
int nshift = 0;
rtx set0, set3;
if (!flag_expensive_optimizations)
return 0;
@ -2643,6 +2644,34 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
|| GET_CODE (src) == LSHIFTRT)
nshift++;
}
/* If I0 loads a memory and I3 sets the same memory, then I2 and I3
are likely manipulating its value. Ideally we'll be able to combine
all four insns into a bitfield insertion of some kind.
Note the source in I0 might be inside a sign/zero extension and the
memory modes in I0 and I3 might be different. So extract the address
from the destination of I3 and search for it in the source of I0.
In the event that there's a match but the source/dest do not actually
refer to the same memory, the worst that happens is we try some
combinations that we wouldn't have otherwise. */
if ((set0 = single_set (i0))
/* Ensure the source of SET0 is a MEM, possibly buried inside
an extension. */
&& (GET_CODE (SET_SRC (set0)) == MEM
|| ((GET_CODE (SET_SRC (set0)) == ZERO_EXTEND
|| GET_CODE (SET_SRC (set0)) == SIGN_EXTEND)
&& GET_CODE (XEXP (SET_SRC (set0), 0)) == MEM))
&& (set3 = single_set (i3))
/* Ensure the destination of SET3 is a MEM. */
&& GET_CODE (SET_DEST (set3)) == MEM
/* Would it be better to extract the base address for the MEM
in SET3 and look for that? I don't have cases where it matters
but I could envision such cases. */
&& rtx_referenced_p (XEXP (SET_DEST (set3), 0), SET_SRC (set0)))
ngood += 2;
if (ngood < 2 && nshift < 2)
return 0;
}
@ -9272,6 +9301,13 @@ make_field_assignment (rtx x)
to the appropriate position, force it to the required mode, and
make the extraction. Check for the AND in both operands. */
/* One or more SUBREGs might obscure the constant-position field
assignment. The first one we are likely to encounter is an outer
narrowing SUBREG, which we can just strip for the purposes of
identifying the constant-field assignment. */
if (GET_CODE (src) == SUBREG && subreg_lowpart_p (src))
src = SUBREG_REG (src);
if (GET_CODE (src) != IOR && GET_CODE (src) != XOR)
return x;
@ -9282,10 +9318,38 @@ make_field_assignment (rtx x)
&& CONST_INT_P (XEXP (rhs, 1))
&& rtx_equal_for_field_assignment_p (XEXP (rhs, 0), dest))
c1 = INTVAL (XEXP (rhs, 1)), other = lhs;
/* The second SUBREG that might get in the way is a paradoxical
SUBREG around the first operand of the AND. We want to
pretend the operand is as wide as the destination here. We
do this by creating a new MEM in the wider mode for the sole
purpose of the call to rtx_equal_for_field_assignment_p. Also
note this trick only works for MEMs. */
else if (GET_CODE (rhs) == AND
&& paradoxical_subreg_p (XEXP (rhs, 0))
&& GET_CODE (SUBREG_REG (XEXP (rhs, 0))) == MEM
&& CONST_INT_P (XEXP (rhs, 1))
&& rtx_equal_for_field_assignment_p (gen_rtx_MEM (GET_MODE (dest),
XEXP (SUBREG_REG (XEXP (rhs, 0)), 0)),
dest))
c1 = INTVAL (XEXP (rhs, 1)), other = lhs;
else if (GET_CODE (lhs) == AND
&& CONST_INT_P (XEXP (lhs, 1))
&& rtx_equal_for_field_assignment_p (XEXP (lhs, 0), dest))
c1 = INTVAL (XEXP (lhs, 1)), other = rhs;
/* The second SUBREG that might get in the way is a paradoxical
SUBREG around the first operand of the AND. We want to
pretend the operand is as wide as the destination here. We
do this by creating a new MEM in the wider mode for the sole
purpose of the call to rtx_equal_for_field_assignment_p. Also
note this trick only works for MEMs. */
else if (GET_CODE (lhs) == AND
&& paradoxical_subreg_p (XEXP (lhs, 0))
&& GET_CODE (SUBREG_REG (XEXP (lhs, 0))) == MEM
&& CONST_INT_P (XEXP (lhs, 1))
&& rtx_equal_for_field_assignment_p (gen_rtx_MEM (GET_MODE (dest),
XEXP (SUBREG_REG (XEXP (lhs, 0)), 0)),
dest))
c1 = INTVAL (XEXP (lhs, 1)), other = rhs;
else
return x;

View file

@ -1,3 +1,9 @@
2015-01-29 Jeff Law <law@redhat.com>
PR target/15184
* gcc.target/i386/pr15184-1.c: New test.
* gcc.target/i386/pr15184-2.c: New test.
2015-01-29 Yuri Rumyantsev <ysrumyan@gmail.com>
PR tree-optimization/64746

View file

@ -0,0 +1,33 @@
/* PR 15184 first two tests, plus two addition ones. */
/* { dg-do compile } */
/* { dg-options "-O2 -m32 -march=pentiumpro" } */
#define regparm __attribute__((__regparm__(3)))
extern unsigned int x;
extern unsigned short y;
void regparm f0(unsigned char c)
{
x = (x & 0xFFFFFF00) | (unsigned int)c;
}
void regparm f1(unsigned char c)
{
x = (x & 0xFFFF00FF) | ((unsigned int)c << 8);
}
void regparm f2(unsigned char c)
{
x = (x & 0xFF00FFFF) | ((unsigned int)c << 16);
}
void regparm f3(unsigned char c)
{
x = (x & 0x00FFFFFF) | ((unsigned int)c << 24);
}
/* Each function should compile down to a byte move from
the input register into x, possibly at an offset within x. */
/* { dg-final { scan-assembler-times "\tmovb\t%al, x" 4 } } */

View file

@ -0,0 +1,23 @@
/* PR 15184 second two tests
/* { dg-do compile } */
/* { dg-options "-O2 -m32 -march=pentiumpro" } */
#define regparm __attribute__((__regparm__(3)))
extern unsigned int x;
extern unsigned short y;
void regparm g0(unsigned char c)
{
y = (y & 0xFF00) | (unsigned short)c;
}
void regparm g1(unsigned char c)
{
y = (y & 0x00FF) | ((unsigned short)c << 8);
}
/* Each function should compile down to a byte move from
the input register into x, possibly at an offset within x. */
/* { dg-final { scan-assembler-times "\tmovb\t%al, y" 2 } } */