Simplify vec_select of a subreg of X to just a vec_select of X.

gcc/ChangeLog
	PR rtl-optimization/97249
	* simplify-rtx.c (simplify_binary_operation_1): Simplify
	vec_select of a subreg of X to a vec_select of X.

gcc/testsuite/ChangeLog

	* gcc.target/i386/pr97249-1.c: New test.
This commit is contained in:
liuhongt 2020-10-13 15:35:29 +08:00
parent 7026bb9504
commit 52e7f09698
2 changed files with 71 additions and 0 deletions

View file

@ -4170,6 +4170,47 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
return subop1;
}
}
/* Simplify vec_select of a subreg of X to just a vec_select of X
when X has same component mode as vec_select. */
unsigned HOST_WIDE_INT subreg_offset = 0;
if (GET_CODE (trueop0) == SUBREG
&& GET_MODE_INNER (mode)
== GET_MODE_INNER (GET_MODE (SUBREG_REG (trueop0)))
&& GET_MODE_NUNITS (mode).is_constant (&l1)
&& constant_multiple_p (subreg_memory_offset (trueop0),
GET_MODE_UNIT_BITSIZE (mode),
&subreg_offset))
{
poly_uint64 nunits
= GET_MODE_NUNITS (GET_MODE (SUBREG_REG (trueop0)));
bool success = true;
for (int i = 0; i != l1; i++)
{
rtx idx = XVECEXP (trueop1, 0, i);
if (!CONST_INT_P (idx)
|| maybe_ge (UINTVAL (idx) + subreg_offset, nunits))
{
success = false;
break;
}
}
if (success)
{
rtx par = trueop1;
if (subreg_offset)
{
rtvec vec = rtvec_alloc (l1);
for (int i = 0; i < l1; i++)
RTVEC_ELT (vec, i)
= GEN_INT (INTVAL (XVECEXP (trueop1, 0, i))
+ subreg_offset);
par = gen_rtx_PARALLEL (VOIDmode, vec);
}
return gen_rtx_VEC_SELECT (mode, SUBREG_REG (trueop0), par);
}
}
}
if (XVECLEN (trueop1, 0) == 1

View file

@ -0,0 +1,30 @@
/* PR target/97249 */
/* { dg-do compile } */
/* { dg-options "-mavx2 -O3 -masm=att" } */
/* { dg-final { scan-assembler-times {(?n)vpmovzxbw[ \t]+\(.*%xmm[0-9]} 2 } } */
/* { dg-final { scan-assembler-times {(?n)vpmovzxwd[ \t]+\(.*%xmm[0-9]} 2 } } */
/* { dg-final { scan-assembler-times {(?n)vpmovzxdq[ \t]+\(.*%xmm[0-9]} 2 } } */
void
foo (unsigned char* p1, unsigned char* p2, short* __restrict p3)
{
for (int i = 0 ; i != 8; i++)
p3[i] = p1[i] + p2[i];
return;
}
void
foo1 (unsigned short* p1, unsigned short* p2, int* __restrict p3)
{
for (int i = 0 ; i != 4; i++)
p3[i] = p1[i] + p2[i];
return;
}
void
foo2 (unsigned int* p1, unsigned int* p2, long long* __restrict p3)
{
for (int i = 0 ; i != 2; i++)
p3[i] = (long long)p1[i] + (long long)p2[i];
return;
}