i386: Fix expand_vec_perm_pshufb for narrow modes [PR103905]
2022-01-05 Uroš Bizjak <ubizjak@gmail.com> gcc/ChangeLog: PR target/103905 * config/i386/i386-expand.c (expand_vec_perm_pshufb): Fix number of narrow mode remapped elements for !one_operand_p case. gcc/testsuite/ChangeLog: PR target/103905 * gcc.target/i386/pr103905.c: New test.
This commit is contained in:
parent
85a3442c85
commit
877c9e332f
2 changed files with 37 additions and 11 deletions
|
@ -18730,7 +18730,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
|
|||
{
|
||||
unsigned i, nelt, eltsz, mask;
|
||||
unsigned char perm[64];
|
||||
machine_mode vmode = V16QImode;
|
||||
machine_mode vmode;
|
||||
struct expand_vec_perm_d nd;
|
||||
rtx rperm[64], vperm, target, op0, op1;
|
||||
|
||||
|
@ -18754,6 +18754,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
|
|||
case 16:
|
||||
if (!TARGET_XOP)
|
||||
return false;
|
||||
vmode = V16QImode;
|
||||
break;
|
||||
|
||||
case 32:
|
||||
|
@ -18803,6 +18804,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
|
|||
case 16:
|
||||
if (!TARGET_SSSE3)
|
||||
return false;
|
||||
vmode = V16QImode;
|
||||
break;
|
||||
|
||||
case 32:
|
||||
|
@ -18894,6 +18896,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
|
|||
/* Or if vpermps can be used. */
|
||||
else if (d->vmode == V16SFmode)
|
||||
vmode = V16SImode;
|
||||
|
||||
if (vmode == V64QImode)
|
||||
{
|
||||
/* vpshufb only works intra lanes, it is not
|
||||
|
@ -18946,8 +18949,10 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
|
|||
|
||||
machine_mode vpmode = vmode;
|
||||
|
||||
if (vmode == V4QImode
|
||||
|| vmode == V8QImode)
|
||||
nelt = GET_MODE_SIZE (vmode);
|
||||
|
||||
/* Emulate narrow modes with V16QI instructions. */
|
||||
if (nelt < 16)
|
||||
{
|
||||
rtx m128 = GEN_INT (-128);
|
||||
|
||||
|
@ -18955,19 +18960,15 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
|
|||
account for inactive top elements from the first operand. */
|
||||
if (!d->one_operand_p)
|
||||
{
|
||||
int sz = GET_MODE_SIZE (vmode);
|
||||
|
||||
for (i = 0; i < nelt; ++i)
|
||||
{
|
||||
int ival = INTVAL (rperm[i]);
|
||||
if (ival >= sz)
|
||||
ival += 16-sz;
|
||||
rperm[i] = GEN_INT (ival);
|
||||
unsigned ival = UINTVAL (rperm[i]);
|
||||
if (ival >= nelt)
|
||||
rperm[i] = GEN_INT (ival + 16 - nelt);
|
||||
}
|
||||
}
|
||||
|
||||
/* V4QI/V8QI is emulated with V16QI instruction, fill inactive
|
||||
elements in the top positions with zeros. */
|
||||
/* Fill inactive elements in the top positions with zeros. */
|
||||
for (i = nelt; i < 16; ++i)
|
||||
rperm[i] = m128;
|
||||
|
||||
|
|
25
gcc/testsuite/gcc.target/i386/pr103905.c
Normal file
25
gcc/testsuite/gcc.target/i386/pr103905.c
Normal file
|
@ -0,0 +1,25 @@
|
|||
/* PR target/103905 */
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target xop } */
|
||||
/* { dg-options "-O3 -mxop" } */
|
||||
|
||||
#include "xop-check.h"
|
||||
|
||||
char perm[64];
|
||||
|
||||
void
|
||||
__attribute__((noipa))
|
||||
foo (int n)
|
||||
{
|
||||
for (int i = 0; i < n; ++i)
|
||||
perm[i] = i;
|
||||
}
|
||||
|
||||
static void
|
||||
xop_test (void)
|
||||
{
|
||||
foo (8);
|
||||
|
||||
if (perm[7] != 7)
|
||||
__builtin_abort ();
|
||||
}
|
Loading…
Add table
Reference in a new issue