tree-optimization/103581 - fix masked gather on x86
The recent fix to PR103527 exposed an issue with how the various special casing for AVX512 masks in vect_build_gather_load_calls are handled. The following makes that more obvious, fixing the miscompile of 403.gcc. 2021-12-06 Richard Biener <rguenther@suse.de> PR tree-optimization/103581 * tree-vect-stmts.c (vect_build_gather_load_calls): Properly guard all the AVX512 mask cases. * gcc.dg/vect/pr103581.c: New testcase.
This commit is contained in:
parent
11013814fc
commit
0dc77a0c49
2 changed files with 61 additions and 2 deletions
59
gcc/testsuite/gcc.dg/vect/pr103581.c
Normal file
59
gcc/testsuite/gcc.dg/vect/pr103581.c
Normal file
|
@ -0,0 +1,59 @@
|
|||
/* { dg-additional-options "-mavx2 -mtune-ctrl=use_gather" { target avx2_runtime } } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define MASKGATHER(SUFF, TYPE1, TYPE2) \
|
||||
TYPE1 * __attribute__((noipa)) \
|
||||
maskgather ## SUFF (int n, TYPE2 *indices, TYPE1 *data) \
|
||||
{ \
|
||||
TYPE1 *out = __builtin_malloc (sizeof (TYPE1) * n); \
|
||||
for (int i = 0; i < n; ++i) \
|
||||
{ \
|
||||
TYPE2 d = indices[i]; \
|
||||
if (d > 1) \
|
||||
out[i] = data[d]; \
|
||||
} \
|
||||
return out; \
|
||||
}
|
||||
|
||||
MASKGATHER(udiusi, unsigned long long, unsigned int)
|
||||
MASKGATHER(usiusi, unsigned int, unsigned int)
|
||||
MASKGATHER(udiudi, unsigned long long, unsigned long long)
|
||||
MASKGATHER(usiudi, unsigned int, unsigned long long)
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
unsigned int idx4[32], data4[32];
|
||||
unsigned long long idx8[32], data8[32];
|
||||
for (int i = 0; i < 32; ++i)
|
||||
{
|
||||
idx4[i] = i;
|
||||
idx8[i] = i;
|
||||
data4[i] = i;
|
||||
data8[i] = i;
|
||||
}
|
||||
unsigned long long *resudiusi = maskgatherudiusi (16, idx4, data8);
|
||||
unsigned int *resusiusi = maskgatherusiusi (16, idx4, data4);
|
||||
unsigned long long *resudiudi = maskgatherudiudi (16, idx8, data8);
|
||||
unsigned int *resusiudi = maskgatherusiudi (16, idx8, data4);
|
||||
for (int i = 0; i < 16; ++i)
|
||||
{
|
||||
unsigned int d = idx4[i];
|
||||
if (d > 1)
|
||||
{
|
||||
if (resudiusi[i] != data4[d])
|
||||
__builtin_abort ();
|
||||
if (resudiudi[i] != data4[d])
|
||||
__builtin_abort ();
|
||||
if (resusiudi[i] != data4[d])
|
||||
__builtin_abort ();
|
||||
if (resusiusi[i] != data4[d])
|
||||
__builtin_abort ();
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -2785,7 +2785,7 @@ vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
|
|||
|
||||
ncopies *= 2;
|
||||
|
||||
if (mask && masktype == real_masktype)
|
||||
if (mask && VECTOR_TYPE_P (real_masktype))
|
||||
{
|
||||
for (int i = 0; i < count; ++i)
|
||||
sel[i] = i | (count / 2);
|
||||
|
@ -2882,7 +2882,7 @@ vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
|
|||
mask_op = var;
|
||||
}
|
||||
}
|
||||
if (modifier == NARROW && masktype != real_masktype)
|
||||
if (modifier == NARROW && !VECTOR_TYPE_P (real_masktype))
|
||||
{
|
||||
var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
|
||||
gassign *new_stmt
|
||||
|
|
Loading…
Add table
Reference in a new issue