vect: Fix inconsistency in fully-masked lane-reducing op generation [PR116985]
To align vectorized def/use when lane-reducing op is present in loop reduction, we may need to insert extra trivial pass-through copies, which would cause mismatch between lane-reducing vector copy and loop mask index. This could be fixed by computing the right index around a new counter on effective lane- reducing vector copies. 2024-10-11 Feng Xue <fxue@os.amperecomputing.com> gcc/ PR tree-optimization/116985 * tree-vect-loop.cc (vect_transform_reduction): Compute loop mask index based on effective vector copies for reduction op. gcc/testsuite/ PR tree-optimization/116985 * gcc.dg/vect/pr116985.c: New testcase.
This commit is contained in:
parent
f54d42e000
commit
a9173a50e7
2 changed files with 28 additions and 2 deletions
23
gcc/testsuite/gcc.dg/vect/pr116985.c
Normal file
23
gcc/testsuite/gcc.dg/vect/pr116985.c
Normal file
|
@ -0,0 +1,23 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
/* { dg-options "--param=vect-partial-vector-usage=2 -O2" } */
|
||||
/* { dg-additional-options "-mavx512vbmi2" { target { i?86-*-* x86_64-*-* } } } */
|
||||
/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */
|
||||
|
||||
signed int __attribute__ ((noipa))
|
||||
fn (signed int n,
|
||||
signed char *restrict a,
|
||||
signed char *restrict b,
|
||||
signed char *restrict c,
|
||||
signed char *restrict d)
|
||||
{
|
||||
signed int res = 0;
|
||||
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
res += a[i] * b[i];
|
||||
res += i + 1;
|
||||
res += c[i] * d[i];
|
||||
}
|
||||
return res;
|
||||
}
|
|
@ -8916,6 +8916,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
|
|||
|
||||
bool emulated_mixed_dot_prod = vect_is_emulated_mixed_dot_prod (stmt_info);
|
||||
unsigned num = vec_oprnds[reduc_index == 0 ? 1 : 0].length ();
|
||||
unsigned mask_index = 0;
|
||||
|
||||
for (unsigned i = 0; i < num; ++i)
|
||||
{
|
||||
|
@ -8954,7 +8955,8 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
|
|||
std::swap (vop[0], vop[1]);
|
||||
}
|
||||
tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
|
||||
vec_num * ncopies, vectype_in, i);
|
||||
vec_num * ncopies, vectype_in,
|
||||
mask_index++);
|
||||
gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
|
||||
vop[0], vop[1], vop[0]);
|
||||
new_temp = make_ssa_name (vec_dest, call);
|
||||
|
@ -8971,7 +8973,8 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
|
|||
if (masked_loop_p && mask_by_cond_expr)
|
||||
{
|
||||
tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
|
||||
vec_num * ncopies, vectype_in, i);
|
||||
vec_num * ncopies, vectype_in,
|
||||
mask_index++);
|
||||
build_vect_cond_expr (code, vop, mask, gsi);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue