From a9173a50e7e346a218323916e4d3add8552529ae Mon Sep 17 00:00:00 2001 From: Feng Xue Date: Fri, 11 Oct 2024 14:55:05 +0800 Subject: [PATCH] vect: Fix inconsistency in fully-masked lane-reducing op generation [PR116985] To align vectorized def/use when lane-reducing op is present in loop reduction, we may need to insert extra trivial pass-through copies, which would cause mismatch between lane-reducing vector copy and loop mask index. This could be fixed by computing the right index around a new counter on effective lane- reducing vector copies. 2024-10-11 Feng Xue gcc/ PR tree-optimization/116985 * tree-vect-loop.cc (vect_transform_reduction): Compute loop mask index based on effective vector copies for reduction op. gcc/testsuite/ PR tree-optimization/116985 * gcc.dg/vect/pr116985.c: New testcase. --- gcc/testsuite/gcc.dg/vect/pr116985.c | 23 +++++++++++++++++++++++ gcc/tree-vect-loop.cc | 7 +++++-- 2 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr116985.c diff --git a/gcc/testsuite/gcc.dg/vect/pr116985.c b/gcc/testsuite/gcc.dg/vect/pr116985.c new file mode 100644 index 00000000000..e6dfdaf0fb5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr116985.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-options "--param=vect-partial-vector-usage=2 -O2" } */ +/* { dg-additional-options "-mavx512vbmi2" { target { i?86-*-* x86_64-*-* } } } */ +/* { dg-additional-options "-march=armv9-a" { target aarch64-*-* } } */ + +signed int __attribute__ ((noipa)) +fn (signed int n, + signed char *restrict a, + signed char *restrict b, + signed char *restrict c, + signed char *restrict d) +{ + signed int res = 0; + + for (int i = 0; i < n; ++i) + { + res += a[i] * b[i]; + res += i + 1; + res += c[i] * d[i]; + } + return res; +} diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index ade72a5124f..025442aabc3 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -8916,6 +8916,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo, bool emulated_mixed_dot_prod = vect_is_emulated_mixed_dot_prod (stmt_info); unsigned num = vec_oprnds[reduc_index == 0 ? 1 : 0].length (); + unsigned mask_index = 0; for (unsigned i = 0; i < num; ++i) { @@ -8954,7 +8955,8 @@ vect_transform_reduction (loop_vec_info loop_vinfo, std::swap (vop[0], vop[1]); } tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, - vec_num * ncopies, vectype_in, i); + vec_num * ncopies, vectype_in, + mask_index++); gcall *call = gimple_build_call_internal (cond_fn, 4, mask, vop[0], vop[1], vop[0]); new_temp = make_ssa_name (vec_dest, call); @@ -8971,7 +8973,8 @@ vect_transform_reduction (loop_vec_info loop_vinfo, if (masked_loop_p && mask_by_cond_expr) { tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, - vec_num * ncopies, vectype_in, i); + vec_num * ncopies, vectype_in, + mask_index++); build_vect_cond_expr (code, vop, mask, gsi); }