From 25f831eab368d1bbec4dc67bf058cb7cf6b721ee Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 12 Jul 2023 11:19:58 +0200 Subject: [PATCH] tree-optimization/110630 - enhance SLP permute support The following enhances the existing lowpart extraction support for SLP VEC_PERM nodes to cover all vector aligned extractions. This allows the existing bb-slp-pr95839.c testcase to be vectorized with mips -mpaired-single and the new bb-slp-pr95839-3.c testcase with SSE2. PR tree-optimization/110630 * tree-vect-slp.cc (vect_add_slp_permutation): New offset parameter, honor that for the extract code generation. (vectorizable_slp_permutation_1): Handle offsetted identities. * gcc.dg/vect/bb-slp-pr95839.c: Make stricter. * gcc.dg/vect/bb-slp-pr95839-3.c: New variant testcase. --- gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-3.c | 15 +++++++++++++++ gcc/testsuite/gcc.dg/vect/bb-slp-pr95839.c | 1 + gcc/tree-vect-slp.cc | 14 +++++++++----- 3 files changed, 25 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-3.c diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-3.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-3.c new file mode 100644 index 00000000000..aaee8febf37 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-3.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_float } */ +/* { dg-additional-options "-w -Wno-psabi" } */ + +typedef float __attribute__((vector_size(32))) v8f32; + +v8f32 f(v8f32 a, v8f32 b) +{ + /* Check that we vectorize this CTOR without any loads. */ + return (v8f32){a[0] + b[0], a[1] + b[1], a[2] + b[2], a[3] + b[3], + a[4] + b[4], a[5] + b[5], a[6] + b[6], a[7] + b[7]}; +} + +/* { dg-final { scan-tree-dump-not "from scalars" "slp2" } } */ +/* { dg-final { scan-tree-dump "optimized: basic block" "slp2" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839.c index 931fd46aaaa..d87bbf125c0 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr95839.c @@ -10,4 +10,5 @@ v4f32 f(v4f32 a, v4f32 b) return (v4f32){a[0] + b[0], a[1] + b[1], a[2] + b[2], a[3] + b[3]}; } +/* { dg-final { scan-tree-dump-not "from scalars" "slp2" } } */ /* { dg-final { scan-tree-dump "optimized: basic block" "slp2" } } */ diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 355d078d66e..693621ca990 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -8432,7 +8432,7 @@ vect_transform_slp_perm_load (vec_info *vinfo, static void vect_add_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, slp_tree node, tree first_def, tree second_def, - tree mask_vec) + tree mask_vec, poly_uint64 identity_offset) { tree vectype = SLP_TREE_VECTYPE (node); @@ -8470,14 +8470,17 @@ vect_add_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, else if (!types_compatible_p (TREE_TYPE (first_def), vectype)) { /* For identity permutes we still need to handle the case - of lowpart extracts or concats. */ + of offsetted extracts or concats. */ unsigned HOST_WIDE_INT c; auto first_def_nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (first_def)); if (known_le (TYPE_VECTOR_SUBPARTS (vectype), first_def_nunits)) { + unsigned HOST_WIDE_INT elsz + = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (TREE_TYPE (first_def)))); tree lowpart = build3 (BIT_FIELD_REF, vectype, first_def, - TYPE_SIZE (vectype), bitsize_zero_node); + TYPE_SIZE (vectype), + bitsize_int (identity_offset * elsz)); perm_stmt = gimple_build_assign (perm_dest, lowpart); } else if (constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype), @@ -8709,7 +8712,8 @@ vectorizable_slp_permutation_1 (vec_info *vinfo, gimple_stmt_iterator *gsi, { indices.new_vector (mask, second_vec.first == -1U ? 1 : 2, TYPE_VECTOR_SUBPARTS (op_vectype)); - bool identity_p = indices.series_p (0, 1, 0, 1); + bool identity_p = (indices.series_p (0, 1, mask[0], 1) + && constant_multiple_p (mask[0], nunits)); machine_mode vmode = TYPE_MODE (vectype); machine_mode op_vmode = TYPE_MODE (op_vectype); unsigned HOST_WIDE_INT c; @@ -8762,7 +8766,7 @@ vectorizable_slp_permutation_1 (vec_info *vinfo, gimple_stmt_iterator *gsi, = vect_get_slp_vect_def (second_node, second_vec.second + vi); vect_add_slp_permutation (vinfo, gsi, node, first_def, - second_def, mask_vec); + second_def, mask_vec, mask[0]); } }