tree-optimization/110630 - enhance SLP permute support
The following enhances the existing lowpart extraction support for SLP VEC_PERM nodes to cover all vector aligned extractions. This allows the existing bb-slp-pr95839.c testcase to be vectorized with mips -mpaired-single and the new bb-slp-pr95839-3.c testcase with SSE2. PR tree-optimization/110630 * tree-vect-slp.cc (vect_add_slp_permutation): New offset parameter, honor that for the extract code generation. (vectorizable_slp_permutation_1): Handle offsetted identities. * gcc.dg/vect/bb-slp-pr95839.c: Make stricter. * gcc.dg/vect/bb-slp-pr95839-3.c: New variant testcase.
This commit is contained in:
parent
c1b3b5a056
commit
25f831eab3
3 changed files with 25 additions and 5 deletions
15
gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-3.c
Normal file
15
gcc/testsuite/gcc.dg/vect/bb-slp-pr95839-3.c
Normal file
|
@ -0,0 +1,15 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_float } */
|
||||
/* { dg-additional-options "-w -Wno-psabi" } */
|
||||
|
||||
typedef float __attribute__((vector_size(32))) v8f32;
|
||||
|
||||
v8f32 f(v8f32 a, v8f32 b)
|
||||
{
|
||||
/* Check that we vectorize this CTOR without any loads. */
|
||||
return (v8f32){a[0] + b[0], a[1] + b[1], a[2] + b[2], a[3] + b[3],
|
||||
a[4] + b[4], a[5] + b[5], a[6] + b[6], a[7] + b[7]};
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-not "from scalars" "slp2" } } */
|
||||
/* { dg-final { scan-tree-dump "optimized: basic block" "slp2" } } */
|
|
@ -10,4 +10,5 @@ v4f32 f(v4f32 a, v4f32 b)
|
|||
return (v4f32){a[0] + b[0], a[1] + b[1], a[2] + b[2], a[3] + b[3]};
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-not "from scalars" "slp2" } } */
|
||||
/* { dg-final { scan-tree-dump "optimized: basic block" "slp2" } } */
|
||||
|
|
|
@ -8432,7 +8432,7 @@ vect_transform_slp_perm_load (vec_info *vinfo,
|
|||
static void
|
||||
vect_add_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
||||
slp_tree node, tree first_def, tree second_def,
|
||||
tree mask_vec)
|
||||
tree mask_vec, poly_uint64 identity_offset)
|
||||
{
|
||||
tree vectype = SLP_TREE_VECTYPE (node);
|
||||
|
||||
|
@ -8470,14 +8470,17 @@ vect_add_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
|||
else if (!types_compatible_p (TREE_TYPE (first_def), vectype))
|
||||
{
|
||||
/* For identity permutes we still need to handle the case
|
||||
of lowpart extracts or concats. */
|
||||
of offsetted extracts or concats. */
|
||||
unsigned HOST_WIDE_INT c;
|
||||
auto first_def_nunits
|
||||
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (first_def));
|
||||
if (known_le (TYPE_VECTOR_SUBPARTS (vectype), first_def_nunits))
|
||||
{
|
||||
unsigned HOST_WIDE_INT elsz
|
||||
= tree_to_uhwi (TYPE_SIZE (TREE_TYPE (TREE_TYPE (first_def))));
|
||||
tree lowpart = build3 (BIT_FIELD_REF, vectype, first_def,
|
||||
TYPE_SIZE (vectype), bitsize_zero_node);
|
||||
TYPE_SIZE (vectype),
|
||||
bitsize_int (identity_offset * elsz));
|
||||
perm_stmt = gimple_build_assign (perm_dest, lowpart);
|
||||
}
|
||||
else if (constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype),
|
||||
|
@ -8709,7 +8712,8 @@ vectorizable_slp_permutation_1 (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
|||
{
|
||||
indices.new_vector (mask, second_vec.first == -1U ? 1 : 2,
|
||||
TYPE_VECTOR_SUBPARTS (op_vectype));
|
||||
bool identity_p = indices.series_p (0, 1, 0, 1);
|
||||
bool identity_p = (indices.series_p (0, 1, mask[0], 1)
|
||||
&& constant_multiple_p (mask[0], nunits));
|
||||
machine_mode vmode = TYPE_MODE (vectype);
|
||||
machine_mode op_vmode = TYPE_MODE (op_vectype);
|
||||
unsigned HOST_WIDE_INT c;
|
||||
|
@ -8762,7 +8766,7 @@ vectorizable_slp_permutation_1 (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
|||
= vect_get_slp_vect_def (second_node,
|
||||
second_vec.second + vi);
|
||||
vect_add_slp_permutation (vinfo, gsi, node, first_def,
|
||||
second_def, mask_vec);
|
||||
second_def, mask_vec, mask[0]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue