Split code out of vectorizable_slp_permutation

A later patch needs to test whether the target supports a
lane_permutation_t without having to construct a full SLP
node to test that.  This patch splits out most of the work
of vectorizable_slp_permutation into a subroutine, so that
properties of the permutation can be passed explicitly without
disturbing the main interface.

The new subroutine still uses an slp_tree argument to get things
like the number of lanes and the vector type.  That's a bit clunky,
but it seemed like the least worst option.

gcc/
	* tree-vect-slp.cc (vectorizable_slp_permutation_1): Split out from...
	(vectorizable_slp_permutation): ...here.
This commit is contained in:
Richard Sandiford 2022-08-30 15:43:45 +01:00
parent 25c2a50cc3
commit 5edc67b773

View file

@ -6976,20 +6976,22 @@ vect_add_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
SLP_TREE_VEC_STMTS (node).quick_push (perm_stmt);
}
/* Vectorize the SLP permutations in NODE as specified
in SLP_TREE_LANE_PERMUTATION which is a vector of pairs of SLP
child number and lane number.
Interleaving of two two-lane two-child SLP subtrees (not supported):
[ { 0, 0 }, { 1, 0 }, { 0, 1 }, { 1, 1 } ]
A blend of two four-lane two-child SLP subtrees:
[ { 0, 0 }, { 1, 1 }, { 0, 2 }, { 1, 3 } ]
Highpart of a four-lane one-child SLP subtree (not supported):
[ { 0, 2 }, { 0, 3 } ]
Where currently only a subset is supported by code generating below. */
/* Subroutine of vectorizable_slp_permutation. Check whether the target
can perform permutation PERM on the (1 or 2) input nodes in CHILDREN.
If GSI is nonnull, emit the permutation there.
static bool
vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
slp_tree node, stmt_vector_for_cost *cost_vec)
When GSI is null, the only purpose of NODE is to give properties
of the result, such as the vector type and number of SLP lanes.
The node does not need to be a VEC_PERM_EXPR.
If the target supports the operation, return the number of individual
VEC_PERM_EXPRs needed, otherwise return -1. Print information to the
dump file if DUMP_P is true. */
static int
vectorizable_slp_permutation_1 (vec_info *vinfo, gimple_stmt_iterator *gsi,
slp_tree node, lane_permutation_t &perm,
vec<slp_tree> &children, bool dump_p)
{
tree vectype = SLP_TREE_VECTYPE (node);
@ -7001,7 +7003,7 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
bool repeating_p = multiple_p (nunits, SLP_TREE_LANES (node));
tree op_vectype = NULL_TREE;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
FOR_EACH_VEC_ELT (children, i, child)
if (SLP_TREE_VECTYPE (child))
{
op_vectype = SLP_TREE_VECTYPE (child);
@ -7009,25 +7011,24 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
}
if (!op_vectype)
op_vectype = vectype;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
FOR_EACH_VEC_ELT (children, i, child)
{
if ((SLP_TREE_DEF_TYPE (child) != vect_internal_def
&& !vect_maybe_update_slp_op_vectype (child, op_vectype))
|| !types_compatible_p (SLP_TREE_VECTYPE (child), op_vectype)
|| !types_compatible_p (TREE_TYPE (vectype), TREE_TYPE (op_vectype)))
{
if (dump_enabled_p ())
if (dump_p)
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Unsupported vector types in lane permutation\n");
return false;
return -1;
}
if (SLP_TREE_LANES (child) != SLP_TREE_LANES (node))
repeating_p = false;
}
vec<std::pair<unsigned, unsigned> > &perm = SLP_TREE_LANE_PERMUTATION (node);
gcc_assert (perm.length () == SLP_TREE_LANES (node));
if (dump_enabled_p ())
if (dump_p)
{
dump_printf_loc (MSG_NOTE, vect_location,
"vectorizing permutation");
@ -7076,11 +7077,11 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
/* Calculate every element of every permute mask vector explicitly,
instead of relying on the pattern described above. */
if (!nunits.is_constant (&npatterns))
return false;
return -1;
nelts_per_pattern = ncopies = 1;
if (loop_vec_info linfo = dyn_cast <loop_vec_info> (vinfo))
if (!LOOP_VINFO_VECT_FACTOR (linfo).is_constant (&ncopies))
return false;
return -1;
noutputs_per_mask = 1;
}
unsigned olanes = ncopies * SLP_TREE_LANES (node);
@ -7093,13 +7094,13 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
auto_vec<std::pair<std::pair<unsigned, unsigned>, unsigned> > vperm;
auto_vec<unsigned> active_lane;
vperm.create (olanes);
active_lane.safe_grow_cleared (SLP_TREE_CHILDREN (node).length (), true);
active_lane.safe_grow_cleared (children.length (), true);
for (unsigned i = 0; i < ncopies; ++i)
{
for (unsigned pi = 0; pi < perm.length (); ++pi)
{
std::pair<unsigned, unsigned> p = perm[pi];
tree vtype = SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (node)[p.first]);
tree vtype = SLP_TREE_VECTYPE (children[p.first]);
if (repeating_p)
vperm.quick_push ({{p.first, 0}, p.second + active_lane[p.first]});
else
@ -7112,12 +7113,19 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
}
}
/* Advance to the next group. */
for (unsigned j = 0; j < SLP_TREE_CHILDREN (node).length (); ++j)
active_lane[j] += SLP_TREE_LANES (SLP_TREE_CHILDREN (node)[j]);
for (unsigned j = 0; j < children.length (); ++j)
active_lane[j] += SLP_TREE_LANES (children[j]);
}
if (dump_enabled_p ())
if (dump_p)
{
dump_printf_loc (MSG_NOTE, vect_location,
"vectorizing permutation");
for (unsigned i = 0; i < perm.length (); ++i)
dump_printf (MSG_NOTE, " op%u[%u]", perm[i].first, perm[i].second);
if (repeating_p)
dump_printf (MSG_NOTE, " (repeat %d)\n", SLP_TREE_LANES (node));
dump_printf (MSG_NOTE, "\n");
dump_printf_loc (MSG_NOTE, vect_location, "as");
for (unsigned i = 0; i < vperm.length (); ++i)
{
@ -7163,12 +7171,12 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
}
else
{
if (dump_enabled_p ())
if (dump_p)
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"permutation requires at "
"least three vectors\n");
gcc_assert (!gsi);
return false;
return -1;
}
mask[index++] = mask_element;
@ -7190,7 +7198,7 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
TYPE_VECTOR_SUBPARTS (op_vectype),
&c) || c != 2)))
{
if (dump_enabled_p ())
if (dump_p)
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION,
vect_location,
@ -7203,7 +7211,7 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
}
gcc_assert (!gsi);
return false;
return -1;
}
if (!identity_p)
@ -7214,8 +7222,8 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
second_vec = first_vec;
slp_tree
first_node = SLP_TREE_CHILDREN (node)[first_vec.first],
second_node = SLP_TREE_CHILDREN (node)[second_vec.first];
first_node = children[first_vec.first],
second_node = children[second_vec.first];
tree mask_vec = NULL_TREE;
if (!identity_p)
@ -7240,6 +7248,32 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
}
}
return nperms;
}
/* Vectorize the SLP permutations in NODE as specified
in SLP_TREE_LANE_PERMUTATION which is a vector of pairs of SLP
child number and lane number.
Interleaving of two two-lane two-child SLP subtrees (not supported):
[ { 0, 0 }, { 1, 0 }, { 0, 1 }, { 1, 1 } ]
A blend of two four-lane two-child SLP subtrees:
[ { 0, 0 }, { 1, 1 }, { 0, 2 }, { 1, 3 } ]
Highpart of a four-lane one-child SLP subtree (not supported):
[ { 0, 2 }, { 0, 3 } ]
Where currently only a subset is supported by code generating below. */
static bool
vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
slp_tree node, stmt_vector_for_cost *cost_vec)
{
tree vectype = SLP_TREE_VECTYPE (node);
lane_permutation_t &perm = SLP_TREE_LANE_PERMUTATION (node);
int nperms = vectorizable_slp_permutation_1 (vinfo, gsi, node, perm,
SLP_TREE_CHILDREN (node),
dump_enabled_p ());
if (nperms < 0)
return false;
if (!gsi)
record_stmt_cost (cost_vec, nperms, vec_perm, node, vectype, 0, vect_body);