Split code out of vectorizable_slp_permutation
A later patch needs to test whether the target supports a lane_permutation_t without having to construct a full SLP node to test that. This patch splits out most of the work of vectorizable_slp_permutation into a subroutine, so that properties of the permutation can be passed explicitly without disturbing the main interface. The new subroutine still uses an slp_tree argument to get things like the number of lanes and the vector type. That's a bit clunky, but it seemed like the least worst option. gcc/ * tree-vect-slp.cc (vectorizable_slp_permutation_1): Split out from... (vectorizable_slp_permutation): ...here.
This commit is contained in:
parent
25c2a50cc3
commit
5edc67b773
1 changed files with 66 additions and 32 deletions
|
@ -6976,20 +6976,22 @@ vect_add_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
|||
SLP_TREE_VEC_STMTS (node).quick_push (perm_stmt);
|
||||
}
|
||||
|
||||
/* Vectorize the SLP permutations in NODE as specified
|
||||
in SLP_TREE_LANE_PERMUTATION which is a vector of pairs of SLP
|
||||
child number and lane number.
|
||||
Interleaving of two two-lane two-child SLP subtrees (not supported):
|
||||
[ { 0, 0 }, { 1, 0 }, { 0, 1 }, { 1, 1 } ]
|
||||
A blend of two four-lane two-child SLP subtrees:
|
||||
[ { 0, 0 }, { 1, 1 }, { 0, 2 }, { 1, 3 } ]
|
||||
Highpart of a four-lane one-child SLP subtree (not supported):
|
||||
[ { 0, 2 }, { 0, 3 } ]
|
||||
Where currently only a subset is supported by code generating below. */
|
||||
/* Subroutine of vectorizable_slp_permutation. Check whether the target
|
||||
can perform permutation PERM on the (1 or 2) input nodes in CHILDREN.
|
||||
If GSI is nonnull, emit the permutation there.
|
||||
|
||||
static bool
|
||||
vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
||||
slp_tree node, stmt_vector_for_cost *cost_vec)
|
||||
When GSI is null, the only purpose of NODE is to give properties
|
||||
of the result, such as the vector type and number of SLP lanes.
|
||||
The node does not need to be a VEC_PERM_EXPR.
|
||||
|
||||
If the target supports the operation, return the number of individual
|
||||
VEC_PERM_EXPRs needed, otherwise return -1. Print information to the
|
||||
dump file if DUMP_P is true. */
|
||||
|
||||
static int
|
||||
vectorizable_slp_permutation_1 (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
||||
slp_tree node, lane_permutation_t &perm,
|
||||
vec<slp_tree> &children, bool dump_p)
|
||||
{
|
||||
tree vectype = SLP_TREE_VECTYPE (node);
|
||||
|
||||
|
@ -7001,7 +7003,7 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
|||
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
bool repeating_p = multiple_p (nunits, SLP_TREE_LANES (node));
|
||||
tree op_vectype = NULL_TREE;
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
|
||||
FOR_EACH_VEC_ELT (children, i, child)
|
||||
if (SLP_TREE_VECTYPE (child))
|
||||
{
|
||||
op_vectype = SLP_TREE_VECTYPE (child);
|
||||
|
@ -7009,25 +7011,24 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
|||
}
|
||||
if (!op_vectype)
|
||||
op_vectype = vectype;
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
|
||||
FOR_EACH_VEC_ELT (children, i, child)
|
||||
{
|
||||
if ((SLP_TREE_DEF_TYPE (child) != vect_internal_def
|
||||
&& !vect_maybe_update_slp_op_vectype (child, op_vectype))
|
||||
|| !types_compatible_p (SLP_TREE_VECTYPE (child), op_vectype)
|
||||
|| !types_compatible_p (TREE_TYPE (vectype), TREE_TYPE (op_vectype)))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
if (dump_p)
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"Unsupported vector types in lane permutation\n");
|
||||
return false;
|
||||
return -1;
|
||||
}
|
||||
if (SLP_TREE_LANES (child) != SLP_TREE_LANES (node))
|
||||
repeating_p = false;
|
||||
}
|
||||
|
||||
vec<std::pair<unsigned, unsigned> > &perm = SLP_TREE_LANE_PERMUTATION (node);
|
||||
gcc_assert (perm.length () == SLP_TREE_LANES (node));
|
||||
if (dump_enabled_p ())
|
||||
if (dump_p)
|
||||
{
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"vectorizing permutation");
|
||||
|
@ -7076,11 +7077,11 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
|||
/* Calculate every element of every permute mask vector explicitly,
|
||||
instead of relying on the pattern described above. */
|
||||
if (!nunits.is_constant (&npatterns))
|
||||
return false;
|
||||
return -1;
|
||||
nelts_per_pattern = ncopies = 1;
|
||||
if (loop_vec_info linfo = dyn_cast <loop_vec_info> (vinfo))
|
||||
if (!LOOP_VINFO_VECT_FACTOR (linfo).is_constant (&ncopies))
|
||||
return false;
|
||||
return -1;
|
||||
noutputs_per_mask = 1;
|
||||
}
|
||||
unsigned olanes = ncopies * SLP_TREE_LANES (node);
|
||||
|
@ -7093,13 +7094,13 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
|||
auto_vec<std::pair<std::pair<unsigned, unsigned>, unsigned> > vperm;
|
||||
auto_vec<unsigned> active_lane;
|
||||
vperm.create (olanes);
|
||||
active_lane.safe_grow_cleared (SLP_TREE_CHILDREN (node).length (), true);
|
||||
active_lane.safe_grow_cleared (children.length (), true);
|
||||
for (unsigned i = 0; i < ncopies; ++i)
|
||||
{
|
||||
for (unsigned pi = 0; pi < perm.length (); ++pi)
|
||||
{
|
||||
std::pair<unsigned, unsigned> p = perm[pi];
|
||||
tree vtype = SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (node)[p.first]);
|
||||
tree vtype = SLP_TREE_VECTYPE (children[p.first]);
|
||||
if (repeating_p)
|
||||
vperm.quick_push ({{p.first, 0}, p.second + active_lane[p.first]});
|
||||
else
|
||||
|
@ -7112,12 +7113,19 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
|||
}
|
||||
}
|
||||
/* Advance to the next group. */
|
||||
for (unsigned j = 0; j < SLP_TREE_CHILDREN (node).length (); ++j)
|
||||
active_lane[j] += SLP_TREE_LANES (SLP_TREE_CHILDREN (node)[j]);
|
||||
for (unsigned j = 0; j < children.length (); ++j)
|
||||
active_lane[j] += SLP_TREE_LANES (children[j]);
|
||||
}
|
||||
|
||||
if (dump_enabled_p ())
|
||||
if (dump_p)
|
||||
{
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"vectorizing permutation");
|
||||
for (unsigned i = 0; i < perm.length (); ++i)
|
||||
dump_printf (MSG_NOTE, " op%u[%u]", perm[i].first, perm[i].second);
|
||||
if (repeating_p)
|
||||
dump_printf (MSG_NOTE, " (repeat %d)\n", SLP_TREE_LANES (node));
|
||||
dump_printf (MSG_NOTE, "\n");
|
||||
dump_printf_loc (MSG_NOTE, vect_location, "as");
|
||||
for (unsigned i = 0; i < vperm.length (); ++i)
|
||||
{
|
||||
|
@ -7163,12 +7171,12 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
|||
}
|
||||
else
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
if (dump_p)
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"permutation requires at "
|
||||
"least three vectors\n");
|
||||
gcc_assert (!gsi);
|
||||
return false;
|
||||
return -1;
|
||||
}
|
||||
|
||||
mask[index++] = mask_element;
|
||||
|
@ -7190,7 +7198,7 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
|||
TYPE_VECTOR_SUBPARTS (op_vectype),
|
||||
&c) || c != 2)))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
if (dump_p)
|
||||
{
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION,
|
||||
vect_location,
|
||||
|
@ -7203,7 +7211,7 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
|||
dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
|
||||
}
|
||||
gcc_assert (!gsi);
|
||||
return false;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!identity_p)
|
||||
|
@ -7214,8 +7222,8 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
|||
second_vec = first_vec;
|
||||
|
||||
slp_tree
|
||||
first_node = SLP_TREE_CHILDREN (node)[first_vec.first],
|
||||
second_node = SLP_TREE_CHILDREN (node)[second_vec.first];
|
||||
first_node = children[first_vec.first],
|
||||
second_node = children[second_vec.first];
|
||||
|
||||
tree mask_vec = NULL_TREE;
|
||||
if (!identity_p)
|
||||
|
@ -7240,6 +7248,32 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
|||
}
|
||||
}
|
||||
|
||||
return nperms;
|
||||
}
|
||||
|
||||
/* Vectorize the SLP permutations in NODE as specified
|
||||
in SLP_TREE_LANE_PERMUTATION which is a vector of pairs of SLP
|
||||
child number and lane number.
|
||||
Interleaving of two two-lane two-child SLP subtrees (not supported):
|
||||
[ { 0, 0 }, { 1, 0 }, { 0, 1 }, { 1, 1 } ]
|
||||
A blend of two four-lane two-child SLP subtrees:
|
||||
[ { 0, 0 }, { 1, 1 }, { 0, 2 }, { 1, 3 } ]
|
||||
Highpart of a four-lane one-child SLP subtree (not supported):
|
||||
[ { 0, 2 }, { 0, 3 } ]
|
||||
Where currently only a subset is supported by code generating below. */
|
||||
|
||||
static bool
|
||||
vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
|
||||
slp_tree node, stmt_vector_for_cost *cost_vec)
|
||||
{
|
||||
tree vectype = SLP_TREE_VECTYPE (node);
|
||||
lane_permutation_t &perm = SLP_TREE_LANE_PERMUTATION (node);
|
||||
int nperms = vectorizable_slp_permutation_1 (vinfo, gsi, node, perm,
|
||||
SLP_TREE_CHILDREN (node),
|
||||
dump_enabled_p ());
|
||||
if (nperms < 0)
|
||||
return false;
|
||||
|
||||
if (!gsi)
|
||||
record_stmt_cost (cost_vec, nperms, vec_perm, node, vectype, 0, vect_body);
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue