tree-optimization/102176 - locally compute participating SLP stmts

This performs local re-computation of participating scalar stmts
in BB vectorization subgraphs to allow precise computation of
liveness of scalar stmts after vectorization and thus precise
costing.  This treats all extern defs as live but continues
to optimistically handle scalar defs that we think we can handle
by lane-extraction even though that can still fail late during
code-generation.

2021-09-02  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/102176
	* tree-vect-slp.c (vect_slp_gather_vectorized_scalar_stmts):
	New function.
	(vect_bb_slp_scalar_cost): Use the computed set of
	vectorized scalar stmts instead of relying on the out-of-date
	and not accurate PURE_SLP_STMT.
	(vect_bb_vectorization_profitable_p): Compute the set
	of vectorized scalar stmts.
This commit is contained in:
Richard Biener 2021-09-02 14:48:10 +02:00
parent 66bba4dc26
commit a3fb781d4b

View file

@ -5104,6 +5104,42 @@ vect_bb_partition_graph (bb_vec_info bb_vinfo)
}
}
/* Compute the set of scalar stmts participating in internal and external
nodes. */
static void
vect_slp_gather_vectorized_scalar_stmts (vec_info *vinfo, slp_tree node,
hash_set<slp_tree> &visited,
hash_set<stmt_vec_info> &vstmts,
hash_set<stmt_vec_info> &estmts)
{
int i;
stmt_vec_info stmt_info;
slp_tree child;
if (visited.add (node))
return;
if (SLP_TREE_DEF_TYPE (node) == vect_internal_def)
{
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
vstmts.add (stmt_info);
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
if (child)
vect_slp_gather_vectorized_scalar_stmts (vinfo, child, visited,
vstmts, estmts);
}
else
for (tree def : SLP_TREE_SCALAR_OPS (node))
{
stmt_vec_info def_stmt = vinfo->lookup_def (def);
if (def_stmt)
estmts.add (def_stmt);
}
}
/* Compute the scalar cost of the SLP node NODE and its children
and return it. Do not account defs that are marked in LIFE and
update LIFE according to uses of NODE. */
@ -5112,6 +5148,7 @@ static void
vect_bb_slp_scalar_cost (vec_info *vinfo,
slp_tree node, vec<bool, va_heap> *life,
stmt_vector_for_cost *cost_vec,
hash_set<stmt_vec_info> &vectorized_scalar_stmts,
hash_set<slp_tree> &visited)
{
unsigned i;
@ -5148,8 +5185,7 @@ vect_bb_slp_scalar_cost (vec_info *vinfo,
{
stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
if (!use_stmt_info
|| !PURE_SLP_STMT
(vect_stmt_to_vectorize (use_stmt_info)))
|| !vectorized_scalar_stmts.contains (use_stmt_info))
{
(*life)[i] = true;
break;
@ -5212,7 +5248,7 @@ vect_bb_slp_scalar_cost (vec_info *vinfo,
subtree_life.safe_splice (*life);
}
vect_bb_slp_scalar_cost (vinfo, child, &subtree_life, cost_vec,
visited);
vectorized_scalar_stmts, visited);
subtree_life.truncate (0);
}
}
@ -5254,11 +5290,33 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo,
SLP_INSTANCE_TREE (instance), visited);
}
/* Compute the set of scalar stmts we know will go away 'locally' when
vectorizing. This used to be tracked with just PURE_SLP_STMT but that's
not accurate for nodes promoted extern late or for scalar stmts that
are used both in extern defs and in vectorized defs. */
hash_set<stmt_vec_info> vectorized_scalar_stmts;
hash_set<stmt_vec_info> scalar_stmts_in_externs;
hash_set<slp_tree> visited;
FOR_EACH_VEC_ELT (slp_instances, i, instance)
{
vect_slp_gather_vectorized_scalar_stmts (bb_vinfo,
SLP_INSTANCE_TREE (instance),
visited,
vectorized_scalar_stmts,
scalar_stmts_in_externs);
for (stmt_vec_info rstmt : SLP_INSTANCE_ROOT_STMTS (instance))
vectorized_scalar_stmts.add (rstmt);
}
/* Scalar stmts used as defs in external nodes need to be preseved, so
remove them from vectorized_scalar_stmts. */
for (stmt_vec_info stmt : scalar_stmts_in_externs)
vectorized_scalar_stmts.remove (stmt);
/* Calculate scalar cost and sum the cost for the vector stmts
previously collected. */
stmt_vector_for_cost scalar_costs = vNULL;
stmt_vector_for_cost vector_costs = vNULL;
hash_set<slp_tree> visited;
visited.empty ();
FOR_EACH_VEC_ELT (slp_instances, i, instance)
{
auto_vec<bool, 20> life;
@ -5271,7 +5329,8 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo,
SLP_INSTANCE_ROOT_STMTS (instance)[0], 0, vect_body);
vect_bb_slp_scalar_cost (bb_vinfo,
SLP_INSTANCE_TREE (instance),
&life, &scalar_costs, visited);
&life, &scalar_costs, vectorized_scalar_stmts,
visited);
vector_costs.safe_splice (instance->cost_vec);
instance->cost_vec.release ();
}