tree-optimization/96043 - BB vectorization costing improvement
This makes the BB vectorizer cost independent SLP subgraphs separately. While on pristine trunk and for x86_64 I failed to distill a testcase where the vectorizer would think _any_ basic-block vectorization opportunity is not profitable I do have pending work that would make the cost savings of a profitable opportunity make another independently not profitable opportunity vectorized. 2020-09-08 Richard Biener <rguenther@suse.de> PR tree-optimization/96043 * tree-vectorizer.h (_slp_instance::cost_vec): New. (_slp_instance::subgraph_entries): Likewise. (BB_VINFO_TARGET_COST_DATA): Remove. * tree-vect-slp.c (vect_free_slp_instance): Free cost_vec and subgraph_entries. (vect_analyze_slp_instance): Initialize them. (vect_slp_analyze_operations): Defer passing costs to the target, instead record them in the SLP graph entry. (get_ultimate_leader): New helper for graph partitioning. (vect_bb_partition_graph_r): Likewise. (vect_bb_partition_graph): New function to partition the SLP graph into independently costable parts. (vect_bb_vectorization_profitable_p): Adjust to work on a subgraph. (vect_bb_vectorization_profitable_p): New wrapper, discarding non-profitable vectorization of subgraphs. (vect_slp_analyze_bb_1): Call vect_bb_partition_graph before costing. * gcc.dg/vect/costmodel/x86_64/costmodel-pr69297.c: Adjust.
This commit is contained in:
parent
484af18ee1
commit
47ddf4c7b1
3 changed files with 194 additions and 19 deletions
|
@ -74,10 +74,28 @@ foo (int* diff)
|
|||
d[13] = m[12] - m[13];
|
||||
d[14] = m[14] + m[15];
|
||||
d[15] = m[15] - m[14];
|
||||
/* The following obviously profitable part should not make
|
||||
the former unprofitable one profitable. */
|
||||
diff[16 + 16] = diff[16];
|
||||
diff[17 + 16] = diff[17];
|
||||
diff[18 + 16] = diff[18];
|
||||
diff[19 + 16] = diff[19];
|
||||
diff[20 + 16] = diff[20];
|
||||
diff[21 + 16] = diff[21];
|
||||
diff[22 + 16] = diff[22];
|
||||
diff[23 + 16] = diff[23];
|
||||
diff[24 + 16] = diff[24];
|
||||
diff[25 + 16] = diff[25];
|
||||
diff[26 + 16] = diff[26];
|
||||
diff[27 + 16] = diff[27];
|
||||
diff[28 + 16] = diff[28];
|
||||
diff[29 + 16] = diff[29];
|
||||
diff[30 + 16] = diff[30];
|
||||
diff[31 + 16] = diff[31];
|
||||
for (k=0; k<16; k++)
|
||||
satd += abs(d[k]);
|
||||
return satd;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump "vectorization is not profitable" "slp1" } } */
|
||||
/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "Vectorizing SLP tree" 1 "slp1" } } */
|
||||
|
|
|
@ -126,6 +126,8 @@ vect_free_slp_instance (slp_instance instance, bool final_p)
|
|||
{
|
||||
vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
|
||||
SLP_INSTANCE_LOADS (instance).release ();
|
||||
instance->subgraph_entries.release ();
|
||||
instance->cost_vec.release ();
|
||||
free (instance);
|
||||
}
|
||||
|
||||
|
@ -2263,6 +2265,8 @@ vect_analyze_slp_instance (vec_info *vinfo,
|
|||
SLP_INSTANCE_LOADS (new_instance) = vNULL;
|
||||
SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
|
||||
new_instance->reduc_phis = NULL;
|
||||
new_instance->cost_vec = vNULL;
|
||||
new_instance->subgraph_entries = vNULL;
|
||||
|
||||
vect_gather_slp_loads (new_instance, node);
|
||||
if (dump_enabled_p ())
|
||||
|
@ -3133,8 +3137,8 @@ vect_slp_analyze_operations (vec_info *vinfo)
|
|||
visited.add (*x);
|
||||
i++;
|
||||
|
||||
add_stmt_costs (vinfo, vinfo->target_cost_data, &cost_vec);
|
||||
cost_vec.release ();
|
||||
/* Remember the SLP graph entry cost for later. */
|
||||
instance->cost_vec = cost_vec;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3142,18 +3146,108 @@ vect_slp_analyze_operations (vec_info *vinfo)
|
|||
if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo))
|
||||
{
|
||||
hash_set<stmt_vec_info> svisited;
|
||||
stmt_vector_for_cost cost_vec;
|
||||
cost_vec.create (2);
|
||||
for (i = 0; vinfo->slp_instances.iterate (i, &instance); ++i)
|
||||
vect_bb_slp_mark_live_stmts (bb_vinfo, SLP_INSTANCE_TREE (instance),
|
||||
instance, &cost_vec, svisited);
|
||||
add_stmt_costs (vinfo, vinfo->target_cost_data, &cost_vec);
|
||||
cost_vec.release ();
|
||||
instance, &instance->cost_vec, svisited);
|
||||
}
|
||||
|
||||
return !vinfo->slp_instances.is_empty ();
|
||||
}
|
||||
|
||||
/* Get the SLP instance leader from INSTANCE_LEADER thereby transitively
|
||||
closing the eventual chain. */
|
||||
|
||||
static slp_instance
|
||||
get_ultimate_leader (slp_instance instance,
|
||||
hash_map<slp_instance, slp_instance> &instance_leader)
|
||||
{
|
||||
auto_vec<slp_instance *, 8> chain;
|
||||
slp_instance *tem;
|
||||
while (*(tem = instance_leader.get (instance)) != instance)
|
||||
{
|
||||
chain.safe_push (tem);
|
||||
instance = *tem;
|
||||
}
|
||||
while (!chain.is_empty ())
|
||||
*chain.pop () = instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
/* Worker of vect_bb_partition_graph, recurse on NODE. */
|
||||
|
||||
static void
|
||||
vect_bb_partition_graph_r (bb_vec_info bb_vinfo,
|
||||
slp_instance instance, slp_tree node,
|
||||
hash_map<stmt_vec_info, slp_instance> &stmt_to_instance,
|
||||
hash_map<slp_instance, slp_instance> &instance_leader)
|
||||
{
|
||||
stmt_vec_info stmt_info;
|
||||
unsigned i;
|
||||
bool all = true;
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
|
||||
{
|
||||
bool existed_p;
|
||||
slp_instance &stmt_instance
|
||||
= stmt_to_instance.get_or_insert (stmt_info, &existed_p);
|
||||
if (!existed_p)
|
||||
all = false;
|
||||
else if (stmt_instance != instance)
|
||||
{
|
||||
/* If we're running into a previously marked stmt make us the
|
||||
leader of the current ultimate leader. This keeps the
|
||||
leader chain acyclic and works even when the current instance
|
||||
connects two previously independent graph parts. */
|
||||
slp_instance stmt_leader
|
||||
= get_ultimate_leader (stmt_instance, instance_leader);
|
||||
if (stmt_leader != instance)
|
||||
instance_leader.put (stmt_leader, instance);
|
||||
}
|
||||
stmt_instance = instance;
|
||||
}
|
||||
/* If not all stmts had been visited we have to recurse on children. */
|
||||
if (all)
|
||||
return;
|
||||
|
||||
slp_tree child;
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
|
||||
if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
|
||||
vect_bb_partition_graph_r (bb_vinfo, instance, child, stmt_to_instance,
|
||||
instance_leader);
|
||||
}
|
||||
|
||||
/* Partition the SLP graph into pieces that can be costed independently. */
|
||||
|
||||
static void
|
||||
vect_bb_partition_graph (bb_vec_info bb_vinfo)
|
||||
{
|
||||
DUMP_VECT_SCOPE ("vect_bb_partition_graph");
|
||||
|
||||
/* First walk the SLP graph assigning each involved scalar stmt a
|
||||
corresponding SLP graph entry and upon visiting a previously
|
||||
marked stmt, make the stmts leader the current SLP graph entry. */
|
||||
hash_map<stmt_vec_info, slp_instance> stmt_to_instance;
|
||||
hash_map<slp_instance, slp_instance> instance_leader;
|
||||
slp_instance instance;
|
||||
for (unsigned i = 0; bb_vinfo->slp_instances.iterate (i, &instance); ++i)
|
||||
{
|
||||
instance_leader.put (instance, instance);
|
||||
vect_bb_partition_graph_r (bb_vinfo,
|
||||
instance, SLP_INSTANCE_TREE (instance),
|
||||
stmt_to_instance, instance_leader);
|
||||
}
|
||||
|
||||
/* Then collect entries to each independent subgraph. */
|
||||
for (unsigned i = 0; bb_vinfo->slp_instances.iterate (i, &instance); ++i)
|
||||
{
|
||||
slp_instance leader = get_ultimate_leader (instance, instance_leader);
|
||||
leader->subgraph_entries.safe_push (instance);
|
||||
if (dump_enabled_p ()
|
||||
&& leader != instance)
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"instance %p is leader of %p\n",
|
||||
leader, instance);
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute the scalar cost of the SLP node NODE and its children
|
||||
and return it. Do not account defs that are marked in LIFE and
|
||||
|
@ -3261,18 +3355,22 @@ vect_bb_slp_scalar_cost (vec_info *vinfo,
|
|||
}
|
||||
}
|
||||
|
||||
/* Check if vectorization of the basic block is profitable. */
|
||||
/* Check if vectorization of the basic block is profitable for the
|
||||
subgraph denoted by SLP_INSTANCES. */
|
||||
|
||||
static bool
|
||||
vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
|
||||
vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo,
|
||||
vec<slp_instance> slp_instances)
|
||||
{
|
||||
vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
|
||||
slp_instance instance;
|
||||
int i;
|
||||
unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
|
||||
unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
|
||||
|
||||
/* Calculate scalar cost. */
|
||||
void *vect_target_cost_data = init_cost (NULL);
|
||||
|
||||
/* Calculate scalar cost and sum the cost for the vector stmts
|
||||
previously collected. */
|
||||
stmt_vector_for_cost scalar_costs;
|
||||
scalar_costs.create (0);
|
||||
hash_set<slp_tree> visited;
|
||||
|
@ -3284,22 +3382,25 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
|
|||
vect_bb_slp_scalar_cost (bb_vinfo,
|
||||
SLP_INSTANCE_TREE (instance),
|
||||
&life, &scalar_costs, visited);
|
||||
add_stmt_costs (bb_vinfo, vect_target_cost_data, &instance->cost_vec);
|
||||
instance->cost_vec.release ();
|
||||
}
|
||||
/* Unset visited flag. */
|
||||
stmt_info_for_cost *si;
|
||||
FOR_EACH_VEC_ELT (scalar_costs, i, si)
|
||||
gimple_set_visited (si->stmt_info->stmt, false);
|
||||
|
||||
void *target_cost_data = init_cost (NULL);
|
||||
add_stmt_costs (bb_vinfo, target_cost_data, &scalar_costs);
|
||||
void *scalar_target_cost_data = init_cost (NULL);
|
||||
add_stmt_costs (bb_vinfo, scalar_target_cost_data, &scalar_costs);
|
||||
scalar_costs.release ();
|
||||
unsigned dummy;
|
||||
finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
|
||||
destroy_cost_data (target_cost_data);
|
||||
finish_cost (scalar_target_cost_data, &dummy, &scalar_cost, &dummy);
|
||||
destroy_cost_data (scalar_target_cost_data);
|
||||
|
||||
/* Complete the target-specific cost calculation. */
|
||||
finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
|
||||
/* Complete the target-specific vector cost calculation. */
|
||||
finish_cost (vect_target_cost_data, &vec_prologue_cost,
|
||||
&vec_inside_cost, &vec_epilogue_cost);
|
||||
destroy_cost_data (vect_target_cost_data);
|
||||
|
||||
vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
|
||||
|
||||
|
@ -3324,6 +3425,54 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
|
|||
return true;
|
||||
}
|
||||
|
||||
/* For each SLP subgraph determine profitability and remove parts not so.
|
||||
Returns true if any profitable to vectorize subgraph remains. */
|
||||
|
||||
static bool
|
||||
vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
|
||||
{
|
||||
slp_instance instance;
|
||||
unsigned i;
|
||||
|
||||
auto_vec<slp_instance> subgraphs (BB_VINFO_SLP_INSTANCES (bb_vinfo).length ());
|
||||
FOR_EACH_VEC_ELT (BB_VINFO_SLP_INSTANCES (bb_vinfo), i, instance)
|
||||
if (!instance->subgraph_entries.is_empty ())
|
||||
subgraphs.quick_push (instance);
|
||||
BB_VINFO_SLP_INSTANCES (bb_vinfo).truncate (0);
|
||||
for (i = 0; i < subgraphs.length ();)
|
||||
{
|
||||
instance = subgraphs[i];
|
||||
if (!vect_bb_vectorization_profitable_p (bb_vinfo,
|
||||
instance->subgraph_entries))
|
||||
{
|
||||
/* ??? We need to think of providing better dump/opt-report
|
||||
locations here. */
|
||||
if (dump_enabled_p ())
|
||||
{
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"not vectorized: vectorization is not "
|
||||
"profitable.\n");
|
||||
}
|
||||
slp_instance entry;
|
||||
unsigned j;
|
||||
FOR_EACH_VEC_ELT (instance->subgraph_entries, j, entry)
|
||||
if (entry != instance)
|
||||
vect_free_slp_instance (entry, false);
|
||||
vect_free_slp_instance (instance, false);
|
||||
subgraphs.ordered_remove (i);
|
||||
}
|
||||
else
|
||||
{
|
||||
slp_instance entry;
|
||||
unsigned j;
|
||||
FOR_EACH_VEC_ELT (instance->subgraph_entries, j, entry)
|
||||
BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (entry);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
return !BB_VINFO_SLP_INSTANCES (bb_vinfo).is_empty ();
|
||||
}
|
||||
|
||||
/* Find any vectorizable constructors and add them to the grouped_store
|
||||
array. */
|
||||
|
||||
|
@ -3465,6 +3614,8 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal,
|
|||
return false;
|
||||
}
|
||||
|
||||
vect_bb_partition_graph (bb_vinfo);
|
||||
|
||||
/* Cost model: check if the vectorization is worthwhile. */
|
||||
if (!unlimited_cost_model (NULL)
|
||||
&& !vect_bb_vectorization_profitable_p (bb_vinfo))
|
||||
|
|
|
@ -183,6 +183,13 @@ public:
|
|||
|
||||
/* The SLP node containing the reduction PHIs. */
|
||||
slp_tree reduc_phis;
|
||||
|
||||
/* Vector cost of this entry to the SLP graph. */
|
||||
stmt_vector_for_cost cost_vec;
|
||||
|
||||
/* If this instance is the main entry of a subgraph the set of
|
||||
entries into the same subgraph, including itself. */
|
||||
vec<_slp_instance *> subgraph_entries;
|
||||
} *slp_instance;
|
||||
|
||||
|
||||
|
@ -913,7 +920,6 @@ public:
|
|||
#define BB_VINFO_SLP_INSTANCES(B) (B)->slp_instances
|
||||
#define BB_VINFO_DATAREFS(B) (B)->shared->datarefs
|
||||
#define BB_VINFO_DDRS(B) (B)->shared->ddrs
|
||||
#define BB_VINFO_TARGET_COST_DATA(B) (B)->target_cost_data
|
||||
|
||||
static inline bb_vec_info
|
||||
vec_info_for_bb (basic_block bb)
|
||||
|
|
Loading…
Add table
Reference in a new issue