Refactor SLP permute opt propagation
This rewrites the SLP permute opt propagation to elide the visited bit for an incoming permute of -1 as well as allowing the initial propagation to take more than one iteration before starting on materialization. As we still lack propagation in the reverse direction I've added gcc.dg/vect/bb-slp-71.c and a stopgap to restrict "any" permute handling to the supported cases. 2021-06-29 Richard Biener <rguenther@suse.de> * tree-vect-slp.c (slpg_vertex::visited): Remove. (vect_slp_perms_eq): Handle -1 permutes. (vect_optimize_slp): Rewrite permute propagation. * gcc.dg/vect/pr67790.c: Un-XFAIL. * gcc.dg/vect/bb-slp-71.c: New testcase.
This commit is contained in:
parent
53fd7544af
commit
2dfc0f2203
3 changed files with 120 additions and 54 deletions
32
gcc/testsuite/gcc.dg/vect/bb-slp-71.c
Normal file
32
gcc/testsuite/gcc.dg/vect/bb-slp-71.c
Normal file
|
@ -0,0 +1,32 @@
|
|||
/* { dg-do run } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
int a[4], b[4];
|
||||
|
||||
void __attribute__((noipa))
|
||||
foo(int x, int y)
|
||||
{
|
||||
int tem0 = x + 1;
|
||||
int tem1 = y + 2;
|
||||
int tem2 = x + 3;
|
||||
int tem3 = y + 4;
|
||||
a[0] = tem0 + b[1];
|
||||
a[1] = tem1 + b[0];
|
||||
a[2] = tem2 + b[2];
|
||||
a[3] = tem3 + b[3];
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
b[0] = 10;
|
||||
b[1] = 14;
|
||||
b[2] = 18;
|
||||
b[3] = 22;
|
||||
foo (-1, -3);
|
||||
if (a[0] != 14 || a[1] != 9 || a[2] != 20 || a[3] != 23)
|
||||
__builtin_abort ();
|
||||
return 0;
|
||||
}
|
|
@ -38,4 +38,4 @@ int main()
|
|||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" } } */
|
||||
|
|
|
@ -3470,12 +3470,11 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
|
|||
struct slpg_vertex
|
||||
{
|
||||
slpg_vertex (slp_tree node_)
|
||||
: node (node_), visited (0), perm_out (0), materialize (0) {}
|
||||
: node (node_), perm_out (-1), materialize (0) {}
|
||||
|
||||
int get_perm_in () const { return materialize ? materialize : perm_out; }
|
||||
|
||||
slp_tree node;
|
||||
unsigned visited : 1;
|
||||
/* The permutation on the outgoing lanes (towards SLP parents). */
|
||||
int perm_out;
|
||||
/* The permutation that is applied by this node. perm_out is
|
||||
|
@ -3567,7 +3566,8 @@ vect_slp_perms_eq (const vec<vec<unsigned> > &perms,
|
|||
int perm_a, int perm_b)
|
||||
{
|
||||
return (perm_a == perm_b
|
||||
|| (perms[perm_a].length () == perms[perm_b].length ()
|
||||
|| (perm_a != -1 && perm_b != -1
|
||||
&& perms[perm_a].length () == perms[perm_b].length ()
|
||||
&& memcmp (&perms[perm_a][0], &perms[perm_b][0],
|
||||
sizeof (unsigned) * perms[perm_a].length ()) == 0));
|
||||
}
|
||||
|
@ -3614,7 +3614,7 @@ vect_optimize_slp (vec_info *vinfo)
|
|||
/* Leafs do not change across iterations. Note leafs also double
|
||||
as entries to the reverse graph. */
|
||||
if (!slpg->vertices[idx].succ)
|
||||
vertices[idx].visited = 1;
|
||||
vertices[idx].perm_out = 0;
|
||||
/* Loads are the only thing generating permutes. */
|
||||
if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
|
||||
continue;
|
||||
|
@ -3668,12 +3668,17 @@ vect_optimize_slp (vec_info *vinfo)
|
|||
|
||||
/* Propagate permutes along the graph and compute materialization points. */
|
||||
bool changed;
|
||||
bool do_materialization = false;
|
||||
unsigned iteration = 0;
|
||||
do
|
||||
{
|
||||
changed = false;
|
||||
++iteration;
|
||||
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"SLP optimize iteration %d\n", iteration);
|
||||
|
||||
for (i = vertices.length (); i > 0 ; --i)
|
||||
{
|
||||
int idx = ipo[i-1];
|
||||
|
@ -3685,19 +3690,21 @@ vect_optimize_slp (vec_info *vinfo)
|
|||
|| SLP_TREE_DEF_TYPE (node) == vect_constant_def)
|
||||
continue;
|
||||
|
||||
vertices[idx].visited = 1;
|
||||
|
||||
/* We still eventually have failed backedge SLP nodes in the
|
||||
graph, those are only cancelled when analyzing operations.
|
||||
Simply treat them as transparent ops, propagating permutes
|
||||
through them. */
|
||||
if (SLP_TREE_DEF_TYPE (node) == vect_internal_def)
|
||||
{
|
||||
/* We do not handle stores with a permutation. */
|
||||
/* We do not handle stores with a permutation, so all
|
||||
incoming permutes must have been materialized. */
|
||||
stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (node);
|
||||
if (STMT_VINFO_DATA_REF (rep)
|
||||
&& DR_IS_WRITE (STMT_VINFO_DATA_REF (rep)))
|
||||
continue;
|
||||
{
|
||||
vertices[idx].perm_out = 0;
|
||||
continue;
|
||||
}
|
||||
/* We cannot move a permute across an operation that is
|
||||
not independent on lanes. Note this is an explicit
|
||||
negative list since that's much shorter than the respective
|
||||
|
@ -3710,63 +3717,82 @@ vect_optimize_slp (vec_info *vinfo)
|
|||
case CFN_COMPLEX_MUL:
|
||||
case CFN_COMPLEX_MUL_CONJ:
|
||||
case CFN_VEC_ADDSUB:
|
||||
vertices[idx].perm_out = 0;
|
||||
continue;
|
||||
default:;
|
||||
}
|
||||
}
|
||||
|
||||
int perm = -1;
|
||||
for (graph_edge *succ = slpg->vertices[idx].succ;
|
||||
succ; succ = succ->succ_next)
|
||||
{
|
||||
int succ_idx = succ->dest;
|
||||
/* Handle unvisited nodes optimistically. */
|
||||
/* ??? But for constants once we want to handle non-bijective
|
||||
permutes we have to verify the permute, when unifying lanes,
|
||||
will not unify different constants. For example see
|
||||
gcc.dg/vect/bb-slp-14.c for a case that would break. */
|
||||
if (!vertices[succ_idx].visited)
|
||||
continue;
|
||||
int succ_perm = vertices[succ_idx].perm_out;
|
||||
if (perm == -1)
|
||||
perm = succ_perm;
|
||||
else if (succ_perm == 0)
|
||||
{
|
||||
perm = 0;
|
||||
break;
|
||||
}
|
||||
else if (!vect_slp_perms_eq (perms, perm, succ_perm))
|
||||
{
|
||||
perm = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (perm == -1)
|
||||
int perm;
|
||||
if (!slpg->vertices[idx].succ)
|
||||
/* Pick up pre-computed leaf values. */
|
||||
perm = vertices[idx].perm_out;
|
||||
else if (!vect_slp_perms_eq (perms, perm,
|
||||
vertices[idx].get_perm_in ()))
|
||||
else
|
||||
{
|
||||
if (iteration > 1)
|
||||
/* Make sure we eventually converge. */
|
||||
gcc_checking_assert (perm == 0);
|
||||
if (perm == 0)
|
||||
perm = -1;
|
||||
bool all_constant = true;
|
||||
for (graph_edge *succ = slpg->vertices[idx].succ;
|
||||
succ; succ = succ->succ_next)
|
||||
{
|
||||
vertices[idx].perm_out = 0;
|
||||
vertices[idx].materialize = 0;
|
||||
int succ_idx = succ->dest;
|
||||
slp_tree succ_node = vertices[succ_idx].node;
|
||||
if (SLP_TREE_DEF_TYPE (succ_node) != vect_external_def
|
||||
&& SLP_TREE_DEF_TYPE (succ_node) != vect_constant_def)
|
||||
all_constant = false;
|
||||
int succ_perm = vertices[succ_idx].perm_out;
|
||||
/* Handle unvisited (and constant) nodes optimistically. */
|
||||
/* ??? But for constants once we want to handle
|
||||
non-bijective permutes we have to verify the permute,
|
||||
when unifying lanes, will not unify different constants.
|
||||
For example see gcc.dg/vect/bb-slp-14.c for a case
|
||||
that would break. */
|
||||
if (succ_perm == -1)
|
||||
continue;
|
||||
if (perm == -1)
|
||||
perm = succ_perm;
|
||||
else if (succ_perm == 0)
|
||||
{
|
||||
perm = 0;
|
||||
break;
|
||||
}
|
||||
else if (!vect_slp_perms_eq (perms, perm, succ_perm))
|
||||
{
|
||||
perm = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* We still lack a forward propagation of materializations
|
||||
and thus only allow "any" permutes on constant or external
|
||||
nodes which we handle during materialization by looking
|
||||
at SLP children. So avoid having internal "any" permutes
|
||||
for now, see gcc.dg/vect/bb-slp-71.c for a testcase that
|
||||
breaks when removing this restriction. */
|
||||
if (perm == -1 && all_constant)
|
||||
perm = 0;
|
||||
|
||||
if (!vect_slp_perms_eq (perms, perm,
|
||||
vertices[idx].get_perm_in ()))
|
||||
{
|
||||
/* Make sure we eventually converge. */
|
||||
gcc_checking_assert (vertices[idx].get_perm_in () == -1
|
||||
|| perm == 0);
|
||||
if (perm == 0)
|
||||
{
|
||||
vertices[idx].perm_out = 0;
|
||||
vertices[idx].materialize = 0;
|
||||
}
|
||||
if (!vertices[idx].materialize)
|
||||
vertices[idx].perm_out = perm;
|
||||
changed = true;
|
||||
}
|
||||
if (!vertices[idx].materialize)
|
||||
vertices[idx].perm_out = perm;
|
||||
changed = true;
|
||||
}
|
||||
|
||||
if (perm == 0)
|
||||
/* Elide pruning at materialization points in the first
|
||||
iteration phase. */
|
||||
if (!do_materialization)
|
||||
continue;
|
||||
|
||||
/* Elide pruning at materialization points in the first
|
||||
iteration so every node was visited once at least. */
|
||||
if (iteration == 1)
|
||||
if (perm == 0 || perm == -1)
|
||||
continue;
|
||||
|
||||
/* Decide on permute materialization. Look whether there's
|
||||
|
@ -3784,8 +3810,8 @@ vect_optimize_slp (vec_info *vinfo)
|
|||
for (graph_edge *pred = slpg->vertices[idx].pred;
|
||||
pred; pred = pred->pred_next)
|
||||
{
|
||||
gcc_checking_assert (vertices[pred->src].visited);
|
||||
int pred_perm = vertices[pred->src].get_perm_in ();
|
||||
gcc_checking_assert (pred_perm != -1);
|
||||
if (!vect_slp_perms_eq (perms, perm, pred_perm))
|
||||
{
|
||||
all_preds_permuted = false;
|
||||
|
@ -3800,8 +3826,16 @@ vect_optimize_slp (vec_info *vinfo)
|
|||
vertices[idx].perm_out = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* If the initial propagation converged, switch on materialization
|
||||
and re-propagate. */
|
||||
if (!changed && !do_materialization)
|
||||
{
|
||||
do_materialization = true;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
while (changed || iteration == 1);
|
||||
while (changed);
|
||||
|
||||
/* Materialize. */
|
||||
for (i = 0; i < vertices.length (); ++i)
|
||||
|
|
Loading…
Add table
Reference in a new issue