tree-vectorizer.h (_slp_tree::ops): New member.
2019-10-21 Richard Biener <rguenther@suse.de> * tree-vectorizer.h (_slp_tree::ops): New member. (SLP_TREE_SCALAR_OPS): New. (vect_get_slp_defs): Adjust prototype. * tree-vect-slp.c (vect_free_slp_tree): Release SLP_TREE_SCALAR_OPS. (vect_create_new_slp_node): Initialize it. New overload for initializing by an operands array. (_slp_oprnd_info::ops): New member. (vect_create_oprnd_info): Initialize it. (vect_free_oprnd_info): Release it. (vect_get_and_check_slp_defs): Populate the operands array. Do not swap operands in the IL when not necessary. (vect_build_slp_tree_2): Build SLP nodes for invariant operands. Record SLP_TREE_SCALAR_OPS for all invariant nodes. Also swap operands in the operands array. Do not swap operands in the IL. (vect_slp_rearrange_stmts): Re-arrange SLP_TREE_SCALAR_OPS as well. (vect_gather_slp_loads): Fix. (vect_detect_hybrid_slp_stmts): Likewise. (vect_slp_analyze_node_operations_1): Search for a internal def child for computing reduction SLP_TREE_NUMBER_OF_VEC_STMTS. (vect_slp_analyze_node_operations): Skip ops-only stmts for the def-type push/pop dance. (vect_get_constant_vectors): Compute number_of_vectors here. Use SLP_TREE_SCALAR_OPS and simplify greatly. (vect_get_slp_vect_defs): Use gimple_get_lhs also for PHIs. (vect_get_slp_defs): Simplify greatly. * tree-vect-loop.c (vectorize_fold_left_reduction): Simplify. (vect_transform_reduction): Likewise. * tree-vect-stmts.c (vect_get_vec_defs): Simplify. (vectorizable_call): Likewise. (vectorizable_operation): Likewise. (vectorizable_load): Likewise. (vectorizable_condition): Likewise. (vectorizable_comparison): Likewise. From-SVN: r277241
This commit is contained in:
parent
ae7f3143a3
commit
30c0d1e3cf
5 changed files with 207 additions and 281 deletions
|
@ -1,3 +1,41 @@
|
|||
2019-10-21 Richard Biener <rguenther@suse.de>
|
||||
|
||||
* tree-vectorizer.h (_slp_tree::ops): New member.
|
||||
(SLP_TREE_SCALAR_OPS): New.
|
||||
(vect_get_slp_defs): Adjust prototype.
|
||||
* tree-vect-slp.c (vect_free_slp_tree): Release
|
||||
SLP_TREE_SCALAR_OPS.
|
||||
(vect_create_new_slp_node): Initialize it. New overload for
|
||||
initializing by an operands array.
|
||||
(_slp_oprnd_info::ops): New member.
|
||||
(vect_create_oprnd_info): Initialize it.
|
||||
(vect_free_oprnd_info): Release it.
|
||||
(vect_get_and_check_slp_defs): Populate the operands array.
|
||||
Do not swap operands in the IL when not necessary.
|
||||
(vect_build_slp_tree_2): Build SLP nodes for invariant operands.
|
||||
Record SLP_TREE_SCALAR_OPS for all invariant nodes. Also
|
||||
swap operands in the operands array. Do not swap operands in
|
||||
the IL.
|
||||
(vect_slp_rearrange_stmts): Re-arrange SLP_TREE_SCALAR_OPS as well.
|
||||
(vect_gather_slp_loads): Fix.
|
||||
(vect_detect_hybrid_slp_stmts): Likewise.
|
||||
(vect_slp_analyze_node_operations_1): Search for a internal
|
||||
def child for computing reduction SLP_TREE_NUMBER_OF_VEC_STMTS.
|
||||
(vect_slp_analyze_node_operations): Skip ops-only stmts for
|
||||
the def-type push/pop dance.
|
||||
(vect_get_constant_vectors): Compute number_of_vectors here.
|
||||
Use SLP_TREE_SCALAR_OPS and simplify greatly.
|
||||
(vect_get_slp_vect_defs): Use gimple_get_lhs also for PHIs.
|
||||
(vect_get_slp_defs): Simplify greatly.
|
||||
* tree-vect-loop.c (vectorize_fold_left_reduction): Simplify.
|
||||
(vect_transform_reduction): Likewise.
|
||||
* tree-vect-stmts.c (vect_get_vec_defs): Simplify.
|
||||
(vectorizable_call): Likewise.
|
||||
(vectorizable_operation): Likewise.
|
||||
(vectorizable_load): Likewise.
|
||||
(vectorizable_condition): Likewise.
|
||||
(vectorizable_comparison): Likewise.
|
||||
|
||||
2019-10-21 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/92161
|
||||
|
|
|
@ -5312,10 +5312,7 @@ vectorize_fold_left_reduction (stmt_vec_info stmt_info,
|
|||
if (slp_node)
|
||||
{
|
||||
auto_vec<vec<tree> > vec_defs (2);
|
||||
auto_vec<tree> sops(2);
|
||||
sops.quick_push (ops[0]);
|
||||
sops.quick_push (ops[1]);
|
||||
vect_get_slp_defs (sops, slp_node, &vec_defs);
|
||||
vect_get_slp_defs (slp_node, &vec_defs);
|
||||
vec_oprnds0.safe_splice (vec_defs[1 - reduc_index]);
|
||||
vec_defs[0].release ();
|
||||
vec_defs[1].release ();
|
||||
|
@ -6484,16 +6481,8 @@ vect_transform_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
{
|
||||
/* Get vec defs for all the operands except the reduction index,
|
||||
ensuring the ordering of the ops in the vector is kept. */
|
||||
auto_vec<tree, 3> slp_ops;
|
||||
auto_vec<vec<tree>, 3> vec_defs;
|
||||
|
||||
slp_ops.quick_push (ops[0]);
|
||||
slp_ops.quick_push (ops[1]);
|
||||
if (op_type == ternary_op)
|
||||
slp_ops.quick_push (ops[2]);
|
||||
|
||||
vect_get_slp_defs (slp_ops, slp_node, &vec_defs);
|
||||
|
||||
vect_get_slp_defs (slp_node, &vec_defs);
|
||||
vec_oprnds0.safe_splice (vec_defs[0]);
|
||||
vec_defs[0].release ();
|
||||
vec_oprnds1.safe_splice (vec_defs[1]);
|
||||
|
|
|
@ -79,6 +79,7 @@ vect_free_slp_tree (slp_tree node, bool final_p)
|
|||
|
||||
SLP_TREE_CHILDREN (node).release ();
|
||||
SLP_TREE_SCALAR_STMTS (node).release ();
|
||||
SLP_TREE_SCALAR_OPS (node).release ();
|
||||
SLP_TREE_VEC_STMTS (node).release ();
|
||||
SLP_TREE_LOAD_PERMUTATION (node).release ();
|
||||
|
||||
|
@ -122,6 +123,7 @@ vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
|
|||
|
||||
node = XNEW (struct _slp_tree);
|
||||
SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
|
||||
SLP_TREE_SCALAR_OPS (node) = vNULL;
|
||||
SLP_TREE_VEC_STMTS (node).create (0);
|
||||
SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
|
||||
SLP_TREE_CHILDREN (node).create (nops);
|
||||
|
@ -138,6 +140,28 @@ vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
|
|||
return node;
|
||||
}
|
||||
|
||||
/* Create an SLP node for OPS. */
|
||||
|
||||
static slp_tree
|
||||
vect_create_new_slp_node (vec<tree> ops)
|
||||
{
|
||||
slp_tree node;
|
||||
|
||||
node = XNEW (struct _slp_tree);
|
||||
SLP_TREE_SCALAR_STMTS (node) = vNULL;
|
||||
SLP_TREE_SCALAR_OPS (node) = ops;
|
||||
SLP_TREE_VEC_STMTS (node).create (0);
|
||||
SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
|
||||
SLP_TREE_CHILDREN (node) = vNULL;
|
||||
SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
|
||||
SLP_TREE_TWO_OPERATORS (node) = false;
|
||||
SLP_TREE_DEF_TYPE (node) = vect_external_def;
|
||||
node->refcnt = 1;
|
||||
node->max_nunits = 1;
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
|
||||
/* This structure is used in creation of an SLP tree. Each instance
|
||||
corresponds to the same operand in a group of scalar stmts in an SLP
|
||||
|
@ -146,6 +170,8 @@ typedef struct _slp_oprnd_info
|
|||
{
|
||||
/* Def-stmts for the operands. */
|
||||
vec<stmt_vec_info> def_stmts;
|
||||
/* Operands. */
|
||||
vec<tree> ops;
|
||||
/* Information about the first statement, its vector def-type, type, the
|
||||
operand itself in case it's constant, and an indication if it's a pattern
|
||||
stmt. */
|
||||
|
@ -170,6 +196,7 @@ vect_create_oprnd_info (int nops, int group_size)
|
|||
{
|
||||
oprnd_info = XNEW (struct _slp_oprnd_info);
|
||||
oprnd_info->def_stmts.create (group_size);
|
||||
oprnd_info->ops.create (group_size);
|
||||
oprnd_info->first_dt = vect_uninitialized_def;
|
||||
oprnd_info->first_op_type = NULL_TREE;
|
||||
oprnd_info->first_pattern = false;
|
||||
|
@ -192,6 +219,7 @@ vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
|
|||
FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
|
||||
{
|
||||
oprnd_info->def_stmts.release ();
|
||||
oprnd_info->ops.release ();
|
||||
XDELETE (oprnd_info);
|
||||
}
|
||||
|
||||
|
@ -449,12 +477,15 @@ again:
|
|||
{
|
||||
case vect_constant_def:
|
||||
case vect_external_def:
|
||||
oprnd_info->def_stmts.quick_push (NULL);
|
||||
oprnd_info->ops.quick_push (oprnd);
|
||||
break;
|
||||
|
||||
case vect_reduction_def:
|
||||
case vect_induction_def:
|
||||
case vect_internal_def:
|
||||
oprnd_info->def_stmts.quick_push (def_stmt_info);
|
||||
oprnd_info->ops.quick_push (oprnd);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -484,6 +515,8 @@ again:
|
|||
|
||||
if (first_op_cond)
|
||||
{
|
||||
/* To get rid of this swapping we have to move the stmt code
|
||||
to the SLP tree as well (and gather it here per stmt). */
|
||||
gassign *stmt = as_a <gassign *> (stmt_info->stmt);
|
||||
tree cond = gimple_assign_rhs1 (stmt);
|
||||
enum tree_code code = TREE_CODE (cond);
|
||||
|
@ -508,10 +541,8 @@ again:
|
|||
}
|
||||
else
|
||||
{
|
||||
unsigned int op = commutative_op + first_op_idx;
|
||||
swap_ssa_operands (stmt_info->stmt,
|
||||
gimple_op_ptr (stmt_info->stmt, op),
|
||||
gimple_op_ptr (stmt_info->stmt, op + 1));
|
||||
/* Commutative ops need not reflect swapping, ops are in
|
||||
the SLP tree. */
|
||||
}
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
|
@ -1221,10 +1252,24 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
|||
unsigned old_tree_size = this_tree_size;
|
||||
unsigned int j;
|
||||
|
||||
if (oprnd_info->first_dt == vect_uninitialized_def)
|
||||
{
|
||||
/* COND_EXPR have one too many eventually if the condition
|
||||
is a SSA name. */
|
||||
gcc_assert (i == 3 && nops == 4);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (oprnd_info->first_dt != vect_internal_def
|
||||
&& oprnd_info->first_dt != vect_reduction_def
|
||||
&& oprnd_info->first_dt != vect_induction_def)
|
||||
continue;
|
||||
{
|
||||
slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
|
||||
SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
|
||||
oprnd_info->ops = vNULL;
|
||||
children.safe_push (invnode);
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
|
||||
group_size, &this_max_nunits,
|
||||
|
@ -1233,7 +1278,8 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
|||
{
|
||||
/* If we have all children of child built up from scalars then just
|
||||
throw that away and build it up this node from scalars. */
|
||||
if (!SLP_TREE_CHILDREN (child).is_empty ()
|
||||
if (is_a <bb_vec_info> (vinfo)
|
||||
&& !SLP_TREE_CHILDREN (child).is_empty ()
|
||||
/* ??? Rejecting patterns this way doesn't work. We'd have to
|
||||
do extra work to cancel the pattern so the uses see the
|
||||
scalar version. */
|
||||
|
@ -1258,6 +1304,8 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
|||
"scalars instead\n");
|
||||
oprnd_info->def_stmts = vNULL;
|
||||
SLP_TREE_DEF_TYPE (child) = vect_external_def;
|
||||
SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
|
||||
oprnd_info->ops = vNULL;
|
||||
++this_tree_size;
|
||||
children.safe_push (child);
|
||||
continue;
|
||||
|
@ -1290,7 +1338,9 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
|||
this_tree_size++;
|
||||
child = vect_create_new_slp_node (oprnd_info->def_stmts);
|
||||
SLP_TREE_DEF_TYPE (child) = vect_external_def;
|
||||
SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
|
||||
children.safe_push (child);
|
||||
oprnd_info->ops = vNULL;
|
||||
oprnd_info->def_stmts = vNULL;
|
||||
continue;
|
||||
}
|
||||
|
@ -1370,6 +1420,8 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
|||
{
|
||||
std::swap (oprnds_info[0]->def_stmts[j],
|
||||
oprnds_info[1]->def_stmts[j]);
|
||||
std::swap (oprnds_info[0]->ops[j],
|
||||
oprnds_info[1]->ops[j]);
|
||||
if (dump_enabled_p ())
|
||||
dump_printf (MSG_NOTE, "%d ", j);
|
||||
}
|
||||
|
@ -1382,34 +1434,10 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
|||
tem, npermutes,
|
||||
&this_tree_size, bst_map)) != NULL)
|
||||
{
|
||||
/* ... so if successful we can apply the operand swapping
|
||||
to the GIMPLE IL. This is necessary because for example
|
||||
vect_get_slp_defs uses operand indexes and thus expects
|
||||
canonical operand order. This is also necessary even
|
||||
if we end up building the operand from scalars as
|
||||
we'll continue to process swapped operand two. */
|
||||
for (j = 0; j < group_size; ++j)
|
||||
gimple_set_plf (stmts[j]->stmt, GF_PLF_1, false);
|
||||
for (j = 0; j < group_size; ++j)
|
||||
if (matches[j] == !swap_not_matching)
|
||||
{
|
||||
gassign *stmt = as_a <gassign *> (stmts[j]->stmt);
|
||||
/* Avoid swapping operands twice. */
|
||||
if (gimple_plf (stmt, GF_PLF_1))
|
||||
continue;
|
||||
swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
|
||||
gimple_assign_rhs2_ptr (stmt));
|
||||
gimple_set_plf (stmt, GF_PLF_1, true);
|
||||
}
|
||||
/* Verify we swap all duplicates or none. */
|
||||
if (flag_checking)
|
||||
for (j = 0; j < group_size; ++j)
|
||||
gcc_assert (gimple_plf (stmts[j]->stmt, GF_PLF_1)
|
||||
== (matches[j] == !swap_not_matching));
|
||||
|
||||
/* If we have all children of child built up from scalars then
|
||||
just throw that away and build it up this node from scalars. */
|
||||
if (!SLP_TREE_CHILDREN (child).is_empty ()
|
||||
if (is_a <bb_vec_info> (vinfo)
|
||||
&& !SLP_TREE_CHILDREN (child).is_empty ()
|
||||
/* ??? Rejecting patterns this way doesn't work. We'd have
|
||||
to do extra work to cancel the pattern so the uses see the
|
||||
scalar version. */
|
||||
|
@ -1435,6 +1463,8 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
|||
"scalars instead\n");
|
||||
oprnd_info->def_stmts = vNULL;
|
||||
SLP_TREE_DEF_TYPE (child) = vect_external_def;
|
||||
SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
|
||||
oprnd_info->ops = vNULL;
|
||||
++this_tree_size;
|
||||
children.safe_push (child);
|
||||
continue;
|
||||
|
@ -1577,8 +1607,6 @@ vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
|
|||
vec<unsigned> permutation,
|
||||
hash_set<slp_tree> &visited)
|
||||
{
|
||||
stmt_vec_info stmt_info;
|
||||
vec<stmt_vec_info> tmp_stmts;
|
||||
unsigned int i;
|
||||
slp_tree child;
|
||||
|
||||
|
@ -1588,15 +1616,30 @@ vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
|
|||
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
|
||||
vect_slp_rearrange_stmts (child, group_size, permutation, visited);
|
||||
|
||||
gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
|
||||
tmp_stmts.create (group_size);
|
||||
tmp_stmts.quick_grow_cleared (group_size);
|
||||
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
|
||||
tmp_stmts[permutation[i]] = stmt_info;
|
||||
|
||||
SLP_TREE_SCALAR_STMTS (node).release ();
|
||||
SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
|
||||
if (SLP_TREE_SCALAR_STMTS (node).exists ())
|
||||
{
|
||||
gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
|
||||
vec<stmt_vec_info> tmp_stmts;
|
||||
tmp_stmts.create (group_size);
|
||||
tmp_stmts.quick_grow (group_size);
|
||||
stmt_vec_info stmt_info;
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
|
||||
tmp_stmts[permutation[i]] = stmt_info;
|
||||
SLP_TREE_SCALAR_STMTS (node).release ();
|
||||
SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
|
||||
}
|
||||
if (SLP_TREE_SCALAR_OPS (node).exists ())
|
||||
{
|
||||
gcc_assert (group_size == SLP_TREE_SCALAR_OPS (node).length ());
|
||||
vec<tree> tmp_ops;
|
||||
tmp_ops.create (group_size);
|
||||
tmp_ops.quick_grow (group_size);
|
||||
tree op;
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
|
||||
tmp_ops[permutation[i]] = op;
|
||||
SLP_TREE_SCALAR_OPS (node).release ();
|
||||
SLP_TREE_SCALAR_OPS (node) = tmp_ops;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -1682,9 +1725,10 @@ vect_gather_slp_loads (slp_instance inst, slp_tree node,
|
|||
|
||||
if (SLP_TREE_CHILDREN (node).length () == 0)
|
||||
{
|
||||
if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
|
||||
return;
|
||||
stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
|
||||
if (SLP_TREE_DEF_TYPE (node) == vect_internal_def
|
||||
&& STMT_VINFO_GROUPED_ACCESS (stmt_info)
|
||||
if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
|
||||
&& DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
|
||||
SLP_INSTANCE_LOADS (inst).safe_push (node);
|
||||
}
|
||||
|
@ -2346,7 +2390,8 @@ vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype,
|
|||
|
||||
if (!only_edge)
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
|
||||
if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
|
||||
if (SLP_TREE_DEF_TYPE (child) != vect_external_def
|
||||
&& SLP_TREE_DEF_TYPE (child) != vect_constant_def)
|
||||
vect_detect_hybrid_slp_stmts (child, i, stype, visited);
|
||||
}
|
||||
|
||||
|
@ -2533,8 +2578,15 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
|
|||
VF divided by the number of elements in a vector. */
|
||||
if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
|
||||
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info))
|
||||
SLP_TREE_NUMBER_OF_VEC_STMTS (node)
|
||||
= SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
|
||||
{
|
||||
for (unsigned i = 0; i < SLP_TREE_CHILDREN (node).length (); ++i)
|
||||
if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node)[i]) == vect_internal_def)
|
||||
{
|
||||
SLP_TREE_NUMBER_OF_VEC_STMTS (node)
|
||||
= SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
poly_uint64 vf;
|
||||
|
@ -2598,25 +2650,31 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
|
|||
auto_vec<vect_def_type, 4> dt;
|
||||
dt.safe_grow (SLP_TREE_CHILDREN (node).length ());
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
|
||||
dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
|
||||
if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
|
||||
dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
|
||||
|
||||
/* Push SLP node def-type to stmt operands. */
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
|
||||
if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
|
||||
if (SLP_TREE_DEF_TYPE (child) != vect_internal_def
|
||||
&& SLP_TREE_SCALAR_STMTS (child).length () != 0)
|
||||
STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
|
||||
= SLP_TREE_DEF_TYPE (child);
|
||||
|
||||
/* Check everything worked out. */
|
||||
bool res = true;
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
|
||||
if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
|
||||
{
|
||||
if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
|
||||
!= SLP_TREE_DEF_TYPE (child))
|
||||
res = false;
|
||||
}
|
||||
else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) != dt[j])
|
||||
res = false;
|
||||
if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
|
||||
{
|
||||
if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
|
||||
{
|
||||
if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
|
||||
!= SLP_TREE_DEF_TYPE (child))
|
||||
res = false;
|
||||
}
|
||||
else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
|
||||
!= dt[j])
|
||||
res = false;
|
||||
}
|
||||
if (!res && dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"not vectorized: same operand with different "
|
||||
|
@ -2628,7 +2686,8 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
|
|||
|
||||
/* Restore def-types. */
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
|
||||
STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
|
||||
if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
|
||||
STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
|
||||
|
||||
return res;
|
||||
}
|
||||
|
@ -3279,53 +3338,46 @@ duplicate_and_interleave (vec_info *vinfo, gimple_seq *seq, tree vector_type,
|
|||
|
||||
/* For constant and loop invariant defs of SLP_NODE this function returns
|
||||
(vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
|
||||
OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
|
||||
scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
|
||||
REDUC_INDEX is the index of the reduction operand in the statements, unless
|
||||
it is -1. */
|
||||
OP_NODE determines the node for the operand containing the scalar
|
||||
operands. */
|
||||
|
||||
static void
|
||||
vect_get_constant_vectors (tree op, slp_tree slp_node,
|
||||
vec<tree> *vec_oprnds,
|
||||
unsigned int op_num, unsigned int number_of_vectors)
|
||||
vect_get_constant_vectors (slp_tree op_node, slp_tree slp_node,
|
||||
vec<tree> *vec_oprnds)
|
||||
{
|
||||
vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
|
||||
stmt_vec_info stmt_vinfo = stmts[0];
|
||||
stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (slp_node)[0];
|
||||
vec_info *vinfo = stmt_vinfo->vinfo;
|
||||
gimple *stmt = stmt_vinfo->stmt;
|
||||
unsigned HOST_WIDE_INT nunits;
|
||||
tree vec_cst;
|
||||
unsigned j, number_of_places_left_in_vector;
|
||||
tree vector_type;
|
||||
tree vop;
|
||||
int group_size = stmts.length ();
|
||||
int group_size = op_node->ops.length ();
|
||||
unsigned int vec_num, i;
|
||||
unsigned number_of_copies = 1;
|
||||
vec<tree> voprnds;
|
||||
voprnds.create (number_of_vectors);
|
||||
bool constant_p, is_store;
|
||||
bool constant_p;
|
||||
tree neutral_op = NULL;
|
||||
enum tree_code code = gimple_expr_code (stmt);
|
||||
gimple_seq ctor_seq = NULL;
|
||||
auto_vec<tree, 16> permute_results;
|
||||
|
||||
/* ??? SLP analysis should compute the vector type for the
|
||||
constant / invariant and store it in the SLP node. */
|
||||
tree op = op_node->ops[0];
|
||||
/* Check if vector type is a boolean vector. */
|
||||
tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
|
||||
if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
|
||||
&& vect_mask_constant_operand_p (stmt_vinfo))
|
||||
vector_type
|
||||
= build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
|
||||
= build_same_sized_truth_vector_type (stmt_vectype);
|
||||
else
|
||||
vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op));
|
||||
|
||||
if (STMT_VINFO_DATA_REF (stmt_vinfo))
|
||||
{
|
||||
is_store = true;
|
||||
op = gimple_assign_rhs1 (stmt);
|
||||
}
|
||||
else
|
||||
is_store = false;
|
||||
|
||||
gcc_assert (op);
|
||||
unsigned int number_of_vectors
|
||||
= vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
|
||||
* TYPE_VECTOR_SUBPARTS (stmt_vectype),
|
||||
vector_type);
|
||||
vec_oprnds->create (number_of_vectors);
|
||||
auto_vec<tree> voprnds (number_of_vectors);
|
||||
|
||||
/* NUMBER_OF_COPIES is the number of times we need to use the same values in
|
||||
created vectors. It is greater than 1 if unrolling is performed.
|
||||
|
@ -3357,56 +3409,8 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
|
|||
bool place_after_defs = false;
|
||||
for (j = 0; j < number_of_copies; j++)
|
||||
{
|
||||
for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--)
|
||||
for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
|
||||
{
|
||||
stmt = stmt_vinfo->stmt;
|
||||
if (is_store)
|
||||
op = gimple_assign_rhs1 (stmt);
|
||||
else
|
||||
{
|
||||
switch (code)
|
||||
{
|
||||
case COND_EXPR:
|
||||
{
|
||||
tree cond = gimple_assign_rhs1 (stmt);
|
||||
if (TREE_CODE (cond) == SSA_NAME)
|
||||
op = gimple_op (stmt, op_num + 1);
|
||||
else if (op_num == 0 || op_num == 1)
|
||||
op = TREE_OPERAND (cond, op_num);
|
||||
else
|
||||
{
|
||||
if (op_num == 2)
|
||||
op = gimple_assign_rhs2 (stmt);
|
||||
else
|
||||
op = gimple_assign_rhs3 (stmt);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case CALL_EXPR:
|
||||
op = gimple_call_arg (stmt, op_num);
|
||||
break;
|
||||
|
||||
case LSHIFT_EXPR:
|
||||
case RSHIFT_EXPR:
|
||||
case LROTATE_EXPR:
|
||||
case RROTATE_EXPR:
|
||||
op = gimple_op (stmt, op_num + 1);
|
||||
/* Unlike the other binary operators, shifts/rotates have
|
||||
the shift count being int, instead of the same type as
|
||||
the lhs, so make sure the scalar is the right type if
|
||||
we are dealing with vectors of
|
||||
long long/long/short/char. */
|
||||
if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
|
||||
op = fold_convert (TREE_TYPE (vector_type), op);
|
||||
break;
|
||||
|
||||
default:
|
||||
op = gimple_op (stmt, op_num + 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Create 'vect_ = {op0,op1,...,opn}'. */
|
||||
number_of_places_left_in_vector--;
|
||||
tree orig_op = op;
|
||||
|
@ -3476,7 +3480,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
|
|||
vec_cst = gimple_build_vector (&ctor_seq, &elts);
|
||||
else
|
||||
{
|
||||
if (vec_oprnds->is_empty ())
|
||||
if (permute_results.is_empty ())
|
||||
duplicate_and_interleave (vinfo, &ctor_seq, vector_type,
|
||||
elts, number_of_vectors,
|
||||
permute_results);
|
||||
|
@ -3520,8 +3524,6 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
|
|||
vec_oprnds->quick_push (vop);
|
||||
}
|
||||
|
||||
voprnds.release ();
|
||||
|
||||
/* In case that VF is greater than the unrolling factor needed for the SLP
|
||||
group of stmts, NUMBER_OF_VECTORS to be created is greater than
|
||||
NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
|
||||
|
@ -3552,25 +3554,17 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
|
|||
static void
|
||||
vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
|
||||
{
|
||||
tree vec_oprnd;
|
||||
stmt_vec_info vec_def_stmt_info;
|
||||
unsigned int i;
|
||||
|
||||
gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
|
||||
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
|
||||
{
|
||||
gcc_assert (vec_def_stmt_info);
|
||||
if (gphi *vec_def_phi = dyn_cast <gphi *> (vec_def_stmt_info->stmt))
|
||||
vec_oprnd = gimple_phi_result (vec_def_phi);
|
||||
else
|
||||
vec_oprnd = gimple_get_lhs (vec_def_stmt_info->stmt);
|
||||
vec_oprnds->quick_push (vec_oprnd);
|
||||
}
|
||||
vec_oprnds->quick_push (gimple_get_lhs (vec_def_stmt_info->stmt));
|
||||
}
|
||||
|
||||
|
||||
/* Get vectorized definitions for SLP_NODE.
|
||||
/* Get N vectorized definitions for SLP_NODE.
|
||||
If the scalar definitions are loop invariants or constants, collect them and
|
||||
call vect_get_constant_vectors() to create vector stmts.
|
||||
Otherwise, the def-stmts must be already vectorized and the vectorized stmts
|
||||
|
@ -3578,91 +3572,26 @@ vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
|
|||
vect_get_slp_vect_defs () to retrieve them. */
|
||||
|
||||
void
|
||||
vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
|
||||
vec<vec<tree> > *vec_oprnds)
|
||||
vect_get_slp_defs (slp_tree slp_node, vec<vec<tree> > *vec_oprnds, unsigned n)
|
||||
{
|
||||
int number_of_vects = 0, i;
|
||||
unsigned int child_index = 0;
|
||||
HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
|
||||
slp_tree child = NULL;
|
||||
vec<tree> vec_defs;
|
||||
tree oprnd;
|
||||
bool vectorized_defs;
|
||||
if (n == -1U)
|
||||
n = SLP_TREE_CHILDREN (slp_node).length ();
|
||||
|
||||
stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
|
||||
FOR_EACH_VEC_ELT (ops, i, oprnd)
|
||||
for (unsigned i = 0; i < n; ++i)
|
||||
{
|
||||
slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
|
||||
|
||||
vec<tree> vec_defs = vNULL;
|
||||
|
||||
/* For each operand we check if it has vectorized definitions in a child
|
||||
node or we need to create them (for invariants and constants). We
|
||||
check if the LHS of the first stmt of the next child matches OPRND.
|
||||
If it does, we found the correct child. Otherwise, we call
|
||||
vect_get_constant_vectors (), and not advance CHILD_INDEX in order
|
||||
to check this child node for the next operand. */
|
||||
vectorized_defs = false;
|
||||
if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
|
||||
{
|
||||
child = SLP_TREE_CHILDREN (slp_node)[child_index];
|
||||
|
||||
/* We have to check both pattern and original def, if available. */
|
||||
if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
|
||||
{
|
||||
stmt_vec_info first_def_info = SLP_TREE_SCALAR_STMTS (child)[0];
|
||||
stmt_vec_info related = STMT_VINFO_RELATED_STMT (first_def_info);
|
||||
tree first_def_op;
|
||||
|
||||
if (gphi *first_def = dyn_cast <gphi *> (first_def_info->stmt))
|
||||
first_def_op = gimple_phi_result (first_def);
|
||||
else
|
||||
first_def_op = gimple_get_lhs (first_def_info->stmt);
|
||||
if (operand_equal_p (oprnd, first_def_op, 0)
|
||||
|| (related
|
||||
&& operand_equal_p (oprnd,
|
||||
gimple_get_lhs (related->stmt), 0)))
|
||||
{
|
||||
/* The number of vector defs is determined by the number of
|
||||
vector statements in the node from which we get those
|
||||
statements. */
|
||||
number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
|
||||
vectorized_defs = true;
|
||||
child_index++;
|
||||
}
|
||||
}
|
||||
else
|
||||
child_index++;
|
||||
}
|
||||
|
||||
if (!vectorized_defs)
|
||||
{
|
||||
if (i == 0)
|
||||
{
|
||||
number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
|
||||
/* Number of vector stmts was calculated according to LHS in
|
||||
vect_schedule_slp_instance (), fix it by replacing LHS with
|
||||
RHS, if necessary. See vect_get_smallest_scalar_type () for
|
||||
details. */
|
||||
vect_get_smallest_scalar_type (first_stmt_info, &lhs_size_unit,
|
||||
&rhs_size_unit);
|
||||
if (rhs_size_unit != lhs_size_unit)
|
||||
{
|
||||
number_of_vects *= rhs_size_unit;
|
||||
number_of_vects /= lhs_size_unit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate memory for vectorized defs. */
|
||||
vec_defs = vNULL;
|
||||
vec_defs.create (number_of_vects);
|
||||
|
||||
/* For reduction defs we call vect_get_constant_vectors (), since we are
|
||||
looking for initial loop invariant values. */
|
||||
if (vectorized_defs)
|
||||
/* The defs are already vectorized. */
|
||||
vect_get_slp_vect_defs (child, &vec_defs);
|
||||
node or we need to create them (for invariants and constants). */
|
||||
if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
|
||||
{
|
||||
vec_defs.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child));
|
||||
vect_get_slp_vect_defs (child, &vec_defs);
|
||||
}
|
||||
else
|
||||
/* Build vectors from scalar defs. */
|
||||
vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
|
||||
number_of_vects);
|
||||
vect_get_constant_vectors (child, slp_node, &vec_defs);
|
||||
|
||||
vec_oprnds->quick_push (vec_defs);
|
||||
}
|
||||
|
|
|
@ -1726,16 +1726,8 @@ vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
|
|||
{
|
||||
if (slp_node)
|
||||
{
|
||||
int nops = (op1 == NULL_TREE) ? 1 : 2;
|
||||
auto_vec<tree> ops (nops);
|
||||
auto_vec<vec<tree> > vec_defs (nops);
|
||||
|
||||
ops.quick_push (op0);
|
||||
if (op1)
|
||||
ops.quick_push (op1);
|
||||
|
||||
vect_get_slp_defs (ops, slp_node, &vec_defs);
|
||||
|
||||
auto_vec<vec<tree> > vec_defs (SLP_TREE_CHILDREN (slp_node).length ());
|
||||
vect_get_slp_defs (slp_node, &vec_defs, op1 ? 2 : 1);
|
||||
*vec_oprnds0 = vec_defs[0];
|
||||
if (op1)
|
||||
*vec_oprnds1 = vec_defs[1];
|
||||
|
@ -3473,9 +3465,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
auto_vec<vec<tree> > vec_defs (nargs);
|
||||
vec<tree> vec_oprnds0;
|
||||
|
||||
for (i = 0; i < nargs; i++)
|
||||
vargs[i] = gimple_call_arg (stmt, i);
|
||||
vect_get_slp_defs (vargs, slp_node, &vec_defs);
|
||||
vect_get_slp_defs (slp_node, &vec_defs);
|
||||
vec_oprnds0 = vec_defs[0];
|
||||
|
||||
/* Arguments are ready. Create the new vector stmt. */
|
||||
|
@ -3647,9 +3637,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
auto_vec<vec<tree> > vec_defs (nargs);
|
||||
vec<tree> vec_oprnds0;
|
||||
|
||||
for (i = 0; i < nargs; i++)
|
||||
vargs.quick_push (gimple_call_arg (stmt, i));
|
||||
vect_get_slp_defs (vargs, slp_node, &vec_defs);
|
||||
vect_get_slp_defs (slp_node, &vec_defs);
|
||||
vec_oprnds0 = vec_defs[0];
|
||||
|
||||
/* Arguments are ready. Create the new vector stmt. */
|
||||
|
@ -6195,12 +6183,8 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
{
|
||||
if (slp_node)
|
||||
{
|
||||
auto_vec<tree> ops(3);
|
||||
ops.quick_push (op0);
|
||||
ops.quick_push (op1);
|
||||
ops.quick_push (op2);
|
||||
auto_vec<vec<tree> > vec_defs(3);
|
||||
vect_get_slp_defs (ops, slp_node, &vec_defs);
|
||||
vect_get_slp_defs (slp_node, &vec_defs);
|
||||
vec_oprnds0 = vec_defs[0];
|
||||
vec_oprnds1 = vec_defs[1];
|
||||
vec_oprnds2 = vec_defs[2];
|
||||
|
@ -9271,10 +9255,8 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
{
|
||||
if (slp_node)
|
||||
{
|
||||
auto_vec<tree> ops (1);
|
||||
auto_vec<vec<tree> > vec_defs (1);
|
||||
ops.quick_push (mask);
|
||||
vect_get_slp_defs (ops, slp_node, &vec_defs);
|
||||
vect_get_slp_defs (slp_node, &vec_defs);
|
||||
vec_mask = vec_defs[0][0];
|
||||
}
|
||||
else
|
||||
|
@ -10046,19 +10028,8 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
{
|
||||
if (slp_node)
|
||||
{
|
||||
auto_vec<tree, 4> ops;
|
||||
auto_vec<vec<tree>, 4> vec_defs;
|
||||
|
||||
if (masked)
|
||||
ops.safe_push (cond_expr);
|
||||
else
|
||||
{
|
||||
ops.safe_push (cond_expr0);
|
||||
ops.safe_push (cond_expr1);
|
||||
}
|
||||
ops.safe_push (then_clause);
|
||||
ops.safe_push (else_clause);
|
||||
vect_get_slp_defs (ops, slp_node, &vec_defs);
|
||||
vect_get_slp_defs (slp_node, &vec_defs);
|
||||
vec_oprnds3 = vec_defs.pop ();
|
||||
vec_oprnds2 = vec_defs.pop ();
|
||||
if (!masked)
|
||||
|
@ -10461,12 +10432,8 @@ vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
{
|
||||
if (slp_node)
|
||||
{
|
||||
auto_vec<tree, 2> ops;
|
||||
auto_vec<vec<tree>, 2> vec_defs;
|
||||
|
||||
ops.safe_push (rhs1);
|
||||
ops.safe_push (rhs2);
|
||||
vect_get_slp_defs (ops, slp_node, &vec_defs);
|
||||
vect_get_slp_defs (slp_node, &vec_defs);
|
||||
vec_oprnds1 = vec_defs.pop ();
|
||||
vec_oprnds0 = vec_defs.pop ();
|
||||
if (swap_p)
|
||||
|
|
|
@ -120,6 +120,8 @@ struct _slp_tree {
|
|||
vec<slp_tree> children;
|
||||
/* A group of scalar stmts to be vectorized together. */
|
||||
vec<stmt_vec_info> stmts;
|
||||
/* A group of scalar operands to be vectorized together. */
|
||||
vec<tree> ops;
|
||||
/* Load permutation relative to the stores, NULL if there is no
|
||||
permutation. */
|
||||
vec<unsigned> load_permutation;
|
||||
|
@ -171,6 +173,7 @@ public:
|
|||
|
||||
#define SLP_TREE_CHILDREN(S) (S)->children
|
||||
#define SLP_TREE_SCALAR_STMTS(S) (S)->stmts
|
||||
#define SLP_TREE_SCALAR_OPS(S) (S)->ops
|
||||
#define SLP_TREE_VEC_STMTS(S) (S)->vec_stmts
|
||||
#define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size
|
||||
#define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation
|
||||
|
@ -1750,7 +1753,7 @@ extern void vect_schedule_slp (vec_info *);
|
|||
extern opt_result vect_analyze_slp (vec_info *, unsigned);
|
||||
extern bool vect_make_slp_decision (loop_vec_info);
|
||||
extern void vect_detect_hybrid_slp (loop_vec_info);
|
||||
extern void vect_get_slp_defs (vec<tree> , slp_tree, vec<vec<tree> > *);
|
||||
extern void vect_get_slp_defs (slp_tree, vec<vec<tree> > *, unsigned n = -1U);
|
||||
extern bool vect_slp_bb (basic_block);
|
||||
extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
|
||||
extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info);
|
||||
|
|
Loading…
Add table
Reference in a new issue