re PR tree-optimization/60510 (SLP blocks loop vectorization (with reduction))
2017-07-03 Richard Biener <rguenther@suse.de> PR tree-optimization/60510 * tree-vect-loop.c (vect_create_epilog_for_reduction): Pass in the scalar reduction PHI and use it. (vectorizable_reduction): Properly guard the single_defuse_cycle path for non-SLP reduction chains where we cannot use it. Rework reduc_def/index and vector type deduction. Rework vector operand gathering during reduction op code-gen. * tree-vect-slp.c (vect_analyze_slp): For failed SLP reduction chains dissolve the chain and leave it to non-SLP reduction handling. * gfortran.dg/vect/pr60510.f: New testcase. From-SVN: r249919
This commit is contained in:
parent
9607b014b2
commit
6b5e165bd8
5 changed files with 161 additions and 71 deletions
|
@ -1,3 +1,16 @@
|
|||
2017-07-03 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/60510
|
||||
* tree-vect-loop.c (vect_create_epilog_for_reduction): Pass in
|
||||
the scalar reduction PHI and use it.
|
||||
(vectorizable_reduction): Properly guard the single_defuse_cycle
|
||||
path for non-SLP reduction chains where we cannot use it.
|
||||
Rework reduc_def/index and vector type deduction. Rework
|
||||
vector operand gathering during reduction op code-gen.
|
||||
* tree-vect-slp.c (vect_analyze_slp): For failed SLP reduction
|
||||
chains dissolve the chain and leave it to non-SLP reduction
|
||||
handling.
|
||||
|
||||
2017-07-03 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
* tree-data-ref.h (dr_alignment): Declare.
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
2017-07-03 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/60510
|
||||
* gfortran.dg/vect/pr60510.f: New testcase.
|
||||
|
||||
2017-07-03 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
|
||||
|
||||
* gcc.target/i386/mvc6.c: Require ifunc support.
|
||||
|
|
29
gcc/testsuite/gfortran.dg/vect/pr60510.f
Normal file
29
gcc/testsuite/gfortran.dg/vect/pr60510.f
Normal file
|
@ -0,0 +1,29 @@
|
|||
! { dg-do run }
|
||||
! { dg-additional-options "-fno-inline -ffast-math" }
|
||||
subroutine foo(a,x,y,n)
|
||||
implicit none
|
||||
integer n,i
|
||||
|
||||
real*8 y(n),x(n),a
|
||||
|
||||
do i=1,n
|
||||
a=a+x(i)*y(i)+x(i)
|
||||
enddo
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
program test
|
||||
real*8 x(1024),y(1024),a
|
||||
do i=1,1024
|
||||
x(i) = i
|
||||
y(i) = i+1
|
||||
enddo
|
||||
call foo(a,x,y,1024)
|
||||
if (a.ne.359488000.0) call abort()
|
||||
end
|
||||
! If there's no longer a reduction chain detected this doesn't test what
|
||||
! it was supposed to test, vectorizing a reduction chain w/o SLP.
|
||||
! { dg-final { scan-tree-dump "reduction chain" "vect" } }
|
||||
! We should vectorize the reduction in foo and the induction in test.
|
||||
! { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } }
|
|
@ -4313,6 +4313,7 @@ get_initial_defs_for_reduction (slp_tree slp_node,
|
|||
|
||||
static void
|
||||
vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
|
||||
gimple *reduc_def_stmt,
|
||||
int ncopies, enum tree_code reduc_code,
|
||||
vec<gimple *> reduction_phis,
|
||||
int reduc_index, bool double_reduc,
|
||||
|
@ -4401,9 +4402,8 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
|
|||
{
|
||||
/* Get at the scalar def before the loop, that defines the initial value
|
||||
of the reduction variable. */
|
||||
tree reduction_op = get_reduction_op (stmt, reduc_index);
|
||||
gimple *def_stmt = SSA_NAME_DEF_STMT (reduction_op);
|
||||
initial_def = PHI_ARG_DEF_FROM_EDGE (def_stmt,
|
||||
gimple *def_stmt;
|
||||
initial_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
|
||||
loop_preheader_edge (loop));
|
||||
vect_is_simple_use (initial_def, loop_vinfo, &def_stmt, &initial_def_dt);
|
||||
vec_initial_def = get_initial_def_for_reduction (stmt, initial_def,
|
||||
|
@ -5581,19 +5581,17 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
|
|||
tree scalar_type;
|
||||
bool is_simple_use;
|
||||
gimple *orig_stmt;
|
||||
stmt_vec_info orig_stmt_info;
|
||||
stmt_vec_info orig_stmt_info = NULL;
|
||||
int i;
|
||||
int ncopies;
|
||||
int epilog_copies;
|
||||
stmt_vec_info prev_stmt_info, prev_phi_info;
|
||||
bool single_defuse_cycle = false;
|
||||
tree reduc_def = NULL_TREE;
|
||||
gimple *new_stmt = NULL;
|
||||
int j;
|
||||
tree ops[3];
|
||||
enum vect_def_type dts[3];
|
||||
bool nested_cycle = false, found_nested_cycle_def = false;
|
||||
gimple *reduc_def_stmt = NULL;
|
||||
bool double_reduc = false;
|
||||
basic_block def_bb;
|
||||
struct loop * def_stmt_loop, *outer_loop = NULL;
|
||||
|
@ -5601,6 +5599,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
|
|||
gimple *def_arg_stmt;
|
||||
auto_vec<tree> vec_oprnds0;
|
||||
auto_vec<tree> vec_oprnds1;
|
||||
auto_vec<tree> vec_oprnds2;
|
||||
auto_vec<tree> vect_defs;
|
||||
auto_vec<gimple *> phis;
|
||||
int vec_num;
|
||||
|
@ -5643,8 +5642,6 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
|
|||
gimple *reduc_stmt = STMT_VINFO_REDUC_DEF (stmt_info);
|
||||
if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (reduc_stmt)))
|
||||
reduc_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (reduc_stmt));
|
||||
if (STMT_VINFO_RELEVANT (vinfo_for_stmt (reduc_stmt)) <= vect_used_only_live)
|
||||
single_defuse_cycle = true;
|
||||
|
||||
gcc_assert (is_gimple_assign (reduc_stmt));
|
||||
for (unsigned k = 1; k < gimple_num_ops (reduc_stmt); ++k)
|
||||
|
@ -5666,6 +5663,17 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
|
|||
ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
|
||||
/ TYPE_VECTOR_SUBPARTS (vectype_in));
|
||||
|
||||
use_operand_p use_p;
|
||||
gimple *use_stmt;
|
||||
if (ncopies > 1
|
||||
&& (STMT_VINFO_RELEVANT (vinfo_for_stmt (reduc_stmt))
|
||||
<= vect_used_only_live)
|
||||
&& single_imm_use (gimple_phi_result (stmt), &use_p, &use_stmt)
|
||||
&& (use_stmt == reduc_stmt
|
||||
|| (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt))
|
||||
== reduc_stmt)))
|
||||
single_defuse_cycle = true;
|
||||
|
||||
/* Create the destination vector */
|
||||
scalar_dest = gimple_assign_lhs (reduc_stmt);
|
||||
vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
|
||||
|
@ -5769,10 +5777,6 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
|
|||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
/* The default is that the reduction variable is the last in statement. */
|
||||
int reduc_index = op_type - 1;
|
||||
if (code == MINUS_EXPR)
|
||||
reduc_index = 0;
|
||||
|
||||
if (code == COND_EXPR && slp_node)
|
||||
return false;
|
||||
|
@ -5792,22 +5796,30 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
|
|||
The last use is the reduction variable. In case of nested cycle this
|
||||
assumption is not true: we use reduc_index to record the index of the
|
||||
reduction variable. */
|
||||
gimple *reduc_def_stmt = NULL;
|
||||
int reduc_index = -1;
|
||||
for (i = 0; i < op_type; i++)
|
||||
{
|
||||
if (i == reduc_index)
|
||||
continue;
|
||||
|
||||
/* The condition of COND_EXPR is checked in vectorizable_condition(). */
|
||||
if (i == 0 && code == COND_EXPR)
|
||||
continue;
|
||||
|
||||
is_simple_use = vect_is_simple_use (ops[i], loop_vinfo,
|
||||
&def_stmt, &dts[i], &tem);
|
||||
if (!vectype_in)
|
||||
vectype_in = tem;
|
||||
gcc_assert (is_simple_use);
|
||||
|
||||
dt = dts[i];
|
||||
gcc_assert (is_simple_use);
|
||||
if (dt == vect_reduction_def)
|
||||
{
|
||||
reduc_def_stmt = def_stmt;
|
||||
reduc_index = i;
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!vectype_in)
|
||||
vectype_in = tem;
|
||||
}
|
||||
|
||||
if (dt != vect_internal_def
|
||||
&& dt != vect_external_def
|
||||
&& dt != vect_constant_def
|
||||
|
@ -5836,22 +5848,29 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
|
|||
}
|
||||
}
|
||||
|
||||
is_simple_use = vect_is_simple_use (ops[reduc_index], loop_vinfo,
|
||||
&def_stmt, &dts[reduc_index], &tem);
|
||||
if (!vectype_in)
|
||||
vectype_in = tem;
|
||||
gcc_assert (is_simple_use);
|
||||
if (!found_nested_cycle_def)
|
||||
reduc_def_stmt = def_stmt;
|
||||
vectype_in = vectype_out;
|
||||
|
||||
if (reduc_def_stmt && gimple_code (reduc_def_stmt) != GIMPLE_PHI)
|
||||
/* When vectorizing a reduction chain w/o SLP the reduction PHI is not
|
||||
directy used in stmt. */
|
||||
if (reduc_index == -1)
|
||||
{
|
||||
if (orig_stmt)
|
||||
reduc_def_stmt = STMT_VINFO_REDUC_DEF (orig_stmt_info);
|
||||
else
|
||||
reduc_def_stmt = STMT_VINFO_REDUC_DEF (stmt_info);
|
||||
}
|
||||
|
||||
if (! reduc_def_stmt || gimple_code (reduc_def_stmt) != GIMPLE_PHI)
|
||||
return false;
|
||||
|
||||
dt = dts[reduc_index];
|
||||
if (!(dt == vect_reduction_def
|
||||
|| dt == vect_nested_cycle
|
||||
|| ((dt == vect_internal_def || dt == vect_external_def
|
||||
|| dt == vect_constant_def || dt == vect_induction_def)
|
||||
if (!(reduc_index == -1
|
||||
|| dts[reduc_index] == vect_reduction_def
|
||||
|| dts[reduc_index] == vect_nested_cycle
|
||||
|| ((dts[reduc_index] == vect_internal_def
|
||||
|| dts[reduc_index] == vect_external_def
|
||||
|| dts[reduc_index] == vect_constant_def
|
||||
|| dts[reduc_index] == vect_induction_def)
|
||||
&& nested_cycle && found_nested_cycle_def)))
|
||||
{
|
||||
/* For pattern recognized stmts, orig_stmt might be a reduction,
|
||||
|
@ -6249,9 +6268,17 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
|
|||
(i.e. we generate VF/2 results in a single register).
|
||||
In this case for each copy we get the vector def for the reduction variable
|
||||
from the vectorized reduction operation generated in the previous iteration.
|
||||
*/
|
||||
|
||||
if (STMT_VINFO_RELEVANT (stmt_info) <= vect_used_only_live)
|
||||
This only works when we see both the reduction PHI and its only consumer
|
||||
in vectorizable_reduction and there are no intermediate stmts
|
||||
participating. */
|
||||
use_operand_p use_p;
|
||||
gimple *use_stmt;
|
||||
if (ncopies > 1
|
||||
&& (STMT_VINFO_RELEVANT (stmt_info) <= vect_used_only_live)
|
||||
&& single_imm_use (gimple_phi_result (reduc_def_stmt), &use_p, &use_stmt)
|
||||
&& (use_stmt == stmt
|
||||
|| STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) == stmt))
|
||||
{
|
||||
single_defuse_cycle = true;
|
||||
epilog_copies = 1;
|
||||
|
@ -6267,8 +6294,9 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
|
|||
{
|
||||
vec_num = 1;
|
||||
vec_oprnds0.create (1);
|
||||
vec_oprnds1.create (1);
|
||||
if (op_type == ternary_op)
|
||||
vec_oprnds1.create (1);
|
||||
vec_oprnds2.create (1);
|
||||
}
|
||||
|
||||
phis.create (vec_num);
|
||||
|
@ -6321,65 +6349,66 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
|
|||
auto_vec<tree, 3> slp_ops;
|
||||
auto_vec<vec<tree>, 3> vec_defs;
|
||||
|
||||
slp_ops.quick_push (reduc_index == 0 ? NULL : ops[0]);
|
||||
slp_ops.quick_push (reduc_index == 1 ? NULL : ops[1]);
|
||||
slp_ops.quick_push (ops[0]);
|
||||
slp_ops.quick_push (ops[1]);
|
||||
if (op_type == ternary_op)
|
||||
slp_ops.quick_push (reduc_index == 2 ? NULL : ops[2]);
|
||||
slp_ops.quick_push (ops[2]);
|
||||
|
||||
vect_get_slp_defs (slp_ops, slp_node, &vec_defs);
|
||||
|
||||
vec_oprnds0.safe_splice (vec_defs[reduc_index == 0 ? 1 : 0]);
|
||||
vec_defs[reduc_index == 0 ? 1 : 0].release ();
|
||||
vec_oprnds0.safe_splice (vec_defs[0]);
|
||||
vec_defs[0].release ();
|
||||
vec_oprnds1.safe_splice (vec_defs[1]);
|
||||
vec_defs[1].release ();
|
||||
if (op_type == ternary_op)
|
||||
{
|
||||
vec_oprnds1.safe_splice (vec_defs[reduc_index == 2 ? 1 : 2]);
|
||||
vec_defs[reduc_index == 2 ? 1 : 2].release ();
|
||||
vec_oprnds2.safe_splice (vec_defs[2]);
|
||||
vec_defs[2].release ();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
vec_oprnds0.quick_push
|
||||
(vect_get_vec_def_for_operand (ops[!reduc_index], stmt));
|
||||
(vect_get_vec_def_for_operand (ops[0], stmt));
|
||||
vec_oprnds1.quick_push
|
||||
(vect_get_vec_def_for_operand (ops[1], stmt));
|
||||
if (op_type == ternary_op)
|
||||
vec_oprnds1.quick_push
|
||||
(vect_get_vec_def_for_operand (reduc_index == 0
|
||||
? ops[2] : ops[1], stmt));
|
||||
vec_oprnds2.quick_push
|
||||
(vect_get_vec_def_for_operand (ops[2], stmt));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!slp_node)
|
||||
{
|
||||
vec_oprnds0[0]
|
||||
= vect_get_vec_def_for_stmt_copy (dts[!reduc_index],
|
||||
vec_oprnds0[0]);
|
||||
if (op_type == ternary_op)
|
||||
vec_oprnds1[0]
|
||||
= vect_get_vec_def_for_stmt_copy (dts[reduc_index == 0
|
||||
? 2 : 1],
|
||||
vec_oprnds1[0]);
|
||||
}
|
||||
gcc_assert (reduc_index != -1 || ! single_defuse_cycle);
|
||||
|
||||
if (single_defuse_cycle)
|
||||
reduc_def = gimple_assign_lhs (new_stmt);
|
||||
if (single_defuse_cycle && reduc_index == 0)
|
||||
vec_oprnds0[0] = gimple_assign_lhs (new_stmt);
|
||||
else
|
||||
vec_oprnds0[0]
|
||||
= vect_get_vec_def_for_stmt_copy (dts[0], vec_oprnds0[0]);
|
||||
if (single_defuse_cycle && reduc_index == 1)
|
||||
vec_oprnds1[0] = gimple_assign_lhs (new_stmt);
|
||||
else
|
||||
vec_oprnds1[0]
|
||||
= vect_get_vec_def_for_stmt_copy (dts[1], vec_oprnds1[0]);
|
||||
if (op_type == ternary_op)
|
||||
{
|
||||
if (single_defuse_cycle && reduc_index == 2)
|
||||
vec_oprnds2[0] = gimple_assign_lhs (new_stmt);
|
||||
else
|
||||
vec_oprnds2[0]
|
||||
= vect_get_vec_def_for_stmt_copy (dts[2], vec_oprnds2[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
|
||||
{
|
||||
if (slp_node)
|
||||
reduc_def = PHI_RESULT (phis[i]);
|
||||
else
|
||||
{
|
||||
if (!single_defuse_cycle || j == 0)
|
||||
reduc_def = PHI_RESULT (new_phi);
|
||||
}
|
||||
|
||||
tree vop[3] = { def0, NULL_TREE, NULL_TREE };
|
||||
tree vop[3] = { def0, vec_oprnds1[i], NULL_TREE };
|
||||
if (op_type == ternary_op)
|
||||
vop[1] = vec_oprnds1[i];
|
||||
for (int k = 2; k > reduc_index; --k)
|
||||
vop[k] = vop[k - 1];
|
||||
vop[reduc_index] = reduc_def;
|
||||
vop[2] = vec_oprnds2[i];
|
||||
|
||||
new_temp = make_ssa_name (vec_dest, new_stmt);
|
||||
new_stmt = gimple_build_assign (new_temp, code,
|
||||
|
@ -6411,7 +6440,8 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
|
|||
if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
|
||||
vect_defs[0] = gimple_assign_lhs (*vec_stmt);
|
||||
|
||||
vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
|
||||
vect_create_epilog_for_reduction (vect_defs, stmt, reduc_def_stmt,
|
||||
epilog_copies,
|
||||
epilog_reduc_code, phis, reduc_index,
|
||||
double_reduc, slp_node);
|
||||
|
||||
|
|
|
@ -2121,7 +2121,20 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
|
|||
FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
|
||||
if (! vect_analyze_slp_instance (vinfo, first_element,
|
||||
max_tree_size))
|
||||
return false;
|
||||
{
|
||||
/* Dissolve reduction chain group. */
|
||||
gimple *next, *stmt = first_element;
|
||||
while (stmt)
|
||||
{
|
||||
stmt_vec_info vinfo = vinfo_for_stmt (stmt);
|
||||
next = GROUP_NEXT_ELEMENT (vinfo);
|
||||
GROUP_FIRST_ELEMENT (vinfo) = NULL;
|
||||
GROUP_NEXT_ELEMENT (vinfo) = NULL;
|
||||
stmt = next;
|
||||
}
|
||||
STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
|
||||
= vect_internal_def;
|
||||
}
|
||||
}
|
||||
|
||||
/* Find SLP sequences starting from groups of reductions. */
|
||||
|
|
Loading…
Add table
Reference in a new issue