Vectorization of first-order recurrences

The following picks up the prototype by Ju-Zhe Zhong for vectorizing
first order recurrences.  That solves two TSVC missed optimization PRs.

There's a new scalar cycle def kind, vect_first_order_recurrence
and it's handling of the backedge value vectorization is complicated
by the fact that the vectorized value isn't the PHI but instead
a (series of) permute(s) shifting in the recurring value from the
previous iteration.  I've implemented this by creating both the
single vectorized PHI and the series of permutes when vectorizing
the scalar PHI but leave the backedge values in both unassigned.
The backedge values are (for the testcases) computed by a load
which is also the place after which the permutes are inserted.
That placement also restricts the cases we can handle (without
resorting to code motion).

I added both costing and SLP handling though SLP handling is
restricted to the case where a single vectorized PHI is enough.

Missing is epilogue handling - while prologue peeling would
be handled transparently by adjusting iv_phi_p the epilogue
case doesn't work with just inserting a scalar LC PHI since
that a) keeps the scalar load live and b) that loads is the
wrong one, it has to be the last, much like when we'd vectorize
the LC PHI as live operation.  Unfortunately LIVE
compute/analysis happens too early before we decide on
peeling.  When using fully masked loop vectorization the
vect-recurr-6.c works as expected though.

I have tested this on x86_64 for now, but since epilogue
handling is missing there's probably no practical cases.
My prototype WHILE_ULT AVX512 patch can handle vect-recurr-6.c
just fine but I didn't feel like running SPEC within SDE nor
is the WHILE_ULT patch complete enough.

	PR tree-optimization/99409
	PR tree-optimization/99394
	* tree-vectorizer.h (vect_def_type::vect_first_order_recurrence): Add.
	(stmt_vec_info_type::recurr_info_type): Likewise.
	(vectorizable_recurr): New function.
	* tree-vect-loop.cc (vect_phi_first_order_recurrence_p): New
	function.
	(vect_analyze_scalar_cycles_1): Look for first order
	recurrences.
	(vect_analyze_loop_operations): Handle them.
	(vect_transform_loop): Likewise.
	(vectorizable_recurr): New function.
	(maybe_set_vectorized_backedge_value): Handle the backedge value
	setting in the first order recurrence PHI and the permutes.
	* tree-vect-stmts.cc (vect_analyze_stmt): Handle first order
	recurrences.
	(vect_transform_stmt): Likewise.
	(vect_is_simple_use): Likewise.
	(vect_is_simple_use): Likewise.
	* tree-vect-slp.cc (vect_get_and_check_slp_defs): Likewise.
	(vect_build_slp_tree_2): Likewise.
	(vect_schedule_scc): Handle the backedge value setting in the
	first order recurrence PHI and the permutes.

	* gcc.dg/vect/vect-recurr-1.c: New testcase.
	* gcc.dg/vect/vect-recurr-2.c: Likewise.
	* gcc.dg/vect/vect-recurr-3.c: Likewise.
	* gcc.dg/vect/vect-recurr-4.c: Likewise.
	* gcc.dg/vect/vect-recurr-5.c: Likewise.
	* gcc.dg/vect/vect-recurr-6.c: Likewise.
	* gcc.dg/vect/tsvc/vect-tsvc-s252.c: Un-XFAIL.
	* gcc.dg/vect/tsvc/vect-tsvc-s254.c: Likewise.
	* gcc.dg/vect/tsvc/vect-tsvc-s291.c: Likewise.

Co-authored-by: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
This commit is contained in:
Richard Biener 2022-10-06 13:56:09 +02:00
parent acdb24166d
commit 46a8e017d0
13 changed files with 560 additions and 30 deletions

View file

@ -40,4 +40,4 @@ int main (int argc, char **argv)
return 0;
}
/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */

View file

@ -39,4 +39,4 @@ int main (int argc, char **argv)
return 0;
}
/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */

View file

@ -39,4 +39,4 @@ int main (int argc, char **argv)
return 0;
}
/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */

View file

@ -0,0 +1,38 @@
/* { dg-do run } */
/* { dg-require-effective-target vect_int } */
#include "tree-vect.h"
void __attribute__((noipa))
foo (int * __restrict__ a, int * __restrict__ b, int * __restrict__ c)
{
int t = *c;
for (int i = 0; i < 64; ++i)
{
b[i] = a[i] - t;
t = a[i];
}
}
int a[64], b[64];
int
main ()
{
check_vect ();
for (int i = 0; i < 64; ++i)
{
a[i] = i;
__asm__ volatile ("" ::: "memory");
}
int c = 7;
foo (a, b, &c);
for (int i = 1; i < 64; ++i)
if (b[i] != a[i] - a[i-1])
abort ();
if (b[0] != -7)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */

View file

@ -0,0 +1,39 @@
/* { dg-do run } */
/* { dg-require-effective-target vect_int } */
#include "tree-vect.h"
void __attribute__((noipa))
foo (int * __restrict__ a, short * __restrict__ b, int * __restrict__ c)
{
int t = *c;
for (int i = 0; i < 64; ++i)
{
b[i] = a[i] - t;
t = a[i];
}
}
int a[64];
short b[64];
int
main ()
{
check_vect ();
for (int i = 0; i < 64; ++i)
{
a[i] = i;
__asm__ volatile ("" ::: "memory");
}
int c = 7;
foo (a, b, &c);
for (int i = 1; i < 64; ++i)
if (b[i] != a[i] - a[i-1])
abort ();
if (b[0] != -7)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */

View file

@ -0,0 +1,39 @@
/* { dg-do run } */
/* { dg-require-effective-target vect_int } */
#include "tree-vect.h"
void __attribute__((noipa))
foo (int * __restrict__ a, signed char * __restrict__ b, int * __restrict__ c)
{
int t = *c;
for (int i = 0; i < 64; ++i)
{
b[i] = a[i] - t;
t = a[i];
}
}
int a[64];
signed char b[64];
int
main ()
{
check_vect ();
for (int i = 0; i < 64; ++i)
{
a[i] = i;
__asm__ volatile ("" ::: "memory");
}
int c = 7;
foo (a, b, &c);
for (int i = 1; i < 64; ++i)
if (b[i] != a[i] - a[i-1])
abort ();
if (b[0] != -7)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */

View file

@ -0,0 +1,42 @@
/* { dg-do run } */
/* { dg-require-effective-target vect_int } */
#include "tree-vect.h"
void __attribute__((noipa))
foo (int * __restrict__ a, int * __restrict__ b, int * __restrict__ c)
{
int t1 = *c;
int t2 = *c;
for (int i = 0; i < 64; i+=2)
{
b[i] = a[i] - t1;
t1 = a[i];
b[i+1] = a[i+1] - t2;
t2 = a[i+1];
}
}
int a[64], b[64];
int
main ()
{
check_vect ();
for (int i = 0; i < 64; ++i)
{
a[i] = i;
__asm__ volatile ("" ::: "memory");
}
int c = 7;
foo (a, b, &c);
for (int i = 2; i < 64; i+=2)
if (b[i] != a[i] - a[i-2]
|| b[i+1] != a[i+1] - a[i-1])
abort ();
if (b[0] != -7 || b[1] != -6)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */

View file

@ -0,0 +1,43 @@
/* { dg-do run } */
/* { dg-require-effective-target vect_int } */
#include "tree-vect.h"
void __attribute__((noipa))
foo (int * __restrict__ a, short * __restrict__ b, int * __restrict__ c)
{
int t1 = *c;
int t2 = *c;
for (int i = 0; i < 64; i+=2)
{
b[i] = a[i] - t1;
t1 = a[i];
b[i+1] = a[i+1] - t2;
t2 = a[i+1];
}
}
int a[64];
short b[64];
int
main ()
{
check_vect ();
for (int i = 0; i < 64; ++i)
{
a[i] = i;
__asm__ volatile ("" ::: "memory");
}
int c = 7;
foo (a, b, &c);
for (int i = 2; i < 64; i+=2)
if (b[i] != a[i] - a[i-2]
|| b[i+1] != a[i+1] - a[i-1])
abort ();
if (b[0] != -7 || b[1] != -6)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */

View file

@ -0,0 +1,39 @@
/* { dg-do run } */
/* { dg-require-effective-target vect_int } */
#include "tree-vect.h"
void __attribute__((noipa))
foo (int * __restrict__ a, int * __restrict__ b, int * __restrict__ c, int n)
{
int t = *c;
for (int i = 0; i < n; ++i)
{
b[i] = a[i] - t;
t = a[i];
}
}
int a[64], b[64];
int
main ()
{
check_vect ();
for (int i = 0; i < 64; ++i)
{
a[i] = i;
__asm__ volatile ("" ::: "memory");
}
int c = 7;
foo (a, b, &c, 63);
for (int i = 1; i < 63; ++i)
if (b[i] != a[i] - a[i-1])
abort ();
if (b[0] != -7)
abort ();
return 0;
}
/* ??? We miss epilogue handling for first order recurrences. */
/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { target vect_fully_masked } } } */

View file

@ -529,6 +529,45 @@ vect_inner_phi_in_double_reduction_p (loop_vec_info loop_vinfo, gphi *phi)
return false;
}
/* Returns true if Phi is a first-order recurrence. A first-order
recurrence is a non-reduction recurrence relation in which the value of
the recurrence in the current loop iteration equals a value defined in
the previous iteration. */
static bool
vect_phi_first_order_recurrence_p (loop_vec_info loop_vinfo, class loop *loop,
gphi *phi)
{
/* Ensure the loop latch definition is from within the loop. */
edge latch = loop_latch_edge (loop);
tree ldef = PHI_ARG_DEF_FROM_EDGE (phi, latch);
if (TREE_CODE (ldef) != SSA_NAME
|| SSA_NAME_IS_DEFAULT_DEF (ldef)
|| !flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (ldef))))
return false;
tree def = gimple_phi_result (phi);
/* Ensure every use_stmt of the phi node is dominated by the latch
definition. */
imm_use_iterator imm_iter;
use_operand_p use_p;
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
if (!is_gimple_debug (USE_STMT (use_p))
&& (SSA_NAME_DEF_STMT (ldef) == USE_STMT (use_p)
|| !vect_stmt_dominates_stmt_p (SSA_NAME_DEF_STMT (ldef),
USE_STMT (use_p))))
return false;
/* First-order recurrence autovectorization needs shuffle vector. */
tree scalar_type = TREE_TYPE (def);
tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
if (!vectype)
return false;
return true;
}
/* Function vect_analyze_scalar_cycles_1.
Examine the cross iteration def-use cycles of scalar variables
@ -666,6 +705,8 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop,
}
}
}
else if (vect_phi_first_order_recurrence_p (loop_vinfo, loop, phi))
STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_first_order_recurrence;
else
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@ -1810,7 +1851,8 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
if ((STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
|| STMT_VINFO_LIVE_P (stmt_info))
&& STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
&& STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def
&& STMT_VINFO_DEF_TYPE (stmt_info) != vect_first_order_recurrence)
/* A scalar-dependence cycle that we don't support. */
return opt_result::failure_at (phi,
"not vectorized:"
@ -1831,6 +1873,11 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
&& ! PURE_SLP_STMT (stmt_info))
ok = vectorizable_reduction (loop_vinfo,
stmt_info, NULL, NULL, &cost_vec);
else if ((STMT_VINFO_DEF_TYPE (stmt_info)
== vect_first_order_recurrence)
&& ! PURE_SLP_STMT (stmt_info))
ok = vectorizable_recurr (loop_vinfo, stmt_info, NULL, NULL,
&cost_vec);
}
/* SLP PHIs are tested by vect_slp_analyze_node_operations. */
@ -8290,6 +8337,178 @@ vectorizable_phi (vec_info *,
return true;
}
/* Vectorizes first order recurrences. An overview of the transformation
is described below. Suppose we have the following loop.
int t = 0;
for (int i = 0; i < n; ++i)
{
b[i] = a[i] - t;
t = a[i];
}
There is a first-order recurrence on 'a'. For this loop, the scalar IR
looks (simplified) like:
scalar.preheader:
init = 0;
scalar.body:
i = PHI <0(scalar.preheader), i+1(scalar.body)>
_2 = PHI <(init(scalar.preheader), <_1(scalar.body)>
_1 = a[i]
b[i] = _1 - _2
if (i < n) goto scalar.body
In this example, _2 is a recurrence because it's value depends on the
previous iteration. We vectorize this as (VF = 4)
vector.preheader:
vect_init = vect_cst(..., ..., ..., 0)
vector.body
i = PHI <0(vector.preheader), i+4(vector.body)>
vect_1 = PHI <vect_init(vector.preheader), v2(vector.body)>
vect_2 = a[i, i+1, i+2, i+3];
vect_3 = vec_perm (vect_1, vect_2, { 3, 4, 5, 6 })
b[i, i+1, i+2, i+3] = vect_2 - vect_3
if (..) goto vector.body
In this function, vectorizable_recurr, we code generate both the
vector PHI node and the permute since those together compute the
vectorized value of the scalar PHI. We do not yet have the
backedge value to fill in there nor into the vec_perm. Those
are filled in maybe_set_vectorized_backedge_value and
vect_schedule_scc.
TODO: Since the scalar loop does not have a use of the recurrence
outside of the loop the natural way to implement peeling via
vectorizing the live value doesn't work. For now peeling of loops
with a recurrence is not implemented. For SLP the supported cases
are restricted to those requiring a single vector recurrence PHI. */
bool
vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
gimple **vec_stmt, slp_tree slp_node,
stmt_vector_for_cost *cost_vec)
{
if (!loop_vinfo || !is_a<gphi *> (stmt_info->stmt))
return false;
gphi *phi = as_a<gphi *> (stmt_info->stmt);
/* So far we only support first-order recurrence auto-vectorization. */
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_first_order_recurrence)
return false;
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
unsigned ncopies;
if (slp_node)
ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
else
ncopies = vect_get_num_copies (loop_vinfo, vectype);
poly_int64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
unsigned dist = slp_node ? SLP_TREE_LANES (slp_node) : 1;
/* We need to be able to make progress with a single vector. */
if (maybe_gt (dist * 2, nunits))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"first order recurrence exceeds half of "
"a vector\n");
return false;
}
/* First-order recurrence autovectorization needs to handle permutation
with indices = [nunits-1, nunits, nunits+1, ...]. */
vec_perm_builder sel (nunits, 1, 3);
for (int i = 0; i < 3; ++i)
sel.quick_push (nunits - dist + i);
vec_perm_indices indices (sel, 2, nunits);
if (!vec_stmt) /* transformation not required. */
{
if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
indices))
return false;
if (slp_node)
{
/* We eventually need to set a vector type on invariant
arguments. */
unsigned j;
slp_tree child;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child)
if (!vect_maybe_update_slp_op_vectype
(child, SLP_TREE_VECTYPE (slp_node)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"incompatible vector types for "
"invariants\n");
return false;
}
}
/* The recurrence costs the initialization vector and one permute
for each copy. */
unsigned prologue_cost = record_stmt_cost (cost_vec, 1, scalar_to_vec,
stmt_info, 0, vect_prologue);
unsigned inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
stmt_info, 0, vect_body);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"vectorizable_recurr: inside_cost = %d, "
"prologue_cost = %d .\n", inside_cost,
prologue_cost);
STMT_VINFO_TYPE (stmt_info) = recurr_info_type;
return true;
}
edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
basic_block bb = gimple_bb (phi);
tree preheader = PHI_ARG_DEF_FROM_EDGE (phi, pe);
tree vec_init = build_vector_from_val (vectype, preheader);
vec_init = vect_init_vector (loop_vinfo, stmt_info, vec_init, vectype, NULL);
/* Create the vectorized first-order PHI node. */
tree vec_dest = vect_get_new_vect_var (vectype,
vect_simple_var, "vec_recur_");
gphi *new_phi = create_phi_node (vec_dest, bb);
add_phi_arg (new_phi, vec_init, pe, UNKNOWN_LOCATION);
/* Insert shuffles the first-order recurrence autovectorization.
result = VEC_PERM <vec_recur, vect_1, index[nunits-1, nunits, ...]>. */
tree perm = vect_gen_perm_mask_checked (vectype, indices);
/* Insert the required permute after the latch definition. The
second and later operands are tentative and will be updated when we have
vectorized the latch definition. */
edge le = loop_latch_edge (LOOP_VINFO_LOOP (loop_vinfo));
gimple_stmt_iterator gsi2
= gsi_for_stmt (SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, le)));
gsi_next (&gsi2);
for (unsigned i = 0; i < ncopies; ++i)
{
vec_dest = make_ssa_name (vectype);
gassign *vperm
= gimple_build_assign (vec_dest, VEC_PERM_EXPR,
i == 0 ? gimple_phi_result (new_phi) : NULL,
NULL, perm);
vect_finish_stmt_generation (loop_vinfo, stmt_info, vperm, &gsi2);
if (slp_node)
SLP_TREE_VEC_STMTS (slp_node).quick_push (vperm);
else
STMT_VINFO_VEC_STMTS (stmt_info).safe_push (vperm);
}
if (!slp_node)
*vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
return true;
}
/* Return true if VECTYPE represents a vector that requires lowering
by the vector lowering pass. */
@ -10242,27 +10461,53 @@ maybe_set_vectorized_backedge_value (loop_vec_info loop_vinfo,
imm_use_iterator iter;
use_operand_p use_p;
FOR_EACH_IMM_USE_FAST (use_p, iter, def)
if (gphi *phi = dyn_cast <gphi *> (USE_STMT (use_p)))
if (gimple_bb (phi)->loop_father->header == gimple_bb (phi)
&& (phi_info = loop_vinfo->lookup_stmt (phi))
&& STMT_VINFO_RELEVANT_P (phi_info)
&& VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (phi_info))
{
gphi *phi = dyn_cast <gphi *> (USE_STMT (use_p));
if (!phi)
continue;
if (!(gimple_bb (phi)->loop_father->header == gimple_bb (phi)
&& (phi_info = loop_vinfo->lookup_stmt (phi))
&& STMT_VINFO_RELEVANT_P (phi_info)))
continue;
loop_p loop = gimple_bb (phi)->loop_father;
edge e = loop_latch_edge (loop);
if (PHI_ARG_DEF_FROM_EDGE (phi, e) != def)
continue;
if (VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (phi_info))
&& STMT_VINFO_REDUC_TYPE (phi_info) != FOLD_LEFT_REDUCTION
&& STMT_VINFO_REDUC_TYPE (phi_info) != EXTRACT_LAST_REDUCTION)
{
loop_p loop = gimple_bb (phi)->loop_father;
edge e = loop_latch_edge (loop);
if (PHI_ARG_DEF_FROM_EDGE (phi, e) == def)
{
vec<gimple *> &phi_defs = STMT_VINFO_VEC_STMTS (phi_info);
vec<gimple *> &latch_defs = STMT_VINFO_VEC_STMTS (def_stmt_info);
gcc_assert (phi_defs.length () == latch_defs.length ());
for (unsigned i = 0; i < phi_defs.length (); ++i)
add_phi_arg (as_a <gphi *> (phi_defs[i]),
gimple_get_lhs (latch_defs[i]), e,
gimple_phi_arg_location (phi, e->dest_idx));
}
vec<gimple *> &phi_defs = STMT_VINFO_VEC_STMTS (phi_info);
vec<gimple *> &latch_defs = STMT_VINFO_VEC_STMTS (def_stmt_info);
gcc_assert (phi_defs.length () == latch_defs.length ());
for (unsigned i = 0; i < phi_defs.length (); ++i)
add_phi_arg (as_a <gphi *> (phi_defs[i]),
gimple_get_lhs (latch_defs[i]), e,
gimple_phi_arg_location (phi, e->dest_idx));
}
else if (STMT_VINFO_DEF_TYPE (phi_info) == vect_first_order_recurrence)
{
/* For first order recurrences we have to update both uses of
the latch definition, the one in the PHI node and the one
in the generated VEC_PERM_EXPR. */
vec<gimple *> &phi_defs = STMT_VINFO_VEC_STMTS (phi_info);
vec<gimple *> &latch_defs = STMT_VINFO_VEC_STMTS (def_stmt_info);
gcc_assert (phi_defs.length () == latch_defs.length ());
tree phidef = gimple_assign_rhs1 (phi_defs[0]);
gphi *vphi = as_a <gphi *> (SSA_NAME_DEF_STMT (phidef));
for (unsigned i = 0; i < phi_defs.length (); ++i)
{
gassign *perm = as_a <gassign *> (phi_defs[i]);
if (i > 0)
gimple_assign_set_rhs1 (perm, gimple_get_lhs (latch_defs[i-1]));
gimple_assign_set_rhs2 (perm, gimple_get_lhs (latch_defs[i]));
update_stmt (perm);
}
add_phi_arg (vphi, gimple_get_lhs (latch_defs.last ()), e,
gimple_phi_arg_location (phi, e->dest_idx));
}
}
}
/* Vectorize STMT_INFO if relevant, inserting any new instructions before GSI.
@ -10671,6 +10916,7 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_first_order_recurrence
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def)
&& ! PURE_SLP_STMT (stmt_info))
{
@ -10696,7 +10942,8 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def)
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_first_order_recurrence)
&& ! PURE_SLP_STMT (stmt_info))
maybe_set_vectorized_backedge_value (loop_vinfo, stmt_info);
}

View file

@ -693,6 +693,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
case vect_reduction_def:
case vect_induction_def:
case vect_nested_cycle:
case vect_first_order_recurrence:
break;
default:
@ -1732,7 +1733,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
}
else if (def_type == vect_reduction_def
|| def_type == vect_double_reduction_def
|| def_type == vect_nested_cycle)
|| def_type == vect_nested_cycle
|| def_type == vect_first_order_recurrence)
{
/* Else def types have to match. */
stmt_vec_info other_info;
@ -1746,7 +1748,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
}
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
/* Reduction initial values are not explicitely represented. */
if (!nested_in_vect_loop_p (loop, stmt_info))
if (def_type != vect_first_order_recurrence
&& !nested_in_vect_loop_p (loop, stmt_info))
skip_args[loop_preheader_edge (loop)->dest_idx] = true;
/* Reduction chain backedge defs are filled manually.
??? Need a better way to identify a SLP reduction chain PHI.
@ -9210,11 +9213,34 @@ vect_schedule_scc (vec_info *vinfo, slp_tree node, slp_instance instance,
child = SLP_TREE_CHILDREN (phi_node)[dest_idx];
if (!child || SLP_TREE_DEF_TYPE (child) != vect_internal_def)
continue;
unsigned n = SLP_TREE_VEC_STMTS (phi_node).length ();
/* Simply fill all args. */
for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]),
vect_get_slp_vect_def (child, i),
e, gimple_phi_arg_location (phi, dest_idx));
if (STMT_VINFO_DEF_TYPE (SLP_TREE_REPRESENTATIVE (phi_node))
!= vect_first_order_recurrence)
for (unsigned i = 0; i < n; ++i)
add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]),
vect_get_slp_vect_def (child, i),
e, gimple_phi_arg_location (phi, dest_idx));
else
{
/* Unless it is a first order recurrence which needs
args filled in for both the PHI node and the permutes. */
gimple *perm = SLP_TREE_VEC_STMTS (phi_node)[0];
gimple *rphi = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (perm));
add_phi_arg (as_a <gphi *> (rphi),
vect_get_slp_vect_def (child, n - 1),
e, gimple_phi_arg_location (phi, dest_idx));
for (unsigned i = 0; i < n; ++i)
{
gimple *perm = SLP_TREE_VEC_STMTS (phi_node)[i];
if (i > 0)
gimple_assign_set_rhs1 (perm,
vect_get_slp_vect_def (child, i - 1));
gimple_assign_set_rhs2 (perm,
vect_get_slp_vect_def (child, i));
update_stmt (perm);
}
}
}
}
}

View file

@ -11176,6 +11176,7 @@ vect_analyze_stmt (vec_info *vinfo,
break;
case vect_induction_def:
case vect_first_order_recurrence:
gcc_assert (!bb_vinfo);
break;
@ -11234,7 +11235,9 @@ vect_analyze_stmt (vec_info *vinfo,
|| vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
cost_vec)
|| vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
stmt_info, NULL, node));
stmt_info, NULL, node)
|| vectorizable_recurr (as_a <loop_vec_info> (vinfo),
stmt_info, NULL, node, cost_vec));
else
{
if (bb_vinfo)
@ -11404,6 +11407,12 @@ vect_transform_stmt (vec_info *vinfo,
gcc_assert (done);
break;
case recurr_info_type:
done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
stmt_info, &vec_stmt, slp_node, NULL);
gcc_assert (done);
break;
case phi_info_type:
done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
gcc_assert (done);
@ -11804,6 +11813,9 @@ vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
case vect_nested_cycle:
dump_printf (MSG_NOTE, "nested cycle\n");
break;
case vect_first_order_recurrence:
dump_printf (MSG_NOTE, "first order recurrence\n");
break;
case vect_unknown_def_type:
dump_printf (MSG_NOTE, "unknown\n");
break;
@ -11852,7 +11864,8 @@ vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
|| *dt == vect_induction_def
|| *dt == vect_reduction_def
|| *dt == vect_double_reduction_def
|| *dt == vect_nested_cycle)
|| *dt == vect_nested_cycle
|| *dt == vect_first_order_recurrence)
{
*vectype = STMT_VINFO_VECTYPE (def_stmt_info);
gcc_assert (*vectype != NULL_TREE);

View file

@ -65,6 +65,7 @@ enum vect_def_type {
vect_reduction_def,
vect_double_reduction_def,
vect_nested_cycle,
vect_first_order_recurrence,
vect_unknown_def_type
};
@ -1027,6 +1028,7 @@ enum stmt_vec_info_type {
cycle_phi_info_type,
lc_phi_info_type,
phi_info_type,
recurr_info_type,
loop_exit_ctrl_vec_info_type
};
@ -2331,6 +2333,8 @@ extern bool vectorizable_lc_phi (loop_vec_info, stmt_vec_info,
gimple **, slp_tree);
extern bool vectorizable_phi (vec_info *, stmt_vec_info, gimple **, slp_tree,
stmt_vector_for_cost *);
extern bool vectorizable_recurr (loop_vec_info, stmt_vec_info,
gimple **, slp_tree, stmt_vector_for_cost *);
extern bool vect_emulated_vector_p (tree);
extern bool vect_can_vectorize_without_simd_p (tree_code);
extern bool vect_can_vectorize_without_simd_p (code_helper);