diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s252.c b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s252.c index f1302b60ae5..83eaa7a8ff5 100644 --- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s252.c +++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s252.c @@ -40,4 +40,4 @@ int main (int argc, char **argv) return 0; } -/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s254.c b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s254.c index bdc8a01e2a5..06e9b0a849d 100644 --- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s254.c +++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s254.c @@ -39,4 +39,4 @@ int main (int argc, char **argv) return 0; } -/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s291.c b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s291.c index 0b474c2e81a..91cdc121095 100644 --- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s291.c +++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s291.c @@ -39,4 +39,4 @@ int main (int argc, char **argv) return 0; } -/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-recurr-1.c b/gcc/testsuite/gcc.dg/vect/vect-recurr-1.c new file mode 100644 index 00000000000..6eb59fdf854 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-recurr-1.c @@ -0,0 +1,38 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" + +void __attribute__((noipa)) +foo (int * __restrict__ a, int * __restrict__ b, int * __restrict__ c) +{ + int t = *c; + for (int i = 0; i < 64; ++i) + { + b[i] = a[i] - t; + t = a[i]; + } +} + +int a[64], b[64]; + +int +main () +{ + check_vect (); + for (int i = 0; i < 64; ++i) + { + a[i] = i; + __asm__ volatile ("" ::: "memory"); + } + int c = 7; + foo (a, b, &c); + for (int i = 1; i < 64; ++i) + if (b[i] != a[i] - a[i-1]) + abort (); + if (b[0] != -7) + abort (); + return 0; +} + +/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-recurr-2.c b/gcc/testsuite/gcc.dg/vect/vect-recurr-2.c new file mode 100644 index 00000000000..97efaaa38bc --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-recurr-2.c @@ -0,0 +1,39 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" + +void __attribute__((noipa)) +foo (int * __restrict__ a, short * __restrict__ b, int * __restrict__ c) +{ + int t = *c; + for (int i = 0; i < 64; ++i) + { + b[i] = a[i] - t; + t = a[i]; + } +} + +int a[64]; +short b[64]; + +int +main () +{ + check_vect (); + for (int i = 0; i < 64; ++i) + { + a[i] = i; + __asm__ volatile ("" ::: "memory"); + } + int c = 7; + foo (a, b, &c); + for (int i = 1; i < 64; ++i) + if (b[i] != a[i] - a[i-1]) + abort (); + if (b[0] != -7) + abort (); + return 0; +} + +/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-recurr-3.c b/gcc/testsuite/gcc.dg/vect/vect-recurr-3.c new file mode 100644 index 00000000000..621a5d8a257 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-recurr-3.c @@ -0,0 +1,39 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" + +void __attribute__((noipa)) +foo (int * __restrict__ a, signed char * __restrict__ b, int * __restrict__ c) +{ + int t = *c; + for (int i = 0; i < 64; ++i) + { + b[i] = a[i] - t; + t = a[i]; + } +} + +int a[64]; +signed char b[64]; + +int +main () +{ + check_vect (); + for (int i = 0; i < 64; ++i) + { + a[i] = i; + __asm__ volatile ("" ::: "memory"); + } + int c = 7; + foo (a, b, &c); + for (int i = 1; i < 64; ++i) + if (b[i] != a[i] - a[i-1]) + abort (); + if (b[0] != -7) + abort (); + return 0; +} + +/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-recurr-4.c b/gcc/testsuite/gcc.dg/vect/vect-recurr-4.c new file mode 100644 index 00000000000..f6dbc494a62 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-recurr-4.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" + +void __attribute__((noipa)) +foo (int * __restrict__ a, int * __restrict__ b, int * __restrict__ c) +{ + int t1 = *c; + int t2 = *c; + for (int i = 0; i < 64; i+=2) + { + b[i] = a[i] - t1; + t1 = a[i]; + b[i+1] = a[i+1] - t2; + t2 = a[i+1]; + } +} + +int a[64], b[64]; + +int +main () +{ + check_vect (); + for (int i = 0; i < 64; ++i) + { + a[i] = i; + __asm__ volatile ("" ::: "memory"); + } + int c = 7; + foo (a, b, &c); + for (int i = 2; i < 64; i+=2) + if (b[i] != a[i] - a[i-2] + || b[i+1] != a[i+1] - a[i-1]) + abort (); + if (b[0] != -7 || b[1] != -6) + abort (); + return 0; +} + +/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-recurr-5.c b/gcc/testsuite/gcc.dg/vect/vect-recurr-5.c new file mode 100644 index 00000000000..19c56df9e83 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-recurr-5.c @@ -0,0 +1,43 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" + +void __attribute__((noipa)) +foo (int * __restrict__ a, short * __restrict__ b, int * __restrict__ c) +{ + int t1 = *c; + int t2 = *c; + for (int i = 0; i < 64; i+=2) + { + b[i] = a[i] - t1; + t1 = a[i]; + b[i+1] = a[i+1] - t2; + t2 = a[i+1]; + } +} + +int a[64]; +short b[64]; + +int +main () +{ + check_vect (); + for (int i = 0; i < 64; ++i) + { + a[i] = i; + __asm__ volatile ("" ::: "memory"); + } + int c = 7; + foo (a, b, &c); + for (int i = 2; i < 64; i+=2) + if (b[i] != a[i] - a[i-2] + || b[i+1] != a[i+1] - a[i-1]) + abort (); + if (b[0] != -7 || b[1] != -6) + abort (); + return 0; +} + +/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-recurr-6.c b/gcc/testsuite/gcc.dg/vect/vect-recurr-6.c new file mode 100644 index 00000000000..e7712680853 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-recurr-6.c @@ -0,0 +1,39 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" + +void __attribute__((noipa)) +foo (int * __restrict__ a, int * __restrict__ b, int * __restrict__ c, int n) +{ + int t = *c; + for (int i = 0; i < n; ++i) + { + b[i] = a[i] - t; + t = a[i]; + } +} + +int a[64], b[64]; + +int +main () +{ + check_vect (); + for (int i = 0; i < 64; ++i) + { + a[i] = i; + __asm__ volatile ("" ::: "memory"); + } + int c = 7; + foo (a, b, &c, 63); + for (int i = 1; i < 63; ++i) + if (b[i] != a[i] - a[i-1]) + abort (); + if (b[0] != -7) + abort (); + return 0; +} + +/* ??? We miss epilogue handling for first order recurrences. */ +/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { target vect_fully_masked } } } */ diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 98a943d8a4b..63e86540d12 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -529,6 +529,45 @@ vect_inner_phi_in_double_reduction_p (loop_vec_info loop_vinfo, gphi *phi) return false; } +/* Returns true if Phi is a first-order recurrence. A first-order + recurrence is a non-reduction recurrence relation in which the value of + the recurrence in the current loop iteration equals a value defined in + the previous iteration. */ + +static bool +vect_phi_first_order_recurrence_p (loop_vec_info loop_vinfo, class loop *loop, + gphi *phi) +{ + /* Ensure the loop latch definition is from within the loop. */ + edge latch = loop_latch_edge (loop); + tree ldef = PHI_ARG_DEF_FROM_EDGE (phi, latch); + if (TREE_CODE (ldef) != SSA_NAME + || SSA_NAME_IS_DEFAULT_DEF (ldef) + || !flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (ldef)))) + return false; + + tree def = gimple_phi_result (phi); + + /* Ensure every use_stmt of the phi node is dominated by the latch + definition. */ + imm_use_iterator imm_iter; + use_operand_p use_p; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def) + if (!is_gimple_debug (USE_STMT (use_p)) + && (SSA_NAME_DEF_STMT (ldef) == USE_STMT (use_p) + || !vect_stmt_dominates_stmt_p (SSA_NAME_DEF_STMT (ldef), + USE_STMT (use_p)))) + return false; + + /* First-order recurrence autovectorization needs shuffle vector. */ + tree scalar_type = TREE_TYPE (def); + tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type); + if (!vectype) + return false; + + return true; +} + /* Function vect_analyze_scalar_cycles_1. Examine the cross iteration def-use cycles of scalar variables @@ -666,6 +705,8 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop, } } } + else if (vect_phi_first_order_recurrence_p (loop_vinfo, loop, phi)) + STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_first_order_recurrence; else if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -1810,7 +1851,8 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo) if ((STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope || STMT_VINFO_LIVE_P (stmt_info)) - && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def) + && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def + && STMT_VINFO_DEF_TYPE (stmt_info) != vect_first_order_recurrence) /* A scalar-dependence cycle that we don't support. */ return opt_result::failure_at (phi, "not vectorized:" @@ -1831,6 +1873,11 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo) && ! PURE_SLP_STMT (stmt_info)) ok = vectorizable_reduction (loop_vinfo, stmt_info, NULL, NULL, &cost_vec); + else if ((STMT_VINFO_DEF_TYPE (stmt_info) + == vect_first_order_recurrence) + && ! PURE_SLP_STMT (stmt_info)) + ok = vectorizable_recurr (loop_vinfo, stmt_info, NULL, NULL, + &cost_vec); } /* SLP PHIs are tested by vect_slp_analyze_node_operations. */ @@ -8290,6 +8337,178 @@ vectorizable_phi (vec_info *, return true; } +/* Vectorizes first order recurrences. An overview of the transformation + is described below. Suppose we have the following loop. + + int t = 0; + for (int i = 0; i < n; ++i) + { + b[i] = a[i] - t; + t = a[i]; + } + + There is a first-order recurrence on 'a'. For this loop, the scalar IR + looks (simplified) like: + + scalar.preheader: + init = 0; + + scalar.body: + i = PHI <0(scalar.preheader), i+1(scalar.body)> + _2 = PHI <(init(scalar.preheader), <_1(scalar.body)> + _1 = a[i] + b[i] = _1 - _2 + if (i < n) goto scalar.body + + In this example, _2 is a recurrence because it's value depends on the + previous iteration. We vectorize this as (VF = 4) + + vector.preheader: + vect_init = vect_cst(..., ..., ..., 0) + + vector.body + i = PHI <0(vector.preheader), i+4(vector.body)> + vect_1 = PHI + vect_2 = a[i, i+1, i+2, i+3]; + vect_3 = vec_perm (vect_1, vect_2, { 3, 4, 5, 6 }) + b[i, i+1, i+2, i+3] = vect_2 - vect_3 + if (..) goto vector.body + + In this function, vectorizable_recurr, we code generate both the + vector PHI node and the permute since those together compute the + vectorized value of the scalar PHI. We do not yet have the + backedge value to fill in there nor into the vec_perm. Those + are filled in maybe_set_vectorized_backedge_value and + vect_schedule_scc. + + TODO: Since the scalar loop does not have a use of the recurrence + outside of the loop the natural way to implement peeling via + vectorizing the live value doesn't work. For now peeling of loops + with a recurrence is not implemented. For SLP the supported cases + are restricted to those requiring a single vector recurrence PHI. */ + +bool +vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, + gimple **vec_stmt, slp_tree slp_node, + stmt_vector_for_cost *cost_vec) +{ + if (!loop_vinfo || !is_a (stmt_info->stmt)) + return false; + + gphi *phi = as_a (stmt_info->stmt); + + /* So far we only support first-order recurrence auto-vectorization. */ + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_first_order_recurrence) + return false; + + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + unsigned ncopies; + if (slp_node) + ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + else + ncopies = vect_get_num_copies (loop_vinfo, vectype); + poly_int64 nunits = TYPE_VECTOR_SUBPARTS (vectype); + unsigned dist = slp_node ? SLP_TREE_LANES (slp_node) : 1; + /* We need to be able to make progress with a single vector. */ + if (maybe_gt (dist * 2, nunits)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "first order recurrence exceeds half of " + "a vector\n"); + return false; + } + + /* First-order recurrence autovectorization needs to handle permutation + with indices = [nunits-1, nunits, nunits+1, ...]. */ + vec_perm_builder sel (nunits, 1, 3); + for (int i = 0; i < 3; ++i) + sel.quick_push (nunits - dist + i); + vec_perm_indices indices (sel, 2, nunits); + + if (!vec_stmt) /* transformation not required. */ + { + if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype), + indices)) + return false; + + if (slp_node) + { + /* We eventually need to set a vector type on invariant + arguments. */ + unsigned j; + slp_tree child; + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child) + if (!vect_maybe_update_slp_op_vectype + (child, SLP_TREE_VECTYPE (slp_node))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "incompatible vector types for " + "invariants\n"); + return false; + } + } + /* The recurrence costs the initialization vector and one permute + for each copy. */ + unsigned prologue_cost = record_stmt_cost (cost_vec, 1, scalar_to_vec, + stmt_info, 0, vect_prologue); + unsigned inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt, + stmt_info, 0, vect_body); + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "vectorizable_recurr: inside_cost = %d, " + "prologue_cost = %d .\n", inside_cost, + prologue_cost); + + STMT_VINFO_TYPE (stmt_info) = recurr_info_type; + return true; + } + + edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); + basic_block bb = gimple_bb (phi); + tree preheader = PHI_ARG_DEF_FROM_EDGE (phi, pe); + tree vec_init = build_vector_from_val (vectype, preheader); + vec_init = vect_init_vector (loop_vinfo, stmt_info, vec_init, vectype, NULL); + + /* Create the vectorized first-order PHI node. */ + tree vec_dest = vect_get_new_vect_var (vectype, + vect_simple_var, "vec_recur_"); + gphi *new_phi = create_phi_node (vec_dest, bb); + add_phi_arg (new_phi, vec_init, pe, UNKNOWN_LOCATION); + + /* Insert shuffles the first-order recurrence autovectorization. + result = VEC_PERM . */ + tree perm = vect_gen_perm_mask_checked (vectype, indices); + + /* Insert the required permute after the latch definition. The + second and later operands are tentative and will be updated when we have + vectorized the latch definition. */ + edge le = loop_latch_edge (LOOP_VINFO_LOOP (loop_vinfo)); + gimple_stmt_iterator gsi2 + = gsi_for_stmt (SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, le))); + gsi_next (&gsi2); + + for (unsigned i = 0; i < ncopies; ++i) + { + vec_dest = make_ssa_name (vectype); + gassign *vperm + = gimple_build_assign (vec_dest, VEC_PERM_EXPR, + i == 0 ? gimple_phi_result (new_phi) : NULL, + NULL, perm); + vect_finish_stmt_generation (loop_vinfo, stmt_info, vperm, &gsi2); + + if (slp_node) + SLP_TREE_VEC_STMTS (slp_node).quick_push (vperm); + else + STMT_VINFO_VEC_STMTS (stmt_info).safe_push (vperm); + } + + if (!slp_node) + *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; + return true; +} + /* Return true if VECTYPE represents a vector that requires lowering by the vector lowering pass. */ @@ -10242,27 +10461,53 @@ maybe_set_vectorized_backedge_value (loop_vec_info loop_vinfo, imm_use_iterator iter; use_operand_p use_p; FOR_EACH_IMM_USE_FAST (use_p, iter, def) - if (gphi *phi = dyn_cast (USE_STMT (use_p))) - if (gimple_bb (phi)->loop_father->header == gimple_bb (phi) - && (phi_info = loop_vinfo->lookup_stmt (phi)) - && STMT_VINFO_RELEVANT_P (phi_info) - && VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (phi_info)) + { + gphi *phi = dyn_cast (USE_STMT (use_p)); + if (!phi) + continue; + if (!(gimple_bb (phi)->loop_father->header == gimple_bb (phi) + && (phi_info = loop_vinfo->lookup_stmt (phi)) + && STMT_VINFO_RELEVANT_P (phi_info))) + continue; + loop_p loop = gimple_bb (phi)->loop_father; + edge e = loop_latch_edge (loop); + if (PHI_ARG_DEF_FROM_EDGE (phi, e) != def) + continue; + + if (VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (phi_info)) && STMT_VINFO_REDUC_TYPE (phi_info) != FOLD_LEFT_REDUCTION && STMT_VINFO_REDUC_TYPE (phi_info) != EXTRACT_LAST_REDUCTION) { - loop_p loop = gimple_bb (phi)->loop_father; - edge e = loop_latch_edge (loop); - if (PHI_ARG_DEF_FROM_EDGE (phi, e) == def) - { - vec &phi_defs = STMT_VINFO_VEC_STMTS (phi_info); - vec &latch_defs = STMT_VINFO_VEC_STMTS (def_stmt_info); - gcc_assert (phi_defs.length () == latch_defs.length ()); - for (unsigned i = 0; i < phi_defs.length (); ++i) - add_phi_arg (as_a (phi_defs[i]), - gimple_get_lhs (latch_defs[i]), e, - gimple_phi_arg_location (phi, e->dest_idx)); - } + vec &phi_defs = STMT_VINFO_VEC_STMTS (phi_info); + vec &latch_defs = STMT_VINFO_VEC_STMTS (def_stmt_info); + gcc_assert (phi_defs.length () == latch_defs.length ()); + for (unsigned i = 0; i < phi_defs.length (); ++i) + add_phi_arg (as_a (phi_defs[i]), + gimple_get_lhs (latch_defs[i]), e, + gimple_phi_arg_location (phi, e->dest_idx)); } + else if (STMT_VINFO_DEF_TYPE (phi_info) == vect_first_order_recurrence) + { + /* For first order recurrences we have to update both uses of + the latch definition, the one in the PHI node and the one + in the generated VEC_PERM_EXPR. */ + vec &phi_defs = STMT_VINFO_VEC_STMTS (phi_info); + vec &latch_defs = STMT_VINFO_VEC_STMTS (def_stmt_info); + gcc_assert (phi_defs.length () == latch_defs.length ()); + tree phidef = gimple_assign_rhs1 (phi_defs[0]); + gphi *vphi = as_a (SSA_NAME_DEF_STMT (phidef)); + for (unsigned i = 0; i < phi_defs.length (); ++i) + { + gassign *perm = as_a (phi_defs[i]); + if (i > 0) + gimple_assign_set_rhs1 (perm, gimple_get_lhs (latch_defs[i-1])); + gimple_assign_set_rhs2 (perm, gimple_get_lhs (latch_defs[i])); + update_stmt (perm); + } + add_phi_arg (vphi, gimple_get_lhs (latch_defs.last ()), e, + gimple_phi_arg_location (phi, e->dest_idx)); + } + } } /* Vectorize STMT_INFO if relevant, inserting any new instructions before GSI. @@ -10671,6 +10916,7 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_first_order_recurrence || STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def) && ! PURE_SLP_STMT (stmt_info)) { @@ -10696,7 +10942,8 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle - || STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def) + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_first_order_recurrence) && ! PURE_SLP_STMT (stmt_info)) maybe_set_vectorized_backedge_value (loop_vinfo, stmt_info); } diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index af27fd56e02..e54414f6bef 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -693,6 +693,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap, case vect_reduction_def: case vect_induction_def: case vect_nested_cycle: + case vect_first_order_recurrence: break; default: @@ -1732,7 +1733,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, } else if (def_type == vect_reduction_def || def_type == vect_double_reduction_def - || def_type == vect_nested_cycle) + || def_type == vect_nested_cycle + || def_type == vect_first_order_recurrence) { /* Else def types have to match. */ stmt_vec_info other_info; @@ -1746,7 +1748,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, } class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); /* Reduction initial values are not explicitely represented. */ - if (!nested_in_vect_loop_p (loop, stmt_info)) + if (def_type != vect_first_order_recurrence + && !nested_in_vect_loop_p (loop, stmt_info)) skip_args[loop_preheader_edge (loop)->dest_idx] = true; /* Reduction chain backedge defs are filled manually. ??? Need a better way to identify a SLP reduction chain PHI. @@ -9210,11 +9213,34 @@ vect_schedule_scc (vec_info *vinfo, slp_tree node, slp_instance instance, child = SLP_TREE_CHILDREN (phi_node)[dest_idx]; if (!child || SLP_TREE_DEF_TYPE (child) != vect_internal_def) continue; + unsigned n = SLP_TREE_VEC_STMTS (phi_node).length (); /* Simply fill all args. */ - for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i) - add_phi_arg (as_a (SLP_TREE_VEC_STMTS (phi_node)[i]), - vect_get_slp_vect_def (child, i), - e, gimple_phi_arg_location (phi, dest_idx)); + if (STMT_VINFO_DEF_TYPE (SLP_TREE_REPRESENTATIVE (phi_node)) + != vect_first_order_recurrence) + for (unsigned i = 0; i < n; ++i) + add_phi_arg (as_a (SLP_TREE_VEC_STMTS (phi_node)[i]), + vect_get_slp_vect_def (child, i), + e, gimple_phi_arg_location (phi, dest_idx)); + else + { + /* Unless it is a first order recurrence which needs + args filled in for both the PHI node and the permutes. */ + gimple *perm = SLP_TREE_VEC_STMTS (phi_node)[0]; + gimple *rphi = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (perm)); + add_phi_arg (as_a (rphi), + vect_get_slp_vect_def (child, n - 1), + e, gimple_phi_arg_location (phi, dest_idx)); + for (unsigned i = 0; i < n; ++i) + { + gimple *perm = SLP_TREE_VEC_STMTS (phi_node)[i]; + if (i > 0) + gimple_assign_set_rhs1 (perm, + vect_get_slp_vect_def (child, i - 1)); + gimple_assign_set_rhs2 (perm, + vect_get_slp_vect_def (child, i)); + update_stmt (perm); + } + } } } } diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index c8d1efc45e5..4e0d75e0d75 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -11176,6 +11176,7 @@ vect_analyze_stmt (vec_info *vinfo, break; case vect_induction_def: + case vect_first_order_recurrence: gcc_assert (!bb_vinfo); break; @@ -11234,7 +11235,9 @@ vect_analyze_stmt (vec_info *vinfo, || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node, cost_vec) || vectorizable_lc_phi (as_a (vinfo), - stmt_info, NULL, node)); + stmt_info, NULL, node) + || vectorizable_recurr (as_a (vinfo), + stmt_info, NULL, node, cost_vec)); else { if (bb_vinfo) @@ -11404,6 +11407,12 @@ vect_transform_stmt (vec_info *vinfo, gcc_assert (done); break; + case recurr_info_type: + done = vectorizable_recurr (as_a (vinfo), + stmt_info, &vec_stmt, slp_node, NULL); + gcc_assert (done); + break; + case phi_info_type: done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL); gcc_assert (done); @@ -11804,6 +11813,9 @@ vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt, case vect_nested_cycle: dump_printf (MSG_NOTE, "nested cycle\n"); break; + case vect_first_order_recurrence: + dump_printf (MSG_NOTE, "first order recurrence\n"); + break; case vect_unknown_def_type: dump_printf (MSG_NOTE, "unknown\n"); break; @@ -11852,7 +11864,8 @@ vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt, || *dt == vect_induction_def || *dt == vect_reduction_def || *dt == vect_double_reduction_def - || *dt == vect_nested_cycle) + || *dt == vect_nested_cycle + || *dt == vect_first_order_recurrence) { *vectype = STMT_VINFO_VECTYPE (def_stmt_info); gcc_assert (*vectype != NULL_TREE); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 4870c754499..016961da851 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -65,6 +65,7 @@ enum vect_def_type { vect_reduction_def, vect_double_reduction_def, vect_nested_cycle, + vect_first_order_recurrence, vect_unknown_def_type }; @@ -1027,6 +1028,7 @@ enum stmt_vec_info_type { cycle_phi_info_type, lc_phi_info_type, phi_info_type, + recurr_info_type, loop_exit_ctrl_vec_info_type }; @@ -2331,6 +2333,8 @@ extern bool vectorizable_lc_phi (loop_vec_info, stmt_vec_info, gimple **, slp_tree); extern bool vectorizable_phi (vec_info *, stmt_vec_info, gimple **, slp_tree, stmt_vector_for_cost *); +extern bool vectorizable_recurr (loop_vec_info, stmt_vec_info, + gimple **, slp_tree, stmt_vector_for_cost *); extern bool vect_emulated_vector_p (tree); extern bool vect_can_vectorize_without_simd_p (tree_code); extern bool vect_can_vectorize_without_simd_p (code_helper);