diff --git a/gcc/testsuite/gcc.dg/vect/slp-49.c b/gcc/testsuite/gcc.dg/vect/slp-49.c new file mode 100644 index 00000000000..3f53baf707b --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/slp-49.c @@ -0,0 +1,38 @@ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" + +int a[1024]; + +void __attribute__((noipa)) +foo(int k) +{ + int j = 5; + for (int i = 0; i < 512; ++i) + { + a[2*i] = j; + a[2*i+1] = k; + j++; + k+=3; + } +} + +int +main() +{ + check_vect (); + + foo (17); + + for (int i = 0; i < 512; ++i) + { + if (a[2*i] != 5 + i + || a[2*i+1] != 17 + 3 * i) + __builtin_abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ +/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */ diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 105ea61ddbe..fcea28935bc 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -4443,34 +4443,6 @@ vect_model_reduction_cost (loop_vec_info loop_vinfo, } -/* Function vect_model_induction_cost. - - Models cost for induction operations. */ - -static void -vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies, - stmt_vector_for_cost *cost_vec) -{ - unsigned inside_cost, prologue_cost; - - if (PURE_SLP_STMT (stmt_info)) - return; - - /* loop cost for vec_loop. */ - inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt, - stmt_info, 0, vect_body); - - /* prologue cost for vec_init and vec_step. */ - prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec, - stmt_info, 0, vect_prologue); - - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "vect_model_induction_cost: inside_cost = %d, " - "prologue_cost = %d .\n", inside_cost, prologue_cost); -} - - /* Function get_initial_def_for_reduction @@ -7796,7 +7768,7 @@ vectorizable_induction (loop_vec_info loop_vinfo, if (slp_node && !nunits.is_constant ()) { - /* The current SLP code creates the initial value element-by-element. */ + /* The current SLP code creates the step value element-by-element. */ if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "SLP induction not supported for variable-length" @@ -7806,9 +7778,46 @@ vectorizable_induction (loop_vec_info loop_vinfo, if (!vec_stmt) /* transformation not required. */ { + unsigned inside_cost = 0, prologue_cost = 0; + if (slp_node) + { + /* We eventually need to set a vector type on invariant + arguments. */ + unsigned j; + slp_tree child; + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child) + if (!vect_maybe_update_slp_op_vectype + (child, SLP_TREE_VECTYPE (slp_node))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "incompatible vector types for " + "invariants\n"); + return false; + } + /* loop cost for vec_loop. */ + inside_cost + = record_stmt_cost (cost_vec, + SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node), + vector_stmt, stmt_info, 0, vect_body); + } + else /* if (!slp_node) */ + { + /* loop cost for vec_loop. */ + inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt, + stmt_info, 0, vect_body); + /* prologue cost for vec_init and vec_step. */ + prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec, + stmt_info, 0, vect_prologue); + } + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "vect_model_induction_cost: inside_cost = %d, " + "prologue_cost = %d .\n", inside_cost, + prologue_cost); + STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_induction"); - vect_model_induction_cost (stmt_info, ncopies, cost_vec); return true; } @@ -7827,6 +7836,164 @@ vectorizable_induction (loop_vec_info loop_vinfo, tree step_vectype = get_same_sized_vectype (TREE_TYPE (step_expr), vectype); pe = loop_preheader_edge (iv_loop); + /* Find the first insertion point in the BB. */ + basic_block bb = gimple_bb (phi); + si = gsi_after_labels (bb); + + /* For SLP induction we have to generate several IVs as for example + with group size 3 we need + [i0, i1, i2, i0 + S0] [i1 + S1, i2 + S2, i0 + 2*S0, i1 + 2*S1] + [i2 + 2*S2, i0 + 3*S0, i1 + 3*S1, i2 + 3*S2]. */ + if (slp_node) + { + /* Enforced above. */ + unsigned int const_nunits = nunits.to_constant (); + + /* The initial values are vectorized, but any lanes > group_size + need adjustment. */ + slp_tree init_node + = SLP_TREE_CHILDREN (slp_node)[pe->dest_idx]; + + /* Gather steps. Since we do not vectorize inductions as + cycles we have to reconstruct the step from SCEV data. */ + unsigned group_size = SLP_TREE_LANES (slp_node); + tree *steps = XALLOCAVEC (tree, group_size); + stmt_vec_info phi_info; + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, phi_info) + steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info); + + /* Now generate the IVs. */ + unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + gcc_assert ((const_nunits * nvects) % group_size == 0); + unsigned nivs = least_common_multiple (group_size, + const_nunits) / const_nunits; + unsigned lup_mul = (nvects * const_nunits) / group_size; + tree stept = TREE_TYPE (step_vectype); + tree lupdate_mul + = build_vector_from_val (step_vectype, + SCALAR_FLOAT_TYPE_P (stept) + ? build_real_from_wide (stept, lup_mul, + UNSIGNED) + : build_int_cstu (stept, lup_mul)); + unsigned ivn; + auto_vec vec_steps; + for (ivn = 0; ivn < nivs; ++ivn) + { + tree_vector_builder elts (step_vectype, const_nunits, 1); + tree_vector_builder mul_elts (step_vectype, const_nunits, 1); + for (unsigned eltn = 0; eltn < const_nunits; ++eltn) + { + tree elt = steps[(ivn*const_nunits + eltn) % group_size]; + elts.quick_push (elt); + unsigned mul_elt = (ivn*const_nunits + eltn) / group_size; + mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept) + ? build_real_from_wide (stept, + mul_elt, UNSIGNED) + : build_int_cstu (stept, mul_elt)); + } + stmts = NULL; + vec_step = gimple_build_vector (&stmts, &elts); + vec_step = gimple_convert (&stmts, step_vectype, vec_step); + vec_steps.safe_push (vec_step); + tree step_mul = gimple_build_vector (&stmts, &mul_elts); + if (stmts) + { + new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); + gcc_assert (!new_bb); + } + + /* Create the induction-phi that defines the induction-operand. */ + vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, + "vec_iv_"); + induction_phi = create_phi_node (vec_dest, iv_loop->header); + induc_def = PHI_RESULT (induction_phi); + + /* Create the iv update inside the loop */ + stmts = NULL; + tree up = gimple_build (&stmts, MULT_EXPR, step_vectype, + vec_step, lupdate_mul); + if (stmts) + { + new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); + gcc_assert (!new_bb); + } + stmts = NULL; + vec_def = gimple_convert (&stmts, step_vectype, induc_def); + vec_def = gimple_build (&stmts, + PLUS_EXPR, step_vectype, vec_def, up); + vec_def = gimple_convert (&stmts, vectype, vec_def); + gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT); + add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop), + UNKNOWN_LOCATION); + + vec_init = vect_get_slp_vect_def (init_node, ivn); + if (!integer_zerop (step_mul)) + { + stmts = NULL; + vec_def = gimple_convert (&stmts, step_vectype, vec_init); + up = gimple_build (&stmts, MULT_EXPR, step_vectype, + vec_step, step_mul); + vec_def = gimple_build (&stmts, PLUS_EXPR, step_vectype, + vec_def, up); + vec_init = gimple_convert (&stmts, vectype, vec_def); + new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); + gcc_assert (!new_bb); + } + + /* Set the arguments of the phi node: */ + add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION); + + SLP_TREE_VEC_STMTS (slp_node).quick_push (induction_phi); + } + + /* Re-use IVs when we can. We are generating further vector + stmts by adding VF' * stride to the IVs generated above. */ + if (ivn < nvects) + { + unsigned vfp + = least_common_multiple (group_size, const_nunits) / group_size; + tree lupdate_mul + = build_vector_from_val (step_vectype, + SCALAR_FLOAT_TYPE_P (stept) + ? build_real_from_wide (stept, + vfp, UNSIGNED) + : build_int_cstu (stept, vfp)); + for (; ivn < nvects; ++ivn) + { + gimple *iv = SLP_TREE_VEC_STMTS (slp_node)[ivn - nivs]; + tree def = gimple_get_lhs (iv); + gimple_seq stmts = NULL; + if (ivn < 2*nivs) + { + vec_steps[ivn - nivs] + = gimple_build (&stmts, MULT_EXPR, step_vectype, + vec_steps[ivn - nivs], lupdate_mul); + if (stmts) + { + new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); + gcc_assert (!new_bb); + } + } + stmts = NULL; + def = gimple_convert (&stmts, step_vectype, def); + def = gimple_build (&stmts, PLUS_EXPR, step_vectype, + def, vec_steps[ivn % nivs]); + def = gimple_convert (&stmts, vectype, def); + if (gimple_code (iv) == GIMPLE_PHI) + gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT); + else + { + gimple_stmt_iterator tgsi = gsi_for_stmt (iv); + gsi_insert_seq_after (&tgsi, stmts, GSI_CONTINUE_LINKING); + } + SLP_TREE_VEC_STMTS (slp_node) + .quick_push (SSA_NAME_DEF_STMT (def)); + } + } + + return true; + } + init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (iv_loop)); @@ -7860,147 +8027,6 @@ vectorizable_induction (loop_vec_info loop_vinfo, gcc_assert (!new_bb); } - /* Find the first insertion point in the BB. */ - basic_block bb = gimple_bb (phi); - si = gsi_after_labels (bb); - - /* For SLP induction we have to generate several IVs as for example - with group size 3 we need [i, i, i, i + S] [i + S, i + S, i + 2*S, i + 2*S] - [i + 2*S, i + 3*S, i + 3*S, i + 3*S]. The step is the same uniform - [VF*S, VF*S, VF*S, VF*S] for all. */ - if (slp_node) - { - /* Enforced above. */ - unsigned int const_nunits = nunits.to_constant (); - - /* Generate [VF*S, VF*S, ... ]. */ - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))) - { - expr = build_int_cst (integer_type_node, vf); - expr = fold_convert (TREE_TYPE (step_expr), expr); - } - else - expr = build_int_cst (TREE_TYPE (step_expr), vf); - new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr), - expr, step_expr); - if (! CONSTANT_CLASS_P (new_name)) - new_name = vect_init_vector (loop_vinfo, stmt_info, new_name, - TREE_TYPE (step_expr), NULL); - new_vec = build_vector_from_val (step_vectype, new_name); - vec_step = vect_init_vector (loop_vinfo, stmt_info, - new_vec, step_vectype, NULL); - - /* Now generate the IVs. */ - unsigned group_size = SLP_TREE_LANES (slp_node); - unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); - unsigned elts = const_nunits * nvects; - /* Compute the number of distinct IVs we need. First reduce - group_size if it is a multiple of const_nunits so we get - one IV for a group_size of 4 but const_nunits 2. */ - unsigned group_sizep = group_size; - if (group_sizep % const_nunits == 0) - group_sizep = group_sizep / const_nunits; - unsigned nivs = least_common_multiple (group_sizep, - const_nunits) / const_nunits; - gcc_assert (elts % group_size == 0); - tree elt = init_expr; - unsigned ivn; - for (ivn = 0; ivn < nivs; ++ivn) - { - tree_vector_builder elts (step_vectype, const_nunits, 1); - stmts = NULL; - for (unsigned eltn = 0; eltn < const_nunits; ++eltn) - { - if (ivn*const_nunits + eltn >= group_size - && (ivn * const_nunits + eltn) % group_size == 0) - elt = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (elt), - elt, step_expr); - elts.quick_push (elt); - } - vec_init = gimple_build_vector (&stmts, &elts); - vec_init = gimple_convert (&stmts, vectype, vec_init); - if (stmts) - { - new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); - gcc_assert (!new_bb); - } - - /* Create the induction-phi that defines the induction-operand. */ - vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_"); - induction_phi = create_phi_node (vec_dest, iv_loop->header); - induc_def = PHI_RESULT (induction_phi); - - /* Create the iv update inside the loop */ - gimple_seq stmts = NULL; - vec_def = gimple_convert (&stmts, step_vectype, induc_def); - vec_def = gimple_build (&stmts, - PLUS_EXPR, step_vectype, vec_def, vec_step); - vec_def = gimple_convert (&stmts, vectype, vec_def); - gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT); - - /* Set the arguments of the phi node: */ - add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION); - add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop), - UNKNOWN_LOCATION); - - SLP_TREE_VEC_STMTS (slp_node).quick_push (induction_phi); - } - /* Fill up to the number of vectors we need for the whole group. */ - nivs = least_common_multiple (group_size, - const_nunits) / const_nunits; - for (; ivn < nivs; ++ivn) - SLP_TREE_VEC_STMTS (slp_node) - .quick_push (SLP_TREE_VEC_STMTS (slp_node)[0]); - - /* Re-use IVs when we can. */ - if (ivn < nvects) - { - unsigned vfp - = least_common_multiple (group_size, const_nunits) / group_size; - /* Generate [VF'*S, VF'*S, ... ]. */ - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))) - { - expr = build_int_cst (integer_type_node, vfp); - expr = fold_convert (TREE_TYPE (step_expr), expr); - } - else - expr = build_int_cst (TREE_TYPE (step_expr), vfp); - new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr), - expr, step_expr); - if (! CONSTANT_CLASS_P (new_name)) - new_name = vect_init_vector (loop_vinfo, stmt_info, new_name, - TREE_TYPE (step_expr), NULL); - new_vec = build_vector_from_val (step_vectype, new_name); - vec_step = vect_init_vector (loop_vinfo, stmt_info, new_vec, - step_vectype, NULL); - for (; ivn < nvects; ++ivn) - { - gimple *iv = SLP_TREE_VEC_STMTS (slp_node)[ivn - nivs]; - tree def; - if (gimple_code (iv) == GIMPLE_PHI) - def = gimple_phi_result (iv); - else - def = gimple_assign_lhs (iv); - gimple_seq stmts = NULL; - def = gimple_convert (&stmts, step_vectype, def); - def = gimple_build (&stmts, - PLUS_EXPR, step_vectype, def, vec_step); - def = gimple_convert (&stmts, vectype, def); - if (gimple_code (iv) == GIMPLE_PHI) - gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT); - else - { - gimple_stmt_iterator tgsi = gsi_for_stmt (iv); - gsi_insert_seq_after (&tgsi, stmts, GSI_CONTINUE_LINKING); - } - SLP_TREE_VEC_STMTS (slp_node) - .quick_push (SSA_NAME_DEF_STMT (def)); - } - } - - return true; - } - /* Create the vector that holds the initial_value of the induction. */ if (nested_in_vect_loop) { diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 45e33c031d2..63a59c0c8ed 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1441,20 +1441,14 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, return NULL; vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info); - /* Induction from different IVs is not supported. */ if (def_type == vect_induction_def) { - stmt_vec_info other_info; - FOR_EACH_VEC_ELT (stmts, i, other_info) - if (stmt_info != other_info) - return NULL; - - /* Induction PHIs are leafs. */ - (*tree_size)++; - node = vect_create_new_slp_node (node, stmts, nops); - SLP_TREE_VECTYPE (node) = vectype; - SLP_TREE_CHILDREN (node).quick_grow_cleared (nops); - return node; + /* Induction PHIs are not cycles but walk the initial + value. */ + class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + if (nested_in_vect_loop_p (loop, stmt_info)) + loop = loop->inner; + skip_args[loop_latch_edge (loop)->dest_idx] = true; } else if (def_type == vect_reduction_def || def_type == vect_double_reduction_def diff --git a/gcc/tree.c b/gcc/tree.c index 81f867ddded..739c288378a 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -2250,6 +2250,22 @@ build_real_from_int_cst (tree type, const_tree i) return v; } +/* Return a new REAL_CST node whose type is TYPE + and whose value is the integer value I which has sign SGN. */ + +tree +build_real_from_wide (tree type, const wide_int_ref &i, signop sgn) +{ + REAL_VALUE_TYPE d; + + /* Clear all bits of the real value type so that we can later do + bitwise comparisons to see if two values are the same. */ + memset (&d, 0, sizeof d); + + real_from_integer (&d, TYPE_MODE (type), i, sgn); + return build_real (type, d); +} + /* Return a newly constructed STRING_CST node whose value is the LEN characters at STR when STR is nonnull, or all zeros otherwise. Note that for a C string literal, LEN should include the trailing NUL. diff --git a/gcc/tree.h b/gcc/tree.h index 7f0aa5b8d1d..04e564c9798 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -4429,6 +4429,7 @@ extern tree build_constructor_from_vec (tree, const vec *); extern tree build_constructor_va (tree, int, ...); extern tree build_clobber (tree); extern tree build_real_from_int_cst (tree, const_tree); +extern tree build_real_from_wide (tree, const wide_int_ref &, signop); extern tree build_complex (tree, tree, tree); extern tree build_complex_inf (tree, bool); extern tree build_each_one_cst (tree);