diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_126.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_126.c new file mode 100644 index 00000000000..4bfc9880f9f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_126.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-add-options vect_early_break } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ + +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ +/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */ + +#define N 1024 +unsigned vect_a[N]; +unsigned vect_b[N]; + +unsigned test4(unsigned x) +{ + unsigned ret = 0; + for (int i = 0; i < N; i++) + { + vect_b[i] = x + i; + if (vect_a[i] > x) + { + ret *= vect_a[i]; + return vect_a[i]; + } + vect_a[i] = x; + ret += vect_a[i] + vect_b[i]; + } + return ret; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_127.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_127.c new file mode 100644 index 00000000000..67cb5d34a77 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_127.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-add-options vect_early_break } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ + +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ +/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */ + +#ifndef N +#define N 800 +#endif +unsigned vect_a[N]; +unsigned vect_b[N]; + +unsigned test4(unsigned x) +{ + unsigned ret = 0; + for (int i = 0; i < N; i++) + { + vect_b[i] = x + i; + if (vect_a[i]*2 != x) + break; + vect_a[i] = x; + + } + return ret; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_128.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_128.c new file mode 100644 index 00000000000..6d7fb920ec2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_128.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-add-options vect_early_break } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ + +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ +/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */ + +#ifndef N +#define N 800 +#endif +unsigned vect_a[N]; +unsigned vect_b[N]; + +unsigned test4(unsigned x) +{ + unsigned ret = 0; + for (int i = 0; i < N; i+=2) + { + vect_b[i] = x + i; + vect_b[i+1] = x + i+1; + if (vect_a[i]*2 != x) + break; + if (vect_a[i+1]*2 != x) + break; + vect_a[i] = x; + vect_a[i+1] = x; + + } + return ret; +} diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 025442aabc3..d1f1edc704c 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -3256,6 +3256,9 @@ again: unsigned i, j; FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance) { + if (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance)) != vect_internal_def) + continue; + stmt_vec_info vinfo; vinfo = SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0]; if (! STMT_VINFO_GROUPED_ACCESS (vinfo)) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 83cb39fc214..16332e0b6d7 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -3756,6 +3756,13 @@ vect_build_slp_instance (vec_info *vinfo, "Analyzing vectorizable constructor: %G\n", root_stmt_infos[0]->stmt); } + else if (kind == slp_inst_kind_gcond) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Analyzing vectorizable control flow: %G", + root_stmt_infos[0]->stmt); + } if (dump_enabled_p ()) { @@ -4827,6 +4834,80 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size, bst_map, NULL, force_single_lane); } } + + /* Find SLP sequences starting from gconds. */ + for (auto cond : LOOP_VINFO_LOOP_CONDS (loop_vinfo)) + { + auto cond_info = loop_vinfo->lookup_stmt (cond); + + cond_info = vect_stmt_to_vectorize (cond_info); + vec roots = vNULL; + roots.safe_push (cond_info); + gimple *stmt = STMT_VINFO_STMT (cond_info); + tree args0 = gimple_cond_lhs (stmt); + tree args1 = gimple_cond_rhs (stmt); + + /* These should be enforced by cond lowering. */ + gcc_assert (gimple_cond_code (stmt) == NE_EXPR); + gcc_assert (zerop (args1)); + + /* An argument without a loop def will be codegened from vectorizing the + root gcond itself. As such we don't need to try to build an SLP tree + from them. It's highly likely that the resulting SLP tree here if both + arguments have a def will be incompatible, but we rely on it being split + later on. */ + if (auto varg = loop_vinfo->lookup_def (args0)) + { + vec stmts; + vec remain = vNULL; + stmts.create (1); + stmts.quick_push (vect_stmt_to_vectorize (varg)); + + vect_build_slp_instance (vinfo, slp_inst_kind_gcond, + stmts, roots, remain, + max_tree_size, &limit, + bst_map, NULL, force_single_lane); + } + else + { + /* Create a new SLP instance. */ + slp_instance new_instance = XNEW (class _slp_instance); + vec ops; + ops.create (1); + ops.quick_push (args0); + slp_tree invnode = vect_create_new_slp_node (ops); + SLP_TREE_DEF_TYPE (invnode) = vect_external_def; + SLP_INSTANCE_TREE (new_instance) = invnode; + SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = 1; + SLP_INSTANCE_LOADS (new_instance) = vNULL; + SLP_INSTANCE_ROOT_STMTS (new_instance) = roots; + SLP_INSTANCE_REMAIN_DEFS (new_instance) = vNULL; + SLP_INSTANCE_KIND (new_instance) = slp_inst_kind_gcond; + new_instance->reduc_phis = NULL; + new_instance->cost_vec = vNULL; + new_instance->subgraph_entries = vNULL; + vinfo->slp_instances.safe_push (new_instance); + } + } + + /* Find and create slp instances for inductions that have been forced + live due to early break. */ + edge latch_e = loop_latch_edge (LOOP_VINFO_LOOP (loop_vinfo)); + for (auto stmt_info : LOOP_VINFO_EARLY_BREAKS_LIVE_IVS (loop_vinfo)) + { + vec stmts; + vec roots = vNULL; + vec remain = vNULL; + gphi *lc_phi = as_a (STMT_VINFO_STMT (stmt_info)); + tree def = gimple_phi_arg_def_from_edge (lc_phi, latch_e); + stmt_vec_info lc_info = loop_vinfo->lookup_def (def); + stmts.create (1); + stmts.quick_push (vect_stmt_to_vectorize (lc_info)); + vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group, + stmts, roots, remain, + max_tree_size, &limit, + bst_map, NULL, force_single_lane); + } } hash_set visited_patterns; @@ -7242,8 +7323,9 @@ maybe_push_to_hybrid_worklist (vec_info *vinfo, } } } - /* No def means this is a loo_vect sink. */ - if (!any_def) + /* No def means this is a loop_vect sink. Gimple conditionals also don't have a + def but shouldn't be considered sinks. */ + if (!any_def && STMT_VINFO_DEF_TYPE (stmt_info) != vect_condition_def) { if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -8067,7 +8149,14 @@ vect_slp_analyze_operations (vec_info *vinfo) (SLP_INSTANCE_TREE (instance)))))) /* Check we can vectorize the reduction. */ || (SLP_INSTANCE_KIND (instance) == slp_inst_kind_bb_reduc - && !vectorizable_bb_reduc_epilogue (instance, &cost_vec))) + && !vectorizable_bb_reduc_epilogue (instance, &cost_vec)) + /* Check we can vectorize the gcond. */ + || (SLP_INSTANCE_KIND (instance) == slp_inst_kind_gcond + && !vectorizable_early_exit (vinfo, + SLP_INSTANCE_ROOT_STMTS (instance)[0], + NULL, NULL, + SLP_INSTANCE_TREE (instance), + &cost_vec))) { cost_vec.release (); slp_tree node = SLP_INSTANCE_TREE (instance); @@ -8697,6 +8786,8 @@ vect_slp_check_for_roots (bb_vec_info bb_vinfo) !gsi_end_p (gsi); gsi_next (&gsi)) { gassign *assign = dyn_cast (gsi_stmt (gsi)); + /* This can be used to start SLP discovery for early breaks for BB early breaks + when we get that far. */ if (!assign) continue; @@ -10924,7 +11015,7 @@ vect_remove_slp_scalar_calls (vec_info *vinfo, slp_tree node) /* Vectorize the instance root. */ void -vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance) +vectorize_slp_instance_root_stmt (vec_info *vinfo, slp_tree node, slp_instance instance) { gassign *rstmt = NULL; @@ -11028,6 +11119,21 @@ vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance) update_stmt (gsi_stmt (rgsi)); return; } + else if (instance->kind == slp_inst_kind_gcond) + { + /* Only support a single root for now as we can't codegen CFG yet and so we + can't support lane > 1 at this time. */ + gcc_assert (instance->root_stmts.length () == 1); + auto root_stmt_info = instance->root_stmts[0]; + auto last_stmt = STMT_VINFO_STMT (root_stmt_info); + gimple_stmt_iterator rgsi = gsi_for_stmt (last_stmt); + gimple *vec_stmt = NULL; + gcc_assert (!SLP_TREE_VEC_DEFS (node).is_empty ()); + bool res = vectorizable_early_exit (vinfo, root_stmt_info, &rgsi, + &vec_stmt, node, NULL); + gcc_assert (res); + return; + } else gcc_unreachable (); @@ -11246,7 +11352,7 @@ vect_schedule_slp (vec_info *vinfo, const vec &slp_instances) vect_schedule_scc (vinfo, node, instance, scc_info, maxdfs, stack); if (!SLP_INSTANCE_ROOT_STMTS (instance).is_empty ()) - vectorize_slp_instance_root_stmt (node, instance); + vectorize_slp_instance_root_stmt (vinfo, node, instance); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 4f6905f1541..9b14b96cb5a 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -411,6 +411,7 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, dump_printf_loc (MSG_NOTE, vect_location, "vec_stmt_relevant_p: induction forced for " "early break.\n"); + LOOP_VINFO_EARLY_BREAKS_LIVE_IVS (loop_vinfo).safe_push (stmt_info); *live_p = true; } @@ -13016,7 +13017,7 @@ vectorizable_comparison (vec_info *vinfo, /* Check to see if the current early break given in STMT_INFO is valid for vectorization. */ -static bool +bool vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, gimple **vec_stmt, slp_tree slp_node, stmt_vector_for_cost *cost_vec) @@ -13040,8 +13041,13 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_op0; tree op0; enum vect_def_type dt0; - if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op0, &slp_op0, &dt0, - &vectype)) + + /* Early break gcond kind SLP trees can be root only and have no children, + for instance in the case where the argument is an external. If that's + the case there is no operand to analyse use of. */ + if ((!slp_node || !SLP_TREE_CHILDREN (slp_node).is_empty ()) + && !vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op0, &slp_op0, &dt0, + &vectype)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -13049,16 +13055,30 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, return false; } + /* For SLP we don't want to use the type of the operands of the SLP node, when + vectorizing using SLP slp_node will be the children of the gcond and we + want to use the type of the direct children which since the gcond is root + will be the current node, rather than a child node as vect_is_simple_use + assumes. */ + if (slp_node) + vectype = SLP_TREE_VECTYPE (slp_node); + if (!vectype) return false; machine_mode mode = TYPE_MODE (vectype); - int ncopies; + int ncopies, vec_num; if (slp_node) - ncopies = 1; + { + ncopies = 1; + vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + } else - ncopies = vect_get_num_copies (loop_vinfo, vectype); + { + ncopies = vect_get_num_copies (loop_vinfo, vectype); + vec_num = 1; + } vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo); @@ -13127,9 +13147,11 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, { if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype, OPTIMIZE_FOR_SPEED)) - vect_record_loop_len (loop_vinfo, lens, ncopies, vectype, 1); + vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num, + vectype, 1); else - vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL); + vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num, + vectype, NULL); } return true; @@ -13143,9 +13165,18 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "transform early-exit.\n"); - if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi, - vec_stmt, slp_node, cost_vec)) - gcc_unreachable (); + /* For SLP we don't do codegen of the body starting from the gcond, the gconds are + roots and so by the time we get to them we have already codegened the SLP tree + and so we shouldn't try to do so again. The arguments have already been + vectorized. It's not very clean to do this here, But the masking code below is + complex and this keeps it all in one place to ease fixes and backports. Once we + drop the non-SLP loop vect or split vectorizable_* this can be simplified. */ + if (!slp_node) + { + if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi, + vec_stmt, slp_node, cost_vec)) + gcc_unreachable (); + } gimple *stmt = STMT_VINFO_STMT (stmt_info); basic_block cond_bb = gimple_bb (stmt); @@ -13177,8 +13208,8 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, for (unsigned i = 0; i < stmts.length (); i++) { tree stmt_mask - = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies, vectype, - i); + = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies * vec_num, + vectype, i); stmt_mask = prepare_vec_mask (loop_vinfo, TREE_TYPE (stmt_mask), stmt_mask, stmts[i], &cond_gsi); @@ -13188,8 +13219,8 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, for (unsigned i = 0; i < stmts.length (); i++) { tree len_mask = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi, - lens, ncopies, vectype, - stmts[i], i, 1); + lens, ncopies * vec_num, + vectype, stmts[i], i, 1); workset.quick_push (len_mask); } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 2775d873ca4..11f921fbad8 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -296,7 +296,8 @@ enum slp_instance_kind { slp_inst_kind_reduc_group, slp_inst_kind_reduc_chain, slp_inst_kind_bb_reduc, - slp_inst_kind_ctor + slp_inst_kind_ctor, + slp_inst_kind_gcond }; /* SLP instance is a sequence of stmts in a loop that can be packed into @@ -1022,6 +1023,10 @@ public: /* Statements whose VUSES need updating if early break vectorization is to happen. */ auto_vec early_break_vuses; + + /* Record statements that are needed to be live for early break vectorization + but may not have an LC PHI node materialized yet in the exits. */ + auto_vec early_break_live_ivs; } *loop_vec_info; /* Access Functions. */ @@ -1081,6 +1086,8 @@ public: #define LOOP_VINFO_EARLY_BRK_STORES(L) (L)->early_break_stores #define LOOP_VINFO_EARLY_BREAKS_VECT_PEELED(L) \ (single_pred ((L)->loop->latch) != (L)->vec_loop_iv_exit->src) +#define LOOP_VINFO_EARLY_BREAKS_LIVE_IVS(L) \ + (L)->early_break_live_ivs #define LOOP_VINFO_EARLY_BRK_DEST_BB(L) (L)->early_break_dest_bb #define LOOP_VINFO_EARLY_BRK_VUSES(L) (L)->early_break_vuses #define LOOP_VINFO_LOOP_CONDS(L) (L)->conds @@ -2546,6 +2553,9 @@ extern bool vectorizable_phi (vec_info *, stmt_vec_info, gimple **, slp_tree, stmt_vector_for_cost *); extern bool vectorizable_recurr (loop_vec_info, stmt_vec_info, gimple **, slp_tree, stmt_vector_for_cost *); +extern bool vectorizable_early_exit (vec_info *, stmt_vec_info, + gimple_stmt_iterator *, gimple **, + slp_tree, stmt_vector_for_cost *); extern bool vect_emulated_vector_p (tree); extern bool vect_can_vectorize_without_simd_p (tree_code); extern bool vect_can_vectorize_without_simd_p (code_helper);