From de1389e24e8dc98b65bc8d40976172214ac4ecc0 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Sun, 8 Sep 2024 11:21:19 +0200 Subject: [PATCH] Fail vectorization when not using SLP and --param vect-force-slp == 1 The following adds --param vect-force-slp to enable the transition to full SLP. Full SLP is enforced during stmt analysis where it detects failed SLP discovery and at loop analysis time where it avoids analyzing a loop with SLP disabled. Failure to SLP results in vectorization to fail. * params.opt (vect-force-slp): New param, default 0. * doc/invoke.texi (--param vect-force-slp): Document. * tree-vect-loop.cc (vect_analyze_loop_2): When analyzing without SLP but --param vect-force-slp is 1 fail. * tree-vect-stmts.cc (vect_analyze_stmt): Fail vectorization for non-SLP stmts when --param vect-force-slp is 1. --- gcc/doc/invoke.texi | 3 +++ gcc/params.opt | 4 ++++ gcc/tree-vect-loop.cc | 6 ++++++ gcc/tree-vect-stmts.cc | 6 ++++++ 4 files changed, 19 insertions(+) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 511b8c8d311..b91fb9c9cca 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -16955,6 +16955,9 @@ this parameter. The default value of this parameter is 50. @item vect-induction-float Enable loop vectorization of floating point inductions. +@item vect-force-slp +Force the use of SLP when vectorizing, fail if not possible. + @item vrp-block-limit Maximum number of basic blocks before VRP switches to a lower memory algorithm. diff --git a/gcc/params.opt b/gcc/params.opt index c17ba17b91b..949b4754498 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -1198,6 +1198,10 @@ The maximum factor which the loop vectorizer applies to the cost of statements i Common Joined UInteger Var(param_vect_induction_float) Init(1) IntegerRange(0, 1) Param Optimization Enable loop vectorization of floating point inductions. +-param=vect-force-slp= +Common Joined UInteger Var(param_vect_force_slp) Init(0) IntegerRange(0, 1) Param Optimization +Force the use of SLP when vectorizing, fail if not possible. + -param=vrp-block-limit= Common Joined UInteger Var(param_vrp_block_limit) Init(150000) Optimization Param Maximum number of basic blocks before VRP switches to a fast model with less memory requirements. diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 62c7f90779f..d42694d1974 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -2891,6 +2891,12 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, /* This is the point where we can re-start analysis with SLP forced off. */ start_over: + /* When we arrive here with SLP disabled and we are supposed + to use SLP for everything fail vectorization. */ + if (!slp && param_vect_force_slp) + return opt_result::failure_at (vect_location, + "may need non-SLP handling\n"); + /* Apply the suggested unrolling factor, this was determined by the backend during finish_cost the first time we ran the analyzis for this vector mode. */ diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index b1353c91fce..495f45e40e6 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -13353,6 +13353,12 @@ vect_analyze_stmt (vec_info *vinfo, return opt_result::success (); } + /* When we arrive here with a non-SLP statement and we are supposed + to use SLP for everything fail vectorization. */ + if (!node && param_vect_force_slp) + return opt_result::failure_at (stmt_info->stmt, + "needs non-SLP handling\n"); + ok = true; if (!bb_vinfo && (STMT_VINFO_RELEVANT_P (stmt_info)