From 39e763ca876fba4449c0781f61115e063f481add Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 15 Nov 2024 10:04:23 +0100 Subject: [PATCH] tree-optimization/117606 - SLP and single element interleaving The following tries to reduce the amount of difference between SLP and non-SLP for single-element interleaving load classification. This fixes another fallout of --param vect-force-slp=1 PR tree-optimization/117606 * tree-vect-stmts.cc (get_group_load_store_type): For single element interleaving also fall back to VMAT_ELEMENTWISE if a left-over permutation isn't supported. --- gcc/tree-vect-stmts.cc | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index ab5ea038d1d..7ffee2c4cc8 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -2082,8 +2082,9 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, *memory_access_type = VMAT_CONTIGUOUS; /* If this is single-element interleaving with an element - distance that leaves unused vector loads around punt - we - at least create very sub-optimal code in that case (and + distance that leaves unused vector loads around fall back + to elementwise access if possible - we otherwise least + create very sub-optimal code in that case (and blow up memory, see PR65518). */ if (loop_vinfo && single_element_p @@ -2110,6 +2111,28 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, } } + /* For single-element interleaving also fall back to elementwise + access in case we did not lower a permutation and cannot + code generate it. */ + auto_vec temv; + unsigned n_perms; + if (loop_vinfo + && single_element_p + && SLP_TREE_LANES (slp_node) == 1 + && (*memory_access_type == VMAT_CONTIGUOUS + || *memory_access_type == VMAT_CONTIGUOUS_REVERSE) + && SLP_TREE_LOAD_PERMUTATION (slp_node).exists () + && !vect_transform_slp_perm_load + (loop_vinfo, slp_node, temv, NULL, + LOOP_VINFO_VECT_FACTOR (loop_vinfo), true, &n_perms)) + { + *memory_access_type = VMAT_ELEMENTWISE; + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "single-element interleaving permutation not " + "supported, using elementwise access\n"); + } + overrun_p = (loop_vinfo && gap != 0 && *memory_access_type != VMAT_ELEMENTWISE); if (overrun_p && vls_type != VLS_LOAD)