From 2f334a10bce0409c2cb4616496aafcb78f7db3d8 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Mon, 14 Apr 2025 12:44:02 +0200 Subject: [PATCH] tree-optimization/119757 - reject mixed mask/non-mask ldst SLP The following makes sure to not mix masked/non-masked stmts when forming a SLP node. PR tree-optimization/119757 * tree-vect-slp.cc (vect_build_slp_tree_1): Record and compare whether a stmt uses a maks. * gcc.dg/vect/pr119757.c: New testcase. --- gcc/testsuite/gcc.dg/vect/pr119757.c | 17 +++++++++++++++++ gcc/tree-vect-slp.cc | 24 ++++++++++++++++-------- 2 files changed, 33 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr119757.c diff --git a/gcc/testsuite/gcc.dg/vect/pr119757.c b/gcc/testsuite/gcc.dg/vect/pr119757.c new file mode 100644 index 00000000000..86442998628 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr119757.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ + +void base64_encode(const char *table64, + const char *inputbuff, int insize, + char * __restrict output) +{ + const unsigned char *in = (const unsigned char *)inputbuff; + + while(insize >= 3) { + *output++ = table64[ in[0] >> 2 ]; + *output++ = table64[ ((in[0] & 0x03) << 4) | (in[1] >> 4) ]; + *output++ = table64[ ((in[1] & 0x0F) << 2) | ((in[2] & 0xC0) >> 6) ]; + *output++ = table64[ in[2] & 0x3F ]; + insize -= 3; + in += 3; + } +} diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index ecb4a6521de..19beeed8a3a 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -1099,7 +1099,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, tree first_lhs = NULL_TREE; tree first_op1 = NULL_TREE; stmt_vec_info first_load = NULL, prev_first_load = NULL; - bool first_stmt_ldst_p = false; + bool first_stmt_ldst_p = false, first_stmt_ldst_masklen_p = false; bool first_stmt_phi_p = false; int first_reduc_idx = -1; bool maybe_soft_fail = false; @@ -1133,6 +1133,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, FOR_EACH_VEC_ELT (stmts, i, stmt_info) { bool ldst_p = false; + bool ldst_masklen_p = false; bool phi_p = false; code_helper rhs_code = ERROR_MARK; @@ -1195,17 +1196,22 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, else rhs_code = CALL_EXPR; - if (cfn == CFN_MASK_LOAD - || cfn == CFN_GATHER_LOAD - || cfn == CFN_MASK_GATHER_LOAD - || cfn == CFN_MASK_LEN_GATHER_LOAD - || cfn == CFN_SCATTER_STORE - || cfn == CFN_MASK_SCATTER_STORE - || cfn == CFN_MASK_LEN_SCATTER_STORE) + if (cfn == CFN_GATHER_LOAD + || cfn == CFN_SCATTER_STORE) ldst_p = true; + else if (cfn == CFN_MASK_LOAD + || cfn == CFN_MASK_GATHER_LOAD + || cfn == CFN_MASK_LEN_GATHER_LOAD + || cfn == CFN_MASK_SCATTER_STORE + || cfn == CFN_MASK_LEN_SCATTER_STORE) + { + ldst_p = true; + ldst_masklen_p = true; + } else if (cfn == CFN_MASK_STORE) { ldst_p = true; + ldst_masklen_p = true; rhs_code = CFN_MASK_STORE; } else if (cfn == CFN_GOMP_SIMD_LANE) @@ -1246,6 +1252,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, first_lhs = lhs; first_stmt_code = rhs_code; first_stmt_ldst_p = ldst_p; + first_stmt_ldst_masklen_p = ldst_masklen_p; first_stmt_phi_p = phi_p; first_reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info); @@ -1364,6 +1371,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, && (STMT_VINFO_GATHER_SCATTER_P (stmt_info) != STMT_VINFO_GATHER_SCATTER_P (first_stmt_info))) || first_stmt_ldst_p != ldst_p + || (ldst_p && first_stmt_ldst_masklen_p != ldst_masklen_p) || first_stmt_phi_p != phi_p) { if (dump_enabled_p ())