diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 1c17149e1f0..70bd83005d7 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -758,18 +758,15 @@ public: if (auto *res = f.fold_const_binary (TRUNC_DIV_EXPR)) return res; - /* If the dividend is all zeros, fold to zero vector. */ + /* If the divisor is all ones, fold to dividend. */ tree op1 = gimple_call_arg (f.call, 1); - if (integer_zerop (op1)) - return gimple_build_assign (f.lhs, op1); - - /* If the divisor is all zeros, fold to zero vector. */ - tree pg = gimple_call_arg (f.call, 0); tree op2 = gimple_call_arg (f.call, 2); - if (integer_zerop (op2) - && (f.pred != PRED_m - || is_ptrue (pg, f.type_suffix (0).element_bytes))) - return gimple_build_assign (f.lhs, build_zero_cst (TREE_TYPE (f.lhs))); + if (integer_onep (op2)) + return f.fold_active_lanes_to (op1); + + /* If one of the operands is all zeros, fold to zero vector. */ + if (integer_zerop (op1) || integer_zerop (op2)) + return f.fold_active_lanes_to (build_zero_cst (TREE_TYPE (f.lhs))); /* If the divisor is a uniform power of 2, fold to a shift instruction. */ @@ -2024,20 +2021,21 @@ public: if (auto *res = f.fold_const_binary (MULT_EXPR)) return res; - /* If one of the operands is all zeros, fold to zero vector. */ + /* If one of the operands is all ones, fold to other operand. */ tree op1 = gimple_call_arg (f.call, 1); - if (integer_zerop (op1)) - return gimple_build_assign (f.lhs, op1); - - tree pg = gimple_call_arg (f.call, 0); tree op2 = gimple_call_arg (f.call, 2); - if (integer_zerop (op2) - && (f.pred != PRED_m - || is_ptrue (pg, f.type_suffix (0).element_bytes))) - return gimple_build_assign (f.lhs, build_zero_cst (TREE_TYPE (f.lhs))); + if (integer_onep (op1)) + return f.fold_active_lanes_to (op2); + if (integer_onep (op2)) + return f.fold_active_lanes_to (op1); + + /* If one of the operands is all zeros, fold to zero vector. */ + if (integer_zerop (op1) || integer_zerop (op2)) + return f.fold_active_lanes_to (build_zero_cst (TREE_TYPE (f.lhs))); /* If one of the operands is a uniform power of 2, fold to a left shift by immediate. */ + tree pg = gimple_call_arg (f.call, 0); tree op1_cst = uniform_integer_cst_p (op1); tree op2_cst = uniform_integer_cst_p (op2); tree shift_op1, shift_op2; @@ -2056,9 +2054,6 @@ public: else return NULL; - if (integer_onep (shift_op2)) - return NULL; - shift_op2 = wide_int_to_tree (unsigned_type_for (TREE_TYPE (shift_op2)), tree_log2 (shift_op2)); function_instance instance ("svlsl", functions::svlsl, diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index e7c703c987e..41673745cfe 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -3636,6 +3636,33 @@ gimple_folder::fold_const_binary (enum tree_code code) return NULL; } +/* Fold the active lanes to X and set the inactive lanes according to the + predication. Return the new statement. */ +gimple * +gimple_folder::fold_active_lanes_to (tree x) +{ + /* If predication is _x or the predicate is ptrue, fold to X. */ + if (pred == PRED_x + || is_ptrue (gimple_call_arg (call, 0), type_suffix (0).element_bytes)) + return gimple_build_assign (lhs, x); + + /* If the predication is _z or _m, calculate a vector that supplies the + values of inactive lanes (the first vector argument for m and a zero + vector from z). */ + tree vec_inactive; + if (pred == PRED_z) + vec_inactive = build_zero_cst (TREE_TYPE (lhs)); + else + vec_inactive = gimple_call_arg (call, 1); + if (operand_equal_p (x, vec_inactive, 0)) + return gimple_build_assign (lhs, x); + + gimple_seq stmts = NULL; + tree pred = convert_pred (stmts, vector_type (0), 0); + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + return gimple_build_assign (lhs, VEC_COND_EXPR, pred, x, vec_inactive); +} + /* Try to fold the call. Return the new statement on success and null on failure. */ gimple * diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h index 645e56badbe..4cdc0541bdc 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.h +++ b/gcc/config/aarch64/aarch64-sve-builtins.h @@ -637,6 +637,7 @@ public: gimple *fold_to_ptrue (); gimple *fold_to_vl_pred (unsigned int); gimple *fold_const_binary (enum tree_code); + gimple *fold_active_lanes_to (tree); gimple *fold (); diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c index d5a23bf0726..a338b28805e 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c @@ -57,7 +57,6 @@ TEST_UNIFORM_ZX (div_w0_s32_m_untied, svint32_t, int32_t, /* ** div_1_s32_m_tied1: -** sel z0\.s, p0, z0\.s, z0\.s ** ret */ TEST_UNIFORM_Z (div_1_s32_m_tied1, svint32_t, @@ -66,7 +65,7 @@ TEST_UNIFORM_Z (div_1_s32_m_tied1, svint32_t, /* ** div_1_s32_m_untied: -** sel z0\.s, p0, z1\.s, z1\.s +** mov z0\.d, z1\.d ** ret */ TEST_UNIFORM_Z (div_1_s32_m_untied, svint32_t, @@ -217,9 +216,8 @@ TEST_UNIFORM_ZX (div_w0_s32_z_untied, svint32_t, int32_t, /* ** div_1_s32_z_tied1: -** mov (z[0-9]+\.s), #1 -** movprfx z0\.s, p0/z, z0\.s -** sdiv z0\.s, p0/m, z0\.s, \1 +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.s, p0, z0\.s, z\1.s ** ret */ TEST_UNIFORM_Z (div_1_s32_z_tied1, svint32_t, @@ -228,9 +226,8 @@ TEST_UNIFORM_Z (div_1_s32_z_tied1, svint32_t, /* ** div_1_s32_z_untied: -** mov z0\.s, #1 -** movprfx z0\.s, p0/z, z0\.s -** sdivr z0\.s, p0/m, z0\.s, z1\.s +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.s, p0, z1\.s, z\1.s ** ret */ TEST_UNIFORM_Z (div_1_s32_z_untied, svint32_t, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s64.c index cfed6f9c1b3..9b804d4af16 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s64.c @@ -57,7 +57,6 @@ TEST_UNIFORM_ZX (div_x0_s64_m_untied, svint64_t, int64_t, /* ** div_1_s64_m_tied1: -** sel z0\.d, p0, z0\.d, z0\.d ** ret */ TEST_UNIFORM_Z (div_1_s64_m_tied1, svint64_t, @@ -66,7 +65,7 @@ TEST_UNIFORM_Z (div_1_s64_m_tied1, svint64_t, /* ** div_1_s64_m_untied: -** sel z0\.d, p0, z1\.d, z1\.d +** mov z0\.d, z1\.d ** ret */ TEST_UNIFORM_Z (div_1_s64_m_untied, svint64_t, @@ -217,9 +216,8 @@ TEST_UNIFORM_ZX (div_x0_s64_z_untied, svint64_t, int64_t, /* ** div_1_s64_z_tied1: -** mov (z[0-9]+\.d), #1 -** movprfx z0\.d, p0/z, z0\.d -** sdiv z0\.d, p0/m, z0\.d, \1 +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.d, p0, z0\.d, z\1.d ** ret */ TEST_UNIFORM_Z (div_1_s64_z_tied1, svint64_t, @@ -228,9 +226,8 @@ TEST_UNIFORM_Z (div_1_s64_z_tied1, svint64_t, /* ** div_1_s64_z_untied: -** mov z0\.d, #1 -** movprfx z0\.d, p0/z, z0\.d -** sdivr z0\.d, p0/m, z0\.d, z1\.d +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.d, p0, z1\.d, z\1.d ** ret */ TEST_UNIFORM_Z (div_1_s64_z_untied, svint64_t, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u32.c index 9707664caf4..1e8d6104845 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u32.c @@ -57,7 +57,6 @@ TEST_UNIFORM_ZX (div_w0_u32_m_untied, svuint32_t, uint32_t, /* ** div_1_u32_m_tied1: -** sel z0\.s, p0, z0\.s, z0\.s ** ret */ TEST_UNIFORM_Z (div_1_u32_m_tied1, svuint32_t, @@ -66,7 +65,7 @@ TEST_UNIFORM_Z (div_1_u32_m_tied1, svuint32_t, /* ** div_1_u32_m_untied: -** sel z0\.s, p0, z1\.s, z1\.s +** mov z0\.d, z1\.d ** ret */ TEST_UNIFORM_Z (div_1_u32_m_untied, svuint32_t, @@ -196,9 +195,8 @@ TEST_UNIFORM_ZX (div_w0_u32_z_untied, svuint32_t, uint32_t, /* ** div_1_u32_z_tied1: -** mov (z[0-9]+\.s), #1 -** movprfx z0\.s, p0/z, z0\.s -** udiv z0\.s, p0/m, z0\.s, \1 +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.s, p0, z0\.s, z\1.s ** ret */ TEST_UNIFORM_Z (div_1_u32_z_tied1, svuint32_t, @@ -207,9 +205,8 @@ TEST_UNIFORM_Z (div_1_u32_z_tied1, svuint32_t, /* ** div_1_u32_z_untied: -** mov z0\.s, #1 -** movprfx z0\.s, p0/z, z0\.s -** udivr z0\.s, p0/m, z0\.s, z1\.s +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.s, p0, z1\.s, z\1.s ** ret */ TEST_UNIFORM_Z (div_1_u32_z_untied, svuint32_t, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u64.c index 5247ebdac7a..ab049affb63 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u64.c @@ -57,7 +57,6 @@ TEST_UNIFORM_ZX (div_x0_u64_m_untied, svuint64_t, uint64_t, /* ** div_1_u64_m_tied1: -** sel z0\.d, p0, z0\.d, z0\.d ** ret */ TEST_UNIFORM_Z (div_1_u64_m_tied1, svuint64_t, @@ -66,7 +65,7 @@ TEST_UNIFORM_Z (div_1_u64_m_tied1, svuint64_t, /* ** div_1_u64_m_untied: -** sel z0\.d, p0, z1\.d, z1\.d +** mov z0\.d, z1\.d ** ret */ TEST_UNIFORM_Z (div_1_u64_m_untied, svuint64_t, @@ -196,9 +195,8 @@ TEST_UNIFORM_ZX (div_x0_u64_z_untied, svuint64_t, uint64_t, /* ** div_1_u64_z_tied1: -** mov (z[0-9]+\.d), #1 -** movprfx z0\.d, p0/z, z0\.d -** udiv z0\.d, p0/m, z0\.d, \1 +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.d, p0, z0\.d, z\1.d ** ret */ TEST_UNIFORM_Z (div_1_u64_z_tied1, svuint64_t, @@ -207,9 +205,8 @@ TEST_UNIFORM_Z (div_1_u64_z_tied1, svuint64_t, /* ** div_1_u64_z_untied: -** mov z0\.d, #1 -** movprfx z0\.d, p0/z, z0\.d -** udivr z0\.d, p0/m, z0\.d, z1\.d +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.d, p0, z1\.d, z\1.d ** ret */ TEST_UNIFORM_Z (div_1_u64_z_untied, svuint64_t, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s16.c index 52e35dc7f95..a2a03aba408 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s16.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s16.c @@ -114,13 +114,22 @@ TEST_UNIFORM_Z (mul_intminnop2_s16_m_tied1, svint16_t, /* ** mul_1_s16_m_tied1: -** sel z0\.h, p0, z0\.h, z0\.h ** ret */ TEST_UNIFORM_Z (mul_1_s16_m_tied1, svint16_t, z0 = svmul_n_s16_m (p0, z0, 1), z0 = svmul_m (p0, z0, 1)) +/* +** mul_1op1_s16_m_tied2: +** mov (z[0-9]+\.h), #1 +** sel z0\.h, p0, z0\.h, \1 +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_s16_m_tied2, svint16_t, + z0 = svmul_s16_m (p0, svdup_s16 (1), z0), + z0 = svmul_m (p0, svdup_s16 (1), z0)) + /* ** mul_3_s16_m_tied1: ** mov (z[0-9]+\.h), #3 @@ -305,15 +314,24 @@ TEST_UNIFORM_Z (mul_intminnop2_s16_z_tied1, svint16_t, /* ** mul_1_s16_z_tied1: -** mov z31.h, #1 -** movprfx z0.h, p0/z, z0.h -** mul z0.h, p0/m, z0.h, z31.h +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.h, p0, z0\.h, z\1.h ** ret */ TEST_UNIFORM_Z (mul_1_s16_z_tied1, svint16_t, z0 = svmul_n_s16_z (p0, z0, 1), z0 = svmul_z (p0, z0, 1)) +/* +** mul_1op1_s16_z_tied2: +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.h, p0, z0\.h, z\1.h +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_s16_z_tied2, svint16_t, + z0 = svmul_s16_z (p0, svdup_s16 (1), z0), + z0 = svmul_z (p0, svdup_s16 (1), z0)) + /* ** mul_3_s16_z_tied1: ** mov (z[0-9]+\.h), #3 @@ -486,6 +504,23 @@ TEST_UNIFORM_Z (mul_1_s16_x_tied1, svint16_t, z0 = svmul_n_s16_x (p0, z0, 1), z0 = svmul_x (p0, z0, 1)) +/* +** mul_1op1_s16_x_tied2: +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_s16_x_tied2, svint16_t, + z0 = svmul_s16_x (p0, svdup_s16 (1), z0), + z0 = svmul_x (p0, svdup_s16 (1), z0)) + +/* +** mul_1op1_s16_x_untied: +** mov z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_s16_x_untied, svint16_t, + z0 = svmul_s16_x (p0, svdup_s16 (1), z1), + z0 = svmul_x (p0, svdup_s16 (1), z1)) + /* ** mul_3_s16_x_tied1: ** mul z0\.h, z0\.h, #3 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s32.c index 0974038e67f..372b9f4a008 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s32.c @@ -114,13 +114,22 @@ TEST_UNIFORM_Z (mul_intminnop2_s32_m_tied1, svint32_t, /* ** mul_1_s32_m_tied1: -** sel z0\.s, p0, z0\.s, z0\.s ** ret */ TEST_UNIFORM_Z (mul_1_s32_m_tied1, svint32_t, z0 = svmul_n_s32_m (p0, z0, 1), z0 = svmul_m (p0, z0, 1)) +/* +** mul_1op1_s32_m_tied2: +** mov (z[0-9]+\.s), #1 +** sel z0\.s, p0, z0\.s, \1 +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_s32_m_tied2, svint32_t, + z0 = svmul_s32_m (p0, svdup_s32 (1), z0), + z0 = svmul_m (p0, svdup_s32 (1), z0)) + /* ** mul_3_s32_m_tied1: ** mov (z[0-9]+\.s), #3 @@ -305,15 +314,24 @@ TEST_UNIFORM_Z (mul_intminnop2_s32_z_tied1, svint32_t, /* ** mul_1_s32_z_tied1: -** mov z31.s, #1 -** movprfx z0.s, p0/z, z0.s -** mul z0.s, p0/m, z0.s, z31.s +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.s, p0, z0\.s, z\1.s ** ret */ TEST_UNIFORM_Z (mul_1_s32_z_tied1, svint32_t, z0 = svmul_n_s32_z (p0, z0, 1), z0 = svmul_z (p0, z0, 1)) +/* +** mul_1op1_s32_z_tied2: +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.s, p0, z0\.s, z\1.s +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_s32_z_tied2, svint32_t, + z0 = svmul_s32_z (p0, svdup_s32 (1), z0), + z0 = svmul_z (p0, svdup_s32 (1), z0)) + /* ** mul_3_s32_z_tied1: ** mov (z[0-9]+\.s), #3 @@ -486,6 +504,23 @@ TEST_UNIFORM_Z (mul_1_s32_x_tied1, svint32_t, z0 = svmul_n_s32_x (p0, z0, 1), z0 = svmul_x (p0, z0, 1)) +/* +** mul_1op1_s32_x_tied2: +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_s32_x_tied2, svint32_t, + z0 = svmul_s32_x (p0, svdup_s32 (1), z0), + z0 = svmul_x (p0, svdup_s32 (1), z0)) + +/* +** mul_1op1_s32_x_untied: +** mov z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_s32_x_untied, svint32_t, + z0 = svmul_s32_x (p0, svdup_s32 (1), z1), + z0 = svmul_x (p0, svdup_s32 (1), z1)) + /* ** mul_3_s32_x_tied1: ** mul z0\.s, z0\.s, #3 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s64.c index 537eb0eef0b..c638e254655 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s64.c @@ -114,13 +114,22 @@ TEST_UNIFORM_Z (mul_intminnop2_s64_m_tied1, svint64_t, /* ** mul_1_s64_m_tied1: -** sel z0\.d, p0, z0\.d, z0\.d ** ret */ TEST_UNIFORM_Z (mul_1_s64_m_tied1, svint64_t, z0 = svmul_n_s64_m (p0, z0, 1), z0 = svmul_m (p0, z0, 1)) +/* +** mul_1op1_s64_m_tied2: +** mov (z[0-9]+\.d), #1 +** sel z0\.d, p0, z0\.d, \1 +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_s64_m_tied2, svint64_t, + z0 = svmul_s64_m (p0, svdup_s64 (1), z0), + z0 = svmul_m (p0, svdup_s64 (1), z0)) + /* ** mul_2_s64_m_tied1: ** lsl z0\.d, p0/m, z0\.d, #1 @@ -314,15 +323,24 @@ TEST_UNIFORM_Z (mul_intminnop2_s64_z_tied1, svint64_t, /* ** mul_1_s64_z_tied1: -** mov z31.d, #1 -** movprfx z0.d, p0/z, z0.d -** mul z0.d, p0/m, z0.d, z31.d +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.d, p0, z0\.d, z\1.d ** ret */ TEST_UNIFORM_Z (mul_1_s64_z_tied1, svint64_t, z0 = svmul_n_s64_z (p0, z0, 1), z0 = svmul_z (p0, z0, 1)) +/* +** mul_1op1_s64_z_tied2: +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.d, p0, z0\.d, z\1.d +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_s64_z_tied2, svint64_t, + z0 = svmul_s64_z (p0, svdup_s64 (1), z0), + z0 = svmul_z (p0, svdup_s64 (1), z0)) + /* ** mul_2_s64_z_tied1: ** movprfx z0.d, p0/z, z0.d @@ -505,6 +523,23 @@ TEST_UNIFORM_Z (mul_1_s64_x_tied1, svint64_t, z0 = svmul_n_s64_x (p0, z0, 1), z0 = svmul_x (p0, z0, 1)) +/* +** mul_1op1_s64_x_tied2: +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_s64_x_tied2, svint64_t, + z0 = svmul_s64_x (p0, svdup_s64 (1), z0), + z0 = svmul_x (p0, svdup_s64 (1), z0)) + +/* +** mul_1op1_s64_x_untied: +** mov z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_s64_x_untied, svint64_t, + z0 = svmul_s64_x (p0, svdup_s64 (1), z1), + z0 = svmul_x (p0, svdup_s64 (1), z1)) + /* ** mul_2_s64_x_tied1: ** add z0\.d, z0\.d, z0\.d diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s8.c index 0def4bd4974..37a490ff611 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s8.c @@ -114,13 +114,22 @@ TEST_UNIFORM_Z (mul_intminnop2_s8_m_tied1, svint8_t, /* ** mul_1_s8_m_tied1: -** sel z0\.b, p0, z0\.b, z0\.b ** ret */ TEST_UNIFORM_Z (mul_1_s8_m_tied1, svint8_t, z0 = svmul_n_s8_m (p0, z0, 1), z0 = svmul_m (p0, z0, 1)) +/* +** mul_1op1_s8_m_tied2: +** mov (z[0-9]+)\.b, #1 +** sel z0\.b, p0, z0\.b, \1\.b +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_s8_m_tied2, svint8_t, + z0 = svmul_s8_m (p0, svdup_s8 (1), z0), + z0 = svmul_m (p0, svdup_s8 (1), z0)) + /* ** mul_3_s8_m_tied1: ** mov (z[0-9]+\.b), #3 @@ -305,15 +314,24 @@ TEST_UNIFORM_Z (mul_intminnop2_s8_z_tied1, svint8_t, /* ** mul_1_s8_z_tied1: -** mov z31.b, #1 -** movprfx z0.b, p0/z, z0.b -** mul z0.b, p0/m, z0.b, z31.b +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.b, p0, z0\.b, z\1.b ** ret */ TEST_UNIFORM_Z (mul_1_s8_z_tied1, svint8_t, z0 = svmul_n_s8_z (p0, z0, 1), z0 = svmul_z (p0, z0, 1)) +/* +** mul_1op1_s8_z_tied2: +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.b, p0, z0\.b, z\1.b +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_s8_z_tied2, svint8_t, + z0 = svmul_s8_z (p0, svdup_s8 (1), z0), + z0 = svmul_z (p0, svdup_s8 (1), z0)) + /* ** mul_3_s8_z_tied1: ** mov (z[0-9]+\.b), #3 @@ -486,6 +504,23 @@ TEST_UNIFORM_Z (mul_1_s8_x_tied1, svint8_t, z0 = svmul_n_s8_x (p0, z0, 1), z0 = svmul_x (p0, z0, 1)) +/* +** mul_1op1_s8_x_tied2: +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_s8_x_tied2, svint8_t, + z0 = svmul_s8_x (p0, svdup_s8 (1), z0), + z0 = svmul_x (p0, svdup_s8 (1), z0)) + +/* +** mul_1op1_s8_x_untied: +** mov z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_s8_x_untied, svint8_t, + z0 = svmul_s8_x (p0, svdup_s8 (1), z1), + z0 = svmul_x (p0, svdup_s8 (1), z1)) + /* ** mul_3_s8_x_tied1: ** mul z0\.b, z0\.b, #3 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u16.c index cc83123aacb..bdf6fcb98d6 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u16.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u16.c @@ -105,13 +105,22 @@ TEST_UNIFORM_Z (mul_maxpownop2_u16_m_tied1, svuint16_t, /* ** mul_1_u16_m_tied1: -** sel z0\.h, p0, z0\.h, z0\.h ** ret */ TEST_UNIFORM_Z (mul_1_u16_m_tied1, svuint16_t, z0 = svmul_n_u16_m (p0, z0, 1), z0 = svmul_m (p0, z0, 1)) +/* +** mul_1op1_u16_m_tied2: +** mov (z[0-9]+\.h), #1 +** sel z0\.h, p0, z0\.h, \1 +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_u16_m_tied2, svuint16_t, + z0 = svmul_u16_m (p0, svdup_u16 (1), z0), + z0 = svmul_m (p0, svdup_u16 (1), z0)) + /* ** mul_3_u16_m_tied1: ** mov (z[0-9]+\.h), #3 @@ -286,15 +295,24 @@ TEST_UNIFORM_Z (mul_maxpownop2_u16_z_tied1, svuint16_t, /* ** mul_1_u16_z_tied1: -** mov z31.h, #1 -** movprfx z0.h, p0/z, z0.h -** mul z0.h, p0/m, z0.h, z31.h +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.h, p0, z0\.h, z\1.h ** ret */ TEST_UNIFORM_Z (mul_1_u16_z_tied1, svuint16_t, z0 = svmul_n_u16_z (p0, z0, 1), z0 = svmul_z (p0, z0, 1)) +/* +** mul_1op1_u16_z_tied2: +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.h, p0, z0\.h, z\1.h +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_u16_z_tied2, svuint16_t, + z0 = svmul_u16_z (p0, svdup_u16 (1), z0), + z0 = svmul_z (p0, svdup_u16 (1), z0)) + /* ** mul_3_u16_z_tied1: ** mov (z[0-9]+\.h), #3 @@ -458,6 +476,23 @@ TEST_UNIFORM_Z (mul_1_u16_x_tied1, svuint16_t, z0 = svmul_n_u16_x (p0, z0, 1), z0 = svmul_x (p0, z0, 1)) +/* +** mul_1op1_u16_x_tied2: +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_u16_x_tied2, svuint16_t, + z0 = svmul_u16_x (p0, svdup_u16 (1), z0), + z0 = svmul_x (p0, svdup_u16 (1), z0)) + +/* +** mul_1op1_u16_x_untied: +** mov z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_u16_x_untied, svuint16_t, + z0 = svmul_u16_x (p0, svdup_u16 (1), z1), + z0 = svmul_x (p0, svdup_u16 (1), z1)) + /* ** mul_3_u16_x_tied1: ** mul z0\.h, z0\.h, #3 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u32.c index 9d63731d019..a61e85fa12d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u32.c @@ -105,13 +105,22 @@ TEST_UNIFORM_Z (mul_maxpownop2_u32_m_tied1, svuint32_t, /* ** mul_1_u32_m_tied1: -** sel z0\.s, p0, z0\.s, z0\.s ** ret */ TEST_UNIFORM_Z (mul_1_u32_m_tied1, svuint32_t, z0 = svmul_n_u32_m (p0, z0, 1), z0 = svmul_m (p0, z0, 1)) +/* +** mul_1op1_u32_m_tied2: +** mov (z[0-9]+\.s), #1 +** sel z0\.s, p0, z0\.s, \1 +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_u32_m_tied2, svuint32_t, + z0 = svmul_u32_m (p0, svdup_u32 (1), z0), + z0 = svmul_m (p0, svdup_u32 (1), z0)) + /* ** mul_3_u32_m_tied1: ** mov (z[0-9]+\.s), #3 @@ -286,15 +295,24 @@ TEST_UNIFORM_Z (mul_maxpownop2_u32_z_tied1, svuint32_t, /* ** mul_1_u32_z_tied1: -** mov z31.s, #1 -** movprfx z0.s, p0/z, z0.s -** mul z0.s, p0/m, z0.s, z31.s +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.s, p0, z0\.s, z\1.s ** ret */ TEST_UNIFORM_Z (mul_1_u32_z_tied1, svuint32_t, z0 = svmul_n_u32_z (p0, z0, 1), z0 = svmul_z (p0, z0, 1)) +/* +** mul_1op1_u32_z_tied2: +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.s, p0, z0\.s, z\1.s +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_u32_z_tied2, svuint32_t, + z0 = svmul_u32_z (p0, svdup_u32 (1), z0), + z0 = svmul_z (p0, svdup_u32 (1), z0)) + /* ** mul_3_u32_z_tied1: ** mov (z[0-9]+\.s), #3 @@ -458,6 +476,23 @@ TEST_UNIFORM_Z (mul_1_u32_x_tied1, svuint32_t, z0 = svmul_n_u32_x (p0, z0, 1), z0 = svmul_x (p0, z0, 1)) +/* +** mul_1op1_u32_x_tied2: +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_u32_x_tied2, svuint32_t, + z0 = svmul_u32_x (p0, svdup_u32 (1), z0), + z0 = svmul_x (p0, svdup_u32 (1), z0)) + +/* +** mul_1op1_u32_x_untied: +** mov z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_u32_x_untied, svuint32_t, + z0 = svmul_u32_x (p0, svdup_u32 (1), z1), + z0 = svmul_x (p0, svdup_u32 (1), z1)) + /* ** mul_3_u32_x_tied1: ** mul z0\.s, z0\.s, #3 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u64.c index 4f501df4fd5..eee1f8a0c99 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u64.c @@ -105,13 +105,22 @@ TEST_UNIFORM_Z (mul_maxpownop2_u64_m_tied1, svuint64_t, /* ** mul_1_u64_m_tied1: -** sel z0\.d, p0, z0\.d, z0\.d ** ret */ TEST_UNIFORM_Z (mul_1_u64_m_tied1, svuint64_t, z0 = svmul_n_u64_m (p0, z0, 1), z0 = svmul_m (p0, z0, 1)) +/* +** mul_1op1_u64_m_tied2: +** mov (z[0-9]+\.d), #1 +** sel z0\.d, p0, z0\.d, \1 +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_u64_m_tied2, svuint64_t, + z0 = svmul_u64_m (p0, svdup_u64 (1), z0), + z0 = svmul_m (p0, svdup_u64 (1), z0)) + /* ** mul_2_u64_m_tied1: ** lsl z0\.d, p0/m, z0\.d, #1 @@ -295,15 +304,24 @@ TEST_UNIFORM_Z (mul_maxpownop2_u64_z_tied1, svuint64_t, /* ** mul_1_u64_z_tied1: -** mov z31.d, #1 -** movprfx z0.d, p0/z, z0.d -** mul z0.d, p0/m, z0.d, z31.d +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.d, p0, z0\.d, z\1.d ** ret */ TEST_UNIFORM_Z (mul_1_u64_z_tied1, svuint64_t, z0 = svmul_n_u64_z (p0, z0, 1), z0 = svmul_z (p0, z0, 1)) +/* +** mul_1op1_u64_z_tied2: +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.d, p0, z0\.d, z\1.d +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_u64_z_tied2, svuint64_t, + z0 = svmul_u64_z (p0, svdup_u64 (1), z0), + z0 = svmul_z (p0, svdup_u64 (1), z0)) + /* ** mul_2_u64_z_tied1: ** movprfx z0.d, p0/z, z0.d @@ -477,6 +495,23 @@ TEST_UNIFORM_Z (mul_1_u64_x_tied1, svuint64_t, z0 = svmul_n_u64_x (p0, z0, 1), z0 = svmul_x (p0, z0, 1)) +/* +** mul_1op1_u64_x_tied2: +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_u64_x_tied2, svuint64_t, + z0 = svmul_u64_x (p0, svdup_u64 (1), z0), + z0 = svmul_x (p0, svdup_u64 (1), z0)) + +/* +** mul_1op1_u64_x_untied: +** mov z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_u64_x_untied, svuint64_t, + z0 = svmul_u64_x (p0, svdup_u64 (1), z1), + z0 = svmul_x (p0, svdup_u64 (1), z1)) + /* ** mul_2_u64_x_tied1: ** add z0\.d, z0\.d, z0\.d diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u8.c index e56fa6069b0..06ee1b3e7c8 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u8.c @@ -105,13 +105,22 @@ TEST_UNIFORM_Z (mul_maxpownop2_u8_m_tied1, svuint8_t, /* ** mul_1_u8_m_tied1: -** sel z0\.b, p0, z0\.b, z0\.b ** ret */ TEST_UNIFORM_Z (mul_1_u8_m_tied1, svuint8_t, z0 = svmul_n_u8_m (p0, z0, 1), z0 = svmul_m (p0, z0, 1)) +/* +** mul_1op1_u8_m_tied2: +** mov (z[0-9]+)\.b, #1 +** sel z0\.b, p0, z0\.b, \1\.b +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_u8_m_tied2, svuint8_t, + z0 = svmul_u8_m (p0, svdup_u8 (1), z0), + z0 = svmul_m (p0, svdup_u8 (1), z0)) + /* ** mul_3_u8_m_tied1: ** mov (z[0-9]+\.b), #3 @@ -286,15 +295,24 @@ TEST_UNIFORM_Z (mul_maxpownop2_u8_z_tied1, svuint8_t, /* ** mul_1_u8_z_tied1: -** mov z31.b, #1 -** movprfx z0.b, p0/z, z0.b -** mul z0.b, p0/m, z0.b, z31.b +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.b, p0, z0\.b, z\1.b ** ret */ TEST_UNIFORM_Z (mul_1_u8_z_tied1, svuint8_t, z0 = svmul_n_u8_z (p0, z0, 1), z0 = svmul_z (p0, z0, 1)) +/* +** mul_1op1_u8_z_tied2: +** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 +** sel z0\.b, p0, z0\.b, z\1\.b +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_u8_z_tied2, svuint8_t, + z0 = svmul_u8_z (p0, svdup_u8 (1), z0), + z0 = svmul_z (p0, svdup_u8 (1), z0)) + /* ** mul_3_u8_z_tied1: ** mov (z[0-9]+\.b), #3 @@ -458,6 +476,23 @@ TEST_UNIFORM_Z (mul_1_u8_x_tied1, svuint8_t, z0 = svmul_n_u8_x (p0, z0, 1), z0 = svmul_x (p0, z0, 1)) +/* +** mul_1op1_u8_x_tied2: +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_u8_x_tied2, svuint8_t, + z0 = svmul_u8_x (p0, svdup_u8 (1), z0), + z0 = svmul_x (p0, svdup_u8 (1), z0)) + +/* +** mul_1op1_u8_x_untied: +** mov z0\.d, z1\.d +** ret +*/ +TEST_UNIFORM_Z (mul_1op1_u8_x_untied, svuint8_t, + z0 = svmul_u8_x (p0, svdup_u8 (1), z1), + z0 = svmul_x (p0, svdup_u8 (1), z1)) + /* ** mul_3_u8_x_tied1: ** mul z0\.b, z0\.b, #3 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fold_div_zero.c b/gcc/testsuite/gcc.target/aarch64/sve/fold_div_zero.c index f1235fc1755..3144df145ba 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/fold_div_zero.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/fold_div_zero.c @@ -85,8 +85,7 @@ svint64_t s64_z_pg_op2 (svbool_t pg, svint64_t op1) /* ** s64_m_pg_op2: -** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 -** sdiv (z[0-9]\.d), p[0-7]/m, \2, z\1\.d +** mov z0\.d, p0/m, #0 ** ret */ svint64_t s64_m_pg_op2 (svbool_t pg, svint64_t op1) @@ -146,8 +145,7 @@ svint64_t s64_n_z_pg_op2 (svbool_t pg, svint64_t op1) /* ** s64_n_m_pg_op2: -** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 -** sdiv (z[0-9]+\.d), p[0-7]/m, \2, z\1\.d +** mov z0\.d, p0/m, #0 ** ret */ svint64_t s64_n_m_pg_op2 (svbool_t pg, svint64_t op1) @@ -267,8 +265,7 @@ svuint64_t u64_z_pg_op2 (svbool_t pg, svuint64_t op1) /* ** u64_m_pg_op2: -** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 -** udiv (z[0-9]+\.d), p[0-7]/m, \2, z\1\.d +** mov z0\.d, p0/m, #0 ** ret */ svuint64_t u64_m_pg_op2 (svbool_t pg, svuint64_t op1) @@ -328,8 +325,7 @@ svuint64_t u64_n_z_pg_op2 (svbool_t pg, svuint64_t op1) /* ** u64_n_m_pg_op2: -** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 -** udiv (z[0-9]+\.d), p[0-7]/m, \2, z\1\.d +** mov z0\.d, p0/m, #0 ** ret */ svuint64_t u64_n_m_pg_op2 (svbool_t pg, svuint64_t op1) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mul_const_run.c b/gcc/testsuite/gcc.target/aarch64/sve/mul_const_run.c index 6af00439e39..c369d5be167 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/mul_const_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/mul_const_run.c @@ -10,6 +10,8 @@ typedef svfloat32_t svfloat32_ __attribute__((arm_sve_vector_bits(128))); typedef svfloat64_t svfloat64_ __attribute__((arm_sve_vector_bits(128))); typedef svint32_t svint32_ __attribute__((arm_sve_vector_bits(128))); typedef svint64_t svint64_ __attribute__((arm_sve_vector_bits(128))); +typedef svuint8_t svuint8_ __attribute__((arm_sve_vector_bits(128))); +typedef svuint16_t svuint16_ __attribute__((arm_sve_vector_bits(128))); typedef svuint32_t svuint32_ __attribute__((arm_sve_vector_bits(128))); typedef svuint64_t svuint64_ __attribute__((arm_sve_vector_bits(128))); @@ -84,6 +86,10 @@ typedef svuint64_t svuint64_ __attribute__((arm_sve_vector_bits(128))); TEST_VALUES_U_1 (64, 4, 7) \ TEST_VALUES_U_1 (32, 7, 3) \ TEST_VALUES_U_1 (64, 7, 3) \ + TEST_VALUES_U_1 (8, 1, 11) \ + TEST_VALUES_U_1 (16, 1, UINT16_MAX) \ + TEST_VALUES_U_1 (32, 1, 0) \ + TEST_VALUES_U_1 (64, 1, (1ULL << 63)) \ TEST_VALUES_U_1 (32, 11, 1) \ TEST_VALUES_U_1 (64, 11, 1)