SVE intrinsics: Fold svmul by -1 to svneg for unsigned types
As follow-up to https://gcc.gnu.org/pipermail/gcc-patches/2024-October/665472.html, this patch implements folding of svmul by -1 to svneg for unsigned SVE vector types. The key idea is to reuse the existing code that does this fold for signed types and feed it as callback to a helper function that adds the necessary type conversions. For example, for the test case svuint64_t foo (svuint64_t x, svbool_t pg) { return svmul_n_u64_x (pg, x, -1); } the following gimple sequence is emitted (-O2 -mcpu=grace): svuint64_t foo (svuint64_t x, svbool_t pg) { svint64_t D.12921; svint64_t D.12920; svuint64_t D.12919; D.12920 = VIEW_CONVERT_EXPR<svint64_t>(x); D.12921 = svneg_s64_x (pg, D.12920); D.12919 = VIEW_CONVERT_EXPR<svuint64_t>(D.12921); goto <D.12922>; <D.12922>: return D.12919; } In general, the new helper gimple_folder::convert_and_fold - takes a target type and a function pointer, - converts the lhs and all non-boolean vector types to the target type, - passes the converted lhs and arguments to the callback, - receives the new gimple statement from the callback function, - adds the necessary view converts to the gimple sequence, - and returns the new call. Because all arguments are converted to the same target types, the helper function is only suitable for folding calls whose arguments are all of the same type. If necessary, this could be extended to convert the arguments to different types differentially. The patch was bootstrapped and tested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Jennifer Schmitz <jschmitz@nvidia.com> gcc/ChangeLog: * config/aarch64/aarch64-sve-builtins-base.cc (svmul_impl::fold): Wrap code for folding to svneg in lambda function and pass to gimple_folder::convert_and_fold to enable the transform for unsigned types. * config/aarch64/aarch64-sve-builtins.cc (gimple_folder::convert_and_fold): New function that converts operands to target type before calling callback function, adding the necessary conversion statements. (gimple_folder::redirect_call): Set fntype of redirected call. (get_vector_type): Move from here to aarch64-sve-builtins.h. * config/aarch64/aarch64-sve-builtins.h (gimple_folder::convert_and_fold): Declare function. (get_vector_type): Move here as inline function. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/acle/asm/mul_u8.c: Adjust expected outcome. * gcc.target/aarch64/sve/acle/asm/mul_u16.c: Likewise. * gcc.target/aarch64/sve/acle/asm/mul_u32.c: Likewise. * gcc.target/aarch64/sve/acle/asm/mul_u64.c: New test and adjust expected outcome.
This commit is contained in:
parent
144ddb0cdf
commit
f9c99d403c
7 changed files with 116 additions and 42 deletions
|
@ -2305,33 +2305,47 @@ public:
|
|||
return f.fold_active_lanes_to (build_zero_cst (TREE_TYPE (f.lhs)));
|
||||
|
||||
/* If one of the operands is all integer -1, fold to svneg. */
|
||||
tree pg = gimple_call_arg (f.call, 0);
|
||||
tree negated_op = NULL;
|
||||
if (integer_minus_onep (op2))
|
||||
negated_op = op1;
|
||||
else if (integer_minus_onep (op1))
|
||||
negated_op = op2;
|
||||
if (!f.type_suffix (0).unsigned_p && negated_op)
|
||||
if (integer_minus_onep (op1) || integer_minus_onep (op2))
|
||||
{
|
||||
function_instance instance ("svneg", functions::svneg, shapes::unary,
|
||||
MODE_none, f.type_suffix_ids, GROUP_none,
|
||||
f.pred, FPM_unused);
|
||||
gcall *call = f.redirect_call (instance);
|
||||
unsigned offset_index = 0;
|
||||
if (f.pred == PRED_m)
|
||||
auto mul_by_m1 = [](gimple_folder &f, tree lhs_conv,
|
||||
vec<tree> &args_conv) -> gimple *
|
||||
{
|
||||
offset_index = 1;
|
||||
gimple_call_set_arg (call, 0, op1);
|
||||
}
|
||||
else
|
||||
gimple_set_num_ops (call, 5);
|
||||
gimple_call_set_arg (call, offset_index, pg);
|
||||
gimple_call_set_arg (call, offset_index + 1, negated_op);
|
||||
return call;
|
||||
gcc_assert (lhs_conv && args_conv.length () == 3);
|
||||
tree pg = args_conv[0];
|
||||
tree op1 = args_conv[1];
|
||||
tree op2 = args_conv[2];
|
||||
tree negated_op = op1;
|
||||
if (integer_minus_onep (op1))
|
||||
negated_op = op2;
|
||||
type_suffix_pair signed_tsp =
|
||||
{find_type_suffix (TYPE_signed, f.type_suffix (0).element_bits),
|
||||
f.type_suffix_ids[1]};
|
||||
function_instance instance ("svneg", functions::svneg,
|
||||
shapes::unary, MODE_none, signed_tsp,
|
||||
GROUP_none, f.pred, FPM_unused);
|
||||
gcall *call = f.redirect_call (instance);
|
||||
gimple_call_set_lhs (call, lhs_conv);
|
||||
unsigned offset = 0;
|
||||
if (f.pred == PRED_m)
|
||||
{
|
||||
offset = 1;
|
||||
gimple_call_set_arg (call, 0, op1);
|
||||
}
|
||||
else
|
||||
gimple_set_num_ops (call, 5);
|
||||
gimple_call_set_arg (call, offset, pg);
|
||||
gimple_call_set_arg (call, offset + 1, negated_op);
|
||||
return call;
|
||||
};
|
||||
tree ty =
|
||||
get_vector_type (find_type_suffix (TYPE_signed,
|
||||
f.type_suffix (0).element_bits));
|
||||
return f.convert_and_fold (ty, mul_by_m1);
|
||||
}
|
||||
|
||||
/* If one of the operands is a uniform power of 2, fold to a left shift
|
||||
by immediate. */
|
||||
tree pg = gimple_call_arg (f.call, 0);
|
||||
tree op1_cst = uniform_integer_cst_p (op1);
|
||||
tree op2_cst = uniform_integer_cst_p (op2);
|
||||
tree shift_op1, shift_op2 = NULL;
|
||||
|
|
|
@ -1130,14 +1130,6 @@ num_vectors_to_group (unsigned int nvectors)
|
|||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Return the vector type associated with TYPE. */
|
||||
static tree
|
||||
get_vector_type (sve_type type)
|
||||
{
|
||||
auto vector_type = type_suffixes[type.type].vector_type;
|
||||
return acle_vector_types[type.num_vectors - 1][vector_type];
|
||||
}
|
||||
|
||||
/* If FNDECL is an SVE builtin, return its function instance, otherwise
|
||||
return null. */
|
||||
const function_instance *
|
||||
|
@ -3601,6 +3593,7 @@ gimple_folder::redirect_call (const function_instance &instance)
|
|||
return NULL;
|
||||
|
||||
gimple_call_set_fndecl (call, rfn->decl);
|
||||
gimple_call_set_fntype (call, TREE_TYPE (rfn->decl));
|
||||
return call;
|
||||
}
|
||||
|
||||
|
@ -3675,6 +3668,46 @@ gimple_folder::fold_pfalse ()
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
/* Convert the lhs and all non-boolean vector-type operands to TYPE.
|
||||
Pass the converted variables to the callback FP, and finally convert the
|
||||
result back to the original type. Add the necessary conversion statements.
|
||||
Return the new call. */
|
||||
gimple *
|
||||
gimple_folder::convert_and_fold (tree type,
|
||||
gimple *(*fp) (gimple_folder &,
|
||||
tree, vec<tree> &))
|
||||
{
|
||||
gcc_assert (VECTOR_TYPE_P (type)
|
||||
&& TYPE_MODE (type) != VNx16BImode);
|
||||
tree old_ty = TREE_TYPE (lhs);
|
||||
gimple_seq stmts = NULL;
|
||||
bool convert_lhs_p = !useless_type_conversion_p (type, old_ty);
|
||||
tree lhs_conv = convert_lhs_p ? create_tmp_var (type) : lhs;
|
||||
unsigned int num_args = gimple_call_num_args (call);
|
||||
auto_vec<tree, 16> args_conv;
|
||||
args_conv.safe_grow (num_args);
|
||||
for (unsigned int i = 0; i < num_args; ++i)
|
||||
{
|
||||
tree op = gimple_call_arg (call, i);
|
||||
tree op_ty = TREE_TYPE (op);
|
||||
args_conv[i] =
|
||||
(VECTOR_TYPE_P (op_ty)
|
||||
&& TYPE_MODE (op_ty) != VNx16BImode
|
||||
&& !useless_type_conversion_p (op_ty, type))
|
||||
? gimple_build (&stmts, VIEW_CONVERT_EXPR, type, op) : op;
|
||||
}
|
||||
|
||||
gimple *new_stmt = fp (*this, lhs_conv, args_conv);
|
||||
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
|
||||
if (convert_lhs_p)
|
||||
{
|
||||
tree t = build1 (VIEW_CONVERT_EXPR, old_ty, lhs_conv);
|
||||
gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, t);
|
||||
gsi_insert_after (gsi, g, GSI_SAME_STMT);
|
||||
}
|
||||
return new_stmt;
|
||||
}
|
||||
|
||||
/* Fold the call to constant VAL. */
|
||||
gimple *
|
||||
gimple_folder::fold_to_cstu (poly_uint64 val)
|
||||
|
|
|
@ -649,6 +649,8 @@ public:
|
|||
gcall *redirect_call (const function_instance &);
|
||||
gimple *redirect_pred_x ();
|
||||
gimple *fold_pfalse ();
|
||||
gimple *convert_and_fold (tree, gimple *(*) (gimple_folder &,
|
||||
tree, vec<tree> &));
|
||||
|
||||
gimple *fold_to_cstu (poly_uint64);
|
||||
gimple *fold_to_pfalse ();
|
||||
|
@ -894,6 +896,14 @@ tuple_type_field (tree type)
|
|||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Return the vector type associated with TYPE. */
|
||||
inline tree
|
||||
get_vector_type (sve_type type)
|
||||
{
|
||||
auto vector_type = type_suffixes[type.type].vector_type;
|
||||
return acle_vector_types[type.num_vectors - 1][vector_type];
|
||||
}
|
||||
|
||||
inline function_instance::
|
||||
function_instance (const char *base_name_in, const function_base *base_in,
|
||||
const function_shape *shape_in,
|
||||
|
|
|
@ -174,8 +174,7 @@ TEST_UNIFORM_Z (mul_3_u16_m_untied, svuint16_t,
|
|||
|
||||
/*
|
||||
** mul_m1_u16_m:
|
||||
** mov (z[0-9]+)\.b, #-1
|
||||
** mul z0\.h, p0/m, z0\.h, \1\.h
|
||||
** neg z0\.h, p0/m, z0\.h
|
||||
** ret
|
||||
*/
|
||||
TEST_UNIFORM_Z (mul_m1_u16_m, svuint16_t,
|
||||
|
@ -569,7 +568,7 @@ TEST_UNIFORM_Z (mul_255_u16_x, svuint16_t,
|
|||
|
||||
/*
|
||||
** mul_m1_u16_x:
|
||||
** mul z0\.h, z0\.h, #-1
|
||||
** neg z0\.h, p0/m, z0\.h
|
||||
** ret
|
||||
*/
|
||||
TEST_UNIFORM_Z (mul_m1_u16_x, svuint16_t,
|
||||
|
|
|
@ -174,8 +174,7 @@ TEST_UNIFORM_Z (mul_3_u32_m_untied, svuint32_t,
|
|||
|
||||
/*
|
||||
** mul_m1_u32_m:
|
||||
** mov (z[0-9]+)\.b, #-1
|
||||
** mul z0\.s, p0/m, z0\.s, \1\.s
|
||||
** neg z0\.s, p0/m, z0\.s
|
||||
** ret
|
||||
*/
|
||||
TEST_UNIFORM_Z (mul_m1_u32_m, svuint32_t,
|
||||
|
@ -569,7 +568,7 @@ TEST_UNIFORM_Z (mul_255_u32_x, svuint32_t,
|
|||
|
||||
/*
|
||||
** mul_m1_u32_x:
|
||||
** mul z0\.s, z0\.s, #-1
|
||||
** neg z0\.s, p0/m, z0\.s
|
||||
** ret
|
||||
*/
|
||||
TEST_UNIFORM_Z (mul_m1_u32_x, svuint32_t,
|
||||
|
|
|
@ -183,14 +183,25 @@ TEST_UNIFORM_Z (mul_3_u64_m_untied, svuint64_t,
|
|||
|
||||
/*
|
||||
** mul_m1_u64_m:
|
||||
** mov (z[0-9]+)\.b, #-1
|
||||
** mul z0\.d, p0/m, z0\.d, \1\.d
|
||||
** neg z0\.d, p0/m, z0\.d
|
||||
** ret
|
||||
*/
|
||||
TEST_UNIFORM_Z (mul_m1_u64_m, svuint64_t,
|
||||
z0 = svmul_n_u64_m (p0, z0, -1),
|
||||
z0 = svmul_m (p0, z0, -1))
|
||||
|
||||
/*
|
||||
** mul_m1r_u64_m:
|
||||
** mov (z[0-9]+)\.b, #-1
|
||||
** mov (z[0-9]+\.d), z0\.d
|
||||
** movprfx z0, \1
|
||||
** neg z0\.d, p0/m, \2
|
||||
** ret
|
||||
*/
|
||||
TEST_UNIFORM_Z (mul_m1r_u64_m, svuint64_t,
|
||||
z0 = svmul_u64_m (p0, svdup_u64 (-1), z0),
|
||||
z0 = svmul_m (p0, svdup_u64 (-1), z0))
|
||||
|
||||
/*
|
||||
** mul_u64_z_tied1:
|
||||
** movprfx z0\.d, p0/z, z0\.d
|
||||
|
@ -597,13 +608,22 @@ TEST_UNIFORM_Z (mul_255_u64_x, svuint64_t,
|
|||
|
||||
/*
|
||||
** mul_m1_u64_x:
|
||||
** mul z0\.d, z0\.d, #-1
|
||||
** neg z0\.d, p0/m, z0\.d
|
||||
** ret
|
||||
*/
|
||||
TEST_UNIFORM_Z (mul_m1_u64_x, svuint64_t,
|
||||
z0 = svmul_n_u64_x (p0, z0, -1),
|
||||
z0 = svmul_x (p0, z0, -1))
|
||||
|
||||
/*
|
||||
** mul_m1r_u64_x:
|
||||
** neg z0\.d, p0/m, z0\.d
|
||||
** ret
|
||||
*/
|
||||
TEST_UNIFORM_Z (mul_m1r_u64_x, svuint64_t,
|
||||
z0 = svmul_u64_x (p0, svdup_u64 (-1), z0),
|
||||
z0 = svmul_x (p0, svdup_u64 (-1), z0))
|
||||
|
||||
/*
|
||||
** mul_m127_u64_x:
|
||||
** mul z0\.d, z0\.d, #-127
|
||||
|
|
|
@ -174,8 +174,7 @@ TEST_UNIFORM_Z (mul_3_u8_m_untied, svuint8_t,
|
|||
|
||||
/*
|
||||
** mul_m1_u8_m:
|
||||
** mov (z[0-9]+)\.b, #-1
|
||||
** mul z0\.b, p0/m, z0\.b, \1\.b
|
||||
** neg z0\.b, p0/m, z0\.b
|
||||
** ret
|
||||
*/
|
||||
TEST_UNIFORM_Z (mul_m1_u8_m, svuint8_t,
|
||||
|
@ -559,7 +558,7 @@ TEST_UNIFORM_Z (mul_128_u8_x, svuint8_t,
|
|||
|
||||
/*
|
||||
** mul_255_u8_x:
|
||||
** mul z0\.b, z0\.b, #-1
|
||||
** neg z0\.b, p0/m, z0\.b
|
||||
** ret
|
||||
*/
|
||||
TEST_UNIFORM_Z (mul_255_u8_x, svuint8_t,
|
||||
|
@ -568,7 +567,7 @@ TEST_UNIFORM_Z (mul_255_u8_x, svuint8_t,
|
|||
|
||||
/*
|
||||
** mul_m1_u8_x:
|
||||
** mul z0\.b, z0\.b, #-1
|
||||
** neg z0\.b, p0/m, z0\.b
|
||||
** ret
|
||||
*/
|
||||
TEST_UNIFORM_Z (mul_m1_u8_x, svuint8_t,
|
||||
|
|
Loading…
Add table
Reference in a new issue