VECT: Add mask_len_fold_left_plus for in-order floating-point reduction
Hi, Richard and Richi. This patch adds mask_len_fold_left_plus pattern to support in-order floating-point reduction for target support len loop control. Consider this following case: double foo2 (double *__restrict a, double init, int *__restrict cond, int n) { for (int i = 0; i < n; i++) if (cond[i]) init += a[i]; return init; } ARM SVE: ... vec_mask_and_60 = loop_mask_54 & mask__23.33_57; vect__ifc__35.37_64 = .VCOND_MASK (vec_mask_and_60, vect__8.36_61, { 0.0, ... }); _36 = .MASK_FOLD_LEFT_PLUS (init_20, vect__ifc__35.37_64, loop_mask_54); ... For RVV, we want to see: ... _36 = .MASK_LEN_FOLD_LEFT_PLUS (init_20, vect__ifc__35.37_64, control_mask, loop_len, bias); ... gcc/ChangeLog: * doc/md.texi: Add mask_len_fold_left_plus. * internal-fn.cc (mask_len_fold_left_direct): Ditto. (expand_mask_len_fold_left_optab_fn): Ditto. (direct_mask_len_fold_left_optab_supported_p): Ditto. * internal-fn.def (MASK_LEN_FOLD_LEFT_PLUS): Ditto. * optabs.def (OPTAB_D): Ditto.
This commit is contained in:
parent
e029635cb7
commit
ba49332bab
4 changed files with 22 additions and 0 deletions
|
@ -5615,6 +5615,19 @@ no reassociation.
|
|||
Like @samp{fold_left_plus_@var{m}}, but takes an additional mask operand
|
||||
(operand 3) that specifies which elements of the source vector should be added.
|
||||
|
||||
@cindex @code{mask_len_fold_left_plus_@var{m}} instruction pattern
|
||||
@item @code{mask_len_fold_left_plus_@var{m}}
|
||||
Like @samp{fold_left_plus_@var{m}}, but takes an additional mask operand
|
||||
(operand 3), len operand (operand 4) and bias operand (operand 5) that
|
||||
performs following operations strictly in-order (no reassociation):
|
||||
|
||||
@smallexample
|
||||
operand0 = operand1;
|
||||
for (i = 0; i < LEN + BIAS; i++)
|
||||
if (operand3[i])
|
||||
operand0 += operand2[i];
|
||||
@end smallexample
|
||||
|
||||
@cindex @code{sdot_prod@var{m}} instruction pattern
|
||||
@item @samp{sdot_prod@var{m}}
|
||||
|
||||
|
|
|
@ -190,6 +190,7 @@ init_internal_fns ()
|
|||
#define fold_extract_direct { 2, 2, false }
|
||||
#define fold_left_direct { 1, 1, false }
|
||||
#define mask_fold_left_direct { 1, 1, false }
|
||||
#define mask_len_fold_left_direct { 1, 1, false }
|
||||
#define check_ptrs_direct { 0, 0, false }
|
||||
|
||||
const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
|
||||
|
@ -3890,6 +3891,9 @@ expand_convert_optab_fn (internal_fn fn, gcall *stmt, convert_optab optab,
|
|||
#define expand_mask_fold_left_optab_fn(FN, STMT, OPTAB) \
|
||||
expand_direct_optab_fn (FN, STMT, OPTAB, 3)
|
||||
|
||||
#define expand_mask_len_fold_left_optab_fn(FN, STMT, OPTAB) \
|
||||
expand_direct_optab_fn (FN, STMT, OPTAB, 5)
|
||||
|
||||
#define expand_check_ptrs_optab_fn(FN, STMT, OPTAB) \
|
||||
expand_direct_optab_fn (FN, STMT, OPTAB, 4)
|
||||
|
||||
|
@ -3997,6 +4001,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
|
|||
#define direct_fold_extract_optab_supported_p direct_optab_supported_p
|
||||
#define direct_fold_left_optab_supported_p direct_optab_supported_p
|
||||
#define direct_mask_fold_left_optab_supported_p direct_optab_supported_p
|
||||
#define direct_mask_len_fold_left_optab_supported_p direct_optab_supported_p
|
||||
#define direct_check_ptrs_optab_supported_p direct_optab_supported_p
|
||||
#define direct_vec_set_optab_supported_p direct_optab_supported_p
|
||||
#define direct_vec_extract_optab_supported_p direct_optab_supported_p
|
||||
|
|
|
@ -319,6 +319,9 @@ DEF_INTERNAL_OPTAB_FN (FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW,
|
|||
DEF_INTERNAL_OPTAB_FN (MASK_FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW,
|
||||
mask_fold_left_plus, mask_fold_left)
|
||||
|
||||
DEF_INTERNAL_OPTAB_FN (MASK_LEN_FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW,
|
||||
mask_len_fold_left_plus, mask_len_fold_left)
|
||||
|
||||
/* Unary math functions. */
|
||||
DEF_INTERNAL_FLT_FN (ACOS, ECF_CONST, acos, unary)
|
||||
DEF_INTERNAL_FLT_FN (ACOSH, ECF_CONST, acosh, unary)
|
||||
|
|
|
@ -385,6 +385,7 @@ OPTAB_D (reduc_ior_scal_optab, "reduc_ior_scal_$a")
|
|||
OPTAB_D (reduc_xor_scal_optab, "reduc_xor_scal_$a")
|
||||
OPTAB_D (fold_left_plus_optab, "fold_left_plus_$a")
|
||||
OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a")
|
||||
OPTAB_D (mask_len_fold_left_plus_optab, "mask_len_fold_left_plus_$a")
|
||||
|
||||
OPTAB_D (extract_last_optab, "extract_last_$a")
|
||||
OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a")
|
||||
|
|
Loading…
Add table
Reference in a new issue