RISC-V: Enable COND_LEN_FMA auto-vectorization
Add comments as Robin's suggestion in scatter_store_run-7.c Enable COND_LEN_FMA auto-vectorization for floating-point FMA auto-vectorization **NO** ffast-math. Since the middle-end support has been approved and I will merge it after I finished bootstrap && regression on X86. https://gcc.gnu.org/pipermail/gcc-patches/2023-July/624395.html Now, it's time to send this patch. Consider this following case: __attribute__ ((noipa)) void ternop_##TYPE (TYPE *__restrict dst, \ TYPE *__restrict a, \ TYPE *__restrict b, int n) \ { \ for (int i = 0; i < n; i++) \ dst[i] += a[i] * b[i]; \ } TEST_ALL () Before this patch: ternop_double: ble a3,zero,.L5 mv a6,a0 .L3: vsetvli a5,a3,e64,m1,tu,ma slli a4,a5,3 vle64.v v1,0(a0) vle64.v v2,0(a1) vle64.v v3,0(a2) sub a3,a3,a5 vfmul.vv v2,v2,v3 vfadd.vv v1,v1,v2 vse64.v v1,0(a6) add a0,a0,a4 add a1,a1,a4 add a2,a2,a4 add a6,a6,a4 bne a3,zero,.L3 .L5: ret After this patch: ternop_double: ble a3,zero,.L5 mv a6,a0 .L3: vsetvli a5,a3,e64,m1,tu,ma slli a4,a5,3 vle64.v v1,0(a0) vle64.v v2,0(a1) vle64.v v3,0(a2) sub a3,a3,a5 vfmacc.vv v1,v3,v2 vse64.v v1,0(a6) add a0,a0,a4 add a1,a1,a4 add a2,a2,a4 add a6,a6,a4 bne a3,zero,.L3 .L5: ret Notice: This patch only supports COND_LEN_FMA, **NO** COND_LEN_FNMA, ... etc since I didn't support them in the middle-end yet. Will support them in the following patches soon. gcc/ChangeLog: * config/riscv/autovec.md (cond_len_fma<mode>): New pattern. * config/riscv/riscv-protos.h (enum insn_type): New enum. (expand_cond_len_ternop): New function. * config/riscv/riscv-v.cc (emit_nonvlmax_fp_ternary_tu_insn): Ditto. (expand_cond_len_ternop): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/gather-scatter/scatter_store_run-7.c: Adapt testcase for link fail. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-1.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-2.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-3.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-1.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-2.c: New test. * gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-3.c: New test.
This commit is contained in:
parent
53d12ecd62
commit
0d2673e995
10 changed files with 118 additions and 1 deletions
|
@ -1531,3 +1531,26 @@
|
|||
riscv_vector::expand_cond_len_binop (<CODE>, operands);
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; -------------------------------------------------------------------------
|
||||
;; ---- [FP] Conditional ternary operations
|
||||
;; -------------------------------------------------------------------------
|
||||
;; Includes:
|
||||
;; - vfmacc/...
|
||||
;; -------------------------------------------------------------------------
|
||||
|
||||
(define_expand "cond_len_fma<mode>"
|
||||
[(match_operand:VF 0 "register_operand")
|
||||
(match_operand:<VM> 1 "vector_mask_operand")
|
||||
(match_operand:VF 2 "register_operand")
|
||||
(match_operand:VF 3 "register_operand")
|
||||
(match_operand:VF 4 "register_operand")
|
||||
(match_operand:VF 5 "register_operand")
|
||||
(match_operand 6 "autovec_length_operand")
|
||||
(match_operand 7 "const_0_operand")]
|
||||
"TARGET_VECTOR"
|
||||
{
|
||||
insn_code icode = code_for_pred_mul (PLUS, <MODE>mode);
|
||||
riscv_vector::expand_cond_len_ternop (icode, operands);
|
||||
DONE;
|
||||
})
|
||||
|
|
|
@ -191,6 +191,7 @@ enum insn_type
|
|||
RVV_UNOP_MU = RVV_UNOP + 2, /* Likewise. */
|
||||
RVV_UNOP_M = RVV_UNOP + 2, /* Likewise. */
|
||||
RVV_TERNOP = 5,
|
||||
RVV_TERNOP_TU = RVV_TERNOP + 1,
|
||||
RVV_WIDEN_TERNOP = 4,
|
||||
RVV_SCALAR_MOV_OP = 4, /* +1 for VUNDEF according to vector.md. */
|
||||
RVV_SLIDE_OP = 4, /* Dest, VUNDEF, source and offset. */
|
||||
|
@ -306,6 +307,7 @@ void expand_vec_perm (rtx, rtx, rtx, rtx);
|
|||
void expand_select_vl (rtx *);
|
||||
void expand_load_store (rtx *, bool);
|
||||
void expand_gather_scatter (rtx *, bool);
|
||||
void expand_cond_len_ternop (unsigned, rtx *);
|
||||
|
||||
/* Rounding mode bitfield for fixed point VXRM. */
|
||||
enum fixed_point_rounding_mode
|
||||
|
|
|
@ -748,6 +748,28 @@ emit_vlmax_fp_ternary_insn (unsigned icode, int op_num, rtx *ops, rtx vl)
|
|||
e.emit_insn ((enum insn_code) icode, ops);
|
||||
}
|
||||
|
||||
/* This function emits a {NONVLMAX, TAIL_UNDISTURBED, MASK_ANY} vsetvli followed
|
||||
* by the ternary operation which always has a real merge operand. */
|
||||
static void
|
||||
emit_nonvlmax_fp_ternary_tu_insn (unsigned icode, int op_num, rtx *ops, rtx vl)
|
||||
{
|
||||
machine_mode dest_mode = GET_MODE (ops[0]);
|
||||
machine_mode mask_mode = get_mask_mode (dest_mode).require ();
|
||||
insn_expander<RVV_INSN_OPERANDS_MAX> e (/*OP_NUM*/ op_num,
|
||||
/*HAS_DEST_P*/ true,
|
||||
/*FULLY_UNMASKED_P*/ false,
|
||||
/*USE_REAL_MERGE_P*/ true,
|
||||
/*HAS_AVL_P*/ true,
|
||||
/*VLMAX_P*/ false,
|
||||
/*DEST_MODE*/ dest_mode,
|
||||
/*MASK_MODE*/ mask_mode);
|
||||
e.set_policy (TAIL_UNDISTURBED);
|
||||
e.set_policy (MASK_ANY);
|
||||
e.set_rounding_mode (FRM_DYN);
|
||||
e.set_vl (vl);
|
||||
e.emit_insn ((enum insn_code) icode, ops);
|
||||
}
|
||||
|
||||
/* This function emits a {NONVLMAX, TAIL_ANY, MASK_ANY} vsetvli followed by the
|
||||
* actual operation. */
|
||||
void
|
||||
|
@ -3267,4 +3289,31 @@ expand_gather_scatter (rtx *ops, bool is_load)
|
|||
}
|
||||
}
|
||||
|
||||
/* Expand COND_LEN_*. */
|
||||
void
|
||||
expand_cond_len_ternop (unsigned icode, rtx *ops)
|
||||
{
|
||||
rtx dest = ops[0];
|
||||
rtx mask = ops[1];
|
||||
rtx len = ops[6];
|
||||
machine_mode mode = GET_MODE (dest);
|
||||
machine_mode mask_mode = GET_MODE (mask);
|
||||
|
||||
poly_uint64 value;
|
||||
bool is_dummy_mask = rtx_equal_p (mask, CONSTM1_RTX (mask_mode));
|
||||
|
||||
if (is_dummy_mask)
|
||||
{
|
||||
/* Use TU, MASK ANY policy. */
|
||||
if (FLOAT_MODE_P (mode))
|
||||
emit_nonvlmax_fp_ternary_tu_insn (icode, RVV_TERNOP_TU, ops, len);
|
||||
else
|
||||
/* FIXME: Enable this case when we support it in the middle-end. */
|
||||
gcc_unreachable ();
|
||||
}
|
||||
else
|
||||
/* FIXME: Enable this case when we support it in the middle-end. */
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
} // namespace riscv_vector
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
/* { dg-do run { target { riscv_vector } } } */
|
||||
|
||||
/* For some reason we exceed
|
||||
the default code model's +-2 GiB limits. We should investigate why and
|
||||
add a proper description here. For now just make sure the test case
|
||||
compiles properly. */
|
||||
/* { dg-additional-options "-mcmodel=medany" } */
|
||||
|
||||
#include "scatter_store-7.c"
|
||||
#include <assert.h>
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
|
||||
|
||||
#include "ternop-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler-not {\tvmv} } } */
|
||||
/* { dg-final { scan-tree-dump-times "COND_LEN_FMA" 3 "optimized" } } */
|
|
@ -0,0 +1,11 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
|
||||
|
||||
#include "ternop-2.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tvmacc\.vv} 8 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvfmacc\.vv} 9 } } */
|
||||
/* TODO: we don't have undefine IR for COND_LEN_* operations,
|
||||
which will produce redundant move instructions here.
|
||||
Will add assembler-not check of 'vmv' instructions in the future. */
|
||||
/* { dg-final { scan-tree-dump-times "COND_LEN_FMA" 9 "optimized" } } */
|
|
@ -0,0 +1,9 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details -fno-schedule-insns" } */
|
||||
|
||||
#include "ternop-3.c"
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tvmacc\.vv} 8 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvfmacc\.vv} 6 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvmv} 11 } } */
|
||||
/* { dg-final { scan-tree-dump-times "COND_LEN_FMA" 6 "optimized" } } */
|
|
@ -0,0 +1,4 @@
|
|||
/* { dg-do run { target { riscv_vector } } } */
|
||||
/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
|
||||
|
||||
#include "ternop_run-1.c"
|
|
@ -0,0 +1,4 @@
|
|||
/* { dg-do run { target { riscv_vector } } } */
|
||||
/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
|
||||
|
||||
#include "ternop_run-2.c"
|
|
@ -0,0 +1,4 @@
|
|||
/* { dg-do run { target { riscv_vector } } } */
|
||||
/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
|
||||
|
||||
#include "ternop_run-3.c"
|
Loading…
Add table
Reference in a new issue