RISC-V: Enable COND_LEN_FMA auto-vectorization

Add comments as Robin's suggestion in scatter_store_run-7.c

Enable COND_LEN_FMA auto-vectorization for floating-point FMA auto-vectorization **NO** ffast-math.

Since the middle-end support has been approved and I will merge it after I finished bootstrap && regression on X86.
https://gcc.gnu.org/pipermail/gcc-patches/2023-July/624395.html

Now, it's time to send this patch.

Consider this following case:

  __attribute__ ((noipa)) void ternop_##TYPE (TYPE *__restrict dst,            \
					      TYPE *__restrict a,              \
					      TYPE *__restrict b, int n)       \
  {                                                                            \
    for (int i = 0; i < n; i++)                                                \
      dst[i] += a[i] * b[i];                                                   \
  }

TEST_ALL ()

Before this patch:

ternop_double:
        ble     a3,zero,.L5
        mv      a6,a0
.L3:
        vsetvli a5,a3,e64,m1,tu,ma
        slli    a4,a5,3
        vle64.v v1,0(a0)
        vle64.v v2,0(a1)
        vle64.v v3,0(a2)
        sub     a3,a3,a5
        vfmul.vv        v2,v2,v3
        vfadd.vv        v1,v1,v2
        vse64.v v1,0(a6)
        add     a0,a0,a4
        add     a1,a1,a4
        add     a2,a2,a4
        add     a6,a6,a4
        bne     a3,zero,.L3
.L5:
        ret

After this patch:

ternop_double:
	ble	a3,zero,.L5
	mv	a6,a0
.L3:
	vsetvli	a5,a3,e64,m1,tu,ma
	slli	a4,a5,3
	vle64.v	v1,0(a0)
	vle64.v	v2,0(a1)
	vle64.v	v3,0(a2)
	sub	a3,a3,a5
	vfmacc.vv	v1,v3,v2
	vse64.v	v1,0(a6)
	add	a0,a0,a4
	add	a1,a1,a4
	add	a2,a2,a4
	add	a6,a6,a4
	bne	a3,zero,.L3
.L5:
	ret

Notice: This patch only supports COND_LEN_FMA, **NO** COND_LEN_FNMA, ... etc since I didn't support them
        in the middle-end yet.

        Will support them in the following patches soon.

gcc/ChangeLog:

	* config/riscv/autovec.md (cond_len_fma<mode>): New pattern.
	* config/riscv/riscv-protos.h (enum insn_type): New enum.
	(expand_cond_len_ternop): New function.
	* config/riscv/riscv-v.cc (emit_nonvlmax_fp_ternary_tu_insn): Ditto.
	(expand_cond_len_ternop): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/gather-scatter/scatter_store_run-7.c:
	Adapt testcase for link fail.
	* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-1.c: New test.
	* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-2.c: New test.
	* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-3.c: New test.
	* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-1.c: New test.
	* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-2.c: New test.
	* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm_run-3.c: New test.
This commit is contained in:
Juzhe-Zhong 2023-07-14 06:17:09 +08:00 committed by Pan Li
parent 53d12ecd62
commit 0d2673e995
10 changed files with 118 additions and 1 deletions

View file

@ -1531,3 +1531,26 @@
riscv_vector::expand_cond_len_binop (<CODE>, operands);
DONE;
})
;; -------------------------------------------------------------------------
;; ---- [FP] Conditional ternary operations
;; -------------------------------------------------------------------------
;; Includes:
;; - vfmacc/...
;; -------------------------------------------------------------------------
(define_expand "cond_len_fma<mode>"
[(match_operand:VF 0 "register_operand")
(match_operand:<VM> 1 "vector_mask_operand")
(match_operand:VF 2 "register_operand")
(match_operand:VF 3 "register_operand")
(match_operand:VF 4 "register_operand")
(match_operand:VF 5 "register_operand")
(match_operand 6 "autovec_length_operand")
(match_operand 7 "const_0_operand")]
"TARGET_VECTOR"
{
insn_code icode = code_for_pred_mul (PLUS, <MODE>mode);
riscv_vector::expand_cond_len_ternop (icode, operands);
DONE;
})

View file

@ -191,6 +191,7 @@ enum insn_type
RVV_UNOP_MU = RVV_UNOP + 2, /* Likewise. */
RVV_UNOP_M = RVV_UNOP + 2, /* Likewise. */
RVV_TERNOP = 5,
RVV_TERNOP_TU = RVV_TERNOP + 1,
RVV_WIDEN_TERNOP = 4,
RVV_SCALAR_MOV_OP = 4, /* +1 for VUNDEF according to vector.md. */
RVV_SLIDE_OP = 4, /* Dest, VUNDEF, source and offset. */
@ -306,6 +307,7 @@ void expand_vec_perm (rtx, rtx, rtx, rtx);
void expand_select_vl (rtx *);
void expand_load_store (rtx *, bool);
void expand_gather_scatter (rtx *, bool);
void expand_cond_len_ternop (unsigned, rtx *);
/* Rounding mode bitfield for fixed point VXRM. */
enum fixed_point_rounding_mode

View file

@ -748,6 +748,28 @@ emit_vlmax_fp_ternary_insn (unsigned icode, int op_num, rtx *ops, rtx vl)
e.emit_insn ((enum insn_code) icode, ops);
}
/* This function emits a {NONVLMAX, TAIL_UNDISTURBED, MASK_ANY} vsetvli followed
* by the ternary operation which always has a real merge operand. */
static void
emit_nonvlmax_fp_ternary_tu_insn (unsigned icode, int op_num, rtx *ops, rtx vl)
{
machine_mode dest_mode = GET_MODE (ops[0]);
machine_mode mask_mode = get_mask_mode (dest_mode).require ();
insn_expander<RVV_INSN_OPERANDS_MAX> e (/*OP_NUM*/ op_num,
/*HAS_DEST_P*/ true,
/*FULLY_UNMASKED_P*/ false,
/*USE_REAL_MERGE_P*/ true,
/*HAS_AVL_P*/ true,
/*VLMAX_P*/ false,
/*DEST_MODE*/ dest_mode,
/*MASK_MODE*/ mask_mode);
e.set_policy (TAIL_UNDISTURBED);
e.set_policy (MASK_ANY);
e.set_rounding_mode (FRM_DYN);
e.set_vl (vl);
e.emit_insn ((enum insn_code) icode, ops);
}
/* This function emits a {NONVLMAX, TAIL_ANY, MASK_ANY} vsetvli followed by the
* actual operation. */
void
@ -3267,4 +3289,31 @@ expand_gather_scatter (rtx *ops, bool is_load)
}
}
/* Expand COND_LEN_*. */
void
expand_cond_len_ternop (unsigned icode, rtx *ops)
{
rtx dest = ops[0];
rtx mask = ops[1];
rtx len = ops[6];
machine_mode mode = GET_MODE (dest);
machine_mode mask_mode = GET_MODE (mask);
poly_uint64 value;
bool is_dummy_mask = rtx_equal_p (mask, CONSTM1_RTX (mask_mode));
if (is_dummy_mask)
{
/* Use TU, MASK ANY policy. */
if (FLOAT_MODE_P (mode))
emit_nonvlmax_fp_ternary_tu_insn (icode, RVV_TERNOP_TU, ops, len);
else
/* FIXME: Enable this case when we support it in the middle-end. */
gcc_unreachable ();
}
else
/* FIXME: Enable this case when we support it in the middle-end. */
gcc_unreachable ();
}
} // namespace riscv_vector

View file

@ -1,5 +1,9 @@
/* { dg-do run { target { riscv_vector } } } */
/* For some reason we exceed
the default code model's +-2 GiB limits. We should investigate why and
add a proper description here. For now just make sure the test case
compiles properly. */
/* { dg-additional-options "-mcmodel=medany" } */
#include "scatter_store-7.c"
#include <assert.h>

View file

@ -0,0 +1,7 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
#include "ternop-1.c"
/* { dg-final { scan-assembler-not {\tvmv} } } */
/* { dg-final { scan-tree-dump-times "COND_LEN_FMA" 3 "optimized" } } */

View file

@ -0,0 +1,11 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
#include "ternop-2.c"
/* { dg-final { scan-assembler-times {\tvmacc\.vv} 8 } } */
/* { dg-final { scan-assembler-times {\tvfmacc\.vv} 9 } } */
/* TODO: we don't have undefine IR for COND_LEN_* operations,
which will produce redundant move instructions here.
Will add assembler-not check of 'vmv' instructions in the future. */
/* { dg-final { scan-tree-dump-times "COND_LEN_FMA" 9 "optimized" } } */

View file

@ -0,0 +1,9 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details -fno-schedule-insns" } */
#include "ternop-3.c"
/* { dg-final { scan-assembler-times {\tvmacc\.vv} 8 } } */
/* { dg-final { scan-assembler-times {\tvfmacc\.vv} 6 } } */
/* { dg-final { scan-assembler-times {\tvmv} 11 } } */
/* { dg-final { scan-tree-dump-times "COND_LEN_FMA" 6 "optimized" } } */

View file

@ -0,0 +1,4 @@
/* { dg-do run { target { riscv_vector } } } */
/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
#include "ternop_run-1.c"

View file

@ -0,0 +1,4 @@
/* { dg-do run { target { riscv_vector } } } */
/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
#include "ternop_run-2.c"

View file

@ -0,0 +1,4 @@
/* { dg-do run { target { riscv_vector } } } */
/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
#include "ternop_run-3.c"