RISC-V: Enable select_vl for RVV auto-vectorization

Consider this following example:
void vec_add(int32_t *restrict c, int32_t *restrict a, int32_t *restrict b,
             int N) {
  for (long i = 0; i < N; i++) {
    c[i] = a[i] + b[i];
  }
}

After this patch:
vec_add:
        ble     a3,zero,.L5
.L3:
        vsetvli a5,a3,e32,m1,ta,ma
        vle32.v v2,0(a1)
        vle32.v v1,0(a2)
        vsetvli a6,zero,e32,m1,ta,ma ===> redundant vsetvl.
        slli    a4,a5,2
        vadd.vv v1,v1,v2
        sub     a3,a3,a5
        vsetvli zero,a5,e32,m1,ta,ma ===> redundant vsetvl.
        vse32.v v1,0(a0)
        add     a1,a1,a4
        add     a2,a2,a4
        add     a0,a0,a4
        bne     a3,zero,.L3
.L5:
        ret

We can get close-to-optimal codegen but with some redundant vsetvls.
This is not the big issue which will be easily addressed in RISC-V backend.

I am going to add a standalone PASS "AVL propagation" (avlprop) to addresse
such issue.

gcc/ChangeLog:

	* config/riscv/autovec.md (select_vl<mode>): New pattern.
	* config/riscv/riscv-protos.h (expand_select_vl): New function.
	* config/riscv/riscv-v.cc (expand_select_vl): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/ternop/ternop-2.c: Adapt test.
	* gcc.target/riscv/rvv/autovec/ternop/ternop-5.c: Ditto.
	* gcc.target/riscv/rvv/autovec/partial/select_vl-1.c: New test.
This commit is contained in:
Juzhe-Zhong 2023-06-10 08:37:37 +08:00 committed by Pan Li
parent a13c444094
commit 55dcf277b5
6 changed files with 55 additions and 2 deletions

View file

@ -626,3 +626,17 @@
}
[(set_attr "type" "vimuladd")
(set_attr "mode" "<MODE>")])
;; =========================================================================
;; == SELECT_VL
;; =========================================================================
(define_expand "select_vl<mode>"
[(match_operand:P 0 "register_operand")
(match_operand:P 1 "vector_length_operand")
(match_operand:P 2 "")]
"TARGET_VECTOR"
{
riscv_vector::expand_select_vl (operands);
DONE;
})

View file

@ -246,6 +246,7 @@ void expand_vec_series (rtx, rtx, rtx);
void expand_vec_init (rtx, rtx);
void expand_vcond (rtx *);
void expand_vec_perm (rtx, rtx, rtx, rtx);
void expand_select_vl (rtx *);
/* Rounding mode bitfield for fixed point VXRM. */
enum vxrm_field_enum
{

View file

@ -2447,4 +2447,16 @@ expand_vec_perm_const (machine_mode vmode, machine_mode op_mode, rtx target,
return ret;
}
/* Generate no side effects vsetvl to get the vector length. */
void
expand_select_vl (rtx *ops)
{
poly_int64 nunits = rtx_to_poly_int64 (ops[2]);
/* We arbitrary picked QImode as inner scalar mode to get vector mode.
since vsetvl only demand ratio. We let VSETVL PASS to optimize it. */
scalar_int_mode mode = QImode;
machine_mode rvv_mode = get_vector_mode (mode, nunits).require ();
emit_insn (gen_no_side_effects_vsetvl_rtx (rvv_mode, ops[0], ops[1]));
}
} // namespace riscv_vector

View file

@ -0,0 +1,26 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fno-vect-cost-model -fno-tree-loop-distribute-patterns -fdump-tree-optimized-details" } */
#include <stdint-gcc.h>
#define TEST_TYPE(TYPE) \
__attribute__ ((noipa)) void select_vl_##TYPE (TYPE *__restrict dst, \
TYPE *__restrict a, int n) \
{ \
for (int i = 0; i < n; i++) \
dst[i] = a[i]; \
}
#define TEST_ALL() \
TEST_TYPE (int8_t) \
TEST_TYPE (uint8_t) \
TEST_TYPE (int16_t) \
TEST_TYPE (uint16_t) \
TEST_TYPE (int32_t) \
TEST_TYPE (uint32_t) \
TEST_TYPE (int64_t) \
TEST_TYPE (uint64_t) \
TEST_TYPE (float) \
TEST_TYPE (double)
TEST_ALL ()

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-schedule-insns" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-schedule-insns" } */
#include <stdint-gcc.h>