RISC-V: Add vwadd<u>/vwsub<u>/vwmul<u>/vwmulsu.vv lowering optimizaiton for RVV auto-vectorization

Base on V1 patch, adding comment:
;; Use define_insn_and_split to define vsext.vf2/vzext.vf2 will help combine PASS
;; to combine instructions as below:
;;   vsext.vf2 + vsext.vf2 + vadd.vv ==> vwadd.vv

gcc/ChangeLog:

	* config/riscv/autovec.md (<optab><v_double_trunc><mode>2): Change
	expand into define_insn_and_split.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/rvv.exp:
	* gcc.target/riscv/rvv/autovec/widen/widen-1.c: New test.
	* gcc.target/riscv/rvv/autovec/widen/widen-2.c: New test.
	* gcc.target/riscv/rvv/autovec/widen/widen-3.c: New test.
	* gcc.target/riscv/rvv/autovec/widen/widen-4.c: New test.
	* gcc.target/riscv/rvv/autovec/widen/widen_run-1.c: New test.
	* gcc.target/riscv/rvv/autovec/widen/widen_run-2.c: New test.
	* gcc.target/riscv/rvv/autovec/widen/widen_run-3.c: New test.
	* gcc.target/riscv/rvv/autovec/widen/widen_run-4.c: New test.
This commit is contained in:
Juzhe-Zhong 2023-05-31 21:20:49 +08:00 committed by Pan Li
parent 644d168385
commit e1240bda3e
10 changed files with 262 additions and 4 deletions

View file

@ -382,16 +382,24 @@
;; - vsext.vf[2|4|8]
;; -------------------------------------------------------------------------
(define_expand "<optab><v_double_trunc><mode>2"
[(set (match_operand:VWEXTI 0 "register_operand")
;; Use define_insn_and_split to define vsext.vf2/vzext.vf2 will help
;; to combine instructions as below:
;; vsext.vf2 + vsext.vf2 + vadd.vv ==> vwadd.vv
(define_insn_and_split "<optab><v_double_trunc><mode>2"
[(set (match_operand:VWEXTI 0 "register_operand" "=&vr")
(any_extend:VWEXTI
(match_operand:<V_DOUBLE_TRUNC> 1 "register_operand")))]
(match_operand:<V_DOUBLE_TRUNC> 1 "register_operand" "vr")))]
"TARGET_VECTOR"
"#"
"&& can_create_pseudo_p ()"
[(const_int 0)]
{
insn_code icode = code_for_pred_vf2 (<CODE>, <MODE>mode);
riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, operands);
DONE;
})
}
[(set_attr "type" "vext")
(set_attr "mode" "<MODE>")])
(define_expand "<optab><v_quad_trunc><mode>2"
[(set (match_operand:VQEXTI 0 "register_operand")

View file

@ -0,0 +1,27 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
#include <stdint-gcc.h>
#define TEST_TYPE(TYPE1, TYPE2) \
__attribute__ ((noipa)) void vwadd_##TYPE1_##TYPE2 (TYPE1 *__restrict dst, \
TYPE2 *__restrict a, \
TYPE2 *__restrict b, \
int n) \
{ \
for (int i = 0; i < n; i++) \
dst[i] = (TYPE1) a[i] + (TYPE1) b[i]; \
}
#define TEST_ALL() \
TEST_TYPE (int16_t, int8_t) \
TEST_TYPE (uint16_t, uint8_t) \
TEST_TYPE (int32_t, int16_t) \
TEST_TYPE (uint32_t, uint16_t) \
TEST_TYPE (int64_t, int32_t) \
TEST_TYPE (uint64_t, uint32_t)
TEST_ALL ()
/* { dg-final { scan-assembler-times {\tvwadd\.vv} 3 } } */
/* { dg-final { scan-assembler-times {\tvwaddu\.vv} 3 } } */

View file

@ -0,0 +1,27 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
#include <stdint-gcc.h>
#define TEST_TYPE(TYPE1, TYPE2) \
__attribute__ ((noipa)) void vwsub_##TYPE1_##TYPE2 (TYPE1 *__restrict dst, \
TYPE2 *__restrict a, \
TYPE2 *__restrict b, \
int n) \
{ \
for (int i = 0; i < n; i++) \
dst[i] = (TYPE1) a[i] - (TYPE1) b[i]; \
}
#define TEST_ALL() \
TEST_TYPE (int16_t, int8_t) \
TEST_TYPE (uint16_t, uint8_t) \
TEST_TYPE (int32_t, int16_t) \
TEST_TYPE (uint32_t, uint16_t) \
TEST_TYPE (int64_t, int32_t) \
TEST_TYPE (uint64_t, uint32_t)
TEST_ALL ()
/* { dg-final { scan-assembler-times {\tvwsub\.vv} 3 } } */
/* { dg-final { scan-assembler-times {\tvwsubu\.vv} 3 } } */

View file

@ -0,0 +1,27 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
#include <stdint-gcc.h>
#define TEST_TYPE(TYPE1, TYPE2) \
__attribute__ ((noipa)) void vwmul_##TYPE1_##TYPE2 (TYPE1 *__restrict dst, \
TYPE2 *__restrict a, \
TYPE2 *__restrict b, \
int n) \
{ \
for (int i = 0; i < n; i++) \
dst[i] = (TYPE1) a[i] * (TYPE1) b[i]; \
}
#define TEST_ALL() \
TEST_TYPE (int16_t, int8_t) \
TEST_TYPE (uint16_t, uint8_t) \
TEST_TYPE (int32_t, int16_t) \
TEST_TYPE (uint32_t, uint16_t) \
TEST_TYPE (int64_t, int32_t) \
TEST_TYPE (uint64_t, uint32_t)
TEST_ALL ()
/* { dg-final { scan-assembler-times {\tvwmul\.vv} 3 } } */
/* { dg-final { scan-assembler-times {\tvwmulu\.vv} 3 } } */

View file

@ -0,0 +1,23 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
#include <stdint-gcc.h>
#define TEST_TYPE(TYPE1, TYPE2, TYPE3) \
__attribute__ ((noipa)) void vwmul_##TYPE1_##TYPE2 (TYPE1 *__restrict dst, \
TYPE2 *__restrict a, \
TYPE3 *__restrict b, \
int n) \
{ \
for (int i = 0; i < n; i++) \
dst[i] = (TYPE1) a[i] * (TYPE1) b[i]; \
}
#define TEST_ALL() \
TEST_TYPE (int16_t, int8_t, uint8_t) \
TEST_TYPE (int32_t, int16_t, uint16_t) \
TEST_TYPE (int64_t, int32_t, uint32_t)
TEST_ALL ()
/* { dg-final { scan-assembler-times {\tvwmulsu\.vv} 3 } } */

View file

@ -0,0 +1,34 @@
/* { dg-do run { target { riscv_vector } } } */
/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
#include <assert.h>
#include "widen-1.c"
#define SZ 512
#define RUN(TYPE1, TYPE2, LIMIT) \
TYPE2 a##TYPE2[SZ]; \
TYPE2 b##TYPE2[SZ]; \
TYPE1 dst##TYPE1[SZ]; \
for (int i = 0; i < SZ; i++) \
{ \
a##TYPE2[i] = LIMIT + i % 8723; \
b##TYPE2[i] = LIMIT + i & 1964; \
} \
vwadd_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE2, SZ); \
for (int i = 0; i < SZ; i++) \
assert (dst##TYPE1[i] == ((TYPE1) a##TYPE2[i] + (TYPE1) b##TYPE2[i]));
#define RUN_ALL() \
RUN (int16_t, int8_t, -128) \
RUN (uint16_t, uint8_t, 255) \
RUN (int32_t, int16_t, -32768) \
RUN (uint32_t, uint16_t, 65535) \
RUN (int64_t, int32_t, -2147483648) \
RUN (uint64_t, uint32_t, 4294967295)
int
main ()
{
RUN_ALL ()
}

View file

@ -0,0 +1,34 @@
/* { dg-do run { target { riscv_vector } } } */
/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
#include <assert.h>
#include "widen-2.c"
#define SZ 512
#define RUN(TYPE1, TYPE2, LIMIT) \
TYPE2 a##TYPE2[SZ]; \
TYPE2 b##TYPE2[SZ]; \
TYPE1 dst##TYPE1[SZ]; \
for (int i = 0; i < SZ; i++) \
{ \
a##TYPE2[i] = LIMIT + i % 8723; \
b##TYPE2[i] = LIMIT + i & 1964; \
} \
vwsub_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE2, SZ); \
for (int i = 0; i < SZ; i++) \
assert (dst##TYPE1[i] == ((TYPE1) a##TYPE2[i] - (TYPE1) b##TYPE2[i]));
#define RUN_ALL() \
RUN (int16_t, int8_t, -128) \
RUN (uint16_t, uint8_t, 255) \
RUN (int32_t, int16_t, -32768) \
RUN (uint32_t, uint16_t, 65535) \
RUN (int64_t, int32_t, -2147483648) \
RUN (uint64_t, uint32_t, 4294967295)
int
main ()
{
RUN_ALL ()
}

View file

@ -0,0 +1,34 @@
/* { dg-do run { target { riscv_vector } } } */
/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
#include <assert.h>
#include "widen-3.c"
#define SZ 512
#define RUN(TYPE1, TYPE2, LIMIT) \
TYPE2 a##TYPE2[SZ]; \
TYPE2 b##TYPE2[SZ]; \
TYPE1 dst##TYPE1[SZ]; \
for (int i = 0; i < SZ; i++) \
{ \
a##TYPE2[i] = LIMIT + i % 8723; \
b##TYPE2[i] = LIMIT + i & 1964; \
} \
vwmul_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE2, SZ); \
for (int i = 0; i < SZ; i++) \
assert (dst##TYPE1[i] == ((TYPE1) a##TYPE2[i] * (TYPE1) b##TYPE2[i]));
#define RUN_ALL() \
RUN (int16_t, int8_t, -128) \
RUN (uint16_t, uint8_t, 255) \
RUN (int32_t, int16_t, -32768) \
RUN (uint32_t, uint16_t, 65535) \
RUN (int64_t, int32_t, -2147483648) \
RUN (uint64_t, uint32_t, 4294967295)
int
main ()
{
RUN_ALL ()
}

View file

@ -0,0 +1,31 @@
/* { dg-do run { target { riscv_vector } } } */
/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
#include <assert.h>
#include "widen-4.c"
#define SZ 512
#define RUN(TYPE1, TYPE2, TYPE3, LIMIT) \
TYPE2 a##TYPE2[SZ]; \
TYPE3 b##TYPE3[SZ]; \
TYPE1 dst##TYPE1[SZ]; \
for (int i = 0; i < SZ; i++) \
{ \
a##TYPE2[i] = LIMIT + i % 8723; \
b##TYPE3[i] = LIMIT + i & 1964; \
} \
vwmul_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE3, SZ); \
for (int i = 0; i < SZ; i++) \
assert (dst##TYPE1[i] == ((TYPE1) a##TYPE2[i] * (TYPE1) b##TYPE3[i]));
#define RUN_ALL() \
RUN (int16_t, int8_t, uint8_t, -128) \
RUN (int32_t, int16_t, uint16_t, -32768) \
RUN (int64_t, int32_t, uint32_t, -2147483648)
int
main ()
{
RUN_ALL ()
}

View file

@ -73,6 +73,19 @@ foreach op $AUTOVEC_TEST_OPTS {
"" "$op"
}
# widening operation only test on LMUL < 8
set AUTOVEC_TEST_OPTS [list \
{-ftree-vectorize -O3 --param riscv-autovec-lmul=m1} \
{-ftree-vectorize -O3 --param riscv-autovec-lmul=m2} \
{-ftree-vectorize -O3 --param riscv-autovec-lmul=m4} \
{-ftree-vectorize -O2 --param riscv-autovec-lmul=m1} \
{-ftree-vectorize -O2 --param riscv-autovec-lmul=m2} \
{-ftree-vectorize -O2 --param riscv-autovec-lmul=m4} ]
foreach op $AUTOVEC_TEST_OPTS {
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/widen/*.\[cS\]]] \
"" "$op"
}
# VLS-VLMAX tests
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/vls-vlmax/*.\[cS\]]] \
"-std=c99 -O3 -ftree-vectorize --param riscv-autovec-preference=fixed-vlmax" $CFLAGS