RISC-V: Add vwadd<u>/vwsub<u>/vwmul<u>/vwmulsu.vv lowering optimizaiton for RVV auto-vectorization
Base on V1 patch, adding comment: ;; Use define_insn_and_split to define vsext.vf2/vzext.vf2 will help combine PASS ;; to combine instructions as below: ;; vsext.vf2 + vsext.vf2 + vadd.vv ==> vwadd.vv gcc/ChangeLog: * config/riscv/autovec.md (<optab><v_double_trunc><mode>2): Change expand into define_insn_and_split. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/rvv.exp: * gcc.target/riscv/rvv/autovec/widen/widen-1.c: New test. * gcc.target/riscv/rvv/autovec/widen/widen-2.c: New test. * gcc.target/riscv/rvv/autovec/widen/widen-3.c: New test. * gcc.target/riscv/rvv/autovec/widen/widen-4.c: New test. * gcc.target/riscv/rvv/autovec/widen/widen_run-1.c: New test. * gcc.target/riscv/rvv/autovec/widen/widen_run-2.c: New test. * gcc.target/riscv/rvv/autovec/widen/widen_run-3.c: New test. * gcc.target/riscv/rvv/autovec/widen/widen_run-4.c: New test.
This commit is contained in:
parent
644d168385
commit
e1240bda3e
10 changed files with 262 additions and 4 deletions
|
@ -382,16 +382,24 @@
|
|||
;; - vsext.vf[2|4|8]
|
||||
;; -------------------------------------------------------------------------
|
||||
|
||||
(define_expand "<optab><v_double_trunc><mode>2"
|
||||
[(set (match_operand:VWEXTI 0 "register_operand")
|
||||
;; Use define_insn_and_split to define vsext.vf2/vzext.vf2 will help
|
||||
;; to combine instructions as below:
|
||||
;; vsext.vf2 + vsext.vf2 + vadd.vv ==> vwadd.vv
|
||||
(define_insn_and_split "<optab><v_double_trunc><mode>2"
|
||||
[(set (match_operand:VWEXTI 0 "register_operand" "=&vr")
|
||||
(any_extend:VWEXTI
|
||||
(match_operand:<V_DOUBLE_TRUNC> 1 "register_operand")))]
|
||||
(match_operand:<V_DOUBLE_TRUNC> 1 "register_operand" "vr")))]
|
||||
"TARGET_VECTOR"
|
||||
"#"
|
||||
"&& can_create_pseudo_p ()"
|
||||
[(const_int 0)]
|
||||
{
|
||||
insn_code icode = code_for_pred_vf2 (<CODE>, <MODE>mode);
|
||||
riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, operands);
|
||||
DONE;
|
||||
})
|
||||
}
|
||||
[(set_attr "type" "vext")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_expand "<optab><v_quad_trunc><mode>2"
|
||||
[(set (match_operand:VQEXTI 0 "register_operand")
|
||||
|
|
27
gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-1.c
Normal file
27
gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-1.c
Normal file
|
@ -0,0 +1,27 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
|
||||
|
||||
#include <stdint-gcc.h>
|
||||
|
||||
#define TEST_TYPE(TYPE1, TYPE2) \
|
||||
__attribute__ ((noipa)) void vwadd_##TYPE1_##TYPE2 (TYPE1 *__restrict dst, \
|
||||
TYPE2 *__restrict a, \
|
||||
TYPE2 *__restrict b, \
|
||||
int n) \
|
||||
{ \
|
||||
for (int i = 0; i < n; i++) \
|
||||
dst[i] = (TYPE1) a[i] + (TYPE1) b[i]; \
|
||||
}
|
||||
|
||||
#define TEST_ALL() \
|
||||
TEST_TYPE (int16_t, int8_t) \
|
||||
TEST_TYPE (uint16_t, uint8_t) \
|
||||
TEST_TYPE (int32_t, int16_t) \
|
||||
TEST_TYPE (uint32_t, uint16_t) \
|
||||
TEST_TYPE (int64_t, int32_t) \
|
||||
TEST_TYPE (uint64_t, uint32_t)
|
||||
|
||||
TEST_ALL ()
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tvwadd\.vv} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvwaddu\.vv} 3 } } */
|
27
gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-2.c
Normal file
27
gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-2.c
Normal file
|
@ -0,0 +1,27 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
|
||||
|
||||
#include <stdint-gcc.h>
|
||||
|
||||
#define TEST_TYPE(TYPE1, TYPE2) \
|
||||
__attribute__ ((noipa)) void vwsub_##TYPE1_##TYPE2 (TYPE1 *__restrict dst, \
|
||||
TYPE2 *__restrict a, \
|
||||
TYPE2 *__restrict b, \
|
||||
int n) \
|
||||
{ \
|
||||
for (int i = 0; i < n; i++) \
|
||||
dst[i] = (TYPE1) a[i] - (TYPE1) b[i]; \
|
||||
}
|
||||
|
||||
#define TEST_ALL() \
|
||||
TEST_TYPE (int16_t, int8_t) \
|
||||
TEST_TYPE (uint16_t, uint8_t) \
|
||||
TEST_TYPE (int32_t, int16_t) \
|
||||
TEST_TYPE (uint32_t, uint16_t) \
|
||||
TEST_TYPE (int64_t, int32_t) \
|
||||
TEST_TYPE (uint64_t, uint32_t)
|
||||
|
||||
TEST_ALL ()
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tvwsub\.vv} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvwsubu\.vv} 3 } } */
|
27
gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-3.c
Normal file
27
gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-3.c
Normal file
|
@ -0,0 +1,27 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
|
||||
|
||||
#include <stdint-gcc.h>
|
||||
|
||||
#define TEST_TYPE(TYPE1, TYPE2) \
|
||||
__attribute__ ((noipa)) void vwmul_##TYPE1_##TYPE2 (TYPE1 *__restrict dst, \
|
||||
TYPE2 *__restrict a, \
|
||||
TYPE2 *__restrict b, \
|
||||
int n) \
|
||||
{ \
|
||||
for (int i = 0; i < n; i++) \
|
||||
dst[i] = (TYPE1) a[i] * (TYPE1) b[i]; \
|
||||
}
|
||||
|
||||
#define TEST_ALL() \
|
||||
TEST_TYPE (int16_t, int8_t) \
|
||||
TEST_TYPE (uint16_t, uint8_t) \
|
||||
TEST_TYPE (int32_t, int16_t) \
|
||||
TEST_TYPE (uint32_t, uint16_t) \
|
||||
TEST_TYPE (int64_t, int32_t) \
|
||||
TEST_TYPE (uint64_t, uint32_t)
|
||||
|
||||
TEST_ALL ()
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tvwmul\.vv} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\tvwmulu\.vv} 3 } } */
|
23
gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-4.c
Normal file
23
gcc/testsuite/gcc.target/riscv/rvv/autovec/widen/widen-4.c
Normal file
|
@ -0,0 +1,23 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
|
||||
|
||||
#include <stdint-gcc.h>
|
||||
|
||||
#define TEST_TYPE(TYPE1, TYPE2, TYPE3) \
|
||||
__attribute__ ((noipa)) void vwmul_##TYPE1_##TYPE2 (TYPE1 *__restrict dst, \
|
||||
TYPE2 *__restrict a, \
|
||||
TYPE3 *__restrict b, \
|
||||
int n) \
|
||||
{ \
|
||||
for (int i = 0; i < n; i++) \
|
||||
dst[i] = (TYPE1) a[i] * (TYPE1) b[i]; \
|
||||
}
|
||||
|
||||
#define TEST_ALL() \
|
||||
TEST_TYPE (int16_t, int8_t, uint8_t) \
|
||||
TEST_TYPE (int32_t, int16_t, uint16_t) \
|
||||
TEST_TYPE (int64_t, int32_t, uint32_t)
|
||||
|
||||
TEST_ALL ()
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tvwmulsu\.vv} 3 } } */
|
|
@ -0,0 +1,34 @@
|
|||
/* { dg-do run { target { riscv_vector } } } */
|
||||
/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
|
||||
|
||||
#include <assert.h>
|
||||
#include "widen-1.c"
|
||||
|
||||
#define SZ 512
|
||||
|
||||
#define RUN(TYPE1, TYPE2, LIMIT) \
|
||||
TYPE2 a##TYPE2[SZ]; \
|
||||
TYPE2 b##TYPE2[SZ]; \
|
||||
TYPE1 dst##TYPE1[SZ]; \
|
||||
for (int i = 0; i < SZ; i++) \
|
||||
{ \
|
||||
a##TYPE2[i] = LIMIT + i % 8723; \
|
||||
b##TYPE2[i] = LIMIT + i & 1964; \
|
||||
} \
|
||||
vwadd_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE2, SZ); \
|
||||
for (int i = 0; i < SZ; i++) \
|
||||
assert (dst##TYPE1[i] == ((TYPE1) a##TYPE2[i] + (TYPE1) b##TYPE2[i]));
|
||||
|
||||
#define RUN_ALL() \
|
||||
RUN (int16_t, int8_t, -128) \
|
||||
RUN (uint16_t, uint8_t, 255) \
|
||||
RUN (int32_t, int16_t, -32768) \
|
||||
RUN (uint32_t, uint16_t, 65535) \
|
||||
RUN (int64_t, int32_t, -2147483648) \
|
||||
RUN (uint64_t, uint32_t, 4294967295)
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
RUN_ALL ()
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
/* { dg-do run { target { riscv_vector } } } */
|
||||
/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
|
||||
|
||||
#include <assert.h>
|
||||
#include "widen-2.c"
|
||||
|
||||
#define SZ 512
|
||||
|
||||
#define RUN(TYPE1, TYPE2, LIMIT) \
|
||||
TYPE2 a##TYPE2[SZ]; \
|
||||
TYPE2 b##TYPE2[SZ]; \
|
||||
TYPE1 dst##TYPE1[SZ]; \
|
||||
for (int i = 0; i < SZ; i++) \
|
||||
{ \
|
||||
a##TYPE2[i] = LIMIT + i % 8723; \
|
||||
b##TYPE2[i] = LIMIT + i & 1964; \
|
||||
} \
|
||||
vwsub_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE2, SZ); \
|
||||
for (int i = 0; i < SZ; i++) \
|
||||
assert (dst##TYPE1[i] == ((TYPE1) a##TYPE2[i] - (TYPE1) b##TYPE2[i]));
|
||||
|
||||
#define RUN_ALL() \
|
||||
RUN (int16_t, int8_t, -128) \
|
||||
RUN (uint16_t, uint8_t, 255) \
|
||||
RUN (int32_t, int16_t, -32768) \
|
||||
RUN (uint32_t, uint16_t, 65535) \
|
||||
RUN (int64_t, int32_t, -2147483648) \
|
||||
RUN (uint64_t, uint32_t, 4294967295)
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
RUN_ALL ()
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
/* { dg-do run { target { riscv_vector } } } */
|
||||
/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
|
||||
|
||||
#include <assert.h>
|
||||
#include "widen-3.c"
|
||||
|
||||
#define SZ 512
|
||||
|
||||
#define RUN(TYPE1, TYPE2, LIMIT) \
|
||||
TYPE2 a##TYPE2[SZ]; \
|
||||
TYPE2 b##TYPE2[SZ]; \
|
||||
TYPE1 dst##TYPE1[SZ]; \
|
||||
for (int i = 0; i < SZ; i++) \
|
||||
{ \
|
||||
a##TYPE2[i] = LIMIT + i % 8723; \
|
||||
b##TYPE2[i] = LIMIT + i & 1964; \
|
||||
} \
|
||||
vwmul_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE2, SZ); \
|
||||
for (int i = 0; i < SZ; i++) \
|
||||
assert (dst##TYPE1[i] == ((TYPE1) a##TYPE2[i] * (TYPE1) b##TYPE2[i]));
|
||||
|
||||
#define RUN_ALL() \
|
||||
RUN (int16_t, int8_t, -128) \
|
||||
RUN (uint16_t, uint8_t, 255) \
|
||||
RUN (int32_t, int16_t, -32768) \
|
||||
RUN (uint32_t, uint16_t, 65535) \
|
||||
RUN (int64_t, int32_t, -2147483648) \
|
||||
RUN (uint64_t, uint32_t, 4294967295)
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
RUN_ALL ()
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
/* { dg-do run { target { riscv_vector } } } */
|
||||
/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
|
||||
|
||||
#include <assert.h>
|
||||
#include "widen-4.c"
|
||||
|
||||
#define SZ 512
|
||||
|
||||
#define RUN(TYPE1, TYPE2, TYPE3, LIMIT) \
|
||||
TYPE2 a##TYPE2[SZ]; \
|
||||
TYPE3 b##TYPE3[SZ]; \
|
||||
TYPE1 dst##TYPE1[SZ]; \
|
||||
for (int i = 0; i < SZ; i++) \
|
||||
{ \
|
||||
a##TYPE2[i] = LIMIT + i % 8723; \
|
||||
b##TYPE3[i] = LIMIT + i & 1964; \
|
||||
} \
|
||||
vwmul_##TYPE1_##TYPE2 (dst##TYPE1, a##TYPE2, b##TYPE3, SZ); \
|
||||
for (int i = 0; i < SZ; i++) \
|
||||
assert (dst##TYPE1[i] == ((TYPE1) a##TYPE2[i] * (TYPE1) b##TYPE3[i]));
|
||||
|
||||
#define RUN_ALL() \
|
||||
RUN (int16_t, int8_t, uint8_t, -128) \
|
||||
RUN (int32_t, int16_t, uint16_t, -32768) \
|
||||
RUN (int64_t, int32_t, uint32_t, -2147483648)
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
RUN_ALL ()
|
||||
}
|
|
@ -73,6 +73,19 @@ foreach op $AUTOVEC_TEST_OPTS {
|
|||
"" "$op"
|
||||
}
|
||||
|
||||
# widening operation only test on LMUL < 8
|
||||
set AUTOVEC_TEST_OPTS [list \
|
||||
{-ftree-vectorize -O3 --param riscv-autovec-lmul=m1} \
|
||||
{-ftree-vectorize -O3 --param riscv-autovec-lmul=m2} \
|
||||
{-ftree-vectorize -O3 --param riscv-autovec-lmul=m4} \
|
||||
{-ftree-vectorize -O2 --param riscv-autovec-lmul=m1} \
|
||||
{-ftree-vectorize -O2 --param riscv-autovec-lmul=m2} \
|
||||
{-ftree-vectorize -O2 --param riscv-autovec-lmul=m4} ]
|
||||
foreach op $AUTOVEC_TEST_OPTS {
|
||||
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/widen/*.\[cS\]]] \
|
||||
"" "$op"
|
||||
}
|
||||
|
||||
# VLS-VLMAX tests
|
||||
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/vls-vlmax/*.\[cS\]]] \
|
||||
"-std=c99 -O3 -ftree-vectorize --param riscv-autovec-preference=fixed-vlmax" $CFLAGS
|
||||
|
|
Loading…
Add table
Reference in a new issue