RISC-V: Enable basic VLS auto-vectorization
Consider this following case: void foo (int8_t *in, int8_t *out, int8_t x) { for (int i = 0; i < 16; i++) in[i] = x; } Compile option: --param=riscv-autovec-preference=scalable -fno-builtin Before this patch: foo: li a5,16 csrr a4,vlenb vsetvli a3,zero,e8,m1,ta,ma vmv.v.x v1,a2 bleu a5,a4,.L2 mv a5,a4 .L2: vsetvli zero,a5,e8,m1,ta,ma vse8.v v1,0(a0) ret After this patch: foo: vsetivli zero,16,e8,mf8,ta,ma vmv.v.x v1,a2 vse8.v v1,0(a0) ret gcc/ChangeLog: * config/riscv/autovec-vls.md (@vec_duplicate<mode>): New pattern. * config/riscv/riscv-v.cc (autovectorize_vector_modes): Add VLS autovec support. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/v-1.c: Adapt test. * gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c: Ditto. * gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c: Ditto. * gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c: Ditto. * gcc.target/riscv/rvv/autovec/vls/dup-1.c: New test. * gcc.target/riscv/rvv/autovec/vls/dup-2.c: New test. * gcc.target/riscv/rvv/autovec/vls/dup-3.c: New test. * gcc.target/riscv/rvv/autovec/vls/dup-4.c: New test. * gcc.target/riscv/rvv/autovec/vls/dup-5.c: New test. * gcc.target/riscv/rvv/autovec/vls/dup-6.c: New test. * gcc.target/riscv/rvv/autovec/vls/dup-7.c: New test.
This commit is contained in:
parent
bf36656a14
commit
92a891e869
13 changed files with 1034 additions and 6 deletions
|
@ -139,3 +139,22 @@
|
|||
"vmv%m1r.v\t%0,%1"
|
||||
[(set_attr "type" "vmov")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
;; -----------------------------------------------------------------
|
||||
;; ---- Duplicate Operations
|
||||
;; -----------------------------------------------------------------
|
||||
|
||||
(define_insn_and_split "@vec_duplicate<mode>"
|
||||
[(set (match_operand:VLS 0 "register_operand")
|
||||
(vec_duplicate:VLS
|
||||
(match_operand:<VEL> 1 "reg_or_int_operand")))]
|
||||
"TARGET_VECTOR && can_create_pseudo_p ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(const_int 0)]
|
||||
{
|
||||
riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode),
|
||||
riscv_vector::RVV_UNOP, operands);
|
||||
DONE;
|
||||
}
|
||||
)
|
||||
|
|
|
@ -2475,7 +2475,6 @@ autovectorize_vector_modes (vector_modes *modes, bool)
|
|||
{
|
||||
if (autovec_use_vlmax_p ())
|
||||
{
|
||||
/* TODO: We will support RVV VLS auto-vectorization mode in the future. */
|
||||
poly_uint64 full_size
|
||||
= BYTES_PER_RISCV_VECTOR * ((int) riscv_autovec_lmul);
|
||||
|
||||
|
@ -2503,7 +2502,25 @@ autovectorize_vector_modes (vector_modes *modes, bool)
|
|||
modes->safe_push (mode);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
unsigned int flag = 0;
|
||||
if (TARGET_VECTOR_VLS)
|
||||
{
|
||||
/* Enable VECT_COMPARE_COSTS between VLA modes VLS modes for scalable
|
||||
auto-vectorization. */
|
||||
flag |= VECT_COMPARE_COSTS;
|
||||
/* Push all VLSmodes according to TARGET_MIN_VLEN. */
|
||||
unsigned int i = 0;
|
||||
unsigned int base_size = TARGET_MIN_VLEN * riscv_autovec_lmul / 8;
|
||||
unsigned int size = base_size;
|
||||
machine_mode mode;
|
||||
while (size > 0 && get_vector_mode (QImode, size).exists (&mode))
|
||||
{
|
||||
modes->safe_push (mode);
|
||||
i++;
|
||||
size = base_size / (1U << i);
|
||||
}
|
||||
}
|
||||
return flag;
|
||||
}
|
||||
|
||||
/* If the given VECTOR_MODE is an RVV mode, first get the largest number
|
||||
|
|
|
@ -3,4 +3,4 @@
|
|||
|
||||
#include "template-1.h"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 6 "vect" } } */
|
||||
|
|
168
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c
Normal file
168
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-1.c
Normal file
|
@ -0,0 +1,168 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
|
||||
#include "def.h"
|
||||
|
||||
/*
|
||||
** foo1:
|
||||
** vsetivli\s+zero,\s*4,\s*e8,\s*mf8,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo1 (int8_t *in, int8_t *out, int8_t x)
|
||||
{
|
||||
for (int i = 0; i < 4; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo2:
|
||||
** vsetivli\s+zero,\s*8,\s*e8,\s*mf8,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo2 (int8_t *in, int8_t *out, int8_t x)
|
||||
{
|
||||
for (int i = 0; i < 8; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo3:
|
||||
** vsetivli\s+zero,\s*16,\s*e8,\s*mf8,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo3 (int8_t *in, int8_t *out, int8_t x)
|
||||
{
|
||||
for (int i = 0; i < 16; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo4:
|
||||
** li\s+[a-x0-9]+,32
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo4 (int8_t *in, int8_t *out, int8_t x)
|
||||
{
|
||||
for (int i = 0; i < 32; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo5:
|
||||
** li\s+[a-x0-9]+,64
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo5 (int8_t *in, int8_t *out, int8_t x)
|
||||
{
|
||||
for (int i = 0; i < 64; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo6:
|
||||
** li\s+[a-x0-9]+,128
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf4,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo6 (int8_t *in, int8_t *out, int8_t x)
|
||||
{
|
||||
for (int i = 0; i < 128; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo7:
|
||||
** li\s+[a-x0-9]+,256
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo7 (int8_t *in, int8_t *out, int8_t x)
|
||||
{
|
||||
for (int i = 0; i < 256; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo8:
|
||||
** li\s+[a-x0-9]+,512
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m1,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo8 (int8_t *in, int8_t *out, int8_t x)
|
||||
{
|
||||
for (int i = 0; i < 512; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo9:
|
||||
** li\s+[a-x0-9]+,1024
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m2,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo9 (int8_t *in, int8_t *out, int8_t x)
|
||||
{
|
||||
for (int i = 0; i < 1024; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo10:
|
||||
** li\s+[a-x0-9]+,4096
|
||||
** addi\s+[a-x0-9]+,[a-x0-9]+,-2048
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m4,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo10 (int8_t *in, int8_t *out, int8_t x)
|
||||
{
|
||||
for (int i = 0; i < 2048; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo11:
|
||||
** li\s+[a-x0-9]+,4096
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m8,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo11 (int8_t *in, int8_t *out, int8_t x)
|
||||
{
|
||||
for (int i = 0; i < 4096; i++)
|
||||
in[i] = x;
|
||||
}
|
153
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c
Normal file
153
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-2.c
Normal file
|
@ -0,0 +1,153 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
|
||||
#include "def.h"
|
||||
|
||||
/*
|
||||
** foo1:
|
||||
** vsetivli\s+zero,\s*4,\s*e16,\s*mf4,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo1 (int16_t *in, int16_t *out, int16_t x)
|
||||
{
|
||||
for (int i = 0; i < 4; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo2:
|
||||
** vsetivli\s+zero,\s*8,\s*e16,\s*mf4,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo2 (int16_t *in, int16_t *out, int16_t x)
|
||||
{
|
||||
for (int i = 0; i < 8; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo3:
|
||||
** vsetivli\s+zero,\s*16,\s*e16,\s*mf4,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo3 (int16_t *in, int16_t *out, int16_t x)
|
||||
{
|
||||
for (int i = 0; i < 16; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo4:
|
||||
** li\s+[a-x0-9]+,32
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo4 (int16_t *in, int16_t *out, int16_t x)
|
||||
{
|
||||
for (int i = 0; i < 32; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo5:
|
||||
** li\s+[a-x0-9]+,64
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo5 (int16_t *in, int16_t *out, int16_t x)
|
||||
{
|
||||
for (int i = 0; i < 64; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo6:
|
||||
** li\s+[a-x0-9]+,128
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf2,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo6 (int16_t *in, int16_t *out, int16_t x)
|
||||
{
|
||||
for (int i = 0; i < 128; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo7:
|
||||
** li\s+[a-x0-9]+,256
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m1,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo7 (int16_t *in, int16_t *out, int16_t x)
|
||||
{
|
||||
for (int i = 0; i < 256; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo8:
|
||||
** li\s+[a-x0-9]+,512
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m2,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo8 (int16_t *in, int16_t *out, int16_t x)
|
||||
{
|
||||
for (int i = 0; i < 512; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo9:
|
||||
** li\s+[a-x0-9]+,1024
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m4,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo9 (int16_t *in, int16_t *out, int16_t x)
|
||||
{
|
||||
for (int i = 0; i < 1024; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo10:
|
||||
** li\s+[a-x0-9]+,4096
|
||||
** addi\s+[a-x0-9]+,[a-x0-9]+,-2048
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m8,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo10 (int16_t *in, int16_t *out, int16_t x)
|
||||
{
|
||||
for (int i = 0; i < 2048; i++)
|
||||
in[i] = x;
|
||||
}
|
153
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c
Normal file
153
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-3.c
Normal file
|
@ -0,0 +1,153 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
|
||||
#include "def.h"
|
||||
|
||||
/*
|
||||
** foo1:
|
||||
** vsetivli\s+zero,\s*4,\s*e16,\s*mf4,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo1 (_Float16 *in, _Float16 *out, _Float16 x)
|
||||
{
|
||||
for (int i = 0; i < 4; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo2:
|
||||
** vsetivli\s+zero,\s*8,\s*e16,\s*mf4,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo2 (_Float16 *in, _Float16 *out, _Float16 x)
|
||||
{
|
||||
for (int i = 0; i < 8; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo3:
|
||||
** vsetivli\s+zero,\s*16,\s*e16,\s*mf4,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo3 (_Float16 *in, _Float16 *out, _Float16 x)
|
||||
{
|
||||
for (int i = 0; i < 16; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo4:
|
||||
** li\s+[a-x0-9]+,32
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo4 (_Float16 *in, _Float16 *out, _Float16 x)
|
||||
{
|
||||
for (int i = 0; i < 32; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo5:
|
||||
** li\s+[a-x0-9]+,64
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo5 (_Float16 *in, _Float16 *out, _Float16 x)
|
||||
{
|
||||
for (int i = 0; i < 64; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo6:
|
||||
** li\s+[a-x0-9]+,128
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf2,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo6 (_Float16 *in, _Float16 *out, _Float16 x)
|
||||
{
|
||||
for (int i = 0; i < 128; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo7:
|
||||
** li\s+[a-x0-9]+,256
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m1,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo7 (_Float16 *in, _Float16 *out, _Float16 x)
|
||||
{
|
||||
for (int i = 0; i < 256; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo8:
|
||||
** li\s+[a-x0-9]+,512
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m2,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo8 (_Float16 *in, _Float16 *out, _Float16 x)
|
||||
{
|
||||
for (int i = 0; i < 512; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo9:
|
||||
** li\s+[a-x0-9]+,1024
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m4,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo9 (_Float16 *in, _Float16 *out, _Float16 x)
|
||||
{
|
||||
for (int i = 0; i < 1024; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo10:
|
||||
** li\s+[a-x0-9]+,4096
|
||||
** addi\s+[a-x0-9]+,[a-x0-9]+,-2048
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m8,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo10 (_Float16 *in, _Float16 *out, _Float16 x)
|
||||
{
|
||||
for (int i = 0; i < 2048; i++)
|
||||
in[i] = x;
|
||||
}
|
137
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c
Normal file
137
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-4.c
Normal file
|
@ -0,0 +1,137 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
|
||||
#include "def.h"
|
||||
|
||||
/*
|
||||
** foo1:
|
||||
** vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo1 (int32_t *in, int32_t *out, int32_t x)
|
||||
{
|
||||
for (int i = 0; i < 4; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo2:
|
||||
** vsetivli\s+zero,\s*8,\s*e32,\s*mf2,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo2 (int32_t *in, int32_t *out, int32_t x)
|
||||
{
|
||||
for (int i = 0; i < 8; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo3:
|
||||
** vsetivli\s+zero,\s*16,\s*e32,\s*mf2,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo3 (int32_t *in, int32_t *out, int32_t x)
|
||||
{
|
||||
for (int i = 0; i < 16; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo4:
|
||||
** li\s+[a-x0-9]+,32
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo4 (int32_t *in, int32_t *out, int32_t x)
|
||||
{
|
||||
for (int i = 0; i < 32; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo5:
|
||||
** li\s+[a-x0-9]+,64
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo5 (int32_t *in, int32_t *out, int32_t x)
|
||||
{
|
||||
for (int i = 0; i < 64; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo6:
|
||||
** li\s+[a-x0-9]+,128
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo6 (int32_t *in, int32_t *out, int32_t x)
|
||||
{
|
||||
for (int i = 0; i < 128; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo7:
|
||||
** li\s+[a-x0-9]+,256
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m2,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo7 (int32_t *in, int32_t *out, int32_t x)
|
||||
{
|
||||
for (int i = 0; i < 256; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo8:
|
||||
** li\s+[a-x0-9]+,512
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m4,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo8 (int32_t *in, int32_t *out, int32_t x)
|
||||
{
|
||||
for (int i = 0; i < 512; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo9:
|
||||
** li\s+[a-x0-9]+,1024
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo9 (int32_t *in, int32_t *out, int32_t x)
|
||||
{
|
||||
for (int i = 0; i < 1024; i++)
|
||||
in[i] = x;
|
||||
}
|
137
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c
Normal file
137
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-5.c
Normal file
|
@ -0,0 +1,137 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
|
||||
#include "def.h"
|
||||
|
||||
/*
|
||||
** foo1:
|
||||
** vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo1 (float *in, float *out, float x)
|
||||
{
|
||||
for (int i = 0; i < 4; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo2:
|
||||
** vsetivli\s+zero,\s*8,\s*e32,\s*mf2,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo2 (float *in, float *out, float x)
|
||||
{
|
||||
for (int i = 0; i < 8; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo3:
|
||||
** vsetivli\s+zero,\s*16,\s*e32,\s*mf2,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo3 (float *in, float *out, float x)
|
||||
{
|
||||
for (int i = 0; i < 16; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo4:
|
||||
** li\s+[a-x0-9]+,32
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo4 (float *in, float *out, float x)
|
||||
{
|
||||
for (int i = 0; i < 32; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo5:
|
||||
** li\s+[a-x0-9]+,64
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo5 (float *in, float *out, float x)
|
||||
{
|
||||
for (int i = 0; i < 64; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo6:
|
||||
** li\s+[a-x0-9]+,128
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo6 (float *in, float *out, float x)
|
||||
{
|
||||
for (int i = 0; i < 128; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo7:
|
||||
** li\s+[a-x0-9]+,256
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m2,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo7 (float *in, float *out, float x)
|
||||
{
|
||||
for (int i = 0; i < 256; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo8:
|
||||
** li\s+[a-x0-9]+,512
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m4,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo8 (float *in, float *out, float x)
|
||||
{
|
||||
for (int i = 0; i < 512; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo9:
|
||||
** li\s+[a-x0-9]+,1024
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo9 (float *in, float *out, float x)
|
||||
{
|
||||
for (int i = 0; i < 1024; i++)
|
||||
in[i] = x;
|
||||
}
|
122
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c
Normal file
122
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-6.c
Normal file
|
@ -0,0 +1,122 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
|
||||
#include "def.h"
|
||||
|
||||
/*
|
||||
** foo1:
|
||||
** vsetivli\s+zero,\s*4,\s*e64,\s*m1,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo1 (int64_t *in, int64_t *out, int64_t x)
|
||||
{
|
||||
for (int i = 0; i < 4; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo2:
|
||||
** vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo2 (int64_t *in, int64_t *out, int64_t x)
|
||||
{
|
||||
for (int i = 0; i < 8; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo3:
|
||||
** vsetivli\s+zero,\s*16,\s*e64,\s*m1,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo3 (int64_t *in, int64_t *out, int64_t x)
|
||||
{
|
||||
for (int i = 0; i < 16; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo4:
|
||||
** li\s+[a-x0-9]+,32
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo4 (int64_t *in, int64_t *out, int64_t x)
|
||||
{
|
||||
for (int i = 0; i < 32; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo5:
|
||||
** li\s+[a-x0-9]+,64
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo5 (int64_t *in, int64_t *out, int64_t x)
|
||||
{
|
||||
for (int i = 0; i < 64; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo6:
|
||||
** li\s+[a-x0-9]+,128
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m2,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo6 (int64_t *in, int64_t *out, int64_t x)
|
||||
{
|
||||
for (int i = 0; i < 128; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo7:
|
||||
** li\s+[a-x0-9]+,256
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m4,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo7 (int64_t *in, int64_t *out, int64_t x)
|
||||
{
|
||||
for (int i = 0; i < 256; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo8:
|
||||
** li\s+[a-x0-9]+,512
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m8,\s*t[au],\s*m[au]
|
||||
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo8 (int64_t *in, int64_t *out, int64_t x)
|
||||
{
|
||||
for (int i = 0; i < 512; i++)
|
||||
in[i] = x;
|
||||
}
|
122
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c
Normal file
122
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/dup-7.c
Normal file
|
@ -0,0 +1,122 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
|
||||
#include "def.h"
|
||||
|
||||
/*
|
||||
** foo1:
|
||||
** vsetivli\s+zero,\s*4,\s*e64,\s*m1,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo1 (double *in, double *out, double x)
|
||||
{
|
||||
for (int i = 0; i < 4; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo2:
|
||||
** vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo2 (double *in, double *out, double x)
|
||||
{
|
||||
for (int i = 0; i < 8; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo3:
|
||||
** vsetivli\s+zero,\s*16,\s*e64,\s*m1,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo3 (double *in, double *out, double x)
|
||||
{
|
||||
for (int i = 0; i < 16; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo4:
|
||||
** li\s+[a-x0-9]+,32
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo4 (double *in, double *out, double x)
|
||||
{
|
||||
for (int i = 0; i < 32; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo5:
|
||||
** li\s+[a-x0-9]+,64
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo5 (double *in, double *out, double x)
|
||||
{
|
||||
for (int i = 0; i < 64; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo6:
|
||||
** li\s+[a-x0-9]+,128
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m2,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo6 (double *in, double *out, double x)
|
||||
{
|
||||
for (int i = 0; i < 128; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo7:
|
||||
** li\s+[a-x0-9]+,256
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m4,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo7 (double *in, double *out, double x)
|
||||
{
|
||||
for (int i = 0; i < 256; i++)
|
||||
in[i] = x;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo8:
|
||||
** li\s+[a-x0-9]+,512
|
||||
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m8,\s*t[au],\s*m[au]
|
||||
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
|
||||
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
|
||||
** ret
|
||||
*/
|
||||
void
|
||||
foo8 (double *in, double *out, double x)
|
||||
{
|
||||
for (int i = 0; i < 512; i++)
|
||||
in[i] = x;
|
||||
}
|
|
@ -3,4 +3,4 @@
|
|||
|
||||
#include "template-1.h"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
|
||||
|
|
|
@ -3,4 +3,4 @@
|
|||
|
||||
#include "template-1.h"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 6 "vect" } } */
|
||||
|
|
|
@ -3,4 +3,4 @@
|
|||
|
||||
#include "template-1.h"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */
|
||||
|
|
Loading…
Add table
Reference in a new issue