RISC-V: Enable basic VLS auto-vectorization

Consider this following case:
void
foo (int8_t *in, int8_t *out, int8_t x)
{
  for (int i = 0; i < 16; i++)
    in[i] = x;
}

Compile option: --param=riscv-autovec-preference=scalable -fno-builtin

Before this patch:

foo:
        li      a5,16
        csrr    a4,vlenb
        vsetvli a3,zero,e8,m1,ta,ma
        vmv.v.x v1,a2
        bleu    a5,a4,.L2
        mv      a5,a4
.L2:
        vsetvli zero,a5,e8,m1,ta,ma
        vse8.v  v1,0(a0)
        ret

After this patch:

foo:
	vsetivli	zero,16,e8,mf8,ta,ma
	vmv.v.x	v1,a2
	vse8.v	v1,0(a0)
	ret

gcc/ChangeLog:

	* config/riscv/autovec-vls.md (@vec_duplicate<mode>): New pattern.
	* config/riscv/riscv-v.cc (autovectorize_vector_modes): Add VLS autovec
	support.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/v-1.c: Adapt test.
	* gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c: Ditto.
	* gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c: Ditto.
	* gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c: Ditto.
	* gcc.target/riscv/rvv/autovec/vls/dup-1.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/dup-2.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/dup-3.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/dup-4.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/dup-5.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/dup-6.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/dup-7.c: New test.
This commit is contained in:
Juzhe-Zhong 2023-07-31 10:13:57 +08:00 committed by Pan Li
parent bf36656a14
commit 92a891e869
13 changed files with 1034 additions and 6 deletions

View file

@ -139,3 +139,22 @@
"vmv%m1r.v\t%0,%1"
[(set_attr "type" "vmov")
(set_attr "mode" "<MODE>")])
;; -----------------------------------------------------------------
;; ---- Duplicate Operations
;; -----------------------------------------------------------------
(define_insn_and_split "@vec_duplicate<mode>"
[(set (match_operand:VLS 0 "register_operand")
(vec_duplicate:VLS
(match_operand:<VEL> 1 "reg_or_int_operand")))]
"TARGET_VECTOR && can_create_pseudo_p ()"
"#"
"&& 1"
[(const_int 0)]
{
riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode),
riscv_vector::RVV_UNOP, operands);
DONE;
}
)

View file

@ -2475,7 +2475,6 @@ autovectorize_vector_modes (vector_modes *modes, bool)
{
if (autovec_use_vlmax_p ())
{
/* TODO: We will support RVV VLS auto-vectorization mode in the future. */
poly_uint64 full_size
= BYTES_PER_RISCV_VECTOR * ((int) riscv_autovec_lmul);
@ -2503,7 +2502,25 @@ autovectorize_vector_modes (vector_modes *modes, bool)
modes->safe_push (mode);
}
}
return 0;
unsigned int flag = 0;
if (TARGET_VECTOR_VLS)
{
/* Enable VECT_COMPARE_COSTS between VLA modes VLS modes for scalable
auto-vectorization. */
flag |= VECT_COMPARE_COSTS;
/* Push all VLSmodes according to TARGET_MIN_VLEN. */
unsigned int i = 0;
unsigned int base_size = TARGET_MIN_VLEN * riscv_autovec_lmul / 8;
unsigned int size = base_size;
machine_mode mode;
while (size > 0 && get_vector_mode (QImode, size).exists (&mode))
{
modes->safe_push (mode);
i++;
size = base_size / (1U << i);
}
}
return flag;
}
/* If the given VECTOR_MODE is an RVV mode, first get the largest number

View file

@ -3,4 +3,4 @@
#include "template-1.h"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 6 "vect" } } */

View file

@ -0,0 +1,168 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "def.h"
/*
** foo1:
** vsetivli\s+zero,\s*4,\s*e8,\s*mf8,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo1 (int8_t *in, int8_t *out, int8_t x)
{
for (int i = 0; i < 4; i++)
in[i] = x;
}
/*
** foo2:
** vsetivli\s+zero,\s*8,\s*e8,\s*mf8,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo2 (int8_t *in, int8_t *out, int8_t x)
{
for (int i = 0; i < 8; i++)
in[i] = x;
}
/*
** foo3:
** vsetivli\s+zero,\s*16,\s*e8,\s*mf8,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo3 (int8_t *in, int8_t *out, int8_t x)
{
for (int i = 0; i < 16; i++)
in[i] = x;
}
/*
** foo4:
** li\s+[a-x0-9]+,32
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo4 (int8_t *in, int8_t *out, int8_t x)
{
for (int i = 0; i < 32; i++)
in[i] = x;
}
/*
** foo5:
** li\s+[a-x0-9]+,64
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo5 (int8_t *in, int8_t *out, int8_t x)
{
for (int i = 0; i < 64; i++)
in[i] = x;
}
/*
** foo6:
** li\s+[a-x0-9]+,128
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf4,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo6 (int8_t *in, int8_t *out, int8_t x)
{
for (int i = 0; i < 128; i++)
in[i] = x;
}
/*
** foo7:
** li\s+[a-x0-9]+,256
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf2,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo7 (int8_t *in, int8_t *out, int8_t x)
{
for (int i = 0; i < 256; i++)
in[i] = x;
}
/*
** foo8:
** li\s+[a-x0-9]+,512
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m1,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo8 (int8_t *in, int8_t *out, int8_t x)
{
for (int i = 0; i < 512; i++)
in[i] = x;
}
/*
** foo9:
** li\s+[a-x0-9]+,1024
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m2,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo9 (int8_t *in, int8_t *out, int8_t x)
{
for (int i = 0; i < 1024; i++)
in[i] = x;
}
/*
** foo10:
** li\s+[a-x0-9]+,4096
** addi\s+[a-x0-9]+,[a-x0-9]+,-2048
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m4,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo10 (int8_t *in, int8_t *out, int8_t x)
{
for (int i = 0; i < 2048; i++)
in[i] = x;
}
/*
** foo11:
** li\s+[a-x0-9]+,4096
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*m8,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse8\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo11 (int8_t *in, int8_t *out, int8_t x)
{
for (int i = 0; i < 4096; i++)
in[i] = x;
}

View file

@ -0,0 +1,153 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "def.h"
/*
** foo1:
** vsetivli\s+zero,\s*4,\s*e16,\s*mf4,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo1 (int16_t *in, int16_t *out, int16_t x)
{
for (int i = 0; i < 4; i++)
in[i] = x;
}
/*
** foo2:
** vsetivli\s+zero,\s*8,\s*e16,\s*mf4,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo2 (int16_t *in, int16_t *out, int16_t x)
{
for (int i = 0; i < 8; i++)
in[i] = x;
}
/*
** foo3:
** vsetivli\s+zero,\s*16,\s*e16,\s*mf4,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo3 (int16_t *in, int16_t *out, int16_t x)
{
for (int i = 0; i < 16; i++)
in[i] = x;
}
/*
** foo4:
** li\s+[a-x0-9]+,32
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo4 (int16_t *in, int16_t *out, int16_t x)
{
for (int i = 0; i < 32; i++)
in[i] = x;
}
/*
** foo5:
** li\s+[a-x0-9]+,64
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo5 (int16_t *in, int16_t *out, int16_t x)
{
for (int i = 0; i < 64; i++)
in[i] = x;
}
/*
** foo6:
** li\s+[a-x0-9]+,128
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf2,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo6 (int16_t *in, int16_t *out, int16_t x)
{
for (int i = 0; i < 128; i++)
in[i] = x;
}
/*
** foo7:
** li\s+[a-x0-9]+,256
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m1,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo7 (int16_t *in, int16_t *out, int16_t x)
{
for (int i = 0; i < 256; i++)
in[i] = x;
}
/*
** foo8:
** li\s+[a-x0-9]+,512
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m2,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo8 (int16_t *in, int16_t *out, int16_t x)
{
for (int i = 0; i < 512; i++)
in[i] = x;
}
/*
** foo9:
** li\s+[a-x0-9]+,1024
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m4,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo9 (int16_t *in, int16_t *out, int16_t x)
{
for (int i = 0; i < 1024; i++)
in[i] = x;
}
/*
** foo10:
** li\s+[a-x0-9]+,4096
** addi\s+[a-x0-9]+,[a-x0-9]+,-2048
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m8,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo10 (int16_t *in, int16_t *out, int16_t x)
{
for (int i = 0; i < 2048; i++)
in[i] = x;
}

View file

@ -0,0 +1,153 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "def.h"
/*
** foo1:
** vsetivli\s+zero,\s*4,\s*e16,\s*mf4,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo1 (_Float16 *in, _Float16 *out, _Float16 x)
{
for (int i = 0; i < 4; i++)
in[i] = x;
}
/*
** foo2:
** vsetivli\s+zero,\s*8,\s*e16,\s*mf4,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo2 (_Float16 *in, _Float16 *out, _Float16 x)
{
for (int i = 0; i < 8; i++)
in[i] = x;
}
/*
** foo3:
** vsetivli\s+zero,\s*16,\s*e16,\s*mf4,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo3 (_Float16 *in, _Float16 *out, _Float16 x)
{
for (int i = 0; i < 16; i++)
in[i] = x;
}
/*
** foo4:
** li\s+[a-x0-9]+,32
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo4 (_Float16 *in, _Float16 *out, _Float16 x)
{
for (int i = 0; i < 32; i++)
in[i] = x;
}
/*
** foo5:
** li\s+[a-x0-9]+,64
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf4,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo5 (_Float16 *in, _Float16 *out, _Float16 x)
{
for (int i = 0; i < 64; i++)
in[i] = x;
}
/*
** foo6:
** li\s+[a-x0-9]+,128
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*mf2,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo6 (_Float16 *in, _Float16 *out, _Float16 x)
{
for (int i = 0; i < 128; i++)
in[i] = x;
}
/*
** foo7:
** li\s+[a-x0-9]+,256
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m1,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo7 (_Float16 *in, _Float16 *out, _Float16 x)
{
for (int i = 0; i < 256; i++)
in[i] = x;
}
/*
** foo8:
** li\s+[a-x0-9]+,512
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m2,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo8 (_Float16 *in, _Float16 *out, _Float16 x)
{
for (int i = 0; i < 512; i++)
in[i] = x;
}
/*
** foo9:
** li\s+[a-x0-9]+,1024
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m4,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo9 (_Float16 *in, _Float16 *out, _Float16 x)
{
for (int i = 0; i < 1024; i++)
in[i] = x;
}
/*
** foo10:
** li\s+[a-x0-9]+,4096
** addi\s+[a-x0-9]+,[a-x0-9]+,-2048
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e16,\s*m8,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse16\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo10 (_Float16 *in, _Float16 *out, _Float16 x)
{
for (int i = 0; i < 2048; i++)
in[i] = x;
}

View file

@ -0,0 +1,137 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "def.h"
/*
** foo1:
** vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo1 (int32_t *in, int32_t *out, int32_t x)
{
for (int i = 0; i < 4; i++)
in[i] = x;
}
/*
** foo2:
** vsetivli\s+zero,\s*8,\s*e32,\s*mf2,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo2 (int32_t *in, int32_t *out, int32_t x)
{
for (int i = 0; i < 8; i++)
in[i] = x;
}
/*
** foo3:
** vsetivli\s+zero,\s*16,\s*e32,\s*mf2,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo3 (int32_t *in, int32_t *out, int32_t x)
{
for (int i = 0; i < 16; i++)
in[i] = x;
}
/*
** foo4:
** li\s+[a-x0-9]+,32
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo4 (int32_t *in, int32_t *out, int32_t x)
{
for (int i = 0; i < 32; i++)
in[i] = x;
}
/*
** foo5:
** li\s+[a-x0-9]+,64
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo5 (int32_t *in, int32_t *out, int32_t x)
{
for (int i = 0; i < 64; i++)
in[i] = x;
}
/*
** foo6:
** li\s+[a-x0-9]+,128
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo6 (int32_t *in, int32_t *out, int32_t x)
{
for (int i = 0; i < 128; i++)
in[i] = x;
}
/*
** foo7:
** li\s+[a-x0-9]+,256
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m2,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo7 (int32_t *in, int32_t *out, int32_t x)
{
for (int i = 0; i < 256; i++)
in[i] = x;
}
/*
** foo8:
** li\s+[a-x0-9]+,512
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m4,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo8 (int32_t *in, int32_t *out, int32_t x)
{
for (int i = 0; i < 512; i++)
in[i] = x;
}
/*
** foo9:
** li\s+[a-x0-9]+,1024
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo9 (int32_t *in, int32_t *out, int32_t x)
{
for (int i = 0; i < 1024; i++)
in[i] = x;
}

View file

@ -0,0 +1,137 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "def.h"
/*
** foo1:
** vsetivli\s+zero,\s*4,\s*e32,\s*mf2,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo1 (float *in, float *out, float x)
{
for (int i = 0; i < 4; i++)
in[i] = x;
}
/*
** foo2:
** vsetivli\s+zero,\s*8,\s*e32,\s*mf2,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo2 (float *in, float *out, float x)
{
for (int i = 0; i < 8; i++)
in[i] = x;
}
/*
** foo3:
** vsetivli\s+zero,\s*16,\s*e32,\s*mf2,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo3 (float *in, float *out, float x)
{
for (int i = 0; i < 16; i++)
in[i] = x;
}
/*
** foo4:
** li\s+[a-x0-9]+,32
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo4 (float *in, float *out, float x)
{
for (int i = 0; i < 32; i++)
in[i] = x;
}
/*
** foo5:
** li\s+[a-x0-9]+,64
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*mf2,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo5 (float *in, float *out, float x)
{
for (int i = 0; i < 64; i++)
in[i] = x;
}
/*
** foo6:
** li\s+[a-x0-9]+,128
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m1,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo6 (float *in, float *out, float x)
{
for (int i = 0; i < 128; i++)
in[i] = x;
}
/*
** foo7:
** li\s+[a-x0-9]+,256
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m2,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo7 (float *in, float *out, float x)
{
for (int i = 0; i < 256; i++)
in[i] = x;
}
/*
** foo8:
** li\s+[a-x0-9]+,512
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m4,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo8 (float *in, float *out, float x)
{
for (int i = 0; i < 512; i++)
in[i] = x;
}
/*
** foo9:
** li\s+[a-x0-9]+,1024
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse32\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo9 (float *in, float *out, float x)
{
for (int i = 0; i < 1024; i++)
in[i] = x;
}

View file

@ -0,0 +1,122 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "def.h"
/*
** foo1:
** vsetivli\s+zero,\s*4,\s*e64,\s*m1,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo1 (int64_t *in, int64_t *out, int64_t x)
{
for (int i = 0; i < 4; i++)
in[i] = x;
}
/*
** foo2:
** vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo2 (int64_t *in, int64_t *out, int64_t x)
{
for (int i = 0; i < 8; i++)
in[i] = x;
}
/*
** foo3:
** vsetivli\s+zero,\s*16,\s*e64,\s*m1,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo3 (int64_t *in, int64_t *out, int64_t x)
{
for (int i = 0; i < 16; i++)
in[i] = x;
}
/*
** foo4:
** li\s+[a-x0-9]+,32
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo4 (int64_t *in, int64_t *out, int64_t x)
{
for (int i = 0; i < 32; i++)
in[i] = x;
}
/*
** foo5:
** li\s+[a-x0-9]+,64
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo5 (int64_t *in, int64_t *out, int64_t x)
{
for (int i = 0; i < 64; i++)
in[i] = x;
}
/*
** foo6:
** li\s+[a-x0-9]+,128
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m2,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo6 (int64_t *in, int64_t *out, int64_t x)
{
for (int i = 0; i < 128; i++)
in[i] = x;
}
/*
** foo7:
** li\s+[a-x0-9]+,256
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m4,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo7 (int64_t *in, int64_t *out, int64_t x)
{
for (int i = 0; i < 256; i++)
in[i] = x;
}
/*
** foo8:
** li\s+[a-x0-9]+,512
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m8,\s*t[au],\s*m[au]
** vmv\.v\.x\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo8 (int64_t *in, int64_t *out, int64_t x)
{
for (int i = 0; i < 512; i++)
in[i] = x;
}

View file

@ -0,0 +1,122 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-builtin -fno-schedule-insns -fno-schedule-insns2 --param riscv-autovec-lmul=m8" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "def.h"
/*
** foo1:
** vsetivli\s+zero,\s*4,\s*e64,\s*m1,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo1 (double *in, double *out, double x)
{
for (int i = 0; i < 4; i++)
in[i] = x;
}
/*
** foo2:
** vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo2 (double *in, double *out, double x)
{
for (int i = 0; i < 8; i++)
in[i] = x;
}
/*
** foo3:
** vsetivli\s+zero,\s*16,\s*e64,\s*m1,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo3 (double *in, double *out, double x)
{
for (int i = 0; i < 16; i++)
in[i] = x;
}
/*
** foo4:
** li\s+[a-x0-9]+,32
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo4 (double *in, double *out, double x)
{
for (int i = 0; i < 32; i++)
in[i] = x;
}
/*
** foo5:
** li\s+[a-x0-9]+,64
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m1,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo5 (double *in, double *out, double x)
{
for (int i = 0; i < 64; i++)
in[i] = x;
}
/*
** foo6:
** li\s+[a-x0-9]+,128
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m2,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo6 (double *in, double *out, double x)
{
for (int i = 0; i < 128; i++)
in[i] = x;
}
/*
** foo7:
** li\s+[a-x0-9]+,256
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m4,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo7 (double *in, double *out, double x)
{
for (int i = 0; i < 256; i++)
in[i] = x;
}
/*
** foo8:
** li\s+[a-x0-9]+,512
** vsetvli\s+zero,\s*[a-x0-9]+,\s*e64,\s*m8,\s*t[au],\s*m[au]
** vfmv\.v\.f\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),\s*[a-x0-9]+
** vse64\.v\s+(?:v[0-9]|v[1-2][0-9]|v3[0-1]),0\s*\([a-x0-9]+\)
** ret
*/
void
foo8 (double *in, double *out, double x)
{
for (int i = 0; i < 512; i++)
in[i] = x;
}

View file

@ -3,4 +3,4 @@
#include "template-1.h"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */

View file

@ -3,4 +3,4 @@
#include "template-1.h"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 6 "vect" } } */

View file

@ -3,4 +3,4 @@
#include "template-1.h"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 5 "vect" } } */