RISC-V: Bugfix for vls mode aggregated in GPR calling convention

According to the issue as below.

https://hub.fgit.cf/riscv-non-isa/riscv-elf-psabi-doc/pull/416

When the mode size of vls integer mode is less than 2 * XLEN, we will
take the gpr for both the args and the return values. Instead of the
reference. For example the below code:

typedef short v8hi __attribute__ ((vector_size (16)));

v8hi __attribute__((noinline))
add (v8hi a, v8hi b)
{
  v8hi r = a + b;
  return r;
}

Before this patch:
add:
  vsetivli zero,8,e16,m1,ta,ma
  vle16.v  v1,0(a1) <== arg by reference
  vle16.v  v2,0(a2) <== arg by reference
  vadd.vv  v1,v1,v2
  vse16.v  v1,0(a0) <== return by reference
  ret

After this patch:
add:
  addi     sp,sp,-32
  sd       a0,0(sp)  <== arg by register a0 - a3
  sd       a1,8(sp)
  sd       a2,16(sp)
  sd       a3,24(sp)
  addi     a5,sp,16
  vsetivli zero,8,e16,m1,ta,ma
  vle16.v  v2,0(sp)
  vle16.v  v1,0(a5)
  vadd.vv  v1,v1,v2
  vse16.v  v1,0(sp)
  ld       a0,0(sp)  <== return by a0 - a1.
  ld       a1,8(sp)
  addi     sp,sp,32
  jr       ra

For vls floating point, we take the same rules as integer and passed by
the gpr or reference.

The riscv regression passed for this patch.

gcc/ChangeLog:

	* config/riscv/riscv.cc (riscv_v_vls_mode_aggregate_gpr_count): New function to
	calculate the gpr count required by vls mode.
	(riscv_v_vls_to_gpr_mode): New function convert vls mode to gpr mode.
	(riscv_pass_vls_aggregate_in_gpr): New function to return the rtx of gpr
	for vls mode.
	(riscv_get_arg_info): Add vls mode handling.
	(riscv_pass_by_reference): Return false if arg info has no zero gpr count.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/vls/def.h: Add new helper macro.
	* gcc.target/riscv/rvv/autovec/vls/calling-convention-1.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/calling-convention-10.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/calling-convention-2.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/calling-convention-3.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/calling-convention-4.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/calling-convention-5.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/calling-convention-6.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/calling-convention-7.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/calling-convention-8.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/calling-convention-9.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/calling-convention-run-1.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/calling-convention-run-2.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/calling-convention-run-3.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/calling-convention-run-4.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/calling-convention-run-5.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/calling-convention-run-6.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
This commit is contained in:
Pan Li 2024-01-30 15:42:06 +08:00
parent 9f38237666
commit 7bfea0aedb
18 changed files with 1559 additions and 0 deletions

View file

@ -1382,6 +1382,41 @@ riscv_v_ext_mode_p (machine_mode mode)
|| riscv_v_ext_vls_mode_p (mode);
}
static unsigned
riscv_v_vls_mode_aggregate_gpr_count (unsigned vls_unit_size,
unsigned scalar_unit_size)
{
gcc_assert (vls_unit_size != 0 && scalar_unit_size != 0);
if (vls_unit_size < scalar_unit_size)
return 1;
/* Ensure the vls mode is exact_div by scalar_unit_size. */
gcc_assert ((vls_unit_size % scalar_unit_size) == 0);
return vls_unit_size / scalar_unit_size;
}
static machine_mode
riscv_v_vls_to_gpr_mode (unsigned vls_mode_size)
{
switch (vls_mode_size)
{
case 16:
return TImode;
case 8:
return DImode;
case 4:
return SImode;
case 2:
return HImode;
case 1:
return QImode;
default:
gcc_unreachable ();
}
}
/* Call from ADJUST_NUNITS in riscv-modes.def. Return the correct
NUNITS size for corresponding machine_mode. */
@ -4868,6 +4903,41 @@ riscv_pass_fpr_pair (machine_mode mode, unsigned regno1,
GEN_INT (offset2))));
}
static rtx
riscv_pass_vls_aggregate_in_gpr (struct riscv_arg_info *info, machine_mode mode,
unsigned gpr_base)
{
gcc_assert (riscv_v_ext_vls_mode_p (mode));
unsigned count = 0;
unsigned regnum = 0;
machine_mode gpr_mode = VOIDmode;
unsigned vls_size = GET_MODE_SIZE (mode).to_constant ();
unsigned gpr_size = GET_MODE_SIZE (Xmode);
if (IN_RANGE (vls_size, 0, gpr_size * 2))
{
count = riscv_v_vls_mode_aggregate_gpr_count (vls_size, gpr_size);
if (count + info->gpr_offset <= MAX_ARGS_IN_REGISTERS)
{
regnum = gpr_base + info->gpr_offset;
info->num_gprs = count;
gpr_mode = riscv_v_vls_to_gpr_mode (vls_size);
}
}
if (!regnum)
return NULL_RTX; /* Return NULL_RTX if we cannot find a suitable reg. */
gcc_assert (gpr_mode != VOIDmode);
rtx reg = gen_rtx_REG (gpr_mode, regnum);
rtx x = gen_rtx_EXPR_LIST (VOIDmode, reg, CONST0_RTX (gpr_mode));
return gen_rtx_PARALLEL (mode, gen_rtvec (1, x));
}
/* Initialize a variable CUM of type CUMULATIVE_ARGS
for a call to a function whose data type is FNTYPE.
For a library call, FNTYPE is 0. */
@ -5067,6 +5137,10 @@ riscv_get_arg_info (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
/* For scalable vector argument. */
if (riscv_vector_type_p (type) && riscv_v_ext_mode_p (mode))
return riscv_get_vector_arg (info, cum, mode, return_p);
/* For vls mode aggregated in gpr. */
if (riscv_v_ext_vls_mode_p (mode))
return riscv_pass_vls_aggregate_in_gpr (info, mode, gpr_base);
}
/* Work out the size of the argument. */
@ -5196,6 +5270,10 @@ riscv_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
if (info.num_fprs)
return false;
/* Don't pass by reference if we can use general register(s) for vls. */
if (info.num_gprs && riscv_v_ext_vls_mode_p (arg.mode))
return false;
/* Don't pass by reference if we can use vector register groups. */
if (info.num_vrs > 0 || info.num_mrs > 0)
return false;

View file

@ -0,0 +1,154 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvl4096b --param riscv-autovec-preference=scalable -mabi=lp64d -O3" } */
#include "def.h"
DEF_RET1_ARG0 (v1qi)
DEF_RET1_ARG0 (v2qi)
DEF_RET1_ARG0 (v4qi)
DEF_RET1_ARG0 (v8qi)
DEF_RET1_ARG0 (v16qi)
DEF_RET1_ARG0 (v32qi)
DEF_RET1_ARG0 (v64qi)
DEF_RET1_ARG0 (v128qi)
DEF_RET1_ARG0 (v256qi)
DEF_RET1_ARG0 (v512qi)
DEF_RET1_ARG0 (v1024qi)
DEF_RET1_ARG0 (v2048qi)
DEF_RET1_ARG0 (v4096qi)
DEF_RET1_ARG1 (v1qi)
DEF_RET1_ARG1 (v2qi)
DEF_RET1_ARG1 (v4qi)
DEF_RET1_ARG1 (v8qi)
DEF_RET1_ARG1 (v16qi)
DEF_RET1_ARG1 (v32qi)
DEF_RET1_ARG1 (v64qi)
DEF_RET1_ARG1 (v128qi)
DEF_RET1_ARG1 (v256qi)
DEF_RET1_ARG1 (v512qi)
DEF_RET1_ARG1 (v1024qi)
DEF_RET1_ARG1 (v2048qi)
DEF_RET1_ARG1 (v4096qi)
DEF_RET1_ARG2 (v1qi)
DEF_RET1_ARG2 (v2qi)
DEF_RET1_ARG2 (v4qi)
DEF_RET1_ARG2 (v8qi)
DEF_RET1_ARG2 (v16qi)
DEF_RET1_ARG2 (v32qi)
DEF_RET1_ARG2 (v64qi)
DEF_RET1_ARG2 (v128qi)
DEF_RET1_ARG2 (v256qi)
DEF_RET1_ARG2 (v512qi)
DEF_RET1_ARG2 (v1024qi)
DEF_RET1_ARG2 (v2048qi)
DEF_RET1_ARG2 (v4096qi)
DEF_RET1_ARG3 (v1qi)
DEF_RET1_ARG3 (v2qi)
DEF_RET1_ARG3 (v4qi)
DEF_RET1_ARG3 (v8qi)
DEF_RET1_ARG3 (v16qi)
DEF_RET1_ARG3 (v32qi)
DEF_RET1_ARG3 (v64qi)
DEF_RET1_ARG3 (v128qi)
DEF_RET1_ARG3 (v256qi)
DEF_RET1_ARG3 (v512qi)
DEF_RET1_ARG3 (v1024qi)
DEF_RET1_ARG3 (v2048qi)
DEF_RET1_ARG3 (v4096qi)
DEF_RET1_ARG4 (v1qi)
DEF_RET1_ARG4 (v2qi)
DEF_RET1_ARG4 (v4qi)
DEF_RET1_ARG4 (v8qi)
DEF_RET1_ARG4 (v16qi)
DEF_RET1_ARG4 (v32qi)
DEF_RET1_ARG4 (v64qi)
DEF_RET1_ARG4 (v128qi)
DEF_RET1_ARG4 (v256qi)
DEF_RET1_ARG4 (v512qi)
DEF_RET1_ARG4 (v1024qi)
DEF_RET1_ARG4 (v2048qi)
DEF_RET1_ARG4 (v4096qi)
DEF_RET1_ARG5 (v1qi)
DEF_RET1_ARG5 (v2qi)
DEF_RET1_ARG5 (v4qi)
DEF_RET1_ARG5 (v8qi)
DEF_RET1_ARG5 (v16qi)
DEF_RET1_ARG5 (v32qi)
DEF_RET1_ARG5 (v64qi)
DEF_RET1_ARG5 (v128qi)
DEF_RET1_ARG5 (v256qi)
DEF_RET1_ARG5 (v512qi)
DEF_RET1_ARG5 (v1024qi)
DEF_RET1_ARG5 (v2048qi)
DEF_RET1_ARG5 (v4096qi)
DEF_RET1_ARG6 (v1qi)
DEF_RET1_ARG6 (v2qi)
DEF_RET1_ARG6 (v4qi)
DEF_RET1_ARG6 (v8qi)
DEF_RET1_ARG6 (v16qi)
DEF_RET1_ARG6 (v32qi)
DEF_RET1_ARG6 (v64qi)
DEF_RET1_ARG6 (v128qi)
DEF_RET1_ARG6 (v256qi)
DEF_RET1_ARG6 (v512qi)
DEF_RET1_ARG6 (v1024qi)
DEF_RET1_ARG6 (v2048qi)
DEF_RET1_ARG6 (v4096qi)
DEF_RET1_ARG7 (v1qi)
DEF_RET1_ARG7 (v2qi)
DEF_RET1_ARG7 (v4qi)
DEF_RET1_ARG7 (v8qi)
DEF_RET1_ARG7 (v16qi)
DEF_RET1_ARG7 (v32qi)
DEF_RET1_ARG7 (v64qi)
DEF_RET1_ARG7 (v128qi)
DEF_RET1_ARG7 (v256qi)
DEF_RET1_ARG7 (v512qi)
DEF_RET1_ARG7 (v1024qi)
DEF_RET1_ARG7 (v2048qi)
DEF_RET1_ARG7 (v4096qi)
DEF_RET1_ARG8 (v1qi)
DEF_RET1_ARG8 (v2qi)
DEF_RET1_ARG8 (v4qi)
DEF_RET1_ARG8 (v8qi)
DEF_RET1_ARG8 (v16qi)
DEF_RET1_ARG8 (v32qi)
DEF_RET1_ARG8 (v64qi)
DEF_RET1_ARG8 (v128qi)
DEF_RET1_ARG8 (v256qi)
DEF_RET1_ARG8 (v512qi)
DEF_RET1_ARG8 (v1024qi)
DEF_RET1_ARG8 (v2048qi)
DEF_RET1_ARG8 (v4096qi)
DEF_RET1_ARG9 (v1qi)
DEF_RET1_ARG9 (v2qi)
DEF_RET1_ARG9 (v4qi)
DEF_RET1_ARG9 (v8qi)
DEF_RET1_ARG9 (v16qi)
DEF_RET1_ARG9 (v32qi)
DEF_RET1_ARG9 (v64qi)
DEF_RET1_ARG9 (v128qi)
DEF_RET1_ARG9 (v256qi)
DEF_RET1_ARG9 (v512qi)
DEF_RET1_ARG9 (v1024qi)
DEF_RET1_ARG9 (v2048qi)
DEF_RET1_ARG9 (v4096qi)
/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 9 } } */
/* { dg-final { scan-assembler-times {lbu\s+a0,\s*[0-9]+\(sp\)} 8 } } */
/* { dg-final { scan-assembler-times {lhu\s+a0,\s*[0-9]+\(sp\)} 8 } } */
/* { dg-final { scan-assembler-times {lw\s+a0,\s*[0-9]+\(sp\)} 8 } } */
/* { dg-final { scan-assembler-times {ld\s+a[0-1],\s*[0-9]+\(sp\)} 35 } } */
/* { dg-final { scan-assembler-times {sb\s+a[0-7],\s*[0-9]+\(sp\)} 43 } } */
/* { dg-final { scan-assembler-times {sh\s+a[0-7],\s*[0-9]+\(sp\)} 43 } } */
/* { dg-final { scan-assembler-times {sw\s+a[0-7],\s*[0-9]+\(sp\)} 43 } } */
/* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */

View file

@ -0,0 +1,51 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64 --param riscv-autovec-preference=scalable -O3 -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "def.h"
/*
** v4hf_RET1_ARG1:
** ret
*/
DEF_RET1_ARG1 (v4hf)
/*
** v2sf_RET1_ARG2:
** addi\s+sp,\s*sp,\s*-16
** sd\s+a0,\s*0\(sp\)
** sd\s+a1,\s*8\(sp\)
** ...
** ld\s+a0,\s*0\(sp\)
** addi\s+sp,\s*sp,\s*16
** jr\s+ra
*/
DEF_RET1_ARG2 (v2sf)
/*
** v4sf_RET1_ARG2:
** addi\s+sp,\s*sp,\s*-32
** sd\s+a0,\s*0\(sp\)
** sd\s+a1,\s*8\(sp\)
** sd\s+a2,\s*16\(sp\)
** sd\s+a3,\s*24\(sp\)
** ...
** ld\s+a0,\s*0\(sp\)
** ld\s+a1,\s*8\(sp\)
** addi\s+sp,\s*sp,\s*32
** jr\s+ra
*/
DEF_RET1_ARG2 (v4sf)
/*
** v1df_RET1_ARG3:
** addi\s+sp,\s*sp,\s*-32
** sd\s+a0,\s*8\(sp\)
** sd\s+a1,\s*16\(sp\)
** sd\s+a2,\s*24\(sp\)
** ...
** ld\s+a0,\s*8\(sp\)
** addi\s+sp,\s*sp,\s*32
** jr\s+ra
*/
DEF_RET1_ARG3 (v1df)

View file

@ -0,0 +1,142 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvl4096b --param riscv-autovec-preference=scalable -mabi=lp64d -O3" } */
#include "def.h"
DEF_RET1_ARG0 (v1hi)
DEF_RET1_ARG0 (v2hi)
DEF_RET1_ARG0 (v4hi)
DEF_RET1_ARG0 (v8hi)
DEF_RET1_ARG0 (v16hi)
DEF_RET1_ARG0 (v32hi)
DEF_RET1_ARG0 (v64hi)
DEF_RET1_ARG0 (v128hi)
DEF_RET1_ARG0 (v256hi)
DEF_RET1_ARG0 (v512hi)
DEF_RET1_ARG0 (v1024hi)
DEF_RET1_ARG0 (v2048hi)
DEF_RET1_ARG1 (v1hi)
DEF_RET1_ARG1 (v2hi)
DEF_RET1_ARG1 (v4hi)
DEF_RET1_ARG1 (v8hi)
DEF_RET1_ARG1 (v16hi)
DEF_RET1_ARG1 (v32hi)
DEF_RET1_ARG1 (v64hi)
DEF_RET1_ARG1 (v128hi)
DEF_RET1_ARG1 (v256hi)
DEF_RET1_ARG1 (v512hi)
DEF_RET1_ARG1 (v1024hi)
DEF_RET1_ARG1 (v2048hi)
DEF_RET1_ARG2 (v1hi)
DEF_RET1_ARG2 (v2hi)
DEF_RET1_ARG2 (v4hi)
DEF_RET1_ARG2 (v8hi)
DEF_RET1_ARG2 (v16hi)
DEF_RET1_ARG2 (v32hi)
DEF_RET1_ARG2 (v64hi)
DEF_RET1_ARG2 (v128hi)
DEF_RET1_ARG2 (v256hi)
DEF_RET1_ARG2 (v512hi)
DEF_RET1_ARG2 (v1024hi)
DEF_RET1_ARG2 (v2048hi)
DEF_RET1_ARG3 (v1hi)
DEF_RET1_ARG3 (v2hi)
DEF_RET1_ARG3 (v4hi)
DEF_RET1_ARG3 (v8hi)
DEF_RET1_ARG3 (v16hi)
DEF_RET1_ARG3 (v32hi)
DEF_RET1_ARG3 (v64hi)
DEF_RET1_ARG3 (v128hi)
DEF_RET1_ARG3 (v256hi)
DEF_RET1_ARG3 (v512hi)
DEF_RET1_ARG3 (v1024hi)
DEF_RET1_ARG3 (v2048hi)
DEF_RET1_ARG4 (v1hi)
DEF_RET1_ARG4 (v2hi)
DEF_RET1_ARG4 (v4hi)
DEF_RET1_ARG4 (v8hi)
DEF_RET1_ARG4 (v16hi)
DEF_RET1_ARG4 (v32hi)
DEF_RET1_ARG4 (v64hi)
DEF_RET1_ARG4 (v128hi)
DEF_RET1_ARG4 (v256hi)
DEF_RET1_ARG4 (v512hi)
DEF_RET1_ARG4 (v1024hi)
DEF_RET1_ARG4 (v2048hi)
DEF_RET1_ARG5 (v1hi)
DEF_RET1_ARG5 (v2hi)
DEF_RET1_ARG5 (v4hi)
DEF_RET1_ARG5 (v8hi)
DEF_RET1_ARG5 (v16hi)
DEF_RET1_ARG5 (v32hi)
DEF_RET1_ARG5 (v64hi)
DEF_RET1_ARG5 (v128hi)
DEF_RET1_ARG5 (v256hi)
DEF_RET1_ARG5 (v512hi)
DEF_RET1_ARG5 (v1024hi)
DEF_RET1_ARG5 (v2048hi)
DEF_RET1_ARG6 (v1hi)
DEF_RET1_ARG6 (v2hi)
DEF_RET1_ARG6 (v4hi)
DEF_RET1_ARG6 (v8hi)
DEF_RET1_ARG6 (v16hi)
DEF_RET1_ARG6 (v32hi)
DEF_RET1_ARG6 (v64hi)
DEF_RET1_ARG6 (v128hi)
DEF_RET1_ARG6 (v256hi)
DEF_RET1_ARG6 (v512hi)
DEF_RET1_ARG6 (v1024hi)
DEF_RET1_ARG6 (v2048hi)
DEF_RET1_ARG7 (v1hi)
DEF_RET1_ARG7 (v2hi)
DEF_RET1_ARG7 (v4hi)
DEF_RET1_ARG7 (v8hi)
DEF_RET1_ARG7 (v16hi)
DEF_RET1_ARG7 (v32hi)
DEF_RET1_ARG7 (v64hi)
DEF_RET1_ARG7 (v128hi)
DEF_RET1_ARG7 (v256hi)
DEF_RET1_ARG7 (v512hi)
DEF_RET1_ARG7 (v1024hi)
DEF_RET1_ARG7 (v2048hi)
DEF_RET1_ARG8 (v1hi)
DEF_RET1_ARG8 (v2hi)
DEF_RET1_ARG8 (v4hi)
DEF_RET1_ARG8 (v8hi)
DEF_RET1_ARG8 (v16hi)
DEF_RET1_ARG8 (v32hi)
DEF_RET1_ARG8 (v64hi)
DEF_RET1_ARG8 (v128hi)
DEF_RET1_ARG8 (v256hi)
DEF_RET1_ARG8 (v512hi)
DEF_RET1_ARG8 (v1024hi)
DEF_RET1_ARG8 (v2048hi)
DEF_RET1_ARG9 (v1hi)
DEF_RET1_ARG9 (v2hi)
DEF_RET1_ARG9 (v4hi)
DEF_RET1_ARG9 (v8hi)
DEF_RET1_ARG9 (v16hi)
DEF_RET1_ARG9 (v32hi)
DEF_RET1_ARG9 (v64hi)
DEF_RET1_ARG9 (v128hi)
DEF_RET1_ARG9 (v256hi)
DEF_RET1_ARG9 (v512hi)
DEF_RET1_ARG9 (v1024hi)
DEF_RET1_ARG9 (v2048hi)
/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 8 } } */
/* { dg-final { scan-assembler-times {lhu\s+a0,\s*[0-9]+\(sp\)} 8 } } */
/* { dg-final { scan-assembler-times {lw\s+a0,\s*[0-9]+\(sp\)} 8 } } */
/* { dg-final { scan-assembler-times {ld\s+a[0-1],\s*[0-9]+\(sp\)} 33 } } */
/* { dg-final { scan-assembler-times {sh\s+a[0-7],\s*[0-9]+\(sp\)} 43 } } */
/* { dg-final { scan-assembler-times {sw\s+a[0-7],\s*[0-9]+\(sp\)} 43 } } */
/* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */

View file

@ -0,0 +1,130 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvl4096b --param riscv-autovec-preference=scalable -mabi=lp64d -O3" } */
#include "def.h"
DEF_RET1_ARG0 (v1si)
DEF_RET1_ARG0 (v2si)
DEF_RET1_ARG0 (v4si)
DEF_RET1_ARG0 (v8si)
DEF_RET1_ARG0 (v16si)
DEF_RET1_ARG0 (v32si)
DEF_RET1_ARG0 (v64si)
DEF_RET1_ARG0 (v128si)
DEF_RET1_ARG0 (v256si)
DEF_RET1_ARG0 (v512si)
DEF_RET1_ARG0 (v1024si)
DEF_RET1_ARG1 (v1si)
DEF_RET1_ARG1 (v2si)
DEF_RET1_ARG1 (v4si)
DEF_RET1_ARG1 (v8si)
DEF_RET1_ARG1 (v16si)
DEF_RET1_ARG1 (v32si)
DEF_RET1_ARG1 (v64si)
DEF_RET1_ARG1 (v128si)
DEF_RET1_ARG1 (v256si)
DEF_RET1_ARG1 (v512si)
DEF_RET1_ARG1 (v1024si)
DEF_RET1_ARG2 (v1si)
DEF_RET1_ARG2 (v2si)
DEF_RET1_ARG2 (v4si)
DEF_RET1_ARG2 (v8si)
DEF_RET1_ARG2 (v16si)
DEF_RET1_ARG2 (v32si)
DEF_RET1_ARG2 (v64si)
DEF_RET1_ARG2 (v128si)
DEF_RET1_ARG2 (v256si)
DEF_RET1_ARG2 (v512si)
DEF_RET1_ARG2 (v1024si)
DEF_RET1_ARG3 (v1si)
DEF_RET1_ARG3 (v2si)
DEF_RET1_ARG3 (v4si)
DEF_RET1_ARG3 (v8si)
DEF_RET1_ARG3 (v16si)
DEF_RET1_ARG3 (v32si)
DEF_RET1_ARG3 (v64si)
DEF_RET1_ARG3 (v128si)
DEF_RET1_ARG3 (v256si)
DEF_RET1_ARG3 (v512si)
DEF_RET1_ARG3 (v1024si)
DEF_RET1_ARG4 (v1si)
DEF_RET1_ARG4 (v2si)
DEF_RET1_ARG4 (v4si)
DEF_RET1_ARG4 (v8si)
DEF_RET1_ARG4 (v16si)
DEF_RET1_ARG4 (v32si)
DEF_RET1_ARG4 (v64si)
DEF_RET1_ARG4 (v128si)
DEF_RET1_ARG4 (v256si)
DEF_RET1_ARG4 (v512si)
DEF_RET1_ARG4 (v1024si)
DEF_RET1_ARG5 (v1si)
DEF_RET1_ARG5 (v2si)
DEF_RET1_ARG5 (v4si)
DEF_RET1_ARG5 (v8si)
DEF_RET1_ARG5 (v16si)
DEF_RET1_ARG5 (v32si)
DEF_RET1_ARG5 (v64si)
DEF_RET1_ARG5 (v128si)
DEF_RET1_ARG5 (v256si)
DEF_RET1_ARG5 (v512si)
DEF_RET1_ARG5 (v1024si)
DEF_RET1_ARG6 (v1si)
DEF_RET1_ARG6 (v2si)
DEF_RET1_ARG6 (v4si)
DEF_RET1_ARG6 (v8si)
DEF_RET1_ARG6 (v16si)
DEF_RET1_ARG6 (v32si)
DEF_RET1_ARG6 (v64si)
DEF_RET1_ARG6 (v128si)
DEF_RET1_ARG6 (v256si)
DEF_RET1_ARG6 (v512si)
DEF_RET1_ARG6 (v1024si)
DEF_RET1_ARG7 (v1si)
DEF_RET1_ARG7 (v2si)
DEF_RET1_ARG7 (v4si)
DEF_RET1_ARG7 (v8si)
DEF_RET1_ARG7 (v16si)
DEF_RET1_ARG7 (v32si)
DEF_RET1_ARG7 (v64si)
DEF_RET1_ARG7 (v128si)
DEF_RET1_ARG7 (v256si)
DEF_RET1_ARG7 (v512si)
DEF_RET1_ARG7 (v1024si)
DEF_RET1_ARG8 (v1si)
DEF_RET1_ARG8 (v2si)
DEF_RET1_ARG8 (v4si)
DEF_RET1_ARG8 (v8si)
DEF_RET1_ARG8 (v16si)
DEF_RET1_ARG8 (v32si)
DEF_RET1_ARG8 (v64si)
DEF_RET1_ARG8 (v128si)
DEF_RET1_ARG8 (v256si)
DEF_RET1_ARG8 (v512si)
DEF_RET1_ARG8 (v1024si)
DEF_RET1_ARG9 (v1si)
DEF_RET1_ARG9 (v2si)
DEF_RET1_ARG9 (v4si)
DEF_RET1_ARG9 (v8si)
DEF_RET1_ARG9 (v16si)
DEF_RET1_ARG9 (v32si)
DEF_RET1_ARG9 (v64si)
DEF_RET1_ARG9 (v128si)
DEF_RET1_ARG9 (v256si)
DEF_RET1_ARG9 (v512si)
DEF_RET1_ARG9 (v1024si)
/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 7 } } */
/* { dg-final { scan-assembler-times {lw\s+a0,\s*[0-9]+\(sp\)} 8 } } */
/* { dg-final { scan-assembler-times {ld\s+a[0-1],\s*[0-9]+\(sp\)} 31 } } */
/* { dg-final { scan-assembler-times {sw\s+a[0-7],\s*[0-9]+\(sp\)} 43 } } */
/* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */

View file

@ -0,0 +1,118 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvl4096b --param riscv-autovec-preference=scalable -mabi=lp64d -O3" } */
#include "def.h"
DEF_RET1_ARG0 (v1di)
DEF_RET1_ARG0 (v2di)
DEF_RET1_ARG0 (v4di)
DEF_RET1_ARG0 (v8di)
DEF_RET1_ARG0 (v16di)
DEF_RET1_ARG0 (v32di)
DEF_RET1_ARG0 (v64di)
DEF_RET1_ARG0 (v128di)
DEF_RET1_ARG0 (v256di)
DEF_RET1_ARG0 (v512di)
DEF_RET1_ARG1 (v1di)
DEF_RET1_ARG1 (v2di)
DEF_RET1_ARG1 (v4di)
DEF_RET1_ARG1 (v8di)
DEF_RET1_ARG1 (v16di)
DEF_RET1_ARG1 (v32di)
DEF_RET1_ARG1 (v64di)
DEF_RET1_ARG1 (v128di)
DEF_RET1_ARG1 (v256di)
DEF_RET1_ARG1 (v512di)
DEF_RET1_ARG2 (v1di)
DEF_RET1_ARG2 (v2di)
DEF_RET1_ARG2 (v4di)
DEF_RET1_ARG2 (v8di)
DEF_RET1_ARG2 (v16di)
DEF_RET1_ARG2 (v32di)
DEF_RET1_ARG2 (v64di)
DEF_RET1_ARG2 (v128di)
DEF_RET1_ARG2 (v256di)
DEF_RET1_ARG2 (v512di)
DEF_RET1_ARG3 (v1di)
DEF_RET1_ARG3 (v2di)
DEF_RET1_ARG3 (v4di)
DEF_RET1_ARG3 (v8di)
DEF_RET1_ARG3 (v16di)
DEF_RET1_ARG3 (v32di)
DEF_RET1_ARG3 (v64di)
DEF_RET1_ARG3 (v128di)
DEF_RET1_ARG3 (v256di)
DEF_RET1_ARG3 (v512di)
DEF_RET1_ARG4 (v1di)
DEF_RET1_ARG4 (v2di)
DEF_RET1_ARG4 (v4di)
DEF_RET1_ARG4 (v8di)
DEF_RET1_ARG4 (v16di)
DEF_RET1_ARG4 (v32di)
DEF_RET1_ARG4 (v64di)
DEF_RET1_ARG4 (v128di)
DEF_RET1_ARG4 (v256di)
DEF_RET1_ARG4 (v512di)
DEF_RET1_ARG5 (v1di)
DEF_RET1_ARG5 (v2di)
DEF_RET1_ARG5 (v4di)
DEF_RET1_ARG5 (v8di)
DEF_RET1_ARG5 (v16di)
DEF_RET1_ARG5 (v32di)
DEF_RET1_ARG5 (v64di)
DEF_RET1_ARG5 (v128di)
DEF_RET1_ARG5 (v256di)
DEF_RET1_ARG5 (v512di)
DEF_RET1_ARG6 (v1di)
DEF_RET1_ARG6 (v2di)
DEF_RET1_ARG6 (v4di)
DEF_RET1_ARG6 (v8di)
DEF_RET1_ARG6 (v16di)
DEF_RET1_ARG6 (v32di)
DEF_RET1_ARG6 (v64di)
DEF_RET1_ARG6 (v128di)
DEF_RET1_ARG6 (v256di)
DEF_RET1_ARG6 (v512di)
DEF_RET1_ARG7 (v1di)
DEF_RET1_ARG7 (v2di)
DEF_RET1_ARG7 (v4di)
DEF_RET1_ARG7 (v8di)
DEF_RET1_ARG7 (v16di)
DEF_RET1_ARG7 (v32di)
DEF_RET1_ARG7 (v64di)
DEF_RET1_ARG7 (v128di)
DEF_RET1_ARG7 (v256di)
DEF_RET1_ARG7 (v512di)
DEF_RET1_ARG8 (v1di)
DEF_RET1_ARG8 (v2di)
DEF_RET1_ARG8 (v4di)
DEF_RET1_ARG8 (v8di)
DEF_RET1_ARG8 (v16di)
DEF_RET1_ARG8 (v32di)
DEF_RET1_ARG8 (v64di)
DEF_RET1_ARG8 (v128di)
DEF_RET1_ARG8 (v256di)
DEF_RET1_ARG8 (v512di)
DEF_RET1_ARG9 (v1di)
DEF_RET1_ARG9 (v2di)
DEF_RET1_ARG9 (v4di)
DEF_RET1_ARG9 (v8di)
DEF_RET1_ARG9 (v16di)
DEF_RET1_ARG9 (v32di)
DEF_RET1_ARG9 (v64di)
DEF_RET1_ARG9 (v128di)
DEF_RET1_ARG9 (v256di)
DEF_RET1_ARG9 (v512di)
/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 6 } } */
/* { dg-final { scan-assembler-times {ld\s+a[0-1],\s*[0-9]+\(sp\)} 29 } } */
/* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */

View file

@ -0,0 +1,141 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b --param riscv-autovec-preference=scalable -mabi=lp64d -O3" } */
#include "def.h"
DEF_RET1_ARG0 (v1hf)
DEF_RET1_ARG0 (v2hf)
DEF_RET1_ARG0 (v4hf)
DEF_RET1_ARG0 (v8hf)
DEF_RET1_ARG0 (v16hf)
DEF_RET1_ARG0 (v32hf)
DEF_RET1_ARG0 (v64hf)
DEF_RET1_ARG0 (v128hf)
DEF_RET1_ARG0 (v256hf)
DEF_RET1_ARG0 (v512hf)
DEF_RET1_ARG0 (v1024hf)
DEF_RET1_ARG0 (v2048hf)
DEF_RET1_ARG1 (v1hf)
DEF_RET1_ARG1 (v2hf)
DEF_RET1_ARG1 (v4hf)
DEF_RET1_ARG1 (v8hf)
DEF_RET1_ARG1 (v16hf)
DEF_RET1_ARG1 (v32hf)
DEF_RET1_ARG1 (v64hf)
DEF_RET1_ARG1 (v128hf)
DEF_RET1_ARG1 (v256hf)
DEF_RET1_ARG1 (v512hf)
DEF_RET1_ARG1 (v1024hf)
DEF_RET1_ARG1 (v2048hf)
DEF_RET1_ARG2 (v1hf)
DEF_RET1_ARG2 (v2hf)
DEF_RET1_ARG2 (v4hf)
DEF_RET1_ARG2 (v8hf)
DEF_RET1_ARG2 (v16hf)
DEF_RET1_ARG2 (v32hf)
DEF_RET1_ARG2 (v64hf)
DEF_RET1_ARG2 (v128hf)
DEF_RET1_ARG2 (v256hf)
DEF_RET1_ARG2 (v512hf)
DEF_RET1_ARG2 (v1024hf)
DEF_RET1_ARG2 (v2048hf)
DEF_RET1_ARG3 (v1hf)
DEF_RET1_ARG3 (v2hf)
DEF_RET1_ARG3 (v4hf)
DEF_RET1_ARG3 (v8hf)
DEF_RET1_ARG3 (v16hf)
DEF_RET1_ARG3 (v32hf)
DEF_RET1_ARG3 (v64hf)
DEF_RET1_ARG3 (v128hf)
DEF_RET1_ARG3 (v256hf)
DEF_RET1_ARG3 (v512hf)
DEF_RET1_ARG3 (v1024hf)
DEF_RET1_ARG3 (v2048hf)
DEF_RET1_ARG4 (v1hf)
DEF_RET1_ARG4 (v2hf)
DEF_RET1_ARG4 (v4hf)
DEF_RET1_ARG4 (v8hf)
DEF_RET1_ARG4 (v16hf)
DEF_RET1_ARG4 (v32hf)
DEF_RET1_ARG4 (v64hf)
DEF_RET1_ARG4 (v128hf)
DEF_RET1_ARG4 (v256hf)
DEF_RET1_ARG4 (v512hf)
DEF_RET1_ARG4 (v1024hf)
DEF_RET1_ARG4 (v2048hf)
DEF_RET1_ARG5 (v1hf)
DEF_RET1_ARG5 (v2hf)
DEF_RET1_ARG5 (v4hf)
DEF_RET1_ARG5 (v8hf)
DEF_RET1_ARG5 (v16hf)
DEF_RET1_ARG5 (v32hf)
DEF_RET1_ARG5 (v64hf)
DEF_RET1_ARG5 (v128hf)
DEF_RET1_ARG5 (v256hf)
DEF_RET1_ARG5 (v512hf)
DEF_RET1_ARG5 (v1024hf)
DEF_RET1_ARG5 (v2048hf)
DEF_RET1_ARG6 (v1hf)
DEF_RET1_ARG6 (v2hf)
DEF_RET1_ARG6 (v4hf)
DEF_RET1_ARG6 (v8hf)
DEF_RET1_ARG6 (v16hf)
DEF_RET1_ARG6 (v32hf)
DEF_RET1_ARG6 (v64hf)
DEF_RET1_ARG6 (v128hf)
DEF_RET1_ARG6 (v256hf)
DEF_RET1_ARG6 (v512hf)
DEF_RET1_ARG6 (v1024hf)
DEF_RET1_ARG6 (v2048hf)
DEF_RET1_ARG7 (v1hf)
DEF_RET1_ARG7 (v2hf)
DEF_RET1_ARG7 (v4hf)
DEF_RET1_ARG7 (v8hf)
DEF_RET1_ARG7 (v16hf)
DEF_RET1_ARG7 (v32hf)
DEF_RET1_ARG7 (v64hf)
DEF_RET1_ARG7 (v128hf)
DEF_RET1_ARG7 (v256hf)
DEF_RET1_ARG7 (v512hf)
DEF_RET1_ARG7 (v1024hf)
DEF_RET1_ARG7 (v2048hf)
DEF_RET1_ARG8 (v1hf)
DEF_RET1_ARG8 (v2hf)
DEF_RET1_ARG8 (v4hf)
DEF_RET1_ARG8 (v8hf)
DEF_RET1_ARG8 (v16hf)
DEF_RET1_ARG8 (v32hf)
DEF_RET1_ARG8 (v64hf)
DEF_RET1_ARG8 (v128hf)
DEF_RET1_ARG8 (v256hf)
DEF_RET1_ARG8 (v512hf)
DEF_RET1_ARG8 (v1024hf)
DEF_RET1_ARG8 (v2048hf)
DEF_RET1_ARG9 (v1hf)
DEF_RET1_ARG9 (v2hf)
DEF_RET1_ARG9 (v4hf)
DEF_RET1_ARG9 (v8hf)
DEF_RET1_ARG9 (v16hf)
DEF_RET1_ARG9 (v32hf)
DEF_RET1_ARG9 (v64hf)
DEF_RET1_ARG9 (v128hf)
DEF_RET1_ARG9 (v256hf)
DEF_RET1_ARG9 (v512hf)
DEF_RET1_ARG9 (v1024hf)
DEF_RET1_ARG9 (v2048hf)
/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 8 } } */
/* { dg-final { scan-assembler-times {lhu\s+a[0-1],\s*[0-9]+\(sp\)} 8 } } */
/* { dg-final { scan-assembler-times {lw\s+a[0-1],\s*[0-9]+\(sp\)} 8 } } */
/* { dg-final { scan-assembler-times {sh\s+a[0-7],\s*[0-9]+\(sp\)} 43 } } */
/* { dg-final { scan-assembler-times {sw\s+a[0-7],\s*[0-9]+\(sp\)} 43 } } */
/* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */

View file

@ -0,0 +1,129 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvl4096b --param riscv-autovec-preference=scalable -mabi=lp64d -O3" } */
#include "def.h"
DEF_RET1_ARG0 (v1sf)
DEF_RET1_ARG0 (v2sf)
DEF_RET1_ARG0 (v4sf)
DEF_RET1_ARG0 (v8sf)
DEF_RET1_ARG0 (v16sf)
DEF_RET1_ARG0 (v32sf)
DEF_RET1_ARG0 (v64sf)
DEF_RET1_ARG0 (v128sf)
DEF_RET1_ARG0 (v256sf)
DEF_RET1_ARG0 (v512sf)
DEF_RET1_ARG0 (v1024sf)
DEF_RET1_ARG1 (v1sf)
DEF_RET1_ARG1 (v2sf)
DEF_RET1_ARG1 (v4sf)
DEF_RET1_ARG1 (v8sf)
DEF_RET1_ARG1 (v16sf)
DEF_RET1_ARG1 (v32sf)
DEF_RET1_ARG1 (v64sf)
DEF_RET1_ARG1 (v128sf)
DEF_RET1_ARG1 (v256sf)
DEF_RET1_ARG1 (v512sf)
DEF_RET1_ARG1 (v1024sf)
DEF_RET1_ARG2 (v1sf)
DEF_RET1_ARG2 (v2sf)
DEF_RET1_ARG2 (v4sf)
DEF_RET1_ARG2 (v8sf)
DEF_RET1_ARG2 (v16sf)
DEF_RET1_ARG2 (v32sf)
DEF_RET1_ARG2 (v64sf)
DEF_RET1_ARG2 (v128sf)
DEF_RET1_ARG2 (v256sf)
DEF_RET1_ARG2 (v512sf)
DEF_RET1_ARG2 (v1024sf)
DEF_RET1_ARG3 (v1sf)
DEF_RET1_ARG3 (v2sf)
DEF_RET1_ARG3 (v4sf)
DEF_RET1_ARG3 (v8sf)
DEF_RET1_ARG3 (v16sf)
DEF_RET1_ARG3 (v32sf)
DEF_RET1_ARG3 (v64sf)
DEF_RET1_ARG3 (v128sf)
DEF_RET1_ARG3 (v256sf)
DEF_RET1_ARG3 (v512sf)
DEF_RET1_ARG3 (v1024sf)
DEF_RET1_ARG4 (v1sf)
DEF_RET1_ARG4 (v2sf)
DEF_RET1_ARG4 (v4sf)
DEF_RET1_ARG4 (v8sf)
DEF_RET1_ARG4 (v16sf)
DEF_RET1_ARG4 (v32sf)
DEF_RET1_ARG4 (v64sf)
DEF_RET1_ARG4 (v128sf)
DEF_RET1_ARG4 (v256sf)
DEF_RET1_ARG4 (v512sf)
DEF_RET1_ARG4 (v1024sf)
DEF_RET1_ARG5 (v1sf)
DEF_RET1_ARG5 (v2sf)
DEF_RET1_ARG5 (v4sf)
DEF_RET1_ARG5 (v8sf)
DEF_RET1_ARG5 (v16sf)
DEF_RET1_ARG5 (v32sf)
DEF_RET1_ARG5 (v64sf)
DEF_RET1_ARG5 (v128sf)
DEF_RET1_ARG5 (v256sf)
DEF_RET1_ARG5 (v512sf)
DEF_RET1_ARG5 (v1024sf)
DEF_RET1_ARG6 (v1sf)
DEF_RET1_ARG6 (v2sf)
DEF_RET1_ARG6 (v4sf)
DEF_RET1_ARG6 (v8sf)
DEF_RET1_ARG6 (v16sf)
DEF_RET1_ARG6 (v32sf)
DEF_RET1_ARG6 (v64sf)
DEF_RET1_ARG6 (v128sf)
DEF_RET1_ARG6 (v256sf)
DEF_RET1_ARG6 (v512sf)
DEF_RET1_ARG6 (v1024sf)
DEF_RET1_ARG7 (v1sf)
DEF_RET1_ARG7 (v2sf)
DEF_RET1_ARG7 (v4sf)
DEF_RET1_ARG7 (v8sf)
DEF_RET1_ARG7 (v16sf)
DEF_RET1_ARG7 (v32sf)
DEF_RET1_ARG7 (v64sf)
DEF_RET1_ARG7 (v128sf)
DEF_RET1_ARG7 (v256sf)
DEF_RET1_ARG7 (v512sf)
DEF_RET1_ARG7 (v1024sf)
DEF_RET1_ARG8 (v1sf)
DEF_RET1_ARG8 (v2sf)
DEF_RET1_ARG8 (v4sf)
DEF_RET1_ARG8 (v8sf)
DEF_RET1_ARG8 (v16sf)
DEF_RET1_ARG8 (v32sf)
DEF_RET1_ARG8 (v64sf)
DEF_RET1_ARG8 (v128sf)
DEF_RET1_ARG8 (v256sf)
DEF_RET1_ARG8 (v512sf)
DEF_RET1_ARG8 (v1024sf)
DEF_RET1_ARG9 (v1sf)
DEF_RET1_ARG9 (v2sf)
DEF_RET1_ARG9 (v4sf)
DEF_RET1_ARG9 (v8sf)
DEF_RET1_ARG9 (v16sf)
DEF_RET1_ARG9 (v32sf)
DEF_RET1_ARG9 (v64sf)
DEF_RET1_ARG9 (v128sf)
DEF_RET1_ARG9 (v256sf)
DEF_RET1_ARG9 (v512sf)
DEF_RET1_ARG9 (v1024sf)
/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 7 } } */
/* { dg-final { scan-assembler-times {lw\s+a[0-1],\s*[0-9]+\(sp\)} 8 } } */
/* { dg-final { scan-assembler-times {sw\s+a[0-7],\s*[0-9]+\(sp\)} 43 } } */
/* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */

View file

@ -0,0 +1,118 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvl4096b --param riscv-autovec-preference=scalable -mabi=lp64d -O3" } */
#include "def.h"
DEF_RET1_ARG0 (v1df)
DEF_RET1_ARG0 (v2df)
DEF_RET1_ARG0 (v4df)
DEF_RET1_ARG0 (v8df)
DEF_RET1_ARG0 (v16df)
DEF_RET1_ARG0 (v32df)
DEF_RET1_ARG0 (v64df)
DEF_RET1_ARG0 (v128df)
DEF_RET1_ARG0 (v256df)
DEF_RET1_ARG0 (v512df)
DEF_RET1_ARG1 (v1df)
DEF_RET1_ARG1 (v2df)
DEF_RET1_ARG1 (v4df)
DEF_RET1_ARG1 (v8df)
DEF_RET1_ARG1 (v16df)
DEF_RET1_ARG1 (v32df)
DEF_RET1_ARG1 (v64df)
DEF_RET1_ARG1 (v128df)
DEF_RET1_ARG1 (v256df)
DEF_RET1_ARG1 (v512df)
DEF_RET1_ARG2 (v1df)
DEF_RET1_ARG2 (v2df)
DEF_RET1_ARG2 (v4df)
DEF_RET1_ARG2 (v8df)
DEF_RET1_ARG2 (v16df)
DEF_RET1_ARG2 (v32df)
DEF_RET1_ARG2 (v64df)
DEF_RET1_ARG2 (v128df)
DEF_RET1_ARG2 (v256df)
DEF_RET1_ARG2 (v512df)
DEF_RET1_ARG3 (v1df)
DEF_RET1_ARG3 (v2df)
DEF_RET1_ARG3 (v4df)
DEF_RET1_ARG3 (v8df)
DEF_RET1_ARG3 (v16df)
DEF_RET1_ARG3 (v32df)
DEF_RET1_ARG3 (v64df)
DEF_RET1_ARG3 (v128df)
DEF_RET1_ARG3 (v256df)
DEF_RET1_ARG3 (v512df)
DEF_RET1_ARG4 (v1df)
DEF_RET1_ARG4 (v2df)
DEF_RET1_ARG4 (v4df)
DEF_RET1_ARG4 (v8df)
DEF_RET1_ARG4 (v16df)
DEF_RET1_ARG4 (v32df)
DEF_RET1_ARG4 (v64df)
DEF_RET1_ARG4 (v128df)
DEF_RET1_ARG4 (v256df)
DEF_RET1_ARG4 (v512df)
DEF_RET1_ARG5 (v1df)
DEF_RET1_ARG5 (v2df)
DEF_RET1_ARG5 (v4df)
DEF_RET1_ARG5 (v8df)
DEF_RET1_ARG5 (v16df)
DEF_RET1_ARG5 (v32df)
DEF_RET1_ARG5 (v64df)
DEF_RET1_ARG5 (v128df)
DEF_RET1_ARG5 (v256df)
DEF_RET1_ARG5 (v512df)
DEF_RET1_ARG6 (v1df)
DEF_RET1_ARG6 (v2df)
DEF_RET1_ARG6 (v4df)
DEF_RET1_ARG6 (v8df)
DEF_RET1_ARG6 (v16df)
DEF_RET1_ARG6 (v32df)
DEF_RET1_ARG6 (v64df)
DEF_RET1_ARG6 (v128df)
DEF_RET1_ARG6 (v256df)
DEF_RET1_ARG6 (v512df)
DEF_RET1_ARG7 (v1df)
DEF_RET1_ARG7 (v2df)
DEF_RET1_ARG7 (v4df)
DEF_RET1_ARG7 (v8df)
DEF_RET1_ARG7 (v16df)
DEF_RET1_ARG7 (v32df)
DEF_RET1_ARG7 (v64df)
DEF_RET1_ARG7 (v128df)
DEF_RET1_ARG7 (v256df)
DEF_RET1_ARG7 (v512df)
DEF_RET1_ARG8 (v1df)
DEF_RET1_ARG8 (v2df)
DEF_RET1_ARG8 (v4df)
DEF_RET1_ARG8 (v8df)
DEF_RET1_ARG8 (v16df)
DEF_RET1_ARG8 (v32df)
DEF_RET1_ARG8 (v64df)
DEF_RET1_ARG8 (v128df)
DEF_RET1_ARG8 (v256df)
DEF_RET1_ARG8 (v512df)
DEF_RET1_ARG9 (v1df)
DEF_RET1_ARG9 (v2df)
DEF_RET1_ARG9 (v4df)
DEF_RET1_ARG9 (v8df)
DEF_RET1_ARG9 (v16df)
DEF_RET1_ARG9 (v32df)
DEF_RET1_ARG9 (v64df)
DEF_RET1_ARG9 (v128df)
DEF_RET1_ARG9 (v256df)
DEF_RET1_ARG9 (v512df)
/* { dg-final { scan-assembler-times {li\s+a[0-1],\s*0} 6 } } */
/* { dg-final { scan-assembler-times {ld\s+a[0-1],\s*[0-9]+\(sp\)} 29 } } */
/* { dg-final { scan-assembler-times {sd\s+a[0-7],\s*[0-9]+\(sp\)} 103 } } */

View file

@ -0,0 +1,43 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvl4096b -mabi=lp64d --param riscv-autovec-preference=scalable -O3 -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "def.h"
/*
** v8qi_RET1_ARG0:
** li\s+a0,\s*0
** ret
*/
DEF_RET1_ARG0 (v8qi)
/*
** v4hi_RET1_ARG1:
** ret
*/
DEF_RET1_ARG1 (v4hi)
/*
** v2si_RET1_ARG2:
** addi\s+sp,\s*sp,\s*-16
** sd\s+a0,\s*0\(sp\)
** sd\s+a1,\s*8\(sp\)
** ...
** ld\s+a0,\s*0\(sp\)
** addi\s+sp,\s*sp,\s*16
** jr\s+ra
*/
DEF_RET1_ARG2 (v2si)
/*
** v1di_RET1_ARG3:
** addi\s+sp,\s*sp,\s*-32
** sd\s+a0,\s*8\(sp\)
** sd\s+a1,\s*16\(sp\)
** sd\s+a2,\s*24\(sp\)
** ...
** ld\s+a0,\s*8\(sp\)
** addi\s+sp,\s*sp,\s*32
** jr\s+ra
*/
DEF_RET1_ARG3 (v1di)

View file

@ -0,0 +1,51 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d --param riscv-autovec-preference=scalable -O3 -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "def.h"
/*
** v4hf_RET1_ARG1:
** ret
*/
DEF_RET1_ARG1 (v4hf)
/*
** v2sf_RET1_ARG2:
** addi\s+sp,\s*sp,\s*-16
** sd\s+a0,\s*0\(sp\)
** sd\s+a1,\s*8\(sp\)
** ...
** ld\s+a0,\s*0\(sp\)
** addi\s+sp,\s*sp,\s*16
** jr\s+ra
*/
DEF_RET1_ARG2 (v2sf)
/*
** v4sf_RET1_ARG2:
** addi\s+sp,\s*sp,\s*-32
** sd\s+a0,\s*0\(sp\)
** sd\s+a1,\s*8\(sp\)
** sd\s+a2,\s*16\(sp\)
** sd\s+a3,\s*24\(sp\)
** ...
** ld\s+a0,\s*0\(sp\)
** ld\s+a1,\s*8\(sp\)
** addi\s+sp,\s*sp,\s*32
** jr\s+ra
*/
DEF_RET1_ARG2 (v4sf)
/*
** v1df_RET1_ARG3:
** addi\s+sp,\s*sp,\s*-32
** sd\s+a0,\s*8\(sp\)
** sd\s+a1,\s*16\(sp\)
** sd\s+a2,\s*24\(sp\)
** ...
** ld\s+a0,\s*8\(sp\)
** addi\s+sp,\s*sp,\s*32
** jr\s+ra
*/
DEF_RET1_ARG3 (v1df)

View file

@ -0,0 +1,55 @@
/* { dg-do run { target { riscv_v } } } */
/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=scalable" } */
typedef char v16qi __attribute__ ((vector_size (16)));
v16qi
add (v16qi a1, v16qi a2, v16qi a3, v16qi a4, v16qi a5, v16qi a6, v16qi a7,
v16qi a8, v16qi a9)
{
return a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9;
}
int
main ()
{
v16qi a1 = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
};
v16qi a2 = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
};
v16qi a3 = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
};
v16qi a4 = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
};
v16qi a5 = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
};
v16qi a6 = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
};
v16qi a7 = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
};
v16qi a8 = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
};
v16qi a9 = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
};
v16qi expected = {
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
};
v16qi result = add (a1, a2, a3, a4, a5, a6, a7, a8, a9);
unsigned i;
for (i = 0; i < 16; i++)
if (result[i] != expected[i])
__builtin_abort ();
return 0;
}

View file

@ -0,0 +1,55 @@
/* { dg-do run { target { riscv_v } } } */
/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=scalable" } */
typedef short v8hi __attribute__ ((vector_size (16)));
v8hi
add (v8hi a1, v8hi a2, v8hi a3, v8hi a4, v8hi a5, v8hi a6, v8hi a7,
v8hi a8, v8hi a9)
{
return a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9;
}
int
main ()
{
v8hi a1 = {
1, 1, 1, 1, 1, 1, 1, 1,
};
v8hi a2 = {
1, 1, 1, 1, 1, 1, 1, 1,
};
v8hi a3 = {
1, 1, 1, 1, 1, 1, 1, 1,
};
v8hi a4 = {
1, 1, 1, 1, 1, 1, 1, 1,
};
v8hi a5 = {
1, 1, 1, 1, 1, 1, 1, 1,
};
v8hi a6 = {
1, 1, 1, 1, 1, 1, 1, 1,
};
v8hi a7 = {
1, 1, 1, 1, 1, 1, 1, 1,
};
v8hi a8 = {
1, 1, 1, 1, 1, 1, 1, 1,
};
v8hi a9 = {
1, 1, 1, 1, 1, 1, 1, 1,
};
v8hi expected = {
9, 9, 9, 9, 9, 9, 9, 9,
};
v8hi result = add (a1, a2, a3, a4, a5, a6, a7, a8, a9);
unsigned i;
for (i = 0; i < 8; i++)
if (result[i] != expected[i])
__builtin_abort ();
return 0;
}

View file

@ -0,0 +1,55 @@
/* { dg-do run { target { riscv_v } } } */
/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=scalable" } */
typedef int v4si __attribute__ ((vector_size (16)));
v4si
add (v4si a1, v4si a2, v4si a3, v4si a4, v4si a5, v4si a6, v4si a7,
v4si a8, v4si a9)
{
return a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9;
}
int
main ()
{
v4si a1 = {
1, 1, 1, 1,
};
v4si a2 = {
1, 1, 1, 1,
};
v4si a3 = {
1, 1, 1, 1,
};
v4si a4 = {
1, 1, 1, 1,
};
v4si a5 = {
1, 1, 1, 1,
};
v4si a6 = {
1, 1, 1, 1,
};
v4si a7 = {
1, 1, 1, 1,
};
v4si a8 = {
1, 1, 1, 1,
};
v4si a9 = {
1, 1, 1, 1,
};
v4si expected = {
9, 9, 9, 9,
};
v4si result = add (a1, a2, a3, a4, a5, a6, a7, a8, a9);
unsigned i;
for (i = 0; i < 4; i++)
if (result[i] != expected[i])
__builtin_abort ();
return 0;
}

View file

@ -0,0 +1,55 @@
/* { dg-do run { target { riscv_v } } } */
/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=scalable" } */
typedef long long v2di __attribute__ ((vector_size (16)));
v2di
add (v2di a1, v2di a2, v2di a3, v2di a4, v2di a5, v2di a6, v2di a7,
v2di a8, v2di a9)
{
return a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9;
}
int
main ()
{
v2di a1 = {
1, 1,
};
v2di a2 = {
1, 1,
};
v2di a3 = {
1, 1,
};
v2di a4 = {
1, 1,
};
v2di a5 = {
1, 1,
};
v2di a6 = {
1, 1,
};
v2di a7 = {
1, 1,
};
v2di a8 = {
1, 1,
};
v2di a9 = {
1, 1,
};
v2di expected = {
9, 9,
};
v2di result = add (a1, a2, a3, a4, a5, a6, a7, a8, a9);
unsigned i;
for (i = 0; i < 2; i++)
if (result[i] != expected[i])
__builtin_abort ();
return 0;
}

View file

@ -0,0 +1,55 @@
/* { dg-do run { target { riscv_v } } } */
/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=scalable" } */
typedef float v4sf __attribute__ ((vector_size (16)));
v4sf
add (v4sf a1, v4sf a2, v4sf a3, v4sf a4, v4sf a5, v4sf a6, v4sf a7,
v4sf a8, v4sf a9)
{
return a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9;
}
int
main ()
{
v4sf a1 = {
1.0, 1.0, 1.0, 1.0,
};
v4sf a2 = {
1.0, 1.0, 1.0, 1.0,
};
v4sf a3 = {
1.0, 1.0, 1.0, 1.0,
};
v4sf a4 = {
1.0, 1.0, 1.0, 1.0,
};
v4sf a5 = {
1.0, 1.0, 1.0, 1.0,
};
v4sf a6 = {
1.0, 1.0, 1.0, 1.0,
};
v4sf a7 = {
1.0, 1.0, 1.0, 1.0,
};
v4sf a8 = {
1.0, 1.0, 1.0, 1.0,
};
v4sf a9 = {
1.0, 1.0, 1.0, 1.0,
};
v4sf expected = {
9.0, 9.0, 9.0, 9.0,
};
v4sf result = add (a1, a2, a3, a4, a5, a6, a7, a8, a9);
unsigned i;
for (i = 0; i < 4; i++)
if (result[i] != expected[i])
__builtin_abort ();
return 0;
}

View file

@ -0,0 +1,55 @@
/* { dg-do run { target { riscv_v } } } */
/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=scalable" } */
typedef long long v2df __attribute__ ((vector_size (16)));
v2df
add (v2df a1, v2df a2, v2df a3, v2df a4, v2df a5, v2df a6, v2df a7,
v2df a8, v2df a9)
{
return a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9;
}
int
main ()
{
v2df a1 = {
1.0, 1.0,
};
v2df a2 = {
1.0, 1.0,
};
v2df a3 = {
1.0, 1.0,
};
v2df a4 = {
1.0, 1.0,
};
v2df a5 = {
1.0, 1.0,
};
v2df a6 = {
1.0, 1.0,
};
v2df a7 = {
1.0, 1.0,
};
v2df a8 = {
1.0, 1.0,
};
v2df a9 = {
1.0, 1.0,
};
v2df expected = {
9.0, 9.0,
};
v2df result = add (a1, a2, a3, a4, a5, a6, a7, a8, a9);
unsigned i;
for (i = 0; i < 2; i++)
if (result[i] != expected[i])
__builtin_abort ();
return 0;
}

View file

@ -860,3 +860,77 @@ typedef double v512df __attribute__ ((vector_size (4096)));
TYPE1 v = {__VA_ARGS__}; \
*(TYPE1 *) out = v; \
}
#define DEF_RET1_ARG0(TYPE) \
TYPE __attribute__((noinline)) \
TYPE##_RET1_ARG0 () \
{ \
TYPE r = {}; \
return r; \
}
#define DEF_RET1_ARG1(TYPE) \
TYPE __attribute__((noinline)) \
TYPE##_RET1_ARG1 (TYPE a1) \
{ \
return a1; \
}
#define DEF_RET1_ARG2(TYPE) \
TYPE __attribute__((noinline)) \
TYPE##_RET1_ARG2 (TYPE a1, TYPE a2) \
{ \
return a1 + a2; \
}
#define DEF_RET1_ARG3(TYPE) \
TYPE __attribute__((noinline)) \
TYPE##_RET1_ARG3 (TYPE a1, TYPE a2, TYPE a3) \
{ \
return a1 + a2 + a3; \
}
#define DEF_RET1_ARG4(TYPE) \
TYPE __attribute__((noinline)) \
TYPE##_RET1_ARG4 (TYPE a1, TYPE a2, TYPE a3, TYPE a4) \
{ \
return a1 + a2 + a3 + a4; \
}
#define DEF_RET1_ARG5(TYPE) \
TYPE __attribute__((noinline)) \
TYPE##_RET1_ARG5 (TYPE a1, TYPE a2, TYPE a3, TYPE a4, TYPE a5) \
{ \
return a1 + a2 + a3 + a4 + a5; \
}
#define DEF_RET1_ARG6(TYPE) \
TYPE __attribute__((noinline)) \
TYPE##_RET1_ARG6 (TYPE a1, TYPE a2, TYPE a3, TYPE a4, TYPE a5, TYPE a6) \
{ \
return a1 + a2 + a3 + a4 + a5 + a6; \
}
#define DEF_RET1_ARG7(TYPE) \
TYPE __attribute__((noinline)) \
TYPE##_RET1_ARG7 (TYPE a1, TYPE a2, TYPE a3, TYPE a4, TYPE a5, TYPE a6, \
TYPE a7) \
{ \
return a1 + a2 + a3 + a4 + a5 + a6 + a7; \
}
#define DEF_RET1_ARG8(TYPE) \
TYPE __attribute__((noinline)) \
TYPE##_RET1_ARG8 (TYPE a1, TYPE a2, TYPE a3, TYPE a4, TYPE a5, TYPE a6, \
TYPE a7, TYPE a8) \
{ \
return a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8; \
}
#define DEF_RET1_ARG9(TYPE) \
TYPE __attribute__((noinline)) \
TYPE##_RET1_ARG9 (TYPE a1, TYPE a2, TYPE a3, TYPE a4, TYPE a5, TYPE a6, \
TYPE a7, TYPE a8, TYPE a9) \
{ \
return a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9; \
}