RISC-V: Support FP lrint/lrintf auto vectorization

This patch would like to support the FP lrint/lrintf auto vectorization.

* long lrint (double) for rv64
* long lrintf (float) for rv32

Due to the limitation that only the same size of data type are allowed
in the vectorier, the standard name lrintmn2 only act on DF => DI for
rv64, and SF => SI for rv32.

Given we have code like:

void
test_lrint (long *out, double *in, unsigned count)
{
  for (unsigned i = 0; i < count; i++)
    out[i] = __builtin_lrint (in[i]);
}

Before this patch:
.L3:
  ...
  fld      fa5,0(a1)
  fcvt.l.d a5,fa5,dyn
  sd       a5,-8(a0)
  ...
  bne      a1,a4,.L3

After this patch:
.L3:
  ...
  vsetvli     a3,zero,e64,m1,ta,ma
  vfcvt.x.f.v v1,v1
  vsetvli     zero,a2,e64,m1,ta,ma
  vse32.v     v1,0(a0)
  ...
  bne         a2,zero,.L3

The rest part like SF => DI/HF => DI/DF => SI/HF => SI will be covered
by TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION.

gcc/ChangeLog:

	* config/riscv/autovec.md (lrint<mode><vlconvert>2): New pattern
	for lrint/lintf.
	* config/riscv/riscv-protos.h (expand_vec_lrint): New func decl
	for expanding lint.
	* config/riscv/riscv-v.cc (emit_vec_cvt_x_f): New helper func impl
	for vfcvt.x.f.v.
	(expand_vec_lrint): New function impl for expanding lint.
	* config/riscv/vector-iterators.md: New mode attr and iterator.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/unop/test-math.h: New define for
	CVT like test case.
	* gcc.target/riscv/rvv/autovec/vls/def.h: Ditto.
	* gcc.target/riscv/rvv/autovec/unop/math-lrint-0.c: New test.
	* gcc.target/riscv/rvv/autovec/unop/math-lrint-1.c: New test.
	* gcc.target/riscv/rvv/autovec/unop/math-lrint-run-0.c: New test.
	* gcc.target/riscv/rvv/autovec/unop/math-lrint-run-1.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/math-lrint-0.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/math-lrint-1.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
This commit is contained in:
Pan Li 2023-10-11 15:51:33 +08:00
parent d4de593d36
commit d1e5566685
12 changed files with 348 additions and 0 deletions

View file

@ -2239,6 +2239,7 @@
;; - round/roundf
;; - trunc/truncf
;; - roundeven/roundevenf
;; - lrint/lrintf
;; -------------------------------------------------------------------------
(define_expand "ceil<mode>2"
[(match_operand:V_VLSF 0 "register_operand")
@ -2309,3 +2310,13 @@
DONE;
}
)
(define_expand "lrint<mode><vlconvert>2"
[(match_operand:<VLCONVERT> 0 "register_operand")
(match_operand:V_VLS_FCONVERTL 1 "register_operand")]
"TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
{
riscv_vector::expand_vec_lrint (operands[0], operands[1], <MODE>mode, <VLCONVERT>mode);
DONE;
}
)

View file

@ -474,6 +474,7 @@ void expand_vec_rint (rtx, rtx, machine_mode, machine_mode);
void expand_vec_round (rtx, rtx, machine_mode, machine_mode);
void expand_vec_trunc (rtx, rtx, machine_mode, machine_mode);
void expand_vec_roundeven (rtx, rtx, machine_mode, machine_mode);
void expand_vec_lrint (rtx, rtx, machine_mode, machine_mode);
#endif
bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
bool, void (*)(rtx *, rtx));

View file

@ -3911,6 +3911,16 @@ emit_vec_cvt_x_f (rtx op_dest, rtx op_src, rtx mask,
emit_vlmax_insn (icode, type, cvt_x_ops);
}
static void
emit_vec_cvt_x_f (rtx op_dest, rtx op_src, insn_type type,
machine_mode vec_mode)
{
rtx ops[] = {op_dest, op_src};
insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, vec_mode);
emit_vlmax_insn (icode, type, ops);
}
static void
emit_vec_cvt_f_x (rtx op_dest, rtx op_src, rtx mask,
insn_type type, machine_mode vec_mode)
@ -4095,4 +4105,14 @@ expand_vec_roundeven (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
emit_vec_copysign (op_0, op_0, op_1, vec_fp_mode);
}
void
expand_vec_lrint (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
machine_mode vec_long_mode)
{
gcc_assert (known_eq (GET_MODE_SIZE (vec_fp_mode),
GET_MODE_SIZE (vec_long_mode)));
emit_vec_cvt_x_f (op_0, op_1, UNARY_OP_FRM_DYN, vec_fp_mode);
}
} // namespace riscv_vector

View file

@ -3281,6 +3281,75 @@
(V512DI "v512hf")
])
;; L indicates convert to long
(define_mode_attr VLCONVERT [
(RVVM8SF "RVVM8SI") (RVVM4SF "RVVM4SI") (RVVM2SF "RVVM2SI")
(RVVM1SF "RVVM1SI") (RVVMF2SF "RVVMF2SI")
(RVVM8DF "RVVM8DI") (RVVM4DF "RVVM4DI") (RVVM2DF "RVVM2DI")
(RVVM1DF "RVVM1DI")
(V1SF "V1SI") (V2SF "V2SI") (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")
(V32SF "V32SI") (V64SF "V64SI") (V128SF "V128SI") (V256SF "V256SI")
(V512SF "V512SI") (V1024SF "V1024SI")
(V1DF "V1DI") (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI") (V16DF "V16DI")
(V32DF "V32DI") (V64DF "V64DI") (V128DF "V128DI") (V256DF "V256DI")
(V512DF "V512DI")
])
(define_mode_attr vlconvert [
(RVVM8SF "rvvm8si") (RVVM4SF "rvvm4si") (RVVM2SF "rvvm2si")
(RVVM1SF "rvvm1si") (RVVMF2SF "rvvmf2si")
(RVVM8DF "rvvm8di") (RVVM4DF "rvvm4di") (RVVM2DF "rvvm2di")
(RVVM1DF "rvvm1di")
(V1SF "v1si") (V2SF "v2si") (V4SF "v4si") (V8SF "v8si") (V16SF "v16si")
(V32SF "v32si") (V64SF "v64si") (V128SF "v128si") (V256SF "v256si")
(V512SF "v512si") (V1024SF "v1024si")
(V1DF "v1di") (V2DF "v2di") (V4DF "v4di") (V8DF "v8di") (V16DF "v16di")
(V32DF "v32di") (V64DF "v64di") (V128DF "v128di") (V256DF "v256di")
(V512DF "v512di")
])
(define_mode_iterator V_VLS_FCONVERTL [
(RVVM8SF "TARGET_VECTOR_ELEN_FP_32 && !TARGET_64BIT")
(RVVM4SF "TARGET_VECTOR_ELEN_FP_32 && !TARGET_64BIT")
(RVVM2SF "TARGET_VECTOR_ELEN_FP_32 && !TARGET_64BIT")
(RVVM1SF "TARGET_VECTOR_ELEN_FP_32 && !TARGET_64BIT")
(RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && !TARGET_64BIT && TARGET_MIN_VLEN > 32")
(RVVM8DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_64BIT")
(RVVM4DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_64BIT")
(RVVM2DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_64BIT")
(RVVM1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_64BIT")
(V1SF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_32 && !TARGET_64BIT")
(V2SF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_32 && !TARGET_64BIT")
(V4SF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_32 && !TARGET_64BIT")
(V8SF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_32 && !TARGET_64BIT")
(V16SF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_32 && !TARGET_64BIT && TARGET_MIN_VLEN >= 64")
(V32SF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_32 && !TARGET_64BIT && TARGET_MIN_VLEN >= 128")
(V64SF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_32 && !TARGET_64BIT && TARGET_MIN_VLEN >= 256")
(V128SF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_32 && !TARGET_64BIT && TARGET_MIN_VLEN >= 512")
(V256SF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_32 && !TARGET_64BIT && TARGET_MIN_VLEN >= 1024")
(V512SF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_32 && !TARGET_64BIT && TARGET_MIN_VLEN >= 2048")
(V1024SF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_32 && !TARGET_64BIT && TARGET_MIN_VLEN >= 4096")
(V1DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64 && TARGET_64BIT")
(V2DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64 && TARGET_64BIT")
(V4DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64 && TARGET_64BIT")
(V8DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64 && TARGET_64BIT && TARGET_MIN_VLEN >= 64")
(V16DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64 && TARGET_64BIT && TARGET_MIN_VLEN >= 128")
(V32DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64 && TARGET_64BIT && TARGET_MIN_VLEN >= 256")
(V64DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64 && TARGET_64BIT && TARGET_MIN_VLEN >= 512")
(V128DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64 && TARGET_64BIT && TARGET_MIN_VLEN >= 1024")
(V256DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64 && TARGET_64BIT && TARGET_MIN_VLEN >= 2048")
(V512DF "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_FP_64 && TARGET_64BIT && TARGET_MIN_VLEN >= 4096")
])
(define_mode_attr VDEMOTE [
(RVVM8DI "RVVM8SI") (RVVM4DI "RVVM4SI") (RVVM2DI "RVVM2SI") (RVVM1DI "RVVM1SI")
(V1DI "V1SI")

View file

@ -0,0 +1,14 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "test-math.h"
/*
** test_double_long___builtin_lrint:
** ...
** vsetvli\s+[atx][0-9]+,\s*zero,\s*e64,\s*m1,\s*ta,\s*ma
** vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
** ...
*/
TEST_UNARY_CALL_CVT (double, long, __builtin_lrint)

View file

@ -0,0 +1,14 @@
/* { dg-do compile } */
/* { dg-options "-march=rv32gcv -mabi=ilp32f -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "test-math.h"
/*
** test_float_long___builtin_lrintf:
** ...
** vsetvli\s+[atx][0-9]+,\s*zero,\s*e32,\s*m1,\s*ta,\s*ma
** vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
** ...
*/
TEST_UNARY_CALL_CVT (float, long, __builtin_lrintf)

View file

@ -0,0 +1,63 @@
/* { dg-do run { target { riscv_v && rv64 } } } */
/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
#include "test-math.h"
#define ARRAY_SIZE 128
double in[ARRAY_SIZE];
long out[ARRAY_SIZE];
long ref[ARRAY_SIZE];
TEST_UNARY_CALL_CVT (double, long, __builtin_lrint)
TEST_ASSERT (long)
TEST_INIT_CVT (double, 1.2, long, __builtin_lrint (1.2), 1)
TEST_INIT_CVT (double, -1.2, long, __builtin_lrint (-1.2), 2)
TEST_INIT_CVT (double, 0.5, long, __builtin_lrint (0.5), 3)
TEST_INIT_CVT (double, -0.5, long, __builtin_lrint (-0.5), 4)
TEST_INIT_CVT (double, 0.1, long, __builtin_lrint (0.1), 5)
TEST_INIT_CVT (double, -0.1, long, __builtin_lrint (-0.1), 6)
TEST_INIT_CVT (double, 3.0, long, __builtin_lrint (3.0), 7)
TEST_INIT_CVT (double, -3.0, long, __builtin_lrint (-3.0), 8)
TEST_INIT_CVT (double, 4503599627370495.5, long, __builtin_lrint (4503599627370495.5), 9)
TEST_INIT_CVT (double, 4503599627370497.0, long, __builtin_lrint (4503599627370497.0), 10)
TEST_INIT_CVT (double, -4503599627370495.5, long, __builtin_lrint (-4503599627370495.5), 11)
TEST_INIT_CVT (double, -4503599627370496.0, long, __builtin_lrint (-4503599627370496.0), 12)
TEST_INIT_CVT (double, 0.0, long, __builtin_lrint (-0.0), 13)
TEST_INIT_CVT (double, -0.0, long, __builtin_lrint (-0.0), 14)
TEST_INIT_CVT (double, 9223372036854774784.0, long, __builtin_lrint (9223372036854774784.0), 15)
TEST_INIT_CVT (double, 9223372036854775808.0, long, __builtin_lrint (9223372036854775808.0), 16)
TEST_INIT_CVT (double, -9223372036854775808.0, long, __builtin_lrint (-9223372036854775808.0), 17)
TEST_INIT_CVT (double, -9223372036854777856.0, long, __builtin_lrint (-9223372036854777856.0), 18)
TEST_INIT_CVT (double, __builtin_inf (), long, __builtin_lrint (__builtin_inf ()), 19)
TEST_INIT_CVT (double, -__builtin_inf (), long, __builtin_lrint (-__builtin_inf ()), 20)
TEST_INIT_CVT (double, __builtin_nan (""), long, 0x7fffffffffffffff, 21)
int
main ()
{
RUN_TEST_CVT (double, long, 1, __builtin_lrint, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (double, long, 2, __builtin_lrint, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (double, long, 3, __builtin_lrint, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (double, long, 4, __builtin_lrint, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (double, long, 5, __builtin_lrint, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (double, long, 6, __builtin_lrint, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (double, long, 7, __builtin_lrint, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (double, long, 8, __builtin_lrint, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (double, long, 9, __builtin_lrint, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (double, long, 10, __builtin_lrint, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (double, long, 11, __builtin_lrint, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (double, long, 12, __builtin_lrint, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (double, long, 13, __builtin_lrint, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (double, long, 14, __builtin_lrint, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (double, long, 15, __builtin_lrint, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (double, long, 16, __builtin_lrint, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (double, long, 17, __builtin_lrint, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (double, long, 18, __builtin_lrint, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (double, long, 19, __builtin_lrint, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (double, long, 20, __builtin_lrint, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (double, long, 21, __builtin_lrint, in, out, ref, ARRAY_SIZE);
return 0;
}

View file

@ -0,0 +1,63 @@
/* { dg-do run { target { riscv_v && rv32 } } } */
/* { dg-additional-options "-std=c99 -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
#include "test-math.h"
#define ARRAY_SIZE 128
float in[ARRAY_SIZE];
long out[ARRAY_SIZE];
long ref[ARRAY_SIZE];
TEST_UNARY_CALL_CVT (float, long, __builtin_lrintf)
TEST_ASSERT (long)
TEST_INIT_CVT (float, 1.2, long, __builtin_lrintf (1.2), 1)
TEST_INIT_CVT (float, -1.2, long, __builtin_lrintf (-1.2), 2)
TEST_INIT_CVT (float, 0.5, long, __builtin_lrintf (0.5), 3)
TEST_INIT_CVT (float, -0.5, long, __builtin_lrintf (-0.5), 4)
TEST_INIT_CVT (float, 0.1, long, __builtin_lrintf (0.1), 5)
TEST_INIT_CVT (float, -0.1, long, __builtin_lrintf (-0.1), 6)
TEST_INIT_CVT (float, 3.0, long, __builtin_lrintf (3.0), 7)
TEST_INIT_CVT (float, -3.0, long, __builtin_lrintf (-3.0), 8)
TEST_INIT_CVT (float, 8388607.5, long, __builtin_lrintf (8388607.5), 9)
TEST_INIT_CVT (float, 8388609.0, long, __builtin_lrintf (8388609.0), 10)
TEST_INIT_CVT (float, -8388607.5, long, __builtin_lrintf (-8388607.5), 11)
TEST_INIT_CVT (float, -8388609.0, long, __builtin_lrintf (-8388609.0), 12)
TEST_INIT_CVT (float, 0.0, long, __builtin_lrintf (-0.0), 13)
TEST_INIT_CVT (float, -0.0, long, __builtin_lrintf (-0.0), 14)
TEST_INIT_CVT (float, 2147483520.0, long, __builtin_lrintf (2147483520.0), 15)
TEST_INIT_CVT (float, 2147483648.0, long, __builtin_lrintf (2147483648.0), 16)
TEST_INIT_CVT (float, -2147483648.0, long, __builtin_lrintf (-2147483648.0), 17)
TEST_INIT_CVT (float, -2147483904.0, long, __builtin_lrintf (-2147483904.0), 18)
TEST_INIT_CVT (float, __builtin_inf (), long, __builtin_lrintf (__builtin_inff ()), 19)
TEST_INIT_CVT (float, -__builtin_inf (), long, __builtin_lrintf (-__builtin_inff ()), 20)
TEST_INIT_CVT (float, __builtin_nanf (""), long, 0x7fffffff, 21)
int
main ()
{
RUN_TEST_CVT (float, long, 1, __builtin_lrintf, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (float, long, 2, __builtin_lrintf, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (float, long, 3, __builtin_lrintf, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (float, long, 4, __builtin_lrintf, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (float, long, 5, __builtin_lrintf, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (float, long, 6, __builtin_lrintf, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (float, long, 7, __builtin_lrintf, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (float, long, 8, __builtin_lrintf, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (float, long, 9, __builtin_lrintf, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (float, long, 10, __builtin_lrintf, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (float, long, 11, __builtin_lrintf, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (float, long, 12, __builtin_lrintf, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (float, long, 13, __builtin_lrintf, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (float, long, 14, __builtin_lrintf, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (float, long, 15, __builtin_lrintf, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (float, long, 16, __builtin_lrintf, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (float, long, 17, __builtin_lrintf, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (float, long, 18, __builtin_lrintf, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (float, long, 19, __builtin_lrintf, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (float, long, 20, __builtin_lrintf, in, out, ref, ARRAY_SIZE);
RUN_TEST_CVT (float, long, 21, __builtin_lrintf, in, out, ref, ARRAY_SIZE);
return 0;
}

View file

@ -5,6 +5,14 @@
out[i] = CALL (in[i]); \
}
#define TEST_UNARY_CALL_CVT(TYPE_IN, TYPE_OUT, CALL) \
void test_##TYPE_IN##_##TYPE_OUT##_##CALL ( \
TYPE_OUT *out, TYPE_IN *in, unsigned count) \
{ \
for (unsigned i = 0; i < count; i++) \
out[i] = CALL (in[i]); \
}
#define TEST_COND_UNARY_CALL(TYPE, CALL) \
void test_##TYPE##_##CALL (TYPE *out, int *cond, TYPE *in, unsigned count) \
{ \
@ -22,6 +30,17 @@
} \
}
#define TEST_INIT_CVT(TYPE_IN, VAL_IN, TYPE_REF, VAL_REF, NUM) \
void test_##TYPE_IN##_##TYPE_REF##_init_##NUM ( \
TYPE_IN *in, TYPE_REF *ref, unsigned size) \
{ \
for (unsigned i = 0; i < size; i++) \
{ \
in[i] = VAL_IN; \
ref[i] = VAL_REF; \
} \
}
#define TEST_ASSERT(TYPE) \
void test_##TYPE##_assert (TYPE *out, TYPE *ref, unsigned size) \
{ \
@ -37,6 +56,11 @@
test_##TYPE##_##CALL (OUT, IN, SIZE); \
test_##TYPE##_assert (OUT, REF, SIZE);
#define RUN_TEST_CVT(TYPE_IN, TYPE_OUT, NUM, CALL, IN, OUT, REF, SIZE) \
test_##TYPE_IN##_##TYPE_OUT##_init_##NUM (IN, REF, SIZE); \
test_##TYPE_IN##_##TYPE_OUT##_##CALL (OUT, IN, SIZE); \
test_##TYPE_OUT##_assert (OUT, REF, SIZE);
#define FRM_RNE 0
#define FRM_RTZ 1
#define FRM_RDN 2

View file

@ -213,6 +213,15 @@ typedef double v512df __attribute__ ((vector_size (4096)));
a[i] = OP (b[i]); \
}
#define DEF_OP_V_CVT(PREFIX, NUM, TYPE_IN, TYPE_OUT, OP) \
void __attribute__ ((noinline, noclone)) \
PREFIX##_##TYPE_IN##_##TYPE_OUT##_##NUM (TYPE_OUT *restrict a, \
TYPE_IN *restrict b) \
{ \
for (int i = 0; i < NUM; ++i) \
a[i] = OP (b[i]); \
}
#define DEF_CALL_VV(PREFIX, NUM, TYPE, CALL) \
void __attribute__ ((noinline, noclone)) \
PREFIX##_##TYPE##NUM (TYPE *restrict a, TYPE *restrict b, TYPE *restrict c) \

View file

@ -0,0 +1,30 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -ffast-math -fdump-tree-optimized" } */
#include "def.h"
DEF_OP_V_CVT (lrint, 1, double, long, __builtin_lrint)
DEF_OP_V_CVT (lrint, 2, double, long, __builtin_lrint)
DEF_OP_V_CVT (lrint, 4, double, long, __builtin_lrint)
DEF_OP_V_CVT (lrint, 8, double, long, __builtin_lrint)
DEF_OP_V_CVT (lrint, 16, double, long, __builtin_lrint)
DEF_OP_V_CVT (lrint, 32, double, long, __builtin_lrint)
DEF_OP_V_CVT (lrint, 64, double, long, __builtin_lrint)
DEF_OP_V_CVT (lrint, 128, double, long, __builtin_lrint)
DEF_OP_V_CVT (lrint, 256, double, long, __builtin_lrint)
DEF_OP_V_CVT (lrint, 512, double, long, __builtin_lrint)
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
/* { dg-final { scan-assembler-times {vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+} 9 } } */

View file

@ -0,0 +1,30 @@
/* { dg-do compile } */
/* { dg-options "-march=rv32gcv_zvfh_zvl4096b -mabi=ilp32f -O3 --param=riscv-autovec-lmul=m8 -ffast-math -fdump-tree-optimized" } */
#include "def.h"
DEF_OP_V_CVT (lrintf, 1, float, long, __builtin_lrintf)
DEF_OP_V_CVT (lrintf, 2, float, long, __builtin_lrintf)
DEF_OP_V_CVT (lrintf, 4, float, long, __builtin_lrintf)
DEF_OP_V_CVT (lrintf, 8, float, long, __builtin_lrintf)
DEF_OP_V_CVT (lrintf, 16, float, long, __builtin_lrintf)
DEF_OP_V_CVT (lrintf, 32, float, long, __builtin_lrintf)
DEF_OP_V_CVT (lrintf, 64, float, long, __builtin_lrintf)
DEF_OP_V_CVT (lrintf, 128, float, long, __builtin_lrintf)
DEF_OP_V_CVT (lrintf, 256, float, long, __builtin_lrintf)
DEF_OP_V_CVT (lrintf, 512, float, long, __builtin_lrintf)
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
/* { dg-final { scan-assembler-times {vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+} 9 } } */