RISC-V: Support FP l/ll round and rint HF mode autovec

This patch would like to support the FP below API auto vectorization
with different type size

+------------+-----------+----------+
| API        | RV64      | RV32     |
+------------+-----------+----------+
| lrintf16   | HF => DI  | HF => SI |
| llrintf16  | HF => DI  | HF => DI |
| lroundf16  | HF => DI  | HF => SI |
| llroundf16 | HF => DI  | HF => DI |
+------------+-----------+----------+

Given below code:
void
test_lrintf16 (long *out, _Float16 *in, int count)
{
  for (unsigned i = 0; i < count; i++)
    out[i] = __builtin_lrintf16 (in[i]);
}

Before this patch:
.L3:
  lhu     a5,0(s0)
  addi    s0,s0,2
  addi    s1,s1,8
  fmv.s.x fa0,a5
  call    lrintf16
  sd      a0,-8(s1)
  bne     s0,s2,.L3

After this patch:
.L3:
  vsetvli a5,a2,e16,mf4,ta,ma
  vle16.v v1,0(a1)
  vfwcvt.f.f.v    v2,v1
  vsetvli zero,zero,e32,mf2,ta,ma
  vfwcvt.x.f.v    v1,v2
  vse64.v v1,0(a0)
  slli    a4,a5,1
  add     a1,a1,a4
  slli    a4,a5,3
  add     a0,a0,a4
  sub     a2,a2,a5
  bne     a2,zero,.L3

gcc/ChangeLog:

	* config/riscv/autovec.md: Add bridge mode to lrint and lround
	pattern.
	* config/riscv/riscv-protos.h (expand_vec_lrint): Add new arg
	bridge machine mode.
	(expand_vec_lround): Ditto.
	* config/riscv/riscv-v.cc (emit_vec_widden_cvt_f_f): New helper
	func impl to emit vfwcvt.f.f.
	(emit_vec_rounding_to_integer): Handle the HF to DI rounding
	with the bridge mode.
	(expand_vec_lrint): Reorder the args.
	(expand_vec_lround): Ditto.
	(expand_vec_lceil): Ditto.
	(expand_vec_lfloor): Ditto.
	* config/riscv/vector-iterators.md: Add vector HFmode and bridge
	mode for converting to DI.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/unop/math-llrintf16-0.c: New test.
	* gcc.target/riscv/rvv/autovec/unop/math-llroundf16-0.c: New test.
	* gcc.target/riscv/rvv/autovec/unop/math-lrintf16-rv32-0.c: New test.
	* gcc.target/riscv/rvv/autovec/unop/math-lrintf16-rv64-0.c: New test.
	* gcc.target/riscv/rvv/autovec/unop/math-lroundf16-rv32-0.c: New test.
	* gcc.target/riscv/rvv/autovec/unop/math-lroundf16-rv64-0.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/math-llrintf16-0.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/math-llroundf16-0.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/math-lrintf16-rv32-0.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/math-lrintf16-rv64-0.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/math-lroundf16-rv32-0.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/math-lroundf16-rv64-0.c: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
This commit is contained in:
Pan Li 2023-11-12 20:16:03 +08:00
parent 93e92b2e5d
commit 5dfa501d2f
16 changed files with 397 additions and 24 deletions

View file

@ -2455,14 +2455,13 @@
}
)
;; Add mode_size equal check as we opened the modes for different sizes.
;; The check will be removed soon after related codegen implemented
(define_expand "lrint<mode><v_f2si_convert>2"
[(match_operand:<V_F2SI_CONVERT> 0 "register_operand")
(match_operand:V_VLS_F_CONVERT_SI 1 "register_operand")]
"TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
{
riscv_vector::expand_vec_lrint (operands[0], operands[1], <MODE>mode, <V_F2SI_CONVERT>mode);
riscv_vector::expand_vec_lrint (operands[0], operands[1], <MODE>mode,
<V_F2SI_CONVERT>mode, VOIDmode);
DONE;
}
)
@ -2472,7 +2471,9 @@
(match_operand:V_VLS_F_CONVERT_DI 1 "register_operand")]
"TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
{
riscv_vector::expand_vec_lrint (operands[0], operands[1], <MODE>mode, <V_F2DI_CONVERT>mode);
riscv_vector::expand_vec_lrint (operands[0], operands[1], <MODE>mode,
<V_F2DI_CONVERT>mode,
<V_F2DI_CONVERT_BRIDGE>mode);
DONE;
}
)
@ -2482,7 +2483,8 @@
(match_operand:V_VLS_F_CONVERT_SI 1 "register_operand")]
"TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
{
riscv_vector::expand_vec_lround (operands[0], operands[1], <MODE>mode, <V_F2SI_CONVERT>mode);
riscv_vector::expand_vec_lround (operands[0], operands[1], <MODE>mode,
<V_F2SI_CONVERT>mode, VOIDmode);
DONE;
}
)
@ -2492,7 +2494,10 @@
(match_operand:V_VLS_F_CONVERT_DI 1 "register_operand")]
"TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
{
riscv_vector::expand_vec_lround (operands[0], operands[1], <MODE>mode, <V_F2DI_CONVERT>mode);
riscv_vector::expand_vec_lround (operands[0], operands[1], <MODE>mode,
<V_F2DI_CONVERT>mode,
<V_F2DI_CONVERT_BRIDGE>mode);
DONE;
}
)

View file

@ -514,8 +514,8 @@ void expand_vec_rint (rtx, rtx, machine_mode, machine_mode);
void expand_vec_round (rtx, rtx, machine_mode, machine_mode);
void expand_vec_trunc (rtx, rtx, machine_mode, machine_mode);
void expand_vec_roundeven (rtx, rtx, machine_mode, machine_mode);
void expand_vec_lrint (rtx, rtx, machine_mode, machine_mode);
void expand_vec_lround (rtx, rtx, machine_mode, machine_mode);
void expand_vec_lrint (rtx, rtx, machine_mode, machine_mode, machine_mode);
void expand_vec_lround (rtx, rtx, machine_mode, machine_mode, machine_mode);
void expand_vec_lceil (rtx, rtx, machine_mode, machine_mode);
void expand_vec_lfloor (rtx, rtx, machine_mode, machine_mode);
#endif

View file

@ -4001,6 +4001,16 @@ emit_vec_widden_cvt_x_f (rtx op_dest, rtx op_src, insn_type type,
emit_vlmax_insn (icode, type, ops);
}
static void
emit_vec_widden_cvt_f_f (rtx op_dest, rtx op_src, insn_type type,
machine_mode vec_mode)
{
rtx ops[] = {op_dest, op_src};
insn_code icode = code_for_pred_extend (vec_mode);
emit_vlmax_insn (icode, type, ops);
}
static void
emit_vec_cvt_f_x (rtx op_dest, rtx op_src, rtx mask,
insn_type type, machine_mode vec_mode)
@ -4195,8 +4205,10 @@ expand_vec_roundeven (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
/* Handling the rounding from floating-point to int/long/long long. */
static void
emit_vec_rounding_to_integer (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
machine_mode vec_int_mode, insn_type type)
emit_vec_rounding_to_integer (rtx op_0, rtx op_1, insn_type type,
machine_mode vec_fp_mode,
machine_mode vec_int_mode,
machine_mode vec_bridge_mode = E_VOIDmode)
{
poly_uint16 vec_fp_size = GET_MODE_SIZE (vec_fp_mode);
poly_uint16 vec_int_size = GET_MODE_SIZE (vec_int_mode);
@ -4205,42 +4217,53 @@ emit_vec_rounding_to_integer (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
emit_vec_cvt_x_f (op_0, op_1, type, vec_fp_mode);
else if (maybe_eq (vec_fp_size, vec_int_size * 2)) /* DF => SI. */
emit_vec_narrow_cvt_x_f (op_0, op_1, type, vec_fp_mode);
else if (maybe_eq (vec_fp_size * 2, vec_int_size)) /* SF => DI. */
else if (maybe_eq (vec_fp_size * 2, vec_int_size)) /* SF => DI, HF => SI. */
emit_vec_widden_cvt_x_f (op_0, op_1, type, vec_int_mode);
else /* HF requires additional middle-end support. */
else if (maybe_eq (vec_fp_size * 4, vec_int_size)) /* HF => DI. */
{
gcc_assert (vec_bridge_mode != E_VOIDmode);
rtx op_sf = gen_reg_rtx (vec_bridge_mode);
/* Step-1: HF => SF, no rounding here. */
emit_vec_widden_cvt_f_f (op_sf, op_1, UNARY_OP, vec_bridge_mode);
/* Step-2: SF => DI. */
emit_vec_widden_cvt_x_f (op_0, op_sf, type, vec_int_mode);
}
else
gcc_unreachable ();
}
void
expand_vec_lrint (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
machine_mode vec_int_mode)
machine_mode vec_int_mode, machine_mode vec_bridge_mode)
{
emit_vec_rounding_to_integer (op_0, op_1, vec_fp_mode, vec_int_mode,
UNARY_OP_FRM_DYN);
emit_vec_rounding_to_integer (op_0, op_1, UNARY_OP_FRM_DYN, vec_fp_mode,
vec_int_mode, vec_bridge_mode);
}
void
expand_vec_lround (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
machine_mode vec_int_mode)
machine_mode vec_int_mode, machine_mode vec_bridge_mode)
{
emit_vec_rounding_to_integer (op_0, op_1, vec_fp_mode, vec_int_mode,
UNARY_OP_FRM_RMM);
emit_vec_rounding_to_integer (op_0, op_1, UNARY_OP_FRM_RMM, vec_fp_mode,
vec_int_mode, vec_bridge_mode);
}
void
expand_vec_lceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
machine_mode vec_int_mode)
{
emit_vec_rounding_to_integer (op_0, op_1, vec_fp_mode, vec_int_mode,
UNARY_OP_FRM_RUP);
emit_vec_rounding_to_integer (op_0, op_1, UNARY_OP_FRM_RUP, vec_fp_mode,
vec_int_mode);
}
void
expand_vec_lfloor (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
machine_mode vec_int_mode)
{
emit_vec_rounding_to_integer (op_0, op_1, vec_fp_mode, vec_int_mode,
UNARY_OP_FRM_RDN);
emit_vec_rounding_to_integer (op_0, op_1, UNARY_OP_FRM_RDN, vec_fp_mode,
vec_int_mode);
}
/* Vectorize popcount by the Wilkes-Wheeler-Gill algorithm that libgcc uses as

View file

@ -3221,15 +3221,20 @@
;; V_F2SI_CONVERT: (HF, SF, DF) => SI
;; V_F2DI_CONVERT: (HF, SF, DF) => DI
;;
;; HF requires additional support from internal function, aka
;; gcc/internal-fn.def, remove HF shortly until the middle-end is ready.
(define_mode_attr V_F2SI_CONVERT [
(RVVM4HF "RVVM8SI") (RVVM2HF "RVVM4SI") (RVVM1HF "RVVM2SI")
(RVVMF2HF "RVVM1SI") (RVVMF4HF "RVVMF2SI")
(RVVM8SF "RVVM8SI") (RVVM4SF "RVVM4SI") (RVVM2SF "RVVM2SI")
(RVVM1SF "RVVM1SI") (RVVMF2SF "RVVMF2SI")
(RVVM8DF "RVVM4SI") (RVVM4DF "RVVM2SI") (RVVM2DF "RVVM1SI")
(RVVM1DF "RVVMF2SI")
(V1HF "V1SI") (V2HF "V2SI") (V4HF "V4SI") (V8HF "V8SI") (V16HF "V16SI")
(V32HF "V32SI") (V64HF "V64SI") (V128HF "V128SI") (V256HF "V256SI")
(V512HF "V512SI") (V1024HF "V1024SI")
(V1SF "V1SI") (V2SF "V2SI") (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")
(V32SF "V32SI") (V64SF "V64SI") (V128SF "V128SI") (V256SF "V256SI")
(V512SF "V512SI") (V1024SF "V1024SI")
@ -3240,12 +3245,19 @@
])
(define_mode_attr v_f2si_convert [
(RVVM4HF "rvvm8si") (RVVM2HF "rvvm4si") (RVVM1HF "rvvm2si")
(RVVMF2HF "rvvm1si") (RVVMF4HF "rvvmf2si")
(RVVM8SF "rvvm8si") (RVVM4SF "rvvm4si") (RVVM2SF "rvvm2si")
(RVVM1SF "rvvm1si") (RVVMF2SF "rvvmf2si")
(RVVM8DF "rvvm4si") (RVVM4DF "rvvm2si") (RVVM2DF "rvvm1si")
(RVVM1DF "rvvmf2si")
(V1HF "v1si") (V2HF "v2si") (V4HF "v4si") (V8HF "v8si") (V16HF "v16si")
(V32HF "v32si") (V64HF "v64si") (V128HF "v128si") (V256HF "v256si")
(V512HF "v512si") (V1024HF "v1024si")
(V1SF "v1si") (V2SF "v2si") (V4SF "v4si") (V8SF "v8si") (V16SF "v16si")
(V32SF "v32si") (V64SF "v64si") (V128SF "v128si") (V256SF "v256si")
(V512SF "v512si") (V1024SF "v1024si")
@ -3256,6 +3268,9 @@
])
(define_mode_iterator V_VLS_F_CONVERT_SI [
(RVVM4HF "TARGET_ZVFH") (RVVM2HF "TARGET_ZVFH") (RVVM1HF "TARGET_ZVFH")
(RVVMF2HF "TARGET_ZVFH") (RVVMF4HF "TARGET_ZVFH && TARGET_MIN_VLEN > 32")
(RVVM8SF "TARGET_VECTOR_ELEN_FP_32") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32")
(RVVM2SF "TARGET_VECTOR_ELEN_FP_32") (RVVM1SF "TARGET_VECTOR_ELEN_FP_32")
(RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
@ -3265,6 +3280,18 @@
(RVVM2DF "TARGET_VECTOR_ELEN_FP_64")
(RVVM1DF "TARGET_VECTOR_ELEN_FP_64")
(V1HF "riscv_vector::vls_mode_valid_p (V1HFmode) && TARGET_ZVFH")
(V2HF "riscv_vector::vls_mode_valid_p (V2HFmode) && TARGET_ZVFH")
(V4HF "riscv_vector::vls_mode_valid_p (V4HFmode) && TARGET_ZVFH")
(V8HF "riscv_vector::vls_mode_valid_p (V8HFmode) && TARGET_ZVFH")
(V16HF "riscv_vector::vls_mode_valid_p (V16HFmode) && TARGET_ZVFH")
(V32HF "riscv_vector::vls_mode_valid_p (V32HFmode) && TARGET_ZVFH && TARGET_MIN_VLEN >= 64")
(V64HF "riscv_vector::vls_mode_valid_p (V64HFmode) && TARGET_ZVFH && TARGET_MIN_VLEN >= 128")
(V128HF "riscv_vector::vls_mode_valid_p (V128HFmode) && TARGET_ZVFH && TARGET_MIN_VLEN >= 256")
(V256HF "riscv_vector::vls_mode_valid_p (V256HFmode) && TARGET_ZVFH && TARGET_MIN_VLEN >= 512")
(V512HF "riscv_vector::vls_mode_valid_p (V512HFmode) && TARGET_ZVFH && TARGET_MIN_VLEN >= 1024")
(V1024HF "riscv_vector::vls_mode_valid_p (V1024HFmode) && TARGET_ZVFH && TARGET_MIN_VLEN >= 2048")
(V1SF "riscv_vector::vls_mode_valid_p (V1SFmode) && TARGET_VECTOR_ELEN_FP_32")
(V2SF "riscv_vector::vls_mode_valid_p (V2SFmode) && TARGET_VECTOR_ELEN_FP_32")
(V4SF "riscv_vector::vls_mode_valid_p (V4SFmode) && TARGET_VECTOR_ELEN_FP_32")
@ -3290,12 +3317,19 @@
])
(define_mode_attr V_F2DI_CONVERT [
(RVVM2HF "RVVM8DI") (RVVM1HF "RVVM4DI") (RVVMF2HF "RVVM2DI")
(RVVMF4HF "RVVM1DI")
(RVVM4SF "RVVM8DI") (RVVM2SF "RVVM4DI") (RVVM1SF "RVVM2DI")
(RVVMF2SF "RVVM1DI")
(RVVM8DF "RVVM8DI") (RVVM4DF "RVVM4DI") (RVVM2DF "RVVM2DI")
(RVVM1DF "RVVM1DI")
(V1HF "V1DI") (V2HF "V2DI") (V4HF "V4DI") (V8HF "V8DI") (V16HF "V16DI")
(V32HF "V32DI") (V64HF "V64DI") (V128HF "V128DI") (V256HF "V256DI")
(V512HF "V512DI")
(V1SF "V1DI") (V2SF "V2DI") (V4SF "V4DI") (V8SF "V8DI") (V16SF "V16DI")
(V32SF "V32DI") (V64SF "V64DI") (V128SF "V128DI") (V256SF "V256DI")
(V512SF "V512DI")
@ -3306,12 +3340,19 @@
])
(define_mode_attr v_f2di_convert [
(RVVM2HF "rvvm8di") (RVVM1HF "rvvm4di") (RVVMF2HF "rvvm2di")
(RVVMF4HF "rvvm1di")
(RVVM4SF "rvvm8di") (RVVM2SF "rvvm4di") (RVVM1SF "rvvm2di")
(RVVMF2SF "rvvm1di")
(RVVM8DF "rvvm8di") (RVVM4DF "rvvm4di") (RVVM2DF "rvvm2di")
(RVVM1DF "rvvm1di")
(V1HF "v1di") (V2HF "v2di") (V4HF "v4di") (V8HF "v8di") (V16HF "v16di")
(V32HF "v32di") (V64HF "v64di") (V128HF "v128di") (V256HF "v256di")
(V512HF "v512di")
(V1SF "v1di") (V2SF "v2di") (V4SF "v4di") (V8SF "v8di") (V16SF "v16di")
(V32SF "v32di") (V64SF "v64di") (V128SF "v128di") (V256SF "v256di")
(V512SF "v512di")
@ -3321,7 +3362,33 @@
(V512DF "v512di")
])
(define_mode_attr V_F2DI_CONVERT_BRIDGE [
(RVVM2HF "RVVM4SF") (RVVM1HF "RVVM2SI") (RVVMF2HF "RVVM1SF")
(RVVMF4HF "RVVMF2SF")
(RVVM4SF "VOID") (RVVM2SF "VOID") (RVVM1SF "VOID")
(RVVMF2SF "VOID")
(RVVM8DF "VOID") (RVVM4DF "VOID") (RVVM2DF "VOID")
(RVVM1DF "VOID")
(V1HF "V1SF") (V2HF "V2SF") (V4HF "V4SF") (V8HF "V8SF") (V16HF "V16SF")
(V32HF "V32SF") (V64HF "V64SF") (V128HF "V128SF") (V256HF "V256SF")
(V512HF "V512SF")
(V1SF "VOID") (V2SF "VOID") (V4SF "VOID") (V8SF "VOID") (V16SF "VOID")
(V32SF "VOID") (V64SF "VOID") (V128SF "VOID") (V256SF "VOID")
(V512SF "VOID")
(V1DF "VOID") (V2DF "VOID") (V4DF "VOID") (V8DF "VOID") (V16DF "VOID")
(V32DF "VOID") (V64DF "VOID") (V128DF "VOID") (V256DF "VOID")
(V512DF "VOID")
])
(define_mode_iterator V_VLS_F_CONVERT_DI [
(RVVM2HF "TARGET_ZVFH") (RVVM1HF "TARGET_ZVFH") (RVVMF2HF "TARGET_ZVFH")
(RVVMF4HF "TARGET_ZVFH && TARGET_MIN_VLEN > 32")
(RVVM4SF "TARGET_VECTOR_ELEN_FP_32") (RVVM2SF "TARGET_VECTOR_ELEN_FP_32")
(RVVM1SF "TARGET_VECTOR_ELEN_FP_32")
(RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
@ -3329,6 +3396,17 @@
(RVVM8DF "TARGET_VECTOR_ELEN_FP_64") (RVVM4DF "TARGET_VECTOR_ELEN_FP_64")
(RVVM2DF "TARGET_VECTOR_ELEN_FP_64") (RVVM1DF "TARGET_VECTOR_ELEN_FP_64")
(V1HF "riscv_vector::vls_mode_valid_p (V1HFmode) && TARGET_ZVFH")
(V2HF "riscv_vector::vls_mode_valid_p (V2HFmode) && TARGET_ZVFH")
(V4HF "riscv_vector::vls_mode_valid_p (V4HFmode) && TARGET_ZVFH")
(V8HF "riscv_vector::vls_mode_valid_p (V8HFmode) && TARGET_ZVFH")
(V16HF "riscv_vector::vls_mode_valid_p (V16HFmode) && TARGET_ZVFH")
(V32HF "riscv_vector::vls_mode_valid_p (V32HFmode) && TARGET_ZVFH && TARGET_MIN_VLEN >= 64")
(V64HF "riscv_vector::vls_mode_valid_p (V64HFmode) && TARGET_ZVFH && TARGET_MIN_VLEN >= 128")
(V128HF "riscv_vector::vls_mode_valid_p (V128HFmode) && TARGET_ZVFH && TARGET_MIN_VLEN >= 256")
(V256HF "riscv_vector::vls_mode_valid_p (V256HFmode) && TARGET_ZVFH && TARGET_MIN_VLEN >= 512")
(V512HF "riscv_vector::vls_mode_valid_p (V512HFmode) && TARGET_ZVFH && TARGET_MIN_VLEN >= 1024")
(V1SF "riscv_vector::vls_mode_valid_p (V1SFmode) && TARGET_VECTOR_ELEN_FP_32")
(V2SF "riscv_vector::vls_mode_valid_p (V2SFmode) && TARGET_VECTOR_ELEN_FP_32")
(V4SF "riscv_vector::vls_mode_valid_p (V4SFmode) && TARGET_VECTOR_ELEN_FP_32")

View file

@ -0,0 +1,14 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zfh -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include <stdint-gcc.h>
#include "test-math.h"
/*
** test__Float16_int64_t___builtin_llrintf16:
** ...
** vfwcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
** ...
*/
TEST_UNARY_CALL_CVT (_Float16, int64_t, __builtin_llrintf16)

View file

@ -0,0 +1,21 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zfh -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include <stdint-gcc.h>
#include "test-math.h"
/*
** test__Float16_int64_t___builtin_llroundf16:
** frrm\s+[atx][0-9]+
** ...
** fsrmi\s+4
** ...
** vfwcvt\.f\.f\.v\s+v[0-9]+,\s*v[0-9]+
** ...
** vfwcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
** ...
** fsrm\s+[atx][0-9]+
** ret
*/
TEST_UNARY_CALL_CVT (_Float16, int64_t, __builtin_llroundf16)

View file

@ -0,0 +1,13 @@
/* { dg-do compile } */
/* { dg-options "-march=rv32gcv_zvfh_zfh -mabi=ilp32d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "test-math.h"
/*
** test__Float16_long___builtin_lrintf16:
** ...
** vfwcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
** ...
*/
TEST_UNARY_CALL_CVT (_Float16, long, __builtin_lrintf16)

View file

@ -0,0 +1,15 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zfh -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "test-math.h"
/*
** test__Float16_long___builtin_lrintf16:
** ...
** vfwcvt\.f\.f\.v\s+v[0-9]+,\s*v[0-9]+
** ...
** vfwcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
** ...
*/
TEST_UNARY_CALL_CVT (_Float16, long, __builtin_lrintf16)

View file

@ -0,0 +1,18 @@
/* { dg-do compile } */
/* { dg-options "-march=rv32gcv_zvfh_zfh -mabi=ilp32d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "test-math.h"
/*
** test__Float16_long___builtin_lroundf16:
** frrm\s+[atx][0-9]+
** ...
** fsrmi\s+4
** ...
** vfwcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
** ...
** fsrm\s+[atx][0-9]+
** ret
*/
TEST_UNARY_CALL_CVT (_Float16, long, __builtin_lroundf16)

View file

@ -0,0 +1,20 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zfh -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "test-math.h"
/*
** test__Float16_long___builtin_lroundf16:
** frrm\s+[atx][0-9]+
** ...
** fsrmi\s+4
** ...
** vfwcvt\.f\.f\.v\s+v[0-9]+,\s*v[0-9]+
** ...
** vfwcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+
** ...
** fsrm\s+[atx][0-9]+
** ret
*/
TEST_UNARY_CALL_CVT (_Float16, long, __builtin_lroundf16)

View file

@ -0,0 +1,28 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -ffast-math -fdump-tree-optimized" } */
#include "def.h"
DEF_OP_V_CVT (llrintf16, 1, _Float16, int64_t, __builtin_llrintf16)
DEF_OP_V_CVT (llrintf16, 2, _Float16, int64_t, __builtin_llrintf16)
DEF_OP_V_CVT (llrintf16, 4, _Float16, int64_t, __builtin_llrintf16)
DEF_OP_V_CVT (llrintf16, 8, _Float16, int64_t, __builtin_llrintf16)
DEF_OP_V_CVT (llrintf16, 16, _Float16, int64_t, __builtin_llrintf16)
DEF_OP_V_CVT (llrintf16, 32, _Float16, int64_t, __builtin_llrintf16)
DEF_OP_V_CVT (llrintf16, 64, _Float16, int64_t, __builtin_llrintf16)
DEF_OP_V_CVT (llrintf16, 128, _Float16, int64_t, __builtin_llrintf16)
DEF_OP_V_CVT (llrintf16, 256, _Float16, int64_t, __builtin_llrintf16)
DEF_OP_V_CVT (llrintf16, 512, _Float16, int64_t, __builtin_llrintf16)
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
/* { dg-final { scan-assembler-times {vfwcvt\.f\.f\.v\s+v[0-9]+,\s*v[0-9]+} 9 } } */
/* { dg-final { scan-assembler-times {vfwcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+} 9 } } */

View file

@ -0,0 +1,28 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -ffast-math -fdump-tree-optimized" } */
#include "def.h"
DEF_OP_V_CVT (llroundf16, 1, _Float16, int64_t, __builtin_llroundf16)
DEF_OP_V_CVT (llroundf16, 2, _Float16, int64_t, __builtin_llroundf16)
DEF_OP_V_CVT (llroundf16, 4, _Float16, int64_t, __builtin_llroundf16)
DEF_OP_V_CVT (llroundf16, 8, _Float16, int64_t, __builtin_llroundf16)
DEF_OP_V_CVT (llroundf16, 16, _Float16, int64_t, __builtin_llroundf16)
DEF_OP_V_CVT (llroundf16, 32, _Float16, int64_t, __builtin_llroundf16)
DEF_OP_V_CVT (llroundf16, 64, _Float16, int64_t, __builtin_llroundf16)
DEF_OP_V_CVT (llroundf16, 128, _Float16, int64_t, __builtin_llroundf16)
DEF_OP_V_CVT (llroundf16, 256, _Float16, int64_t, __builtin_llroundf16)
DEF_OP_V_CVT (llroundf16, 512, _Float16, int64_t, __builtin_llroundf16)
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
/* { dg-final { scan-assembler-times {vfwcvt\.f\.f\.v\s+v[0-9]+,\s*v[0-9]+} 9 } } */
/* { dg-final { scan-assembler-times {vfwcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+} 9 } } */

View file

@ -0,0 +1,27 @@
/* { dg-do compile } */
/* { dg-options "-march=rv32gcv_zvfh_zvl4096b -mabi=ilp32d -O3 --param=riscv-autovec-lmul=m8 -ffast-math -fdump-tree-optimized" } */
#include "def.h"
DEF_OP_V_CVT (lrintf16, 1, _Float16, long, __builtin_lrintf16)
DEF_OP_V_CVT (lrintf16, 2, _Float16, long, __builtin_lrintf16)
DEF_OP_V_CVT (lrintf16, 4, _Float16, long, __builtin_lrintf16)
DEF_OP_V_CVT (lrintf16, 8, _Float16, long, __builtin_lrintf16)
DEF_OP_V_CVT (lrintf16, 16, _Float16, long, __builtin_lrintf16)
DEF_OP_V_CVT (lrintf16, 32, _Float16, long, __builtin_lrintf16)
DEF_OP_V_CVT (lrintf16, 64, _Float16, long, __builtin_lrintf16)
DEF_OP_V_CVT (lrintf16, 128, _Float16, long, __builtin_lrintf16)
DEF_OP_V_CVT (lrintf16, 256, _Float16, long, __builtin_lrintf16)
DEF_OP_V_CVT (lrintf16, 512, _Float16, long, __builtin_lrintf16)
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
/* { dg-final { scan-assembler-times {vfwcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+} 9 } } */

View file

@ -0,0 +1,28 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -ffast-math -fdump-tree-optimized" } */
#include "def.h"
DEF_OP_V_CVT (lrintf16, 1, _Float16, long, __builtin_lrintf16)
DEF_OP_V_CVT (lrintf16, 2, _Float16, long, __builtin_lrintf16)
DEF_OP_V_CVT (lrintf16, 4, _Float16, long, __builtin_lrintf16)
DEF_OP_V_CVT (lrintf16, 8, _Float16, long, __builtin_lrintf16)
DEF_OP_V_CVT (lrintf16, 16, _Float16, long, __builtin_lrintf16)
DEF_OP_V_CVT (lrintf16, 32, _Float16, long, __builtin_lrintf16)
DEF_OP_V_CVT (lrintf16, 64, _Float16, long, __builtin_lrintf16)
DEF_OP_V_CVT (lrintf16, 128, _Float16, long, __builtin_lrintf16)
DEF_OP_V_CVT (lrintf16, 256, _Float16, long, __builtin_lrintf16)
DEF_OP_V_CVT (lrintf16, 512, _Float16, long, __builtin_lrintf16)
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
/* { dg-final { scan-assembler-times {vfwcvt\.f\.f\.v\s+v[0-9]+,\s*v[0-9]+} 9 } } */
/* { dg-final { scan-assembler-times {vfwcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+} 9 } } */

View file

@ -0,0 +1,27 @@
/* { dg-do compile } */
/* { dg-options "-march=rv32gcv_zvfh_zvl4096b -mabi=ilp32d -O3 --param=riscv-autovec-lmul=m8 -ffast-math -fdump-tree-optimized" } */
#include "def.h"
DEF_OP_V_CVT (lroundf16, 1, _Float16, long, __builtin_lroundf16)
DEF_OP_V_CVT (lroundf16, 2, _Float16, long, __builtin_lroundf16)
DEF_OP_V_CVT (lroundf16, 4, _Float16, long, __builtin_lroundf16)
DEF_OP_V_CVT (lroundf16, 8, _Float16, long, __builtin_lroundf16)
DEF_OP_V_CVT (lroundf16, 16, _Float16, long, __builtin_lroundf16)
DEF_OP_V_CVT (lroundf16, 32, _Float16, long, __builtin_lroundf16)
DEF_OP_V_CVT (lroundf16, 64, _Float16, long, __builtin_lroundf16)
DEF_OP_V_CVT (lroundf16, 128, _Float16, long, __builtin_lroundf16)
DEF_OP_V_CVT (lroundf16, 256, _Float16, long, __builtin_lroundf16)
DEF_OP_V_CVT (lroundf16, 512, _Float16, long, __builtin_lroundf16)
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
/* { dg-final { scan-assembler-times {vfwcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+} 9 } } */

View file

@ -0,0 +1,28 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -ffast-math -fdump-tree-optimized" } */
#include "def.h"
DEF_OP_V_CVT (lroundf16, 1, _Float16, long, __builtin_lroundf16)
DEF_OP_V_CVT (lroundf16, 2, _Float16, long, __builtin_lroundf16)
DEF_OP_V_CVT (lroundf16, 4, _Float16, long, __builtin_lroundf16)
DEF_OP_V_CVT (lroundf16, 8, _Float16, long, __builtin_lroundf16)
DEF_OP_V_CVT (lroundf16, 16, _Float16, long, __builtin_lroundf16)
DEF_OP_V_CVT (lroundf16, 32, _Float16, long, __builtin_lroundf16)
DEF_OP_V_CVT (lroundf16, 64, _Float16, long, __builtin_lroundf16)
DEF_OP_V_CVT (lroundf16, 128, _Float16, long, __builtin_lroundf16)
DEF_OP_V_CVT (lroundf16, 256, _Float16, long, __builtin_lroundf16)
DEF_OP_V_CVT (lroundf16, 512, _Float16, long, __builtin_lroundf16)
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
/* { dg-final { scan-assembler-times {vfwcvt\.f\.f\.v\s+v[0-9]+,\s*v[0-9]+} 9 } } */
/* { dg-final { scan-assembler-times {vfwcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+} 9 } } */