RISC-V: Extend VLS modes in 'VWEXTI' iterator

This patch extends 'VWEXT' iterator so that we will support
integer extension/integer truncate/integer average VLS patterns.

This patch reduce these following FAILs:

FAIL: gcc.dg/pr92301.c execution test
XPASS: gcc.dg/vect/bb-slp-subgroups-3.c -flto -ffat-lto-objects  scan-tree-dump-times slp2 "optimized: basic block" 2
XPASS: gcc.dg/vect/bb-slp-subgroups-3.c scan-tree-dump-times slp2 "optimized: basic block" 2

The pr92301.c is the latent bug in middle-end GIMPLE FOLD.
We are just lucky that this test passes with this patch which makes us not trigger the GIMPLE FOLD bug again.

gcc/ChangeLog:

	* config/riscv/riscv-v.cc (can_find_related_mode_p): New function.
	(vectorize_related_mode): Add VLS related modes.
	* config/riscv/vector-iterators.md: Extend VLS modes.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/binop/narrow-1.c: Adapt testcase.
	* gcc.target/riscv/rvv/autovec/binop/narrow-2.c: Ditto.
	* gcc.target/riscv/rvv/autovec/binop/narrow-3.c: Ditto.
	* gcc.target/riscv/rvv/autovec/cmp/vcond-1.c: Ditto.
	* gcc.target/riscv/rvv/autovec/cmp/vcond-2.c: Ditto.
	* gcc.target/riscv/rvv/autovec/cmp/vcond-3.c: Ditto.
	* gcc.target/riscv/rvv/autovec/cmp/vcond-4.c: Ditto.
	* gcc.target/riscv/rvv/autovec/partial/slp-18.c: Ditto.
	* gcc.target/riscv/rvv/autovec/partial/slp-19.c: Ditto.
	* gcc.target/riscv/rvv/autovec/pr110950.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop-1.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop-10.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop-11.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop-12.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop-2.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop-3.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop-4.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop-5.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop-6.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop-7.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop-8.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop-9.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-1.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-10.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-11.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-12.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-2.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-3.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-4.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-5.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-6.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-7.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-8.c: Ditto.
	* gcc.target/riscv/rvv/autovec/ternop/ternop_nofm-9.c: Ditto.
	* gcc.target/riscv/rvv/autovec/vls/def.h: Ditto.
	* gcc.target/riscv/rvv/autovec/vls/div-1.c: Ditto.
	* gcc.target/riscv/rvv/autovec/vls/shift-3.c: Ditto.
	* gcc.target/riscv/rvv/autovec/widen/widen-1.c: Ditto.
	* gcc.target/riscv/rvv/autovec/widen/widen-2.c: Ditto.
	* gcc.target/riscv/rvv/autovec/widen/widen-3.c: Ditto.
	* gcc.target/riscv/rvv/autovec/widen/widen-4.c: Ditto.
	* gcc.target/riscv/rvv/autovec/widen/widen-5.c: Ditto.
	* gcc.target/riscv/rvv/autovec/widen/widen-6.c: Ditto.
	* gcc.target/riscv/rvv/autovec/widen/widen-7.c: Ditto.
	* gcc.target/riscv/rvv/autovec/widen/widen-8.c: Ditto.
	* gcc.target/riscv/rvv/autovec/widen/widen-9.c: Ditto.
	* gcc.target/riscv/rvv/autovec/widen/widen-complicate-1.c: Ditto.
	* gcc.target/riscv/rvv/autovec/widen/widen-complicate-2.c: Ditto.
	* gcc.target/riscv/rvv/autovec/widen/widen-complicate-3.c: Ditto.
	* gcc.target/riscv/rvv/autovec/widen/widen-complicate-4.c: Ditto.
	* gcc.target/riscv/rvv/autovec/widen/widen-complicate-5.c: Ditto.
	* gcc.target/riscv/rvv/autovec/widen/widen-complicate-6.c: Ditto.
	* gcc.target/riscv/rvv/autovec/zve32f-1.c: Ditto.
	* gcc.target/riscv/rvv/autovec/vls/avg-1.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/avg-2.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/avg-3.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/avg-4.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/avg-5.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/avg-6.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/ext-1.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/ext-2.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/ext-3.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/trunc-1.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/trunc-2.c: New test.
	* gcc.target/riscv/rvv/autovec/vls/trunc-3.c: New test.
This commit is contained in:
Juzhe-Zhong 2023-09-20 06:49:56 +08:00 committed by Lehua Ding
parent 677249a232
commit bea89f78f2
67 changed files with 874 additions and 56 deletions

View file

@ -2211,6 +2211,25 @@ autovectorize_vector_modes (vector_modes *modes, bool)
return VECT_COMPARE_COSTS;
}
/* Return true if we can find the related MODE according to default LMUL. */
static bool
can_find_related_mode_p (machine_mode vector_mode, scalar_mode element_mode,
poly_uint64 *nunits)
{
if (!autovec_use_vlmax_p ())
return false;
int lmul = riscv_autovec_lmul == RVV_DYNAMIC ? RVV_M8 : riscv_autovec_lmul;
if (riscv_v_ext_vector_mode_p (vector_mode)
&& multiple_p (BYTES_PER_RISCV_VECTOR * lmul,
GET_MODE_SIZE (element_mode), nunits))
return true;
if (riscv_v_ext_vls_mode_p (vector_mode)
&& multiple_p (TARGET_MIN_VLEN * lmul, GET_MODE_SIZE (element_mode),
nunits))
return true;
return false;
}
/* If the given VECTOR_MODE is an RVV mode, first get the largest number
of units that fit into a full vector at the given ELEMENT_MODE.
We will have the vectorizer call us with a successively decreasing
@ -2222,10 +2241,7 @@ vectorize_related_mode (machine_mode vector_mode, scalar_mode element_mode,
{
/* TODO: We will support RVV VLS auto-vectorization mode in the future. */
poly_uint64 min_units;
int lmul = riscv_autovec_lmul == RVV_DYNAMIC ? RVV_M8 : riscv_autovec_lmul;
if (autovec_use_vlmax_p () && riscv_v_ext_vector_mode_p (vector_mode)
&& multiple_p (BYTES_PER_RISCV_VECTOR * lmul,
GET_MODE_SIZE (element_mode), &min_units))
if (can_find_related_mode_p (vector_mode, element_mode, &min_units))
{
machine_mode rvv_mode;
if (maybe_ne (nunits, 0U))

View file

@ -948,6 +948,40 @@
(RVVM8DI "TARGET_VECTOR_ELEN_64") (RVVM4DI "TARGET_VECTOR_ELEN_64")
(RVVM2DI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64")
(V1HI "TARGET_VECTOR_VLS")
(V2HI "TARGET_VECTOR_VLS")
(V4HI "TARGET_VECTOR_VLS")
(V8HI "TARGET_VECTOR_VLS")
(V16HI "TARGET_VECTOR_VLS")
(V32HI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 64")
(V64HI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 128")
(V128HI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 256")
(V256HI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 512")
(V512HI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 1024")
(V1024HI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 2048")
(V2048HI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 4096")
(V1SI "TARGET_VECTOR_VLS")
(V2SI "TARGET_VECTOR_VLS")
(V4SI "TARGET_VECTOR_VLS")
(V8SI "TARGET_VECTOR_VLS")
(V16SI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 64")
(V32SI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 128")
(V64SI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 256")
(V128SI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 512")
(V256SI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 1024")
(V512SI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 2048")
(V1024SI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 4096")
(V1DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64")
(V2DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64")
(V4DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64")
(V8DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 64")
(V16DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128")
(V32DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 256")
(V64DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 512")
(V128DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 1024")
(V256DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 2048")
(V512DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 4096")
])
;; Same iterator split reason as VF_ZVFHMIN and VF.
@ -988,6 +1022,28 @@
(RVVM8DI "TARGET_VECTOR_ELEN_64") (RVVM4DI "TARGET_VECTOR_ELEN_64")
(RVVM2DI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64")
(V1SI "TARGET_VECTOR_VLS")
(V2SI "TARGET_VECTOR_VLS")
(V4SI "TARGET_VECTOR_VLS")
(V8SI "TARGET_VECTOR_VLS")
(V16SI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 64")
(V32SI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 128")
(V64SI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 256")
(V128SI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 512")
(V256SI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 1024")
(V512SI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 2048")
(V1024SI "TARGET_VECTOR_VLS && TARGET_MIN_VLEN >= 4096")
(V1DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64")
(V2DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64")
(V4DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64")
(V8DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 64")
(V16DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128")
(V32DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 256")
(V64DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 512")
(V128DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 1024")
(V256DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 2048")
(V512DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 4096")
])
(define_mode_iterator VQEXTF [
@ -998,6 +1054,17 @@
(define_mode_iterator VOEXTI [
(RVVM8DI "TARGET_VECTOR_ELEN_64") (RVVM4DI "TARGET_VECTOR_ELEN_64")
(RVVM2DI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64")
(V1DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64")
(V2DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64")
(V4DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64")
(V8DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 64")
(V16DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128")
(V32DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 256")
(V64DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 512")
(V128DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 1024")
(V256DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 2048")
(V512DI "TARGET_VECTOR_VLS && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 4096")
])
(define_mode_iterator VT [
@ -2242,6 +2309,40 @@
(RVVM8DI "RVVM4SI") (RVVM4DI "RVVM2SI") (RVVM2DI "RVVM1SI") (RVVM1DI "RVVMF2SI")
(RVVM8DF "RVVM4SF") (RVVM4DF "RVVM2SF") (RVVM2DF "RVVM1SF") (RVVM1DF "RVVMF2SF")
(V1HI "V1QI")
(V2HI "V2QI")
(V4HI "V4QI")
(V8HI "V8QI")
(V16HI "V16QI")
(V32HI "V32QI")
(V64HI "V64QI")
(V128HI "V128QI")
(V256HI "V256QI")
(V512HI "V512QI")
(V1024HI "V1024QI")
(V2048HI "V2048QI")
(V1SI "V1HI")
(V2SI "V2HI")
(V4SI "V4HI")
(V8SI "V8HI")
(V16SI "V16HI")
(V32SI "V32HI")
(V64SI "V64HI")
(V128SI "V128HI")
(V256SI "V256HI")
(V512SI "V512HI")
(V1024SI "V1024HI")
(V1DI "V1SI")
(V2DI "V2SI")
(V4DI "V4SI")
(V8DI "V8SI")
(V16DI "V16SI")
(V32DI "V32SI")
(V64DI "V64SI")
(V128DI "V128SI")
(V256DI "V256SI")
(V512DI "V512SI")
])
(define_mode_attr V_QUAD_TRUNC [
@ -2250,10 +2351,43 @@
(RVVM8DI "RVVM2HI") (RVVM4DI "RVVM1HI") (RVVM2DI "RVVMF2HI") (RVVM1DI "RVVMF4HI")
(RVVM8DF "RVVM2HF") (RVVM4DF "RVVM1HF") (RVVM2DF "RVVMF2HF") (RVVM1DF "RVVMF4HF")
(V1SI "V1QI")
(V2SI "V2QI")
(V4SI "V4QI")
(V8SI "V8QI")
(V16SI "V16QI")
(V32SI "V32QI")
(V64SI "V64QI")
(V128SI "V128QI")
(V256SI "V256QI")
(V512SI "V512QI")
(V1024SI "V1024QI")
(V1DI "V1HI")
(V2DI "V2HI")
(V4DI "V4HI")
(V8DI "V8HI")
(V16DI "V16HI")
(V32DI "V32HI")
(V64DI "V64HI")
(V128DI "V128HI")
(V256DI "V256HI")
(V512DI "V512HI")
])
(define_mode_attr V_OCT_TRUNC [
(RVVM8DI "RVVM1QI") (RVVM4DI "RVVMF2QI") (RVVM2DI "RVVMF4QI") (RVVM1DI "RVVMF8QI")
(V1DI "V1QI")
(V2DI "V2QI")
(V4DI "V4QI")
(V8DI "V8QI")
(V16DI "V16QI")
(V32DI "V32QI")
(V64DI "V64QI")
(V128DI "V128QI")
(V256DI "V256QI")
(V512DI "V512QI")
])
; Again in lower case.
@ -2267,6 +2401,40 @@
(RVVM8DI "rvvm4si") (RVVM4DI "rvvm2si") (RVVM2DI "rvvm1si") (RVVM1DI "rvvmf2si")
(RVVM8DF "rvvm4sf") (RVVM4DF "rvvm2sf") (RVVM2DF "rvvm1sf") (RVVM1DF "rvvmf2sf")
(V1HI "v1qi")
(V2HI "v2qi")
(V4HI "v4qi")
(V8HI "v8qi")
(V16HI "v16qi")
(V32HI "v32qi")
(V64HI "v64qi")
(V128HI "v128qi")
(V256HI "v256qi")
(V512HI "v512qi")
(V1024HI "v1024qi")
(V2048HI "v2048qi")
(V1SI "v1hi")
(V2SI "v2hi")
(V4SI "v4hi")
(V8SI "v8hi")
(V16SI "v16hi")
(V32SI "v32hi")
(V64SI "v64hi")
(V128SI "v128hi")
(V256SI "v256hi")
(V512SI "v512hi")
(V1024SI "v1024hi")
(V1DI "v1si")
(V2DI "v2si")
(V4DI "v4si")
(V8DI "v8si")
(V16DI "v16si")
(V32DI "v32si")
(V64DI "v64si")
(V128DI "v128si")
(V256DI "v256si")
(V512DI "v512si")
])
(define_mode_attr v_quad_trunc [
@ -2275,10 +2443,43 @@
(RVVM8DI "rvvm2hi") (RVVM4DI "rvvm1hi") (RVVM2DI "rvvmf2hi") (RVVM1DI "rvvmf4hi")
(RVVM8DF "rvvm2hf") (RVVM4DF "rvvm1hf") (RVVM2DF "rvvmf2hf") (RVVM1DF "rvvmf4hf")
(V1SI "v1qi")
(V2SI "v2qi")
(V4SI "v4qi")
(V8SI "v8qi")
(V16SI "v16qi")
(V32SI "v32qi")
(V64SI "v64qi")
(V128SI "v128qi")
(V256SI "v256qi")
(V512SI "v512qi")
(V1024SI "v1024qi")
(V1DI "v1hi")
(V2DI "v2hi")
(V4DI "v4hi")
(V8DI "v8hi")
(V16DI "v16hi")
(V32DI "v32hi")
(V64DI "v64hi")
(V128DI "v128hi")
(V256DI "v256hi")
(V512DI "v512hi")
])
(define_mode_attr v_oct_trunc [
(RVVM8DI "rvvm1qi") (RVVM4DI "rvvmf2qi") (RVVM2DI "rvvmf4qi") (RVVM1DI "rvvmf8qi")
(V1DI "v1qi")
(V2DI "v2qi")
(V4DI "v4qi")
(V8DI "v8qi")
(V16DI "v16qi")
(V32DI "v32qi")
(V64DI "v64qi")
(V128DI "v128qi")
(V256DI "v256qi")
(V512DI "v512qi")
])
(define_mode_attr VINDEX_DOUBLE_TRUNC [

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable" } */
/* { dg-additional-options "-march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-trapping-math" } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-trapping-math -fno-vect-cost-model" } */
/* The difference here is that nueq can use LTGT. */

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-optimized-details -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-optimized-details -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable -Ofast" } */
/* { dg-options "-march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable -Ofast -fno-vect-cost-model" } */
int a;
void b() {

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-schedule-insns -ffast-math" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-schedule-insns -ffast-math -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-schedule-insns -ffast-math" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-schedule-insns -ffast-math -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-schedule-insns -ffast-math" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-schedule-insns -ffast-math -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-schedule-insns -ffast-math" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-schedule-insns -ffast-math -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details -fno-vect-cost-model" } */
#include "ternop-1.c"

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details -fno-vect-cost-model" } */
#include "ternop-10.c"

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details -fno-vect-cost-model" } */
#include "ternop-11.c"

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details -fno-schedule-insns" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details -fno-schedule-insns -fno-vect-cost-model" } */
#include "ternop-12.c"

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details -fno-vect-cost-model" } */
#include "ternop-2.c"

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details -fno-schedule-insns" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details -fno-schedule-insns -fno-vect-cost-model" } */
#include "ternop-3.c"

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details -fno-vect-cost-model" } */
#include "ternop-4.c"

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details -fno-vect-cost-model" } */
#include "ternop-5.c"

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details -fno-schedule-insns" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details -fno-schedule-insns -fno-vect-cost-model" } */
#include "ternop-6.c"

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details -fno-vect-cost-model" } */
#include "ternop-7.c"

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details -fno-vect-cost-model" } */
#include "ternop-8.c"

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details -fno-schedule-insns" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details -fno-schedule-insns -fno-vect-cost-model" } */
#include "ternop-9.c"

View file

@ -0,0 +1,44 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
#include "def.h"
DEF_AVG_FLOOR (int8_t, int16_t, 4)
DEF_AVG_FLOOR (int8_t, int16_t, 8)
DEF_AVG_FLOOR (int8_t, int16_t, 16)
DEF_AVG_FLOOR (int8_t, int16_t, 32)
DEF_AVG_FLOOR (int8_t, int16_t, 64)
DEF_AVG_FLOOR (int8_t, int16_t, 128)
DEF_AVG_FLOOR (int8_t, int16_t, 256)
DEF_AVG_FLOOR (int8_t, int16_t, 512)
DEF_AVG_FLOOR (int8_t, int16_t, 1024)
DEF_AVG_FLOOR (int8_t, int16_t, 2048)
DEF_AVG_FLOOR (uint8_t, uint16_t, 4)
DEF_AVG_FLOOR (uint8_t, uint16_t, 8)
DEF_AVG_FLOOR (uint8_t, uint16_t, 16)
DEF_AVG_FLOOR (uint8_t, uint16_t, 32)
DEF_AVG_FLOOR (uint8_t, uint16_t, 64)
DEF_AVG_FLOOR (uint8_t, uint16_t, 128)
DEF_AVG_FLOOR (uint8_t, uint16_t, 256)
DEF_AVG_FLOOR (uint8_t, uint16_t, 512)
DEF_AVG_FLOOR (uint8_t, uint16_t, 1024)
DEF_AVG_FLOOR (uint8_t, uint16_t, 2048)
/* { dg-final { scan-assembler-times {vwadd\.vv} 10 } } */
/* { dg-final { scan-assembler-times {vwaddu\.vv} 10 } } */
/* { dg-final { scan-assembler-times {vnsra\.wi} 10 } } */
/* { dg-final { scan-assembler-times {vnsrl\.wi} 10 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */

View file

@ -0,0 +1,42 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
#include "def.h"
DEF_AVG_FLOOR (int16_t, int32_t, 4)
DEF_AVG_FLOOR (int16_t, int32_t, 8)
DEF_AVG_FLOOR (int16_t, int32_t, 16)
DEF_AVG_FLOOR (int16_t, int32_t, 32)
DEF_AVG_FLOOR (int16_t, int32_t, 64)
DEF_AVG_FLOOR (int16_t, int32_t, 128)
DEF_AVG_FLOOR (int16_t, int32_t, 256)
DEF_AVG_FLOOR (int16_t, int32_t, 512)
DEF_AVG_FLOOR (int16_t, int32_t, 1024)
DEF_AVG_FLOOR (uint16_t, uint32_t, 4)
DEF_AVG_FLOOR (uint16_t, uint32_t, 8)
DEF_AVG_FLOOR (uint16_t, uint32_t, 16)
DEF_AVG_FLOOR (uint16_t, uint32_t, 32)
DEF_AVG_FLOOR (uint16_t, uint32_t, 64)
DEF_AVG_FLOOR (uint16_t, uint32_t, 128)
DEF_AVG_FLOOR (uint16_t, uint32_t, 256)
DEF_AVG_FLOOR (uint16_t, uint32_t, 512)
DEF_AVG_FLOOR (uint16_t, uint32_t, 1024)
/* { dg-final { scan-assembler-times {vwadd\.vv} 9 } } */
/* { dg-final { scan-assembler-times {vwaddu\.vv} 9 } } */
/* { dg-final { scan-assembler-times {vnsra\.wi} 9 } } */
/* { dg-final { scan-assembler-times {vnsrl\.wi} 9 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */

View file

@ -0,0 +1,40 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
#include "def.h"
DEF_AVG_FLOOR (int32_t, int64_t, 4)
DEF_AVG_FLOOR (int32_t, int64_t, 8)
DEF_AVG_FLOOR (int32_t, int64_t, 16)
DEF_AVG_FLOOR (int32_t, int64_t, 32)
DEF_AVG_FLOOR (int32_t, int64_t, 64)
DEF_AVG_FLOOR (int32_t, int64_t, 128)
DEF_AVG_FLOOR (int32_t, int64_t, 256)
DEF_AVG_FLOOR (int32_t, int64_t, 512)
DEF_AVG_FLOOR (uint32_t, uint64_t, 4)
DEF_AVG_FLOOR (uint32_t, uint64_t, 8)
DEF_AVG_FLOOR (uint32_t, uint64_t, 16)
DEF_AVG_FLOOR (uint32_t, uint64_t, 32)
DEF_AVG_FLOOR (uint32_t, uint64_t, 64)
DEF_AVG_FLOOR (uint32_t, uint64_t, 128)
DEF_AVG_FLOOR (uint32_t, uint64_t, 256)
DEF_AVG_FLOOR (uint32_t, uint64_t, 512)
/* { dg-final { scan-assembler-times {vwadd\.vv} 8 } } */
/* { dg-final { scan-assembler-times {vwaddu\.vv} 8 } } */
/* { dg-final { scan-assembler-times {vnsra\.wi} 8 } } */
/* { dg-final { scan-assembler-times {vnsrl\.wi} 8 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */

View file

@ -0,0 +1,45 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
#include "def.h"
DEF_AVG_CEIL (int8_t, int16_t, 4)
DEF_AVG_CEIL (int8_t, int16_t, 8)
DEF_AVG_CEIL (int8_t, int16_t, 16)
DEF_AVG_CEIL (int8_t, int16_t, 32)
DEF_AVG_CEIL (int8_t, int16_t, 64)
DEF_AVG_CEIL (int8_t, int16_t, 128)
DEF_AVG_CEIL (int8_t, int16_t, 256)
DEF_AVG_CEIL (int8_t, int16_t, 512)
DEF_AVG_CEIL (int8_t, int16_t, 1024)
DEF_AVG_CEIL (int8_t, int16_t, 2048)
DEF_AVG_CEIL (uint8_t, uint16_t, 4)
DEF_AVG_CEIL (uint8_t, uint16_t, 8)
DEF_AVG_CEIL (uint8_t, uint16_t, 16)
DEF_AVG_CEIL (uint8_t, uint16_t, 32)
DEF_AVG_CEIL (uint8_t, uint16_t, 64)
DEF_AVG_CEIL (uint8_t, uint16_t, 128)
DEF_AVG_CEIL (uint8_t, uint16_t, 256)
DEF_AVG_CEIL (uint8_t, uint16_t, 512)
DEF_AVG_CEIL (uint8_t, uint16_t, 1024)
DEF_AVG_CEIL (uint8_t, uint16_t, 2048)
/* { dg-final { scan-assembler-times {vwadd\.vv} 10 } } */
/* { dg-final { scan-assembler-times {vwaddu\.vv} 10 } } */
/* { dg-final { scan-assembler-times {vnsra\.wi} 10 } } */
/* { dg-final { scan-assembler-times {vnsrl\.wi} 10 } } */
/* { dg-final { scan-assembler-times {vadd\.vi} 20 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */

View file

@ -0,0 +1,43 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
#include "def.h"
DEF_AVG_CEIL (int16_t, int32_t, 4)
DEF_AVG_CEIL (int16_t, int32_t, 8)
DEF_AVG_CEIL (int16_t, int32_t, 16)
DEF_AVG_CEIL (int16_t, int32_t, 32)
DEF_AVG_CEIL (int16_t, int32_t, 64)
DEF_AVG_CEIL (int16_t, int32_t, 128)
DEF_AVG_CEIL (int16_t, int32_t, 256)
DEF_AVG_CEIL (int16_t, int32_t, 512)
DEF_AVG_CEIL (int16_t, int32_t, 1024)
DEF_AVG_CEIL (uint16_t, uint32_t, 4)
DEF_AVG_CEIL (uint16_t, uint32_t, 8)
DEF_AVG_CEIL (uint16_t, uint32_t, 16)
DEF_AVG_CEIL (uint16_t, uint32_t, 32)
DEF_AVG_CEIL (uint16_t, uint32_t, 64)
DEF_AVG_CEIL (uint16_t, uint32_t, 128)
DEF_AVG_CEIL (uint16_t, uint32_t, 256)
DEF_AVG_CEIL (uint16_t, uint32_t, 512)
DEF_AVG_CEIL (uint16_t, uint32_t, 1024)
/* { dg-final { scan-assembler-times {vwadd\.vv} 9 } } */
/* { dg-final { scan-assembler-times {vwaddu\.vv} 9 } } */
/* { dg-final { scan-assembler-times {vnsra\.wi} 9 } } */
/* { dg-final { scan-assembler-times {vnsrl\.wi} 9 } } */
/* { dg-final { scan-assembler-times {vadd\.vi} 18 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */

View file

@ -0,0 +1,41 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
#include "def.h"
DEF_AVG_CEIL (int16_t, int32_t, 4)
DEF_AVG_CEIL (int16_t, int32_t, 8)
DEF_AVG_CEIL (int16_t, int32_t, 16)
DEF_AVG_CEIL (int16_t, int32_t, 32)
DEF_AVG_CEIL (int16_t, int32_t, 64)
DEF_AVG_CEIL (int16_t, int32_t, 128)
DEF_AVG_CEIL (int16_t, int32_t, 256)
DEF_AVG_CEIL (int16_t, int32_t, 512)
DEF_AVG_CEIL (uint16_t, uint32_t, 4)
DEF_AVG_CEIL (uint16_t, uint32_t, 8)
DEF_AVG_CEIL (uint16_t, uint32_t, 16)
DEF_AVG_CEIL (uint16_t, uint32_t, 32)
DEF_AVG_CEIL (uint16_t, uint32_t, 64)
DEF_AVG_CEIL (uint16_t, uint32_t, 128)
DEF_AVG_CEIL (uint16_t, uint32_t, 256)
DEF_AVG_CEIL (uint16_t, uint32_t, 512)
/* { dg-final { scan-assembler-times {vwadd\.vv} 8 } } */
/* { dg-final { scan-assembler-times {vwaddu\.vv} 8 } } */
/* { dg-final { scan-assembler-times {vnsra\.wi} 8 } } */
/* { dg-final { scan-assembler-times {vnsrl\.wi} 8 } } */
/* { dg-final { scan-assembler-times {vadd\.vi} 16 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */

View file

@ -494,3 +494,28 @@ typedef double v512df __attribute__ ((vector_size (4096)));
for (int i = 0; i < NUM; ++i) \
a[i] = -(b[i] * c[i]) - d[i]; \
}
#define DEF_CONVERT(PREFIX, TYPE1, TYPE2, NUM) \
__attribute__ (( \
noipa)) void PREFIX##_##TYPE1##TYPE2##_##NUM (TYPE2 *__restrict dst, \
TYPE1 *__restrict a) \
{ \
for (int i = 0; i < NUM; i++) \
dst[i] = (TYPE2) a[i]; \
}
#define DEF_AVG_FLOOR(TYPE, TYPE2, NUM) \
__attribute__ ((noipa)) void vavg_##TYPE##_##TYPE2##NUM ( \
TYPE *__restrict dst, TYPE *__restrict a, TYPE *__restrict b, int n) \
{ \
for (int i = 0; i < NUM; i++) \
dst[i] = ((TYPE2) a[i] + b[i]) >> 1; \
}
#define DEF_AVG_CEIL(TYPE, TYPE2, NUM) \
__attribute__ ((noipa)) void vavg2_##TYPE##_##TYPE2##NUM ( \
TYPE *__restrict dst, TYPE *__restrict a, TYPE *__restrict b, int n) \
{ \
for (int i = 0; i < NUM; i++) \
dst[i] = ((TYPE2) a[i] + b[i] + 1) >> 1; \
}

View file

@ -53,5 +53,5 @@ DEF_OP_VV (div, 128, int64_t, /)
DEF_OP_VV (div, 256, int64_t, /)
DEF_OP_VV (div, 512, int64_t, /)
/* { dg-final { scan-assembler-times {vdivu?\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 42 } } */
/* { dg-final { scan-assembler-times {vdivu?\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 44 } } */
/* { dg-final { scan-assembler-not {csrr} } } */

View file

@ -0,0 +1,72 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
#include "def.h"
DEF_CONVERT (sext, int8_t, int16_t, 4)
DEF_CONVERT (sext, int8_t, int16_t, 16)
DEF_CONVERT (sext, int8_t, int16_t, 32)
DEF_CONVERT (sext, int8_t, int16_t, 64)
DEF_CONVERT (sext, int8_t, int16_t, 128)
DEF_CONVERT (sext, int8_t, int16_t, 256)
DEF_CONVERT (sext, int8_t, int16_t, 512)
DEF_CONVERT (sext, int8_t, int16_t, 1024)
DEF_CONVERT (sext, int8_t, int16_t, 2048)
DEF_CONVERT (sext, int16_t, int32_t, 4)
DEF_CONVERT (sext, int16_t, int32_t, 16)
DEF_CONVERT (sext, int16_t, int32_t, 32)
DEF_CONVERT (sext, int16_t, int32_t, 64)
DEF_CONVERT (sext, int16_t, int32_t, 128)
DEF_CONVERT (sext, int16_t, int32_t, 256)
DEF_CONVERT (sext, int16_t, int32_t, 512)
DEF_CONVERT (sext, int16_t, int32_t, 1024)
DEF_CONVERT (sext, int32_t, int64_t, 4)
DEF_CONVERT (sext, int32_t, int64_t, 16)
DEF_CONVERT (sext, int32_t, int64_t, 32)
DEF_CONVERT (sext, int32_t, int64_t, 64)
DEF_CONVERT (sext, int32_t, int64_t, 128)
DEF_CONVERT (sext, int32_t, int64_t, 256)
DEF_CONVERT (zext, uint8_t, uint16_t, 4)
DEF_CONVERT (zext, uint8_t, uint16_t, 16)
DEF_CONVERT (zext, uint8_t, uint16_t, 32)
DEF_CONVERT (zext, uint8_t, uint16_t, 64)
DEF_CONVERT (zext, uint8_t, uint16_t, 128)
DEF_CONVERT (zext, uint8_t, uint16_t, 256)
DEF_CONVERT (zext, uint8_t, uint16_t, 512)
DEF_CONVERT (zext, uint8_t, uint16_t, 1024)
DEF_CONVERT (zext, uint8_t, uint16_t, 2048)
DEF_CONVERT (zext, uint16_t, uint32_t, 4)
DEF_CONVERT (zext, uint16_t, uint32_t, 16)
DEF_CONVERT (zext, uint16_t, uint32_t, 32)
DEF_CONVERT (zext, uint16_t, uint32_t, 64)
DEF_CONVERT (zext, uint16_t, uint32_t, 128)
DEF_CONVERT (zext, uint16_t, uint32_t, 256)
DEF_CONVERT (zext, uint16_t, uint32_t, 512)
DEF_CONVERT (zext, uint16_t, uint32_t, 1024)
DEF_CONVERT (zext, uint32_t, uint64_t, 4)
DEF_CONVERT (zext, uint32_t, uint64_t, 16)
DEF_CONVERT (zext, uint32_t, uint64_t, 32)
DEF_CONVERT (zext, uint32_t, uint64_t, 64)
DEF_CONVERT (zext, uint32_t, uint64_t, 128)
DEF_CONVERT (zext, uint32_t, uint64_t, 256)
/* { dg-final { scan-assembler-times {vsext\.vf2} 23 } } */
/* { dg-final { scan-assembler-times {vzext\.vf2} 23 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */

View file

@ -0,0 +1,54 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
#include "def.h"
DEF_CONVERT (sext, int8_t, int32_t, 4)
DEF_CONVERT (sext, int8_t, int32_t, 16)
DEF_CONVERT (sext, int8_t, int32_t, 32)
DEF_CONVERT (sext, int8_t, int32_t, 64)
DEF_CONVERT (sext, int8_t, int32_t, 128)
DEF_CONVERT (sext, int8_t, int32_t, 256)
DEF_CONVERT (sext, int8_t, int32_t, 512)
DEF_CONVERT (sext, int8_t, int32_t, 1024)
DEF_CONVERT (sext, int16_t, int64_t, 4)
DEF_CONVERT (sext, int16_t, int64_t, 16)
DEF_CONVERT (sext, int16_t, int64_t, 32)
DEF_CONVERT (sext, int16_t, int64_t, 64)
DEF_CONVERT (sext, int16_t, int64_t, 128)
DEF_CONVERT (sext, int16_t, int64_t, 256)
DEF_CONVERT (sext, int16_t, int64_t, 512)
DEF_CONVERT (zext, uint8_t, uint32_t, 4)
DEF_CONVERT (zext, uint8_t, uint32_t, 16)
DEF_CONVERT (zext, uint8_t, uint32_t, 32)
DEF_CONVERT (zext, uint8_t, uint32_t, 64)
DEF_CONVERT (zext, uint8_t, uint32_t, 128)
DEF_CONVERT (zext, uint8_t, uint32_t, 256)
DEF_CONVERT (zext, uint8_t, uint32_t, 512)
DEF_CONVERT (zext, uint8_t, uint32_t, 1024)
DEF_CONVERT (zext, uint16_t, uint64_t, 4)
DEF_CONVERT (zext, uint16_t, uint64_t, 16)
DEF_CONVERT (zext, uint16_t, uint64_t, 32)
DEF_CONVERT (zext, uint16_t, uint64_t, 64)
DEF_CONVERT (zext, uint16_t, uint64_t, 128)
DEF_CONVERT (zext, uint16_t, uint64_t, 256)
DEF_CONVERT (zext, uint16_t, uint64_t, 512)
/* { dg-final { scan-assembler-times {vsext\.vf4} 15 } } */
/* { dg-final { scan-assembler-times {vzext\.vf4} 15 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */

View file

@ -0,0 +1,36 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
#include "def.h"
DEF_CONVERT (sext, int8_t, int64_t, 4)
DEF_CONVERT (sext, int8_t, int64_t, 16)
DEF_CONVERT (sext, int8_t, int64_t, 32)
DEF_CONVERT (sext, int8_t, int64_t, 64)
DEF_CONVERT (sext, int8_t, int64_t, 128)
DEF_CONVERT (sext, int8_t, int64_t, 256)
DEF_CONVERT (sext, int8_t, int64_t, 512)
DEF_CONVERT (zext, uint8_t, uint64_t, 4)
DEF_CONVERT (zext, uint8_t, uint64_t, 16)
DEF_CONVERT (zext, uint8_t, uint64_t, 32)
DEF_CONVERT (zext, uint8_t, uint64_t, 64)
DEF_CONVERT (zext, uint8_t, uint64_t, 128)
DEF_CONVERT (zext, uint8_t, uint64_t, 256)
DEF_CONVERT (zext, uint8_t, uint64_t, 512)
/* { dg-final { scan-assembler-times {vsext\.vf8} 7 } } */
/* { dg-final { scan-assembler-times {vzext\.vf8} 7 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */

View file

@ -53,5 +53,5 @@ DEF_OP_VV (shift, 128, int64_t, <<)
DEF_OP_VV (shift, 256, int64_t, <<)
DEF_OP_VV (shift, 512, int64_t, <<)
/* { dg-final { scan-assembler-times {vsll\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 41 } } */
/* { dg-final { scan-assembler-times {vsll\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 43 } } */
/* { dg-final { scan-assembler-not {csrr} } } */

View file

@ -0,0 +1,71 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
#include "def.h"
DEF_CONVERT (trunc, int16_t, int8_t, 4)
DEF_CONVERT (trunc, int16_t, int8_t, 16)
DEF_CONVERT (trunc, int16_t, int8_t, 32)
DEF_CONVERT (trunc, int16_t, int8_t, 64)
DEF_CONVERT (trunc, int16_t, int8_t, 128)
DEF_CONVERT (trunc, int16_t, int8_t, 256)
DEF_CONVERT (trunc, int16_t, int8_t, 512)
DEF_CONVERT (trunc, int16_t, int8_t, 1024)
DEF_CONVERT (trunc, int16_t, int8_t, 2048)
DEF_CONVERT (trunc, int32_t, int16_t, 4)
DEF_CONVERT (trunc, int32_t, int16_t, 16)
DEF_CONVERT (trunc, int32_t, int16_t, 32)
DEF_CONVERT (trunc, int32_t, int16_t, 64)
DEF_CONVERT (trunc, int32_t, int16_t, 128)
DEF_CONVERT (trunc, int32_t, int16_t, 256)
DEF_CONVERT (trunc, int32_t, int16_t, 512)
DEF_CONVERT (trunc, int32_t, int16_t, 1024)
DEF_CONVERT (trunc, int64_t, int32_t, 4)
DEF_CONVERT (trunc, int64_t, int32_t, 16)
DEF_CONVERT (trunc, int64_t, int32_t, 32)
DEF_CONVERT (trunc, int64_t, int32_t, 64)
DEF_CONVERT (trunc, int64_t, int32_t, 128)
DEF_CONVERT (trunc, int64_t, int32_t, 256)
DEF_CONVERT (trunc, uint16_t, uint8_t, 4)
DEF_CONVERT (trunc, uint16_t, uint8_t, 16)
DEF_CONVERT (trunc, uint16_t, uint8_t, 32)
DEF_CONVERT (trunc, uint16_t, uint8_t, 64)
DEF_CONVERT (trunc, uint16_t, uint8_t, 128)
DEF_CONVERT (trunc, uint16_t, uint8_t, 256)
DEF_CONVERT (trunc, uint16_t, uint8_t, 512)
DEF_CONVERT (trunc, uint16_t, uint8_t, 1024)
DEF_CONVERT (trunc, uint16_t, uint8_t, 2048)
DEF_CONVERT (trunc, uint32_t, uint16_t, 4)
DEF_CONVERT (trunc, uint32_t, uint16_t, 16)
DEF_CONVERT (trunc, uint32_t, uint16_t, 32)
DEF_CONVERT (trunc, uint32_t, uint16_t, 64)
DEF_CONVERT (trunc, uint32_t, uint16_t, 128)
DEF_CONVERT (trunc, uint32_t, uint16_t, 256)
DEF_CONVERT (trunc, uint32_t, uint16_t, 512)
DEF_CONVERT (trunc, uint32_t, uint16_t, 1024)
DEF_CONVERT (trunc, uint64_t, uint32_t, 4)
DEF_CONVERT (trunc, uint64_t, uint32_t, 16)
DEF_CONVERT (trunc, uint64_t, uint32_t, 32)
DEF_CONVERT (trunc, uint64_t, uint32_t, 64)
DEF_CONVERT (trunc, uint64_t, uint32_t, 128)
DEF_CONVERT (trunc, uint64_t, uint32_t, 256)
/* { dg-final { scan-assembler-times {vncvt\.x\.x\.w} 46 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */

View file

@ -0,0 +1,53 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
#include "def.h"
DEF_CONVERT (trunc, int32_t, int8_t, 4)
DEF_CONVERT (trunc, int32_t, int8_t, 16)
DEF_CONVERT (trunc, int32_t, int8_t, 32)
DEF_CONVERT (trunc, int32_t, int8_t, 64)
DEF_CONVERT (trunc, int32_t, int8_t, 128)
DEF_CONVERT (trunc, int32_t, int8_t, 256)
DEF_CONVERT (trunc, int32_t, int8_t, 512)
DEF_CONVERT (trunc, int32_t, int8_t, 1024)
DEF_CONVERT (trunc, int64_t, int16_t, 4)
DEF_CONVERT (trunc, int64_t, int16_t, 16)
DEF_CONVERT (trunc, int64_t, int16_t, 32)
DEF_CONVERT (trunc, int64_t, int16_t, 64)
DEF_CONVERT (trunc, int64_t, int16_t, 128)
DEF_CONVERT (trunc, int64_t, int16_t, 256)
DEF_CONVERT (trunc, int64_t, int16_t, 512)
DEF_CONVERT (trunc, uint32_t, uint8_t, 4)
DEF_CONVERT (trunc, uint32_t, uint8_t, 16)
DEF_CONVERT (trunc, uint32_t, uint8_t, 32)
DEF_CONVERT (trunc, uint32_t, uint8_t, 64)
DEF_CONVERT (trunc, uint32_t, uint8_t, 128)
DEF_CONVERT (trunc, uint32_t, uint8_t, 256)
DEF_CONVERT (trunc, uint32_t, uint8_t, 512)
DEF_CONVERT (trunc, uint32_t, uint8_t, 1024)
DEF_CONVERT (trunc, uint64_t, uint16_t, 4)
DEF_CONVERT (trunc, uint64_t, uint16_t, 16)
DEF_CONVERT (trunc, uint64_t, uint16_t, 32)
DEF_CONVERT (trunc, uint64_t, uint16_t, 64)
DEF_CONVERT (trunc, uint64_t, uint16_t, 128)
DEF_CONVERT (trunc, uint64_t, uint16_t, 256)
DEF_CONVERT (trunc, uint64_t, uint16_t, 512)
/* { dg-final { scan-assembler-times {vncvt\.x\.x\.w} 60 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */

View file

@ -0,0 +1,35 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
#include "def.h"
DEF_CONVERT (trunc, int64_t, int8_t, 4)
DEF_CONVERT (trunc, int64_t, int8_t, 16)
DEF_CONVERT (trunc, int64_t, int8_t, 32)
DEF_CONVERT (trunc, int64_t, int8_t, 64)
DEF_CONVERT (trunc, int64_t, int8_t, 128)
DEF_CONVERT (trunc, int64_t, int8_t, 256)
DEF_CONVERT (trunc, int64_t, int8_t, 512)
DEF_CONVERT (trunc, uint64_t, uint8_t, 4)
DEF_CONVERT (trunc, uint64_t, uint8_t, 16)
DEF_CONVERT (trunc, uint64_t, uint8_t, 32)
DEF_CONVERT (trunc, uint64_t, uint8_t, 64)
DEF_CONVERT (trunc, uint64_t, uint8_t, 128)
DEF_CONVERT (trunc, uint64_t, uint8_t, 256)
DEF_CONVERT (trunc, uint64_t, uint8_t, 512)
/* { dg-final { scan-assembler-times {vncvt\.x\.x\.w} 42 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math" } */
/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -ffast-math -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
#include <stdint-gcc.h>

View file

@ -3,4 +3,4 @@
#include "template-1.h"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */