RISC-V: Support RVV permutation auto-vectorization
This patch supports vector permutation for VLS only by vec_perm pattern. We will support TARGET_VECTORIZE_VEC_PERM_CONST to support VLA permutation in the future. Fixed following comments from Robin. gcc/ChangeLog: * config/riscv/autovec.md (vec_perm<mode>): New pattern. * config/riscv/predicates.md (vector_perm_operand): New predicate. * config/riscv/riscv-protos.h (enum insn_type): New enum. (expand_vec_perm): New function. * config/riscv/riscv-v.cc (const_vec_all_in_range_p): Ditto. (gen_const_vector_dup): Ditto. (emit_vlmax_gather_insn): Ditto. (emit_vlmax_masked_gather_mu_insn): Ditto. (expand_vec_perm): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls-vlmax/perm-1.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm-2.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm-3.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm-5.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm-6.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm-7.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm.h: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-1.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-2.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-3.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-4.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-5.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-6.c: New test. * gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-7.c: New test.
This commit is contained in:
parent
847499148e
commit
bf9eee73f3
19 changed files with 1217 additions and 0 deletions
|
@ -83,6 +83,24 @@
|
|||
}
|
||||
)
|
||||
|
||||
;; -------------------------------------------------------------------------
|
||||
;; ---- [INT,FP] permutation
|
||||
;; -------------------------------------------------------------------------
|
||||
;; This is the pattern permutes the vector
|
||||
;; -------------------------------------------------------------------------
|
||||
|
||||
(define_expand "vec_perm<mode>"
|
||||
[(match_operand:V 0 "register_operand")
|
||||
(match_operand:V 1 "register_operand")
|
||||
(match_operand:V 2 "register_operand")
|
||||
(match_operand:<VINDEX> 3 "vector_perm_operand")]
|
||||
"TARGET_VECTOR && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
|
||||
{
|
||||
riscv_vector::expand_vec_perm (operands);
|
||||
DONE;
|
||||
}
|
||||
)
|
||||
|
||||
;; -------------------------------------------------------------------------
|
||||
;; ---- [INT,FP] Initialize from individual elements
|
||||
;; -------------------------------------------------------------------------
|
||||
|
|
|
@ -330,6 +330,10 @@
|
|||
(and (match_code "const_vector")
|
||||
(match_test "riscv_vector::const_vec_all_same_in_range_p (op, 0, 31)"))))
|
||||
|
||||
(define_predicate "vector_perm_operand"
|
||||
(ior (match_operand 0 "register_operand")
|
||||
(match_code "const_vector")))
|
||||
|
||||
(define_predicate "ltge_operator"
|
||||
(match_code "lt,ltu,ge,geu"))
|
||||
|
||||
|
|
|
@ -137,6 +137,7 @@ enum insn_type
|
|||
RVV_MISC_OP = 1,
|
||||
RVV_UNOP = 2,
|
||||
RVV_BINOP = 3,
|
||||
RVV_BINOP_MU = RVV_BINOP + 2,
|
||||
RVV_MERGE_OP = 4,
|
||||
RVV_CMP_OP = 4,
|
||||
RVV_CMP_MU_OP = RVV_CMP_OP + 2, /* +2 means mask and maskoff operand. */
|
||||
|
@ -240,6 +241,7 @@ opt_machine_mode get_mask_mode (machine_mode);
|
|||
void expand_vec_series (rtx, rtx, rtx);
|
||||
void expand_vec_init (rtx, rtx);
|
||||
void expand_vcond (rtx *);
|
||||
void expand_vec_perm (rtx *);
|
||||
/* Rounding mode bitfield for fixed point VXRM. */
|
||||
enum vxrm_field_enum
|
||||
{
|
||||
|
|
|
@ -259,6 +259,47 @@ const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT minval,
|
|||
&& IN_RANGE (INTVAL (elt), minval, maxval));
|
||||
}
|
||||
|
||||
/* Return true if VEC is a constant in which every element is in the range
|
||||
[MINVAL, MAXVAL]. The elements do not need to have the same value.
|
||||
|
||||
This function also exists in aarch64, we may unify it in middle-end in the
|
||||
future. */
|
||||
|
||||
static bool
|
||||
const_vec_all_in_range_p (rtx vec, HOST_WIDE_INT minval, HOST_WIDE_INT maxval)
|
||||
{
|
||||
if (!CONST_VECTOR_P (vec)
|
||||
|| GET_MODE_CLASS (GET_MODE (vec)) != MODE_VECTOR_INT)
|
||||
return false;
|
||||
|
||||
int nunits;
|
||||
if (!CONST_VECTOR_STEPPED_P (vec))
|
||||
nunits = const_vector_encoded_nelts (vec);
|
||||
else if (!CONST_VECTOR_NUNITS (vec).is_constant (&nunits))
|
||||
return false;
|
||||
|
||||
for (int i = 0; i < nunits; i++)
|
||||
{
|
||||
rtx vec_elem = CONST_VECTOR_ELT (vec, i);
|
||||
if (!CONST_INT_P (vec_elem)
|
||||
|| !IN_RANGE (INTVAL (vec_elem), minval, maxval))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Return a const_int vector of VAL.
|
||||
|
||||
This function also exists in aarch64, we may unify it in middle-end in the
|
||||
future. */
|
||||
|
||||
static rtx
|
||||
gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
|
||||
{
|
||||
rtx c = gen_int_mode (val, GET_MODE_INNER (mode));
|
||||
return gen_const_vec_duplicate (mode, c);
|
||||
}
|
||||
|
||||
/* Emit a vlmax vsetvl instruction. This should only be used when
|
||||
optimization is disabled or after vsetvl insertion pass. */
|
||||
void
|
||||
|
@ -1927,4 +1968,116 @@ expand_vcond (rtx *ops)
|
|||
gen_vcond_mask (data_mode, data_mode, ops[0], ops[1], ops[2], mask));
|
||||
}
|
||||
|
||||
/* This function emits VLMAX vrgather instruction. Emit vrgather.vx/vi when sel
|
||||
is a const duplicate vector. Otherwise, emit vrgather.vv. */
|
||||
static void
|
||||
emit_vlmax_gather_insn (rtx target, rtx op, rtx sel)
|
||||
{
|
||||
rtx elt;
|
||||
insn_code icode;
|
||||
machine_mode data_mode = GET_MODE (target);
|
||||
if (const_vec_duplicate_p (sel, &elt))
|
||||
{
|
||||
icode = code_for_pred_gather_scalar (data_mode);
|
||||
sel = elt;
|
||||
}
|
||||
else
|
||||
icode = code_for_pred_gather (data_mode);
|
||||
rtx ops[] = {target, op, sel};
|
||||
emit_vlmax_insn (icode, RVV_BINOP, ops);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_vlmax_masked_gather_mu_insn (rtx target, rtx op, rtx sel, rtx mask)
|
||||
{
|
||||
rtx elt;
|
||||
insn_code icode;
|
||||
machine_mode data_mode = GET_MODE (target);
|
||||
if (const_vec_duplicate_p (sel, &elt))
|
||||
{
|
||||
icode = code_for_pred_gather_scalar (data_mode);
|
||||
sel = elt;
|
||||
}
|
||||
else
|
||||
icode = code_for_pred_gather (data_mode);
|
||||
rtx ops[] = {target, mask, target, op, sel};
|
||||
emit_vlmax_masked_mu_insn (icode, RVV_BINOP_MU, ops);
|
||||
}
|
||||
|
||||
/* Implement vec_perm<mode>. */
|
||||
|
||||
void
|
||||
expand_vec_perm (rtx *operands)
|
||||
{
|
||||
rtx target = operands[0];
|
||||
rtx op0 = operands[1];
|
||||
rtx op1 = operands[2];
|
||||
rtx sel = operands[3];
|
||||
machine_mode data_mode = GET_MODE (target);
|
||||
machine_mode sel_mode = GET_MODE (sel);
|
||||
|
||||
/* Enforced by the pattern condition. */
|
||||
int nunits = GET_MODE_NUNITS (sel_mode).to_constant ();
|
||||
|
||||
/* Check if the sel only references the first values vector. If each select
|
||||
index is in range of [0, nunits - 1]. A single vrgather instructions is
|
||||
enough. */
|
||||
if (const_vec_all_in_range_p (sel, 0, nunits - 1))
|
||||
{
|
||||
emit_vlmax_gather_insn (target, op0, sel);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check if the two values vectors are the same. */
|
||||
if (rtx_equal_p (op0, op1) || const_vec_duplicate_p (sel))
|
||||
{
|
||||
/* Note: vec_perm indices are supposed to wrap when they go beyond the
|
||||
size of the two value vectors, i.e. the upper bits of the indices
|
||||
are effectively ignored. RVV vrgather instead produces 0 for any
|
||||
out-of-range indices, so we need to modulo all the vec_perm indices
|
||||
to ensure they are all in range of [0, nunits - 1]. */
|
||||
rtx max_sel = gen_const_vector_dup (sel_mode, nunits - 1);
|
||||
rtx sel_mod = expand_simple_binop (sel_mode, AND, sel, max_sel, NULL, 0,
|
||||
OPTAB_DIRECT);
|
||||
emit_vlmax_gather_insn (target, op1, sel_mod);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Note: vec_perm indices are supposed to wrap when they go beyond the
|
||||
size of the two value vectors, i.e. the upper bits of the indices
|
||||
are effectively ignored. RVV vrgather instead produces 0 for any
|
||||
out-of-range indices, so we need to modulo all the vec_perm indices
|
||||
to ensure they are all in range of [0, 2 * nunits - 1]. */
|
||||
rtx max_sel = gen_const_vector_dup (sel_mode, 2 * nunits - 1);
|
||||
rtx sel_mod
|
||||
= expand_simple_binop (sel_mode, AND, sel, max_sel, NULL, 0, OPTAB_DIRECT);
|
||||
|
||||
/* This following sequence is handling the case that:
|
||||
__builtin_shufflevector (vec1, vec2, index...), the index can be any
|
||||
value in range of [0, 2 * nunits - 1]. */
|
||||
machine_mode mask_mode;
|
||||
mask_mode = get_mask_mode (data_mode).require ();
|
||||
rtx mask = gen_reg_rtx (mask_mode);
|
||||
max_sel = gen_const_vector_dup (sel_mode, nunits);
|
||||
|
||||
/* Step 1: generate a mask that should select everything >= nunits into the
|
||||
* mask. */
|
||||
expand_vec_cmp (mask, GEU, sel_mod, max_sel);
|
||||
|
||||
/* Step2: gather every op0 values indexed by sel into target,
|
||||
we don't need to care about the result of the element
|
||||
whose index >= nunits. */
|
||||
emit_vlmax_gather_insn (target, op0, sel_mod);
|
||||
|
||||
/* Step3: shift the range from (nunits, max_of_mode] to
|
||||
[0, max_of_mode - nunits]. */
|
||||
rtx tmp = gen_reg_rtx (sel_mode);
|
||||
rtx ops[] = {tmp, sel_mod, max_sel};
|
||||
emit_vlmax_insn (code_for_pred (MINUS, sel_mode), RVV_BINOP, ops);
|
||||
|
||||
/* Step4: gather those into the previously masked-out elements
|
||||
of target. */
|
||||
emit_vlmax_masked_gather_mu_insn (target, op1, tmp, mask);
|
||||
}
|
||||
|
||||
} // namespace riscv_vector
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
|
||||
|
||||
#include "perm.h"
|
||||
|
||||
#define MASK_2(X, Y) 1, 1
|
||||
#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
|
||||
#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y)
|
||||
#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
|
||||
#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
|
||||
#define MASK_64(X, Y) MASK_32 (X, Y), MASK_32 (X + 32, Y)
|
||||
#define MASK_128(X, Y) MASK_64 (X, Y), MASK_64 (X + 64, Y)
|
||||
|
||||
#define PERMUTE(TYPE, NUNITS) \
|
||||
__attribute__ ((noipa)) void permute_##TYPE (TYPE values1, TYPE values2, \
|
||||
TYPE *out) \
|
||||
{ \
|
||||
TYPE v \
|
||||
= __builtin_shufflevector (values1, values2, MASK_##NUNITS (0, NUNITS)); \
|
||||
*(TYPE *) out = v; \
|
||||
}
|
||||
|
||||
#define TEST_ALL(T) \
|
||||
T (vnx2qi, 2) \
|
||||
T (vnx4qi, 4) \
|
||||
T (vnx8qi, 8) \
|
||||
T (vnx16qi, 16) \
|
||||
T (vnx32qi, 32) \
|
||||
T (vnx64qi, 64) \
|
||||
T (vnx128qi, 128) \
|
||||
T (vnx2hi, 2) \
|
||||
T (vnx4hi, 4) \
|
||||
T (vnx8hi, 8) \
|
||||
T (vnx16hi, 16) \
|
||||
T (vnx32hi, 32) \
|
||||
T (vnx64hi, 64) \
|
||||
T (vnx2si, 2) \
|
||||
T (vnx4si, 4) \
|
||||
T (vnx8si, 8) \
|
||||
T (vnx16si, 16) \
|
||||
T (vnx32si, 32) \
|
||||
T (vnx2di, 2) \
|
||||
T (vnx4di, 4) \
|
||||
T (vnx8di, 8) \
|
||||
T (vnx16di, 16) \
|
||||
T (vnx2sf, 2) \
|
||||
T (vnx4sf, 4) \
|
||||
T (vnx8sf, 8) \
|
||||
T (vnx16sf, 16) \
|
||||
T (vnx32sf, 32) \
|
||||
T (vnx2df, 2) \
|
||||
T (vnx4df, 4) \
|
||||
T (vnx8df, 8) \
|
||||
T (vnx16df, 16)
|
||||
|
||||
TEST_ALL (PERMUTE)
|
||||
|
||||
/* { dg-final { scan-assembler-times {vrgather\.vi\tv[0-9]+,\s*v[0-9]+,\s*1} 31 } } */
|
|
@ -0,0 +1,33 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
|
||||
|
||||
#include "perm.h"
|
||||
|
||||
#define MASK_2(X, Y) 31, 31
|
||||
#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
|
||||
#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y)
|
||||
#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
|
||||
#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
|
||||
#define MASK_64(X, Y) MASK_32 (X, Y), MASK_32 (X + 32, Y)
|
||||
#define MASK_128(X, Y) MASK_64 (X, Y), MASK_64 (X + 64, Y)
|
||||
|
||||
#define PERMUTE(TYPE, NUNITS) \
|
||||
void permute_##TYPE (TYPE values1, TYPE values2, TYPE *out) \
|
||||
{ \
|
||||
TYPE v \
|
||||
= __builtin_shufflevector (values1, values2, MASK_##NUNITS (0, NUNITS)); \
|
||||
*(TYPE *) out = v; \
|
||||
}
|
||||
|
||||
#define TEST_ALL(T) \
|
||||
T (vnx32qi, 32) \
|
||||
T (vnx64qi, 64) \
|
||||
T (vnx128qi, 128) \
|
||||
T (vnx32hi, 32) \
|
||||
T (vnx64hi, 64) \
|
||||
T (vnx32si, 32) \
|
||||
T (vnx32sf, 32)
|
||||
|
||||
TEST_ALL (PERMUTE)
|
||||
|
||||
/* { dg-final { scan-assembler-times {vrgather\.vi\tv[0-9]+,\s*v[0-9]+,\s*31} 7 } } */
|
|
@ -0,0 +1,29 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
|
||||
|
||||
#include "perm.h"
|
||||
|
||||
#define MASK_2(X, Y) 55, 55
|
||||
#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
|
||||
#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y)
|
||||
#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
|
||||
#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
|
||||
#define MASK_64(X, Y) MASK_32 (X, Y), MASK_32 (X + 32, Y)
|
||||
#define MASK_128(X, Y) MASK_64 (X, Y), MASK_64 (X + 64, Y)
|
||||
|
||||
#define PERMUTE(TYPE, NUNITS) \
|
||||
void permute_##TYPE (TYPE values1, TYPE values2, TYPE *out) \
|
||||
{ \
|
||||
TYPE v \
|
||||
= __builtin_shufflevector (values1, values2, MASK_##NUNITS (0, NUNITS)); \
|
||||
*(TYPE *) out = v; \
|
||||
}
|
||||
|
||||
#define TEST_ALL(T) \
|
||||
T (vnx64qi, 64) \
|
||||
T (vnx128qi, 128) \
|
||||
T (vnx64hi, 64)
|
||||
|
||||
TEST_ALL (PERMUTE)
|
||||
|
||||
/* { dg-final { scan-assembler-times {vrgather\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+} 3 } } */
|
|
@ -0,0 +1,58 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
|
||||
|
||||
#include "perm.h"
|
||||
|
||||
#define MASK_2(X, Y) (Y) - 1 - (X), (Y) - 2 - (X)
|
||||
#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
|
||||
#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y)
|
||||
#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
|
||||
#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
|
||||
#define MASK_64(X, Y) MASK_32 (X, Y), MASK_32 (X + 32, Y)
|
||||
#define MASK_128(X, Y) MASK_64 (X, Y), MASK_64 (X + 64, Y)
|
||||
|
||||
#define PERMUTE(TYPE, NUNITS) \
|
||||
__attribute__ ((noipa)) void permute_##TYPE (TYPE values1, TYPE values2, \
|
||||
TYPE *out) \
|
||||
{ \
|
||||
TYPE v \
|
||||
= __builtin_shufflevector (values1, values2, MASK_##NUNITS (0, NUNITS)); \
|
||||
*(TYPE *) out = v; \
|
||||
}
|
||||
|
||||
#define TEST_ALL(T) \
|
||||
T (vnx2qi, 2) \
|
||||
T (vnx4qi, 4) \
|
||||
T (vnx8qi, 8) \
|
||||
T (vnx16qi, 16) \
|
||||
T (vnx32qi, 32) \
|
||||
T (vnx64qi, 64) \
|
||||
T (vnx128qi, 128) \
|
||||
T (vnx2hi, 2) \
|
||||
T (vnx4hi, 4) \
|
||||
T (vnx8hi, 8) \
|
||||
T (vnx16hi, 16) \
|
||||
T (vnx32hi, 32) \
|
||||
T (vnx64hi, 64) \
|
||||
T (vnx2si, 2) \
|
||||
T (vnx4si, 4) \
|
||||
T (vnx8si, 8) \
|
||||
T (vnx16si, 16) \
|
||||
T (vnx32si, 32) \
|
||||
T (vnx2di, 2) \
|
||||
T (vnx4di, 4) \
|
||||
T (vnx8di, 8) \
|
||||
T (vnx16di, 16) \
|
||||
T (vnx2sf, 2) \
|
||||
T (vnx4sf, 4) \
|
||||
T (vnx8sf, 8) \
|
||||
T (vnx16sf, 16) \
|
||||
T (vnx32sf, 32) \
|
||||
T (vnx2df, 2) \
|
||||
T (vnx4df, 4) \
|
||||
T (vnx8df, 8) \
|
||||
T (vnx16df, 16)
|
||||
|
||||
TEST_ALL (PERMUTE)
|
||||
|
||||
/* { dg-final { scan-assembler-times {vrgather\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 31 } } */
|
|
@ -0,0 +1,49 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
|
||||
|
||||
#include "perm.h"
|
||||
|
||||
#define PERMUTE(TYPE, TYPE2, NUNITS) \
|
||||
__attribute__ ((noipa)) void permute_##TYPE (TYPE values1, TYPE values2, \
|
||||
TYPE2 mask, TYPE *out) \
|
||||
{ \
|
||||
TYPE v = __builtin_shuffle (values1, values1, mask); \
|
||||
*(TYPE *) out = v; \
|
||||
}
|
||||
|
||||
#define TEST_ALL(T) \
|
||||
T (vnx2qi, vnx2qi, 2) \
|
||||
T (vnx4qi, vnx4qi, 4) \
|
||||
T (vnx8qi, vnx8qi, 8) \
|
||||
T (vnx16qi, vnx16qi, 16) \
|
||||
T (vnx32qi, vnx32qi, 32) \
|
||||
T (vnx64qi, vnx64qi, 64) \
|
||||
T (vnx128qi, vnx128qi, 128) \
|
||||
T (vnx2hi, vnx2hi, 2) \
|
||||
T (vnx4hi, vnx4hi, 4) \
|
||||
T (vnx8hi, vnx8hi, 8) \
|
||||
T (vnx16hi, vnx16hi, 16) \
|
||||
T (vnx32hi, vnx32hi, 32) \
|
||||
T (vnx64hi, vnx64hi, 64) \
|
||||
T (vnx2si, vnx2si, 2) \
|
||||
T (vnx4si, vnx4si, 4) \
|
||||
T (vnx8si, vnx8si, 8) \
|
||||
T (vnx16si, vnx16si, 16) \
|
||||
T (vnx32si, vnx32si, 32) \
|
||||
T (vnx2di, vnx2di, 2) \
|
||||
T (vnx4di, vnx4di, 4) \
|
||||
T (vnx8di, vnx8di, 8) \
|
||||
T (vnx16di, vnx16di, 16) \
|
||||
T (vnx2sf, vnx2si, 2) \
|
||||
T (vnx4sf, vnx4si, 4) \
|
||||
T (vnx8sf, vnx8si, 8) \
|
||||
T (vnx16sf, vnx16si, 16) \
|
||||
T (vnx32sf, vnx32si, 32) \
|
||||
T (vnx2df, vnx2di, 2) \
|
||||
T (vnx4df, vnx4di, 4) \
|
||||
T (vnx8df, vnx8di, 8) \
|
||||
T (vnx16df, vnx16di, 16)
|
||||
|
||||
TEST_ALL (PERMUTE)
|
||||
|
||||
/* { dg-final { scan-assembler-times {vrgather\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 31 } } */
|
|
@ -0,0 +1,58 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
|
||||
|
||||
#include "perm.h"
|
||||
|
||||
#define MASK_2(X, Y) Y + 1, Y + 1
|
||||
#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
|
||||
#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y)
|
||||
#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
|
||||
#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
|
||||
#define MASK_64(X, Y) MASK_32 (X, Y), MASK_32 (X + 32, Y)
|
||||
#define MASK_128(X, Y) MASK_64 (X, Y), MASK_64 (X + 64, Y)
|
||||
|
||||
#define PERMUTE(TYPE, NUNITS) \
|
||||
__attribute__ ((noipa)) void permute_##TYPE (TYPE values1, TYPE values2, \
|
||||
TYPE *out) \
|
||||
{ \
|
||||
TYPE v \
|
||||
= __builtin_shufflevector (values1, values2, MASK_##NUNITS (0, NUNITS)); \
|
||||
*(TYPE *) out = v; \
|
||||
}
|
||||
|
||||
#define TEST_ALL(T) \
|
||||
T (vnx2qi, 2) \
|
||||
T (vnx4qi, 4) \
|
||||
T (vnx8qi, 8) \
|
||||
T (vnx16qi, 16) \
|
||||
T (vnx32qi, 32) \
|
||||
T (vnx64qi, 64) \
|
||||
T (vnx128qi, 128) \
|
||||
T (vnx2hi, 2) \
|
||||
T (vnx4hi, 4) \
|
||||
T (vnx8hi, 8) \
|
||||
T (vnx16hi, 16) \
|
||||
T (vnx32hi, 32) \
|
||||
T (vnx64hi, 64) \
|
||||
T (vnx2si, 2) \
|
||||
T (vnx4si, 4) \
|
||||
T (vnx8si, 8) \
|
||||
T (vnx16si, 16) \
|
||||
T (vnx32si, 32) \
|
||||
T (vnx2di, 2) \
|
||||
T (vnx4di, 4) \
|
||||
T (vnx8di, 8) \
|
||||
T (vnx16di, 16) \
|
||||
T (vnx2sf, 2) \
|
||||
T (vnx4sf, 4) \
|
||||
T (vnx8sf, 8) \
|
||||
T (vnx16sf, 16) \
|
||||
T (vnx32sf, 32) \
|
||||
T (vnx2df, 2) \
|
||||
T (vnx4df, 4) \
|
||||
T (vnx8df, 8) \
|
||||
T (vnx16df, 16)
|
||||
|
||||
TEST_ALL (PERMUTE)
|
||||
|
||||
/* { dg-final { scan-assembler-times {vrgather\.vi\tv[0-9]+,\s*v[0-9]+,\s*1} 31 } } */
|
|
@ -0,0 +1,49 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
|
||||
|
||||
#include "perm.h"
|
||||
|
||||
#define PERMUTE(TYPE, TYPE2, NUNITS) \
|
||||
__attribute__ ((noipa)) void permute_##TYPE (TYPE values1, TYPE values2, \
|
||||
TYPE2 mask, TYPE *out) \
|
||||
{ \
|
||||
TYPE v = __builtin_shuffle (values1, values2, mask); \
|
||||
*(TYPE *) out = v; \
|
||||
}
|
||||
|
||||
#define TEST_ALL(T) \
|
||||
T (vnx2qi, vnx2qi, 2) \
|
||||
T (vnx4qi, vnx4qi, 4) \
|
||||
T (vnx8qi, vnx8qi, 8) \
|
||||
T (vnx16qi, vnx16qi, 16) \
|
||||
T (vnx32qi, vnx32qi, 32) \
|
||||
T (vnx64qi, vnx64qi, 64) \
|
||||
T (vnx128qi, vnx128qi, 128) \
|
||||
T (vnx2hi, vnx2hi, 2) \
|
||||
T (vnx4hi, vnx4hi, 4) \
|
||||
T (vnx8hi, vnx8hi, 8) \
|
||||
T (vnx16hi, vnx16hi, 16) \
|
||||
T (vnx32hi, vnx32hi, 32) \
|
||||
T (vnx64hi, vnx64hi, 64) \
|
||||
T (vnx2si, vnx2si, 2) \
|
||||
T (vnx4si, vnx4si, 4) \
|
||||
T (vnx8si, vnx8si, 8) \
|
||||
T (vnx16si, vnx16si, 16) \
|
||||
T (vnx32si, vnx32si, 32) \
|
||||
T (vnx2di, vnx2di, 2) \
|
||||
T (vnx4di, vnx4di, 4) \
|
||||
T (vnx8di, vnx8di, 8) \
|
||||
T (vnx16di, vnx16di, 16) \
|
||||
T (vnx2sf, vnx2si, 2) \
|
||||
T (vnx4sf, vnx4si, 4) \
|
||||
T (vnx8sf, vnx8si, 8) \
|
||||
T (vnx16sf, vnx16si, 16) \
|
||||
T (vnx32sf, vnx32si, 32) \
|
||||
T (vnx2df, vnx2di, 2) \
|
||||
T (vnx4df, vnx4di, 4) \
|
||||
T (vnx8df, vnx8di, 8) \
|
||||
T (vnx16df, vnx16di, 16)
|
||||
|
||||
TEST_ALL (PERMUTE)
|
||||
|
||||
/* { dg-final { scan-assembler-times {vrgather\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0.t} 31 } } */
|
70
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/perm.h
Normal file
70
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/perm.h
Normal file
|
@ -0,0 +1,70 @@
|
|||
#include <stdint.h>
|
||||
|
||||
typedef int8_t vnx2qi __attribute__ ((vector_size (2)));
|
||||
typedef int8_t vnx4qi __attribute__ ((vector_size (4)));
|
||||
typedef int8_t vnx8qi __attribute__ ((vector_size (8)));
|
||||
typedef int8_t vnx16qi __attribute__ ((vector_size (16)));
|
||||
typedef int8_t vnx32qi __attribute__ ((vector_size (32)));
|
||||
typedef int8_t vnx64qi __attribute__ ((vector_size (64)));
|
||||
typedef int8_t vnx128qi __attribute__ ((vector_size (128)));
|
||||
|
||||
typedef int16_t vnx2hi __attribute__ ((vector_size (4)));
|
||||
typedef int16_t vnx4hi __attribute__ ((vector_size (8)));
|
||||
typedef int16_t vnx8hi __attribute__ ((vector_size (16)));
|
||||
typedef int16_t vnx16hi __attribute__ ((vector_size (32)));
|
||||
typedef int16_t vnx32hi __attribute__ ((vector_size (64)));
|
||||
typedef int16_t vnx64hi __attribute__ ((vector_size (128)));
|
||||
|
||||
typedef int32_t vnx2si __attribute__ ((vector_size (8)));
|
||||
typedef int32_t vnx4si __attribute__ ((vector_size (16)));
|
||||
typedef int32_t vnx8si __attribute__ ((vector_size (32)));
|
||||
typedef int32_t vnx16si __attribute__ ((vector_size (64)));
|
||||
typedef int32_t vnx32si __attribute__ ((vector_size (128)));
|
||||
|
||||
typedef int64_t vnx2di __attribute__ ((vector_size (16)));
|
||||
typedef int64_t vnx4di __attribute__ ((vector_size (32)));
|
||||
typedef int64_t vnx8di __attribute__ ((vector_size (64)));
|
||||
typedef int64_t vnx16di __attribute__ ((vector_size (128)));
|
||||
|
||||
typedef float vnx2sf __attribute__ ((vector_size (8)));
|
||||
typedef float vnx4sf __attribute__ ((vector_size (16)));
|
||||
typedef float vnx8sf __attribute__ ((vector_size (32)));
|
||||
typedef float vnx16sf __attribute__ ((vector_size (64)));
|
||||
typedef float vnx32sf __attribute__ ((vector_size (128)));
|
||||
|
||||
typedef double vnx2df __attribute__ ((vector_size (16)));
|
||||
typedef double vnx4df __attribute__ ((vector_size (32)));
|
||||
typedef double vnx8df __attribute__ ((vector_size (64)));
|
||||
typedef double vnx16df __attribute__ ((vector_size (128)));
|
||||
|
||||
#define INIT_PERMUTE(NUNITS, NUM1, NUM2, TYPE) \
|
||||
TYPE v_##TYPE##_in1; \
|
||||
TYPE v_##TYPE##_in2; \
|
||||
TYPE v_##TYPE##_out = {0}; \
|
||||
for (int i = 0; i < NUNITS; i++) \
|
||||
{ \
|
||||
v_##TYPE##_in1[i] = i * NUM1 + NUM2; \
|
||||
v_##TYPE##_in2[i] = i * NUM1 - NUM2; \
|
||||
}
|
||||
|
||||
#define CHECK_PERMUTE_SINGLE(NUNITS, VALUE, TYPE) \
|
||||
for (int i = 0; i < NUNITS; i++) \
|
||||
if (v_##TYPE##_out[i] != VALUE) \
|
||||
__builtin_abort ();
|
||||
|
||||
#define CHECK_PERMUTE_REVERSE(NUNITS, TYPE) \
|
||||
for (int i = 0; i < NUNITS; i++) \
|
||||
if (v_##TYPE##_out[i] != v_##TYPE##_in1[NUNITS - 1 - i]) \
|
||||
__builtin_abort ();
|
||||
|
||||
#define CHECK_PERMUTE_DOUBLE(NUNITS, TYPE) \
|
||||
for (int i = 0; i < NUNITS; i++) \
|
||||
{ \
|
||||
int new_index = i * 2; \
|
||||
if (new_index < NUNITS \
|
||||
&& v_##TYPE##_out[i] != v_##TYPE##_in1[new_index]) \
|
||||
__builtin_abort (); \
|
||||
if (new_index >= NUNITS \
|
||||
&& v_##TYPE##_out[i] != v_##TYPE##_in2[new_index % NUNITS]) \
|
||||
__builtin_abort (); \
|
||||
}
|
|
@ -0,0 +1,104 @@
|
|||
/* { dg-do run { target { riscv_vector } } } */
|
||||
/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
|
||||
|
||||
#include "perm-1.c"
|
||||
|
||||
int __attribute__ ((optimize (0)))
|
||||
main ()
|
||||
{
|
||||
INIT_PERMUTE(2, 3, 79, vnx2qi)
|
||||
permute_vnx2qi (v_vnx2qi_in1, v_vnx2qi_in2, &v_vnx2qi_out);
|
||||
CHECK_PERMUTE_SINGLE(2, 3*1+79, vnx2qi)
|
||||
INIT_PERMUTE(4, 2, -69, vnx4qi)
|
||||
permute_vnx4qi (v_vnx4qi_in1, v_vnx4qi_in2, &v_vnx4qi_out);
|
||||
CHECK_PERMUTE_SINGLE(4, 2*1+-69, vnx4qi)
|
||||
INIT_PERMUTE(8, 4, -33, vnx8qi)
|
||||
permute_vnx8qi (v_vnx8qi_in1, v_vnx8qi_in2, &v_vnx8qi_out);
|
||||
CHECK_PERMUTE_SINGLE(8, 4*1+-33, vnx8qi)
|
||||
INIT_PERMUTE(16, -3, 15, vnx16qi)
|
||||
permute_vnx16qi (v_vnx16qi_in1, v_vnx16qi_in2, &v_vnx16qi_out);
|
||||
CHECK_PERMUTE_SINGLE(16, -3*1+15, vnx16qi)
|
||||
INIT_PERMUTE(32, -1, 30, vnx32qi)
|
||||
permute_vnx32qi (v_vnx32qi_in1, v_vnx32qi_in2, &v_vnx32qi_out);
|
||||
CHECK_PERMUTE_SINGLE(32, -1*1+30, vnx32qi)
|
||||
INIT_PERMUTE(64, -1, 66, vnx64qi)
|
||||
permute_vnx64qi (v_vnx64qi_in1, v_vnx64qi_in2, &v_vnx64qi_out);
|
||||
CHECK_PERMUTE_SINGLE(64, -1*1+66, vnx64qi)
|
||||
INIT_PERMUTE(128, -1, 38, vnx128qi)
|
||||
permute_vnx128qi (v_vnx128qi_in1, v_vnx128qi_in2, &v_vnx128qi_out);
|
||||
CHECK_PERMUTE_SINGLE(128, -1*1+38, vnx128qi)
|
||||
INIT_PERMUTE(2, 2, 30238, vnx2hi)
|
||||
permute_vnx2hi (v_vnx2hi_in1, v_vnx2hi_in2, &v_vnx2hi_out);
|
||||
CHECK_PERMUTE_SINGLE(2, 2*1+30238, vnx2hi)
|
||||
INIT_PERMUTE(4, -45, -2345, vnx4hi)
|
||||
permute_vnx4hi (v_vnx4hi_in1, v_vnx4hi_in2, &v_vnx4hi_out);
|
||||
CHECK_PERMUTE_SINGLE(4, -45*1+-2345, vnx4hi)
|
||||
INIT_PERMUTE(8, 98, -18415, vnx8hi)
|
||||
permute_vnx8hi (v_vnx8hi_in1, v_vnx8hi_in2, &v_vnx8hi_out);
|
||||
CHECK_PERMUTE_SINGLE(8, 98*1+-18415, vnx8hi)
|
||||
INIT_PERMUTE(16, 56, 3299, vnx16hi)
|
||||
permute_vnx16hi (v_vnx16hi_in1, v_vnx16hi_in2, &v_vnx16hi_out);
|
||||
CHECK_PERMUTE_SINGLE(16, 56*1+3299, vnx16hi)
|
||||
INIT_PERMUTE(32, 15641, -9156, vnx32hi)
|
||||
permute_vnx32hi (v_vnx32hi_in1, v_vnx32hi_in2, &v_vnx32hi_out);
|
||||
CHECK_PERMUTE_SINGLE(32, 15641*1+-9156, vnx32hi)
|
||||
INIT_PERMUTE(64, -25641, 8093, vnx64hi)
|
||||
permute_vnx64hi (v_vnx64hi_in1, v_vnx64hi_in2, &v_vnx64hi_out);
|
||||
CHECK_PERMUTE_SINGLE(64, -25641*1+8093, vnx64hi)
|
||||
INIT_PERMUTE(2, -428, -15651, vnx2si)
|
||||
permute_vnx2si (v_vnx2si_in1, v_vnx2si_in2, &v_vnx2si_out);
|
||||
CHECK_PERMUTE_SINGLE(2, -428*1+-15651, vnx2si)
|
||||
INIT_PERMUTE(4, 208, -55651, vnx4si)
|
||||
permute_vnx4si (v_vnx4si_in1, v_vnx4si_in2, &v_vnx4si_out);
|
||||
CHECK_PERMUTE_SINGLE(4, 208*1+-55651, vnx4si)
|
||||
INIT_PERMUTE(8, 808, 75651, vnx8si)
|
||||
permute_vnx8si (v_vnx8si_in1, v_vnx8si_in2, &v_vnx8si_out);
|
||||
CHECK_PERMUTE_SINGLE(8, 808*1+75651, vnx8si)
|
||||
INIT_PERMUTE(16, 816, -8941561, vnx16si)
|
||||
permute_vnx16si (v_vnx16si_in1, v_vnx16si_in2, &v_vnx16si_out);
|
||||
CHECK_PERMUTE_SINGLE(16, 816*1+-8941561, vnx16si)
|
||||
INIT_PERMUTE(32, -532, 98416, vnx32si)
|
||||
permute_vnx32si (v_vnx32si_in1, v_vnx32si_in2, &v_vnx32si_out);
|
||||
CHECK_PERMUTE_SINGLE(32, -532*1+98416, vnx32si)
|
||||
INIT_PERMUTE(2, -4161, 9551616, vnx2di)
|
||||
permute_vnx2di (v_vnx2di_in1, v_vnx2di_in2, &v_vnx2di_out);
|
||||
CHECK_PERMUTE_SINGLE(2, -4161*1+9551616, vnx2di)
|
||||
INIT_PERMUTE(4, 7259, -15644961, vnx4di)
|
||||
permute_vnx4di (v_vnx4di_in1, v_vnx4di_in2, &v_vnx4di_out);
|
||||
CHECK_PERMUTE_SINGLE(4, 7259*1+-15644961, vnx4di)
|
||||
INIT_PERMUTE(8, 351, 9156651, vnx8di)
|
||||
permute_vnx8di (v_vnx8di_in1, v_vnx8di_in2, &v_vnx8di_out);
|
||||
CHECK_PERMUTE_SINGLE(8, 351*1+9156651, vnx8di)
|
||||
INIT_PERMUTE(16, 11, -816196231,vnx16di)
|
||||
permute_vnx16di (v_vnx16di_in1, v_vnx16di_in2, &v_vnx16di_out);
|
||||
CHECK_PERMUTE_SINGLE(16, 11*1+-816196231, vnx16di)
|
||||
INIT_PERMUTE(2, 4552, -89, vnx2sf)
|
||||
permute_vnx2sf (v_vnx2sf_in1, v_vnx2sf_in2, &v_vnx2sf_out);
|
||||
CHECK_PERMUTE_SINGLE(2, (4552+-89), vnx2sf)
|
||||
INIT_PERMUTE(4, 685, 7961, vnx4sf)
|
||||
permute_vnx4sf (v_vnx4sf_in1, v_vnx4sf_in2, &v_vnx4sf_out);
|
||||
CHECK_PERMUTE_SINGLE(4, 685+7961, vnx4sf)
|
||||
INIT_PERMUTE(8, 3927, 16513, vnx8sf)
|
||||
permute_vnx8sf (v_vnx8sf_in1, v_vnx8sf_in2, &v_vnx8sf_out);
|
||||
CHECK_PERMUTE_SINGLE(8, 3927*1+16513, vnx8sf)
|
||||
INIT_PERMUTE(16, -68, 16156571, vnx16sf)
|
||||
permute_vnx16sf (v_vnx16sf_in1, v_vnx16sf_in2, &v_vnx16sf_out);
|
||||
CHECK_PERMUTE_SINGLE(16, -68*1+16156571, vnx16sf)
|
||||
INIT_PERMUTE(32, 9985, 1561318, vnx32sf)
|
||||
permute_vnx32sf (v_vnx32sf_in1, v_vnx32sf_in2, &v_vnx32sf_out);
|
||||
CHECK_PERMUTE_SINGLE(32, 9985*1+1561318, vnx32sf)
|
||||
INIT_PERMUTE(2, -1565.1561, -5641565.515, vnx2df)
|
||||
permute_vnx2df (v_vnx2df_in1, v_vnx2df_in2, &v_vnx2df_out);
|
||||
CHECK_PERMUTE_SINGLE(2, -1565.1561*1+-5641565.515, vnx2df)
|
||||
INIT_PERMUTE(4, -189.14897196, -15616547.5165574, vnx4df)
|
||||
permute_vnx4df (v_vnx4df_in1, v_vnx4df_in2, &v_vnx4df_out);
|
||||
CHECK_PERMUTE_SINGLE(4, -189.14897196*1+-15616547.5165574, vnx4df)
|
||||
INIT_PERMUTE(8, 651.158691561, -56163.1655411, vnx8df)
|
||||
permute_vnx8df (v_vnx8df_in1, v_vnx8df_in2, &v_vnx8df_out);
|
||||
CHECK_PERMUTE_SINGLE(8, 651.158691561*1+-56163.1655411, vnx8df)
|
||||
INIT_PERMUTE(16, 58.91516377, 251465.81561, vnx16df)
|
||||
permute_vnx16df (v_vnx16df_in1, v_vnx16df_in2, &v_vnx16df_out);
|
||||
CHECK_PERMUTE_SINGLE(16, 58.91516377*1+251465.81561, vnx16df)
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
/* { dg-do run { target { riscv_vector } } } */
|
||||
/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
|
||||
|
||||
#include "perm-2.c"
|
||||
|
||||
int __attribute__ ((optimize (0)))
|
||||
main ()
|
||||
{
|
||||
INIT_PERMUTE(32, -1, 30, vnx32qi)
|
||||
permute_vnx32qi (v_vnx32qi_in1, v_vnx32qi_in2, &v_vnx32qi_out);
|
||||
CHECK_PERMUTE_SINGLE(32, -1*31+30, vnx32qi)
|
||||
INIT_PERMUTE(64, -1, 66, vnx64qi)
|
||||
permute_vnx64qi (v_vnx64qi_in1, v_vnx64qi_in2, &v_vnx64qi_out);
|
||||
CHECK_PERMUTE_SINGLE(64, -1*31+66, vnx64qi)
|
||||
INIT_PERMUTE(128, -1, 38, vnx128qi)
|
||||
permute_vnx128qi (v_vnx128qi_in1, v_vnx128qi_in2, &v_vnx128qi_out);
|
||||
CHECK_PERMUTE_SINGLE(128, -1*31+38, vnx128qi)
|
||||
INIT_PERMUTE(32, 156, -9156, vnx32hi)
|
||||
permute_vnx32hi (v_vnx32hi_in1, v_vnx32hi_in2, &v_vnx32hi_out);
|
||||
CHECK_PERMUTE_SINGLE(32, 156*31+-9156, vnx32hi)
|
||||
INIT_PERMUTE(64, -251, 8093, vnx64hi)
|
||||
permute_vnx64hi (v_vnx64hi_in1, v_vnx64hi_in2, &v_vnx64hi_out);
|
||||
CHECK_PERMUTE_SINGLE(64, -251*31+8093, vnx64hi)
|
||||
INIT_PERMUTE(32, -532, 98416, vnx32si)
|
||||
permute_vnx32si (v_vnx32si_in1, v_vnx32si_in2, &v_vnx32si_out);
|
||||
CHECK_PERMUTE_SINGLE(32, -532*31+98416, vnx32si)
|
||||
INIT_PERMUTE(32, 995, 1561318, vnx32sf)
|
||||
permute_vnx32sf (v_vnx32sf_in1, v_vnx32sf_in2, &v_vnx32sf_out);
|
||||
CHECK_PERMUTE_SINGLE(32, 995*31+1561318, vnx32sf)
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
/* { dg-do run { target { riscv_vector } } } */
|
||||
/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
|
||||
|
||||
#include "perm-3.c"
|
||||
|
||||
int __attribute__ ((optimize (0)))
|
||||
main ()
|
||||
{
|
||||
INIT_PERMUTE(64, -1, 66, vnx64qi)
|
||||
permute_vnx64qi (v_vnx64qi_in1, v_vnx64qi_in2, &v_vnx64qi_out);
|
||||
CHECK_PERMUTE_SINGLE(64, -1*55+66, vnx64qi)
|
||||
INIT_PERMUTE(128, -1, 38, vnx128qi)
|
||||
permute_vnx128qi (v_vnx128qi_in1, v_vnx128qi_in2, &v_vnx128qi_out);
|
||||
CHECK_PERMUTE_SINGLE(128, -1*55+38, vnx128qi)
|
||||
INIT_PERMUTE(64, -251, 8093, vnx64hi)
|
||||
permute_vnx64hi (v_vnx64hi_in1, v_vnx64hi_in2, &v_vnx64hi_out);
|
||||
CHECK_PERMUTE_SINGLE(64, -251*55+8093, vnx64hi)
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,104 @@
|
|||
/* { dg-do run { target { riscv_vector } } } */
|
||||
/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
|
||||
|
||||
#include "perm-4.c"
|
||||
|
||||
int __attribute__ ((optimize (0)))
|
||||
main ()
|
||||
{
|
||||
INIT_PERMUTE(2, 3, 79, vnx2qi)
|
||||
permute_vnx2qi (v_vnx2qi_in1, v_vnx2qi_in2, &v_vnx2qi_out);
|
||||
CHECK_PERMUTE_REVERSE(2, vnx2qi)
|
||||
INIT_PERMUTE(4, 2, -69, vnx4qi)
|
||||
permute_vnx4qi (v_vnx4qi_in1, v_vnx4qi_in2, &v_vnx4qi_out);
|
||||
CHECK_PERMUTE_REVERSE(4, vnx4qi)
|
||||
INIT_PERMUTE(8, 4, -33, vnx8qi)
|
||||
permute_vnx8qi (v_vnx8qi_in1, v_vnx8qi_in2, &v_vnx8qi_out);
|
||||
CHECK_PERMUTE_REVERSE(8, vnx8qi)
|
||||
INIT_PERMUTE(16, -3, 15, vnx16qi)
|
||||
permute_vnx16qi (v_vnx16qi_in1, v_vnx16qi_in2, &v_vnx16qi_out);
|
||||
CHECK_PERMUTE_REVERSE(16, vnx16qi)
|
||||
INIT_PERMUTE(32, -1, 30, vnx32qi)
|
||||
permute_vnx32qi (v_vnx32qi_in1, v_vnx32qi_in2, &v_vnx32qi_out);
|
||||
CHECK_PERMUTE_REVERSE(32, vnx32qi)
|
||||
INIT_PERMUTE(64, -1, 66, vnx64qi)
|
||||
permute_vnx64qi (v_vnx64qi_in1, v_vnx64qi_in2, &v_vnx64qi_out);
|
||||
CHECK_PERMUTE_REVERSE(64, vnx64qi)
|
||||
INIT_PERMUTE(128, -1, 38, vnx128qi)
|
||||
permute_vnx128qi (v_vnx128qi_in1, v_vnx128qi_in2, &v_vnx128qi_out);
|
||||
CHECK_PERMUTE_REVERSE(128, vnx128qi)
|
||||
INIT_PERMUTE(2, 2, 30238, vnx2hi)
|
||||
permute_vnx2hi (v_vnx2hi_in1, v_vnx2hi_in2, &v_vnx2hi_out);
|
||||
CHECK_PERMUTE_REVERSE(2, vnx2hi)
|
||||
INIT_PERMUTE(4, -45, -2345, vnx4hi)
|
||||
permute_vnx4hi (v_vnx4hi_in1, v_vnx4hi_in2, &v_vnx4hi_out);
|
||||
CHECK_PERMUTE_REVERSE(4, vnx4hi)
|
||||
INIT_PERMUTE(8, 98, -18415, vnx8hi)
|
||||
permute_vnx8hi (v_vnx8hi_in1, v_vnx8hi_in2, &v_vnx8hi_out);
|
||||
CHECK_PERMUTE_REVERSE(8, vnx8hi)
|
||||
INIT_PERMUTE(16, 56, 3299, vnx16hi)
|
||||
permute_vnx16hi (v_vnx16hi_in1, v_vnx16hi_in2, &v_vnx16hi_out);
|
||||
CHECK_PERMUTE_REVERSE(16, vnx16hi)
|
||||
INIT_PERMUTE(32, 15641, -9156, vnx32hi)
|
||||
permute_vnx32hi (v_vnx32hi_in1, v_vnx32hi_in2, &v_vnx32hi_out);
|
||||
CHECK_PERMUTE_REVERSE(32, vnx32hi)
|
||||
INIT_PERMUTE(64, -25641, 8093, vnx64hi)
|
||||
permute_vnx64hi (v_vnx64hi_in1, v_vnx64hi_in2, &v_vnx64hi_out);
|
||||
CHECK_PERMUTE_REVERSE(64, vnx64hi)
|
||||
INIT_PERMUTE(2, -428, -15651, vnx2si)
|
||||
permute_vnx2si (v_vnx2si_in1, v_vnx2si_in2, &v_vnx2si_out);
|
||||
CHECK_PERMUTE_REVERSE(2, vnx2si)
|
||||
INIT_PERMUTE(4, 208, -55651, vnx4si)
|
||||
permute_vnx4si (v_vnx4si_in1, v_vnx4si_in2, &v_vnx4si_out);
|
||||
CHECK_PERMUTE_REVERSE(4, vnx4si)
|
||||
INIT_PERMUTE(8, 808, 75651, vnx8si)
|
||||
permute_vnx8si (v_vnx8si_in1, v_vnx8si_in2, &v_vnx8si_out);
|
||||
CHECK_PERMUTE_REVERSE(8, vnx8si)
|
||||
INIT_PERMUTE(16, 816, -8941561, vnx16si)
|
||||
permute_vnx16si (v_vnx16si_in1, v_vnx16si_in2, &v_vnx16si_out);
|
||||
CHECK_PERMUTE_REVERSE(16, vnx16si)
|
||||
INIT_PERMUTE(32, -532, 98416, vnx32si)
|
||||
permute_vnx32si (v_vnx32si_in1, v_vnx32si_in2, &v_vnx32si_out);
|
||||
CHECK_PERMUTE_REVERSE(32, vnx32si)
|
||||
INIT_PERMUTE(2, -4161, 9551616, vnx2di)
|
||||
permute_vnx2di (v_vnx2di_in1, v_vnx2di_in2, &v_vnx2di_out);
|
||||
CHECK_PERMUTE_REVERSE(2, vnx2di)
|
||||
INIT_PERMUTE(4, 7259, -15644961, vnx4di)
|
||||
permute_vnx4di (v_vnx4di_in1, v_vnx4di_in2, &v_vnx4di_out);
|
||||
CHECK_PERMUTE_REVERSE(4, vnx4di)
|
||||
INIT_PERMUTE(8, 351, 9156651, vnx8di)
|
||||
permute_vnx8di (v_vnx8di_in1, v_vnx8di_in2, &v_vnx8di_out);
|
||||
CHECK_PERMUTE_REVERSE(8, vnx8di)
|
||||
INIT_PERMUTE(16, 11, -816196231,vnx16di)
|
||||
permute_vnx16di (v_vnx16di_in1, v_vnx16di_in2, &v_vnx16di_out);
|
||||
CHECK_PERMUTE_REVERSE(16, vnx16di)
|
||||
INIT_PERMUTE(2, 4552, -89, vnx2sf)
|
||||
permute_vnx2sf (v_vnx2sf_in1, v_vnx2sf_in2, &v_vnx2sf_out);
|
||||
CHECK_PERMUTE_REVERSE(2, vnx2sf)
|
||||
INIT_PERMUTE(4, 685, 7961, vnx4sf)
|
||||
permute_vnx4sf (v_vnx4sf_in1, v_vnx4sf_in2, &v_vnx4sf_out);
|
||||
CHECK_PERMUTE_REVERSE(4, vnx4sf)
|
||||
INIT_PERMUTE(8, 3927, 16513, vnx8sf)
|
||||
permute_vnx8sf (v_vnx8sf_in1, v_vnx8sf_in2, &v_vnx8sf_out);
|
||||
CHECK_PERMUTE_REVERSE(8, vnx8sf)
|
||||
INIT_PERMUTE(16, -68, 16156571, vnx16sf)
|
||||
permute_vnx16sf (v_vnx16sf_in1, v_vnx16sf_in2, &v_vnx16sf_out);
|
||||
CHECK_PERMUTE_REVERSE(16, vnx16sf)
|
||||
INIT_PERMUTE(32, 9985, 1561318, vnx32sf)
|
||||
permute_vnx32sf (v_vnx32sf_in1, v_vnx32sf_in2, &v_vnx32sf_out);
|
||||
CHECK_PERMUTE_REVERSE(32, vnx32sf)
|
||||
INIT_PERMUTE(2, -1565.1561, -5641565.515, vnx2df)
|
||||
permute_vnx2df (v_vnx2df_in1, v_vnx2df_in2, &v_vnx2df_out);
|
||||
CHECK_PERMUTE_REVERSE(2, vnx2df)
|
||||
INIT_PERMUTE(4, -189.14897196, -15616547.5165574, vnx4df)
|
||||
permute_vnx4df (v_vnx4df_in1, v_vnx4df_in2, &v_vnx4df_out);
|
||||
CHECK_PERMUTE_REVERSE(4, vnx4df)
|
||||
INIT_PERMUTE(8, 651.158691561, -56163.1655411, vnx8df)
|
||||
permute_vnx8df (v_vnx8df_in1, v_vnx8df_in2, &v_vnx8df_out);
|
||||
CHECK_PERMUTE_REVERSE(8, vnx8df)
|
||||
INIT_PERMUTE(16, 58.91516377, 251465.81561, vnx16df)
|
||||
permute_vnx16df (v_vnx16df_in1, v_vnx16df_in2, &v_vnx16df_out);
|
||||
CHECK_PERMUTE_REVERSE(16, vnx16df)
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,137 @@
|
|||
/* { dg-do run { target { riscv_vector } } } */
|
||||
/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
|
||||
|
||||
#include "perm-5.c"
|
||||
|
||||
#define MASK_2(X, Y) (Y) - 1 - (X) + (Y), (Y) -2 - (X) + (Y)
|
||||
#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
|
||||
#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y)
|
||||
#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
|
||||
#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
|
||||
#define MASK_64(X, Y) MASK_32 (X, Y), MASK_32 (X + 32, Y)
|
||||
#define MASK_128(X, Y) MASK_64 (X, Y), MASK_64 (X + 64, Y)
|
||||
|
||||
#define INIT_MASK(TYPE, NUNTIS) \
|
||||
TYPE TYPE##_mask = {MASK_##NUNTIS (0, NUNTIS)};
|
||||
|
||||
int __attribute__ ((optimize (0)))
|
||||
main ()
|
||||
{
|
||||
INIT_PERMUTE(2, 3, 79, vnx2qi)
|
||||
INIT_MASK (vnx2qi, 2)
|
||||
permute_vnx2qi (v_vnx2qi_in1, v_vnx2qi_in2, vnx2qi_mask, &v_vnx2qi_out);
|
||||
CHECK_PERMUTE_REVERSE(2, vnx2qi)
|
||||
INIT_PERMUTE(4, 2, -69, vnx4qi)
|
||||
INIT_MASK (vnx4qi, 4)
|
||||
permute_vnx4qi (v_vnx4qi_in1, v_vnx4qi_in2, vnx4qi_mask, &v_vnx4qi_out);
|
||||
CHECK_PERMUTE_REVERSE(4, vnx4qi)
|
||||
INIT_PERMUTE(8, 4, -33, vnx8qi)
|
||||
INIT_MASK (vnx8qi, 8)
|
||||
permute_vnx8qi (v_vnx8qi_in1, v_vnx8qi_in2, vnx8qi_mask, &v_vnx8qi_out);
|
||||
CHECK_PERMUTE_REVERSE(8, vnx8qi)
|
||||
INIT_PERMUTE(16, -3, 15, vnx16qi)
|
||||
INIT_MASK (vnx16qi, 16)
|
||||
permute_vnx16qi (v_vnx16qi_in1, v_vnx16qi_in2, vnx16qi_mask, &v_vnx16qi_out);
|
||||
CHECK_PERMUTE_REVERSE(16, vnx16qi)
|
||||
INIT_PERMUTE(32, -1, 30, vnx32qi)
|
||||
INIT_MASK (vnx32qi, 32)
|
||||
permute_vnx32qi (v_vnx32qi_in1, v_vnx32qi_in2, vnx32qi_mask, &v_vnx32qi_out);
|
||||
CHECK_PERMUTE_REVERSE(32, vnx32qi)
|
||||
INIT_PERMUTE(64, -1, 66, vnx64qi)
|
||||
INIT_MASK (vnx64qi, 64)
|
||||
permute_vnx64qi (v_vnx64qi_in1, v_vnx64qi_in2, vnx64qi_mask, &v_vnx64qi_out);
|
||||
CHECK_PERMUTE_REVERSE(64, vnx64qi)
|
||||
INIT_PERMUTE(128, -1, 38, vnx128qi)
|
||||
INIT_MASK (vnx128qi, 128)
|
||||
permute_vnx128qi (v_vnx128qi_in1, v_vnx128qi_in2, vnx128qi_mask, &v_vnx128qi_out);
|
||||
CHECK_PERMUTE_REVERSE(128, vnx128qi)
|
||||
INIT_PERMUTE(2, 2, 30238, vnx2hi)
|
||||
INIT_MASK (vnx2hi, 2)
|
||||
permute_vnx2hi (v_vnx2hi_in1, v_vnx2hi_in2, vnx2hi_mask, &v_vnx2hi_out);
|
||||
CHECK_PERMUTE_REVERSE(2, vnx2hi)
|
||||
INIT_PERMUTE(4, -45, -2345, vnx4hi)
|
||||
INIT_MASK (vnx4hi, 4)
|
||||
permute_vnx4hi (v_vnx4hi_in1, v_vnx4hi_in2, vnx4hi_mask, &v_vnx4hi_out);
|
||||
CHECK_PERMUTE_REVERSE(4, vnx4hi)
|
||||
INIT_PERMUTE(8, 98, -18415, vnx8hi)
|
||||
INIT_MASK (vnx8hi, 8)
|
||||
permute_vnx8hi (v_vnx8hi_in1, v_vnx8hi_in2, vnx8hi_mask, &v_vnx8hi_out);
|
||||
CHECK_PERMUTE_REVERSE(8, vnx8hi)
|
||||
INIT_PERMUTE(16, 56, 3299, vnx16hi)
|
||||
INIT_MASK (vnx16hi, 16)
|
||||
permute_vnx16hi (v_vnx16hi_in1, v_vnx16hi_in2, vnx16hi_mask, &v_vnx16hi_out);
|
||||
CHECK_PERMUTE_REVERSE(16, vnx16hi)
|
||||
INIT_PERMUTE(32, 15641, -9156, vnx32hi)
|
||||
INIT_MASK (vnx32hi, 32)
|
||||
permute_vnx32hi (v_vnx32hi_in1, v_vnx32hi_in2, vnx32hi_mask, &v_vnx32hi_out);
|
||||
CHECK_PERMUTE_REVERSE(32, vnx32hi)
|
||||
INIT_PERMUTE(64, -25641, 8093, vnx64hi)
|
||||
INIT_MASK (vnx64hi, 64)
|
||||
permute_vnx64hi (v_vnx64hi_in1, v_vnx64hi_in2, vnx64hi_mask, &v_vnx64hi_out);
|
||||
CHECK_PERMUTE_REVERSE(64, vnx64hi)
|
||||
INIT_PERMUTE(2, -428, -15651, vnx2si)
|
||||
INIT_MASK (vnx2si, 2)
|
||||
permute_vnx2si (v_vnx2si_in1, v_vnx2si_in2, vnx2si_mask, &v_vnx2si_out);
|
||||
CHECK_PERMUTE_REVERSE(2, vnx2si)
|
||||
INIT_PERMUTE(4, 208, -55651, vnx4si)
|
||||
INIT_MASK (vnx4si, 4)
|
||||
permute_vnx4si (v_vnx4si_in1, v_vnx4si_in2, vnx4si_mask, &v_vnx4si_out);
|
||||
CHECK_PERMUTE_REVERSE(4, vnx4si)
|
||||
INIT_PERMUTE(8, 808, 75651, vnx8si)
|
||||
INIT_MASK (vnx8si, 8)
|
||||
permute_vnx8si (v_vnx8si_in1, v_vnx8si_in2, vnx8si_mask, &v_vnx8si_out);
|
||||
CHECK_PERMUTE_REVERSE(8, vnx8si)
|
||||
INIT_PERMUTE(16, 816, -8941561, vnx16si)
|
||||
INIT_MASK (vnx16si, 16)
|
||||
permute_vnx16si (v_vnx16si_in1, v_vnx16si_in2, vnx16si_mask, &v_vnx16si_out);
|
||||
CHECK_PERMUTE_REVERSE(16, vnx16si)
|
||||
INIT_PERMUTE(32, -532, 98416, vnx32si)
|
||||
INIT_MASK (vnx32si, 32)
|
||||
permute_vnx32si (v_vnx32si_in1, v_vnx32si_in2, vnx32si_mask, &v_vnx32si_out);
|
||||
CHECK_PERMUTE_REVERSE(32, vnx32si)
|
||||
INIT_PERMUTE(2, -4161, 9551616, vnx2di)
|
||||
INIT_MASK (vnx2di, 2)
|
||||
permute_vnx2di (v_vnx2di_in1, v_vnx2di_in2, vnx2di_mask, &v_vnx2di_out);
|
||||
CHECK_PERMUTE_REVERSE(2, vnx2di)
|
||||
INIT_PERMUTE(4, 7259, -15644961, vnx4di)
|
||||
INIT_MASK (vnx4di, 4)
|
||||
permute_vnx4di (v_vnx4di_in1, v_vnx4di_in2, vnx4di_mask, &v_vnx4di_out);
|
||||
CHECK_PERMUTE_REVERSE(4, vnx4di)
|
||||
INIT_PERMUTE(8, 351, 9156651, vnx8di)
|
||||
INIT_MASK (vnx8di, 8)
|
||||
permute_vnx8di (v_vnx8di_in1, v_vnx8di_in2, vnx8di_mask, &v_vnx8di_out);
|
||||
CHECK_PERMUTE_REVERSE(8, vnx8di)
|
||||
INIT_PERMUTE(16, 11, -816196231,vnx16di)
|
||||
INIT_MASK (vnx16di, 16)
|
||||
permute_vnx16di (v_vnx16di_in1, v_vnx16di_in2, vnx16di_mask, &v_vnx16di_out);
|
||||
CHECK_PERMUTE_REVERSE(16, vnx16di)
|
||||
INIT_PERMUTE(2, 4552, -89, vnx2sf)
|
||||
permute_vnx2sf (v_vnx2sf_in1, v_vnx2sf_in2, vnx2si_mask, &v_vnx2sf_out);
|
||||
CHECK_PERMUTE_REVERSE(2, vnx2sf)
|
||||
INIT_PERMUTE(4, 685, 7961, vnx4sf)
|
||||
permute_vnx4sf (v_vnx4sf_in1, v_vnx4sf_in2, vnx4si_mask, &v_vnx4sf_out);
|
||||
CHECK_PERMUTE_REVERSE(4, vnx4sf)
|
||||
INIT_PERMUTE(8, 3927, 16513, vnx8sf)
|
||||
permute_vnx8sf (v_vnx8sf_in1, v_vnx8sf_in2, vnx8si_mask, &v_vnx8sf_out);
|
||||
CHECK_PERMUTE_REVERSE(8, vnx8sf)
|
||||
INIT_PERMUTE(16, -68, 16156571, vnx16sf)
|
||||
permute_vnx16sf (v_vnx16sf_in1, v_vnx16sf_in2, vnx16si_mask, &v_vnx16sf_out);
|
||||
CHECK_PERMUTE_REVERSE(16, vnx16sf)
|
||||
INIT_PERMUTE(32, 9985, 1561318, vnx32sf)
|
||||
permute_vnx32sf (v_vnx32sf_in1, v_vnx32sf_in2, vnx32si_mask, &v_vnx32sf_out);
|
||||
CHECK_PERMUTE_REVERSE(32, vnx32sf)
|
||||
INIT_PERMUTE(2, -1565.1561, -5641565.515, vnx2df)
|
||||
permute_vnx2df (v_vnx2df_in1, v_vnx2df_in2, vnx2di_mask, &v_vnx2df_out);
|
||||
CHECK_PERMUTE_REVERSE(2, vnx2df)
|
||||
INIT_PERMUTE(4, -189.14897196, -15616547.5165574, vnx4df)
|
||||
permute_vnx4df (v_vnx4df_in1, v_vnx4df_in2, vnx4di_mask, &v_vnx4df_out);
|
||||
CHECK_PERMUTE_REVERSE(4, vnx4df)
|
||||
INIT_PERMUTE(8, 651.158691561, -56163.1655411, vnx8df)
|
||||
permute_vnx8df (v_vnx8df_in1, v_vnx8df_in2, vnx8di_mask, &v_vnx8df_out);
|
||||
CHECK_PERMUTE_REVERSE(8, vnx8df)
|
||||
INIT_PERMUTE(16, 58.91516377, 251465.81561, vnx16df)
|
||||
permute_vnx16df (v_vnx16df_in1, v_vnx16df_in2, vnx16di_mask, &v_vnx16df_out);
|
||||
CHECK_PERMUTE_REVERSE(16, vnx16df)
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,104 @@
|
|||
/* { dg-do run { target { riscv_vector } } } */
|
||||
/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
|
||||
|
||||
#include "perm-6.c"
|
||||
|
||||
int __attribute__ ((optimize (0)))
|
||||
main ()
|
||||
{
|
||||
INIT_PERMUTE(2, 3, 79, vnx2qi)
|
||||
permute_vnx2qi (v_vnx2qi_in1, v_vnx2qi_in2, &v_vnx2qi_out);
|
||||
CHECK_PERMUTE_SINGLE(2, 3*1-79, vnx2qi)
|
||||
INIT_PERMUTE(4, 2, -69, vnx4qi)
|
||||
permute_vnx4qi (v_vnx4qi_in1, v_vnx4qi_in2, &v_vnx4qi_out);
|
||||
CHECK_PERMUTE_SINGLE(4, 2*1-(-69), vnx4qi)
|
||||
INIT_PERMUTE(8, 4, -33, vnx8qi)
|
||||
permute_vnx8qi (v_vnx8qi_in1, v_vnx8qi_in2, &v_vnx8qi_out);
|
||||
CHECK_PERMUTE_SINGLE(8, 4*1-(-33), vnx8qi)
|
||||
INIT_PERMUTE(16, -3, 15, vnx16qi)
|
||||
permute_vnx16qi (v_vnx16qi_in1, v_vnx16qi_in2, &v_vnx16qi_out);
|
||||
CHECK_PERMUTE_SINGLE(16, -3*1-15, vnx16qi)
|
||||
INIT_PERMUTE(32, -1, 30, vnx32qi)
|
||||
permute_vnx32qi (v_vnx32qi_in1, v_vnx32qi_in2, &v_vnx32qi_out);
|
||||
CHECK_PERMUTE_SINGLE(32, -1*1-30, vnx32qi)
|
||||
INIT_PERMUTE(64, -1, 66, vnx64qi)
|
||||
permute_vnx64qi (v_vnx64qi_in1, v_vnx64qi_in2, &v_vnx64qi_out);
|
||||
CHECK_PERMUTE_SINGLE(64, -1*1-66, vnx64qi)
|
||||
INIT_PERMUTE(128, -1, 38, vnx128qi)
|
||||
permute_vnx128qi (v_vnx128qi_in1, v_vnx128qi_in2, &v_vnx128qi_out);
|
||||
CHECK_PERMUTE_SINGLE(128, -1*1-38, vnx128qi)
|
||||
INIT_PERMUTE(2, 2, 30238, vnx2hi)
|
||||
permute_vnx2hi (v_vnx2hi_in1, v_vnx2hi_in2, &v_vnx2hi_out);
|
||||
CHECK_PERMUTE_SINGLE(2, 2*1-30238, vnx2hi)
|
||||
INIT_PERMUTE(4, -45, -2345, vnx4hi)
|
||||
permute_vnx4hi (v_vnx4hi_in1, v_vnx4hi_in2, &v_vnx4hi_out);
|
||||
CHECK_PERMUTE_SINGLE(4, -45*1-(-2345), vnx4hi)
|
||||
INIT_PERMUTE(8, 98, -18415, vnx8hi)
|
||||
permute_vnx8hi (v_vnx8hi_in1, v_vnx8hi_in2, &v_vnx8hi_out);
|
||||
CHECK_PERMUTE_SINGLE(8, 98*1-(-18415), vnx8hi)
|
||||
INIT_PERMUTE(16, 56, 3299, vnx16hi)
|
||||
permute_vnx16hi (v_vnx16hi_in1, v_vnx16hi_in2, &v_vnx16hi_out);
|
||||
CHECK_PERMUTE_SINGLE(16, 56*1-3299, vnx16hi)
|
||||
INIT_PERMUTE(32, 15641, -9156, vnx32hi)
|
||||
permute_vnx32hi (v_vnx32hi_in1, v_vnx32hi_in2, &v_vnx32hi_out);
|
||||
CHECK_PERMUTE_SINGLE(32, 15641*1-(-9156), vnx32hi)
|
||||
INIT_PERMUTE(64, -2564, 8093, vnx64hi)
|
||||
permute_vnx64hi (v_vnx64hi_in1, v_vnx64hi_in2, &v_vnx64hi_out);
|
||||
CHECK_PERMUTE_SINGLE(64, -2564*1-8093, vnx64hi)
|
||||
INIT_PERMUTE(2, -428, -15651, vnx2si)
|
||||
permute_vnx2si (v_vnx2si_in1, v_vnx2si_in2, &v_vnx2si_out);
|
||||
CHECK_PERMUTE_SINGLE(2, -428*1-(-15651), vnx2si)
|
||||
INIT_PERMUTE(4, 208, -55651, vnx4si)
|
||||
permute_vnx4si (v_vnx4si_in1, v_vnx4si_in2, &v_vnx4si_out);
|
||||
CHECK_PERMUTE_SINGLE(4, 208*1-(-55651), vnx4si)
|
||||
INIT_PERMUTE(8, 808, 75651, vnx8si)
|
||||
permute_vnx8si (v_vnx8si_in1, v_vnx8si_in2, &v_vnx8si_out);
|
||||
CHECK_PERMUTE_SINGLE(8, 808*1-75651, vnx8si)
|
||||
INIT_PERMUTE(16, 816, -8941561, vnx16si)
|
||||
permute_vnx16si (v_vnx16si_in1, v_vnx16si_in2, &v_vnx16si_out);
|
||||
CHECK_PERMUTE_SINGLE(16, 816*1-(-8941561), vnx16si)
|
||||
INIT_PERMUTE(32, -532, 98416, vnx32si)
|
||||
permute_vnx32si (v_vnx32si_in1, v_vnx32si_in2, &v_vnx32si_out);
|
||||
CHECK_PERMUTE_SINGLE(32, -532*1-98416, vnx32si)
|
||||
INIT_PERMUTE(2, -4161, 9551616, vnx2di)
|
||||
permute_vnx2di (v_vnx2di_in1, v_vnx2di_in2, &v_vnx2di_out);
|
||||
CHECK_PERMUTE_SINGLE(2, -4161*1-9551616, vnx2di)
|
||||
INIT_PERMUTE(4, 7259, -15644961, vnx4di)
|
||||
permute_vnx4di (v_vnx4di_in1, v_vnx4di_in2, &v_vnx4di_out);
|
||||
CHECK_PERMUTE_SINGLE(4, 7259*1-(-15644961), vnx4di)
|
||||
INIT_PERMUTE(8, 351, 9156651, vnx8di)
|
||||
permute_vnx8di (v_vnx8di_in1, v_vnx8di_in2, &v_vnx8di_out);
|
||||
CHECK_PERMUTE_SINGLE(8, 351*1-9156651, vnx8di)
|
||||
INIT_PERMUTE(16, 11, -816196231,vnx16di)
|
||||
permute_vnx16di (v_vnx16di_in1, v_vnx16di_in2, &v_vnx16di_out);
|
||||
CHECK_PERMUTE_SINGLE(16, 11*1-(-816196231), vnx16di)
|
||||
INIT_PERMUTE(2, 4552, -89, vnx2sf)
|
||||
permute_vnx2sf (v_vnx2sf_in1, v_vnx2sf_in2, &v_vnx2sf_out);
|
||||
CHECK_PERMUTE_SINGLE(2, (4552-(-89)), vnx2sf)
|
||||
INIT_PERMUTE(4, 685, 7961, vnx4sf)
|
||||
permute_vnx4sf (v_vnx4sf_in1, v_vnx4sf_in2, &v_vnx4sf_out);
|
||||
CHECK_PERMUTE_SINGLE(4, 685-7961, vnx4sf)
|
||||
INIT_PERMUTE(8, 3927, 16513, vnx8sf)
|
||||
permute_vnx8sf (v_vnx8sf_in1, v_vnx8sf_in2, &v_vnx8sf_out);
|
||||
CHECK_PERMUTE_SINGLE(8, 3927*1-16513, vnx8sf)
|
||||
INIT_PERMUTE(16, -68, 16156571, vnx16sf)
|
||||
permute_vnx16sf (v_vnx16sf_in1, v_vnx16sf_in2, &v_vnx16sf_out);
|
||||
CHECK_PERMUTE_SINGLE(16, -68*1-16156571, vnx16sf)
|
||||
INIT_PERMUTE(32, 9985, 1561318, vnx32sf)
|
||||
permute_vnx32sf (v_vnx32sf_in1, v_vnx32sf_in2, &v_vnx32sf_out);
|
||||
CHECK_PERMUTE_SINGLE(32, 9985*1-1561318, vnx32sf)
|
||||
INIT_PERMUTE(2, -1565.1561, -5641565.515, vnx2df)
|
||||
permute_vnx2df (v_vnx2df_in1, v_vnx2df_in2, &v_vnx2df_out);
|
||||
CHECK_PERMUTE_SINGLE(2, -1565.1561*1-(-5641565.515), vnx2df)
|
||||
INIT_PERMUTE(4, -189.14897196, -15616547.5165574, vnx4df)
|
||||
permute_vnx4df (v_vnx4df_in1, v_vnx4df_in2, &v_vnx4df_out);
|
||||
CHECK_PERMUTE_SINGLE(4, -189.14897196*1-(-15616547.5165574), vnx4df)
|
||||
INIT_PERMUTE(8, 651.158691561, -56163.1655411, vnx8df)
|
||||
permute_vnx8df (v_vnx8df_in1, v_vnx8df_in2, &v_vnx8df_out);
|
||||
CHECK_PERMUTE_SINGLE(8, 651.158691561*1-(-56163.1655411), vnx8df)
|
||||
INIT_PERMUTE(16, 58.91516377, 251465.81561, vnx16df)
|
||||
permute_vnx16df (v_vnx16df_in1, v_vnx16df_in2, &v_vnx16df_out);
|
||||
CHECK_PERMUTE_SINGLE(16, 58.91516377*1-251465.81561, vnx16df)
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,135 @@
|
|||
/* { dg-do run { target { riscv_vector } } } */
|
||||
/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O0" } */
|
||||
|
||||
#include "perm-7.c"
|
||||
|
||||
#define MASK_2(X) X, X + 2
|
||||
#define MASK_4(X) MASK_2 (X), MASK_2 (X+4)
|
||||
#define MASK_8(X) MASK_4 (X), MASK_4 (X+8)
|
||||
#define MASK_16(X) MASK_8 (X), MASK_8 (X+16)
|
||||
#define MASK_32(X) MASK_16 (X), MASK_16 (X+32)
|
||||
#define MASK_64(X) MASK_32 (X), MASK_32 (X+64)
|
||||
#define MASK_128(X) MASK_64 (X), MASK_64 (X+128)
|
||||
|
||||
#define INIT_MASK(TYPE, NUNTIS) TYPE TYPE##_mask = {MASK_##NUNTIS (0)};
|
||||
|
||||
int __attribute__ ((optimize (0))) main ()
|
||||
{
|
||||
INIT_PERMUTE(2, 3, 79, vnx2qi)
|
||||
INIT_MASK (vnx2qi, 2)
|
||||
permute_vnx2qi (v_vnx2qi_in1, v_vnx2qi_in2, vnx2qi_mask, &v_vnx2qi_out);
|
||||
CHECK_PERMUTE_DOUBLE(2, vnx2qi)
|
||||
INIT_PERMUTE(4, 2, -69, vnx4qi)
|
||||
INIT_MASK (vnx4qi, 4)
|
||||
permute_vnx4qi (v_vnx4qi_in1, v_vnx4qi_in2, vnx4qi_mask, &v_vnx4qi_out);
|
||||
CHECK_PERMUTE_DOUBLE(4, vnx4qi)
|
||||
INIT_PERMUTE(8, 4, -33, vnx8qi)
|
||||
INIT_MASK (vnx8qi, 8)
|
||||
permute_vnx8qi (v_vnx8qi_in1, v_vnx8qi_in2, vnx8qi_mask, &v_vnx8qi_out);
|
||||
CHECK_PERMUTE_DOUBLE(8, vnx8qi)
|
||||
INIT_PERMUTE(16, -3, 15, vnx16qi)
|
||||
INIT_MASK (vnx16qi, 16)
|
||||
permute_vnx16qi (v_vnx16qi_in1, v_vnx16qi_in2, vnx16qi_mask, &v_vnx16qi_out);
|
||||
CHECK_PERMUTE_DOUBLE(16, vnx16qi)
|
||||
INIT_PERMUTE(32, -1, 30, vnx32qi)
|
||||
INIT_MASK (vnx32qi, 32)
|
||||
permute_vnx32qi (v_vnx32qi_in1, v_vnx32qi_in2, vnx32qi_mask, &v_vnx32qi_out);
|
||||
CHECK_PERMUTE_DOUBLE(32, vnx32qi)
|
||||
INIT_PERMUTE(64, -1, 66, vnx64qi)
|
||||
INIT_MASK (vnx64qi, 64)
|
||||
permute_vnx64qi (v_vnx64qi_in1, v_vnx64qi_in2, vnx64qi_mask, &v_vnx64qi_out);
|
||||
CHECK_PERMUTE_DOUBLE(64, vnx64qi)
|
||||
INIT_PERMUTE(128, -1, 38, vnx128qi)
|
||||
INIT_MASK (vnx128qi, 128)
|
||||
permute_vnx128qi (v_vnx128qi_in1, v_vnx128qi_in2, vnx128qi_mask, &v_vnx128qi_out);
|
||||
CHECK_PERMUTE_DOUBLE(128, vnx128qi)
|
||||
INIT_PERMUTE(2, 2, 30238, vnx2hi)
|
||||
INIT_MASK (vnx2hi, 2)
|
||||
permute_vnx2hi (v_vnx2hi_in1, v_vnx2hi_in2, vnx2hi_mask, &v_vnx2hi_out);
|
||||
CHECK_PERMUTE_DOUBLE(2, vnx2hi)
|
||||
INIT_PERMUTE(4, -45, -2345, vnx4hi)
|
||||
INIT_MASK (vnx4hi, 4)
|
||||
permute_vnx4hi (v_vnx4hi_in1, v_vnx4hi_in2, vnx4hi_mask, &v_vnx4hi_out);
|
||||
CHECK_PERMUTE_DOUBLE(4, vnx4hi)
|
||||
INIT_PERMUTE(8, 98, -18415, vnx8hi)
|
||||
INIT_MASK (vnx8hi, 8)
|
||||
permute_vnx8hi (v_vnx8hi_in1, v_vnx8hi_in2, vnx8hi_mask, &v_vnx8hi_out);
|
||||
CHECK_PERMUTE_DOUBLE(8, vnx8hi)
|
||||
INIT_PERMUTE(16, 56, 3299, vnx16hi)
|
||||
INIT_MASK (vnx16hi, 16)
|
||||
permute_vnx16hi (v_vnx16hi_in1, v_vnx16hi_in2, vnx16hi_mask, &v_vnx16hi_out);
|
||||
CHECK_PERMUTE_DOUBLE(16, vnx16hi)
|
||||
INIT_PERMUTE(32, 15641, -9156, vnx32hi)
|
||||
INIT_MASK (vnx32hi, 32)
|
||||
permute_vnx32hi (v_vnx32hi_in1, v_vnx32hi_in2, vnx32hi_mask, &v_vnx32hi_out);
|
||||
CHECK_PERMUTE_DOUBLE(32, vnx32hi)
|
||||
INIT_PERMUTE(64, -25641, 8093, vnx64hi)
|
||||
INIT_MASK (vnx64hi, 64)
|
||||
permute_vnx64hi (v_vnx64hi_in1, v_vnx64hi_in2, vnx64hi_mask, &v_vnx64hi_out);
|
||||
CHECK_PERMUTE_DOUBLE(64, vnx64hi)
|
||||
INIT_PERMUTE(2, -428, -15651, vnx2si)
|
||||
INIT_MASK (vnx2si, 2)
|
||||
permute_vnx2si (v_vnx2si_in1, v_vnx2si_in2, vnx2si_mask, &v_vnx2si_out);
|
||||
CHECK_PERMUTE_DOUBLE(2, vnx2si)
|
||||
INIT_PERMUTE(4, 208, -55651, vnx4si)
|
||||
INIT_MASK (vnx4si, 4)
|
||||
permute_vnx4si (v_vnx4si_in1, v_vnx4si_in2, vnx4si_mask, &v_vnx4si_out);
|
||||
CHECK_PERMUTE_DOUBLE(4, vnx4si)
|
||||
INIT_PERMUTE(8, 808, 75651, vnx8si)
|
||||
INIT_MASK (vnx8si, 8)
|
||||
permute_vnx8si (v_vnx8si_in1, v_vnx8si_in2, vnx8si_mask, &v_vnx8si_out);
|
||||
CHECK_PERMUTE_DOUBLE(8, vnx8si)
|
||||
INIT_PERMUTE(16, 816, -8941561, vnx16si)
|
||||
INIT_MASK (vnx16si, 16)
|
||||
permute_vnx16si (v_vnx16si_in1, v_vnx16si_in2, vnx16si_mask, &v_vnx16si_out);
|
||||
CHECK_PERMUTE_DOUBLE(16, vnx16si)
|
||||
INIT_PERMUTE(32, -532, 98416, vnx32si)
|
||||
INIT_MASK (vnx32si, 32)
|
||||
permute_vnx32si (v_vnx32si_in1, v_vnx32si_in2, vnx32si_mask, &v_vnx32si_out);
|
||||
CHECK_PERMUTE_DOUBLE(32, vnx32si)
|
||||
INIT_PERMUTE(2, -4161, 9551616, vnx2di)
|
||||
INIT_MASK (vnx2di, 2)
|
||||
permute_vnx2di (v_vnx2di_in1, v_vnx2di_in2, vnx2di_mask, &v_vnx2di_out);
|
||||
CHECK_PERMUTE_DOUBLE(2, vnx2di)
|
||||
INIT_PERMUTE(4, 7259, -15644961, vnx4di)
|
||||
INIT_MASK (vnx4di, 4)
|
||||
permute_vnx4di (v_vnx4di_in1, v_vnx4di_in2, vnx4di_mask, &v_vnx4di_out);
|
||||
CHECK_PERMUTE_DOUBLE(4, vnx4di)
|
||||
INIT_PERMUTE(8, 351, 9156651, vnx8di)
|
||||
INIT_MASK (vnx8di, 8)
|
||||
permute_vnx8di (v_vnx8di_in1, v_vnx8di_in2, vnx8di_mask, &v_vnx8di_out);
|
||||
CHECK_PERMUTE_DOUBLE(8, vnx8di)
|
||||
INIT_PERMUTE(16, 11, -816196231,vnx16di)
|
||||
INIT_MASK (vnx16di, 16)
|
||||
permute_vnx16di (v_vnx16di_in1, v_vnx16di_in2, vnx16di_mask, &v_vnx16di_out);
|
||||
CHECK_PERMUTE_DOUBLE(16, vnx16di)
|
||||
INIT_PERMUTE(2, 4552, -89, vnx2sf)
|
||||
permute_vnx2sf (v_vnx2sf_in1, v_vnx2sf_in2, vnx2si_mask, &v_vnx2sf_out);
|
||||
CHECK_PERMUTE_DOUBLE(2, vnx2sf)
|
||||
INIT_PERMUTE(4, 685, 7961, vnx4sf)
|
||||
permute_vnx4sf (v_vnx4sf_in1, v_vnx4sf_in2, vnx4si_mask, &v_vnx4sf_out);
|
||||
CHECK_PERMUTE_DOUBLE(4, vnx4sf)
|
||||
INIT_PERMUTE(8, 3927, 16513, vnx8sf)
|
||||
permute_vnx8sf (v_vnx8sf_in1, v_vnx8sf_in2, vnx8si_mask, &v_vnx8sf_out);
|
||||
CHECK_PERMUTE_DOUBLE(8, vnx8sf)
|
||||
INIT_PERMUTE(16, -68, 16156571, vnx16sf)
|
||||
permute_vnx16sf (v_vnx16sf_in1, v_vnx16sf_in2, vnx16si_mask, &v_vnx16sf_out);
|
||||
CHECK_PERMUTE_DOUBLE(16, vnx16sf)
|
||||
INIT_PERMUTE(32, 9985, 1561318, vnx32sf)
|
||||
permute_vnx32sf (v_vnx32sf_in1, v_vnx32sf_in2, vnx32si_mask, &v_vnx32sf_out);
|
||||
CHECK_PERMUTE_DOUBLE(32, vnx32sf)
|
||||
INIT_PERMUTE(2, -1565.1561, -5641565.515, vnx2df)
|
||||
permute_vnx2df (v_vnx2df_in1, v_vnx2df_in2, vnx2di_mask, &v_vnx2df_out);
|
||||
CHECK_PERMUTE_DOUBLE(2, vnx2df)
|
||||
INIT_PERMUTE(4, -189.14897196, -15616547.5165574, vnx4df)
|
||||
permute_vnx4df (v_vnx4df_in1, v_vnx4df_in2, vnx4di_mask, &v_vnx4df_out);
|
||||
CHECK_PERMUTE_DOUBLE(4, vnx4df)
|
||||
INIT_PERMUTE(8, 651.158691561, -56163.1655411, vnx8df)
|
||||
permute_vnx8df (v_vnx8df_in1, v_vnx8df_in2, vnx8di_mask, &v_vnx8df_out);
|
||||
CHECK_PERMUTE_DOUBLE(8, vnx8df)
|
||||
INIT_PERMUTE(16, 58.91516377, 251465.81561, vnx16df)
|
||||
permute_vnx16df (v_vnx16df_in1, v_vnx16df_in2, vnx16di_mask, &v_vnx16df_out);
|
||||
CHECK_PERMUTE_DOUBLE(16, vnx16df)
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Add table
Reference in a new issue