RISC-V: Support RVV permutation auto-vectorization

This patch supports vector permutation for VLS only by vec_perm pattern.
We will support TARGET_VECTORIZE_VEC_PERM_CONST to support VLA permutation
in the future.

Fixed following comments from Robin.

gcc/ChangeLog:

	* config/riscv/autovec.md (vec_perm<mode>): New pattern.
	* config/riscv/predicates.md (vector_perm_operand): New predicate.
	* config/riscv/riscv-protos.h (enum insn_type): New enum.
	(expand_vec_perm): New function.
	* config/riscv/riscv-v.cc (const_vec_all_in_range_p): Ditto.
	(gen_const_vector_dup): Ditto.
	(emit_vlmax_gather_insn): Ditto.
	(emit_vlmax_masked_gather_mu_insn): Ditto.
	(expand_vec_perm): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/vls-vlmax/perm-1.c: New test.
	* gcc.target/riscv/rvv/autovec/vls-vlmax/perm-2.c: New test.
	* gcc.target/riscv/rvv/autovec/vls-vlmax/perm-3.c: New test.
	* gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c: New test.
	* gcc.target/riscv/rvv/autovec/vls-vlmax/perm-5.c: New test.
	* gcc.target/riscv/rvv/autovec/vls-vlmax/perm-6.c: New test.
	* gcc.target/riscv/rvv/autovec/vls-vlmax/perm-7.c: New test.
	* gcc.target/riscv/rvv/autovec/vls-vlmax/perm.h: New test.
	* gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-1.c: New test.
	* gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-2.c: New test.
	* gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-3.c: New test.
	* gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-4.c: New test.
	* gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-5.c: New test.
	* gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-6.c: New test.
	* gcc.target/riscv/rvv/autovec/vls-vlmax/perm_run-7.c: New test.
This commit is contained in:
Juzhe-Zhong 2023-06-01 10:36:15 +08:00 committed by Pan Li
parent 847499148e
commit bf9eee73f3
19 changed files with 1217 additions and 0 deletions

View file

@ -83,6 +83,24 @@
}
)
;; -------------------------------------------------------------------------
;; ---- [INT,FP] permutation
;; -------------------------------------------------------------------------
;; This is the pattern permutes the vector
;; -------------------------------------------------------------------------
(define_expand "vec_perm<mode>"
[(match_operand:V 0 "register_operand")
(match_operand:V 1 "register_operand")
(match_operand:V 2 "register_operand")
(match_operand:<VINDEX> 3 "vector_perm_operand")]
"TARGET_VECTOR && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
{
riscv_vector::expand_vec_perm (operands);
DONE;
}
)
;; -------------------------------------------------------------------------
;; ---- [INT,FP] Initialize from individual elements
;; -------------------------------------------------------------------------

View file

@ -330,6 +330,10 @@
(and (match_code "const_vector")
(match_test "riscv_vector::const_vec_all_same_in_range_p (op, 0, 31)"))))
(define_predicate "vector_perm_operand"
(ior (match_operand 0 "register_operand")
(match_code "const_vector")))
(define_predicate "ltge_operator"
(match_code "lt,ltu,ge,geu"))

View file

@ -137,6 +137,7 @@ enum insn_type
RVV_MISC_OP = 1,
RVV_UNOP = 2,
RVV_BINOP = 3,
RVV_BINOP_MU = RVV_BINOP + 2,
RVV_MERGE_OP = 4,
RVV_CMP_OP = 4,
RVV_CMP_MU_OP = RVV_CMP_OP + 2, /* +2 means mask and maskoff operand. */
@ -240,6 +241,7 @@ opt_machine_mode get_mask_mode (machine_mode);
void expand_vec_series (rtx, rtx, rtx);
void expand_vec_init (rtx, rtx);
void expand_vcond (rtx *);
void expand_vec_perm (rtx *);
/* Rounding mode bitfield for fixed point VXRM. */
enum vxrm_field_enum
{

View file

@ -259,6 +259,47 @@ const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT minval,
&& IN_RANGE (INTVAL (elt), minval, maxval));
}
/* Return true if VEC is a constant in which every element is in the range
[MINVAL, MAXVAL]. The elements do not need to have the same value.
This function also exists in aarch64, we may unify it in middle-end in the
future. */
static bool
const_vec_all_in_range_p (rtx vec, HOST_WIDE_INT minval, HOST_WIDE_INT maxval)
{
if (!CONST_VECTOR_P (vec)
|| GET_MODE_CLASS (GET_MODE (vec)) != MODE_VECTOR_INT)
return false;
int nunits;
if (!CONST_VECTOR_STEPPED_P (vec))
nunits = const_vector_encoded_nelts (vec);
else if (!CONST_VECTOR_NUNITS (vec).is_constant (&nunits))
return false;
for (int i = 0; i < nunits; i++)
{
rtx vec_elem = CONST_VECTOR_ELT (vec, i);
if (!CONST_INT_P (vec_elem)
|| !IN_RANGE (INTVAL (vec_elem), minval, maxval))
return false;
}
return true;
}
/* Return a const_int vector of VAL.
This function also exists in aarch64, we may unify it in middle-end in the
future. */
static rtx
gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
{
rtx c = gen_int_mode (val, GET_MODE_INNER (mode));
return gen_const_vec_duplicate (mode, c);
}
/* Emit a vlmax vsetvl instruction. This should only be used when
optimization is disabled or after vsetvl insertion pass. */
void
@ -1927,4 +1968,116 @@ expand_vcond (rtx *ops)
gen_vcond_mask (data_mode, data_mode, ops[0], ops[1], ops[2], mask));
}
/* This function emits VLMAX vrgather instruction. Emit vrgather.vx/vi when sel
is a const duplicate vector. Otherwise, emit vrgather.vv. */
static void
emit_vlmax_gather_insn (rtx target, rtx op, rtx sel)
{
rtx elt;
insn_code icode;
machine_mode data_mode = GET_MODE (target);
if (const_vec_duplicate_p (sel, &elt))
{
icode = code_for_pred_gather_scalar (data_mode);
sel = elt;
}
else
icode = code_for_pred_gather (data_mode);
rtx ops[] = {target, op, sel};
emit_vlmax_insn (icode, RVV_BINOP, ops);
}
static void
emit_vlmax_masked_gather_mu_insn (rtx target, rtx op, rtx sel, rtx mask)
{
rtx elt;
insn_code icode;
machine_mode data_mode = GET_MODE (target);
if (const_vec_duplicate_p (sel, &elt))
{
icode = code_for_pred_gather_scalar (data_mode);
sel = elt;
}
else
icode = code_for_pred_gather (data_mode);
rtx ops[] = {target, mask, target, op, sel};
emit_vlmax_masked_mu_insn (icode, RVV_BINOP_MU, ops);
}
/* Implement vec_perm<mode>. */
void
expand_vec_perm (rtx *operands)
{
rtx target = operands[0];
rtx op0 = operands[1];
rtx op1 = operands[2];
rtx sel = operands[3];
machine_mode data_mode = GET_MODE (target);
machine_mode sel_mode = GET_MODE (sel);
/* Enforced by the pattern condition. */
int nunits = GET_MODE_NUNITS (sel_mode).to_constant ();
/* Check if the sel only references the first values vector. If each select
index is in range of [0, nunits - 1]. A single vrgather instructions is
enough. */
if (const_vec_all_in_range_p (sel, 0, nunits - 1))
{
emit_vlmax_gather_insn (target, op0, sel);
return;
}
/* Check if the two values vectors are the same. */
if (rtx_equal_p (op0, op1) || const_vec_duplicate_p (sel))
{
/* Note: vec_perm indices are supposed to wrap when they go beyond the
size of the two value vectors, i.e. the upper bits of the indices
are effectively ignored. RVV vrgather instead produces 0 for any
out-of-range indices, so we need to modulo all the vec_perm indices
to ensure they are all in range of [0, nunits - 1]. */
rtx max_sel = gen_const_vector_dup (sel_mode, nunits - 1);
rtx sel_mod = expand_simple_binop (sel_mode, AND, sel, max_sel, NULL, 0,
OPTAB_DIRECT);
emit_vlmax_gather_insn (target, op1, sel_mod);
return;
}
/* Note: vec_perm indices are supposed to wrap when they go beyond the
size of the two value vectors, i.e. the upper bits of the indices
are effectively ignored. RVV vrgather instead produces 0 for any
out-of-range indices, so we need to modulo all the vec_perm indices
to ensure they are all in range of [0, 2 * nunits - 1]. */
rtx max_sel = gen_const_vector_dup (sel_mode, 2 * nunits - 1);
rtx sel_mod
= expand_simple_binop (sel_mode, AND, sel, max_sel, NULL, 0, OPTAB_DIRECT);
/* This following sequence is handling the case that:
__builtin_shufflevector (vec1, vec2, index...), the index can be any
value in range of [0, 2 * nunits - 1]. */
machine_mode mask_mode;
mask_mode = get_mask_mode (data_mode).require ();
rtx mask = gen_reg_rtx (mask_mode);
max_sel = gen_const_vector_dup (sel_mode, nunits);
/* Step 1: generate a mask that should select everything >= nunits into the
* mask. */
expand_vec_cmp (mask, GEU, sel_mod, max_sel);
/* Step2: gather every op0 values indexed by sel into target,
we don't need to care about the result of the element
whose index >= nunits. */
emit_vlmax_gather_insn (target, op0, sel_mod);
/* Step3: shift the range from (nunits, max_of_mode] to
[0, max_of_mode - nunits]. */
rtx tmp = gen_reg_rtx (sel_mode);
rtx ops[] = {tmp, sel_mod, max_sel};
emit_vlmax_insn (code_for_pred (MINUS, sel_mode), RVV_BINOP, ops);
/* Step4: gather those into the previously masked-out elements
of target. */
emit_vlmax_masked_gather_mu_insn (target, op1, tmp, mask);
}
} // namespace riscv_vector

View file

@ -0,0 +1,58 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
#include "perm.h"
#define MASK_2(X, Y) 1, 1
#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y)
#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
#define MASK_64(X, Y) MASK_32 (X, Y), MASK_32 (X + 32, Y)
#define MASK_128(X, Y) MASK_64 (X, Y), MASK_64 (X + 64, Y)
#define PERMUTE(TYPE, NUNITS) \
__attribute__ ((noipa)) void permute_##TYPE (TYPE values1, TYPE values2, \
TYPE *out) \
{ \
TYPE v \
= __builtin_shufflevector (values1, values2, MASK_##NUNITS (0, NUNITS)); \
*(TYPE *) out = v; \
}
#define TEST_ALL(T) \
T (vnx2qi, 2) \
T (vnx4qi, 4) \
T (vnx8qi, 8) \
T (vnx16qi, 16) \
T (vnx32qi, 32) \
T (vnx64qi, 64) \
T (vnx128qi, 128) \
T (vnx2hi, 2) \
T (vnx4hi, 4) \
T (vnx8hi, 8) \
T (vnx16hi, 16) \
T (vnx32hi, 32) \
T (vnx64hi, 64) \
T (vnx2si, 2) \
T (vnx4si, 4) \
T (vnx8si, 8) \
T (vnx16si, 16) \
T (vnx32si, 32) \
T (vnx2di, 2) \
T (vnx4di, 4) \
T (vnx8di, 8) \
T (vnx16di, 16) \
T (vnx2sf, 2) \
T (vnx4sf, 4) \
T (vnx8sf, 8) \
T (vnx16sf, 16) \
T (vnx32sf, 32) \
T (vnx2df, 2) \
T (vnx4df, 4) \
T (vnx8df, 8) \
T (vnx16df, 16)
TEST_ALL (PERMUTE)
/* { dg-final { scan-assembler-times {vrgather\.vi\tv[0-9]+,\s*v[0-9]+,\s*1} 31 } } */

View file

@ -0,0 +1,33 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
#include "perm.h"
#define MASK_2(X, Y) 31, 31
#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y)
#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
#define MASK_64(X, Y) MASK_32 (X, Y), MASK_32 (X + 32, Y)
#define MASK_128(X, Y) MASK_64 (X, Y), MASK_64 (X + 64, Y)
#define PERMUTE(TYPE, NUNITS) \
void permute_##TYPE (TYPE values1, TYPE values2, TYPE *out) \
{ \
TYPE v \
= __builtin_shufflevector (values1, values2, MASK_##NUNITS (0, NUNITS)); \
*(TYPE *) out = v; \
}
#define TEST_ALL(T) \
T (vnx32qi, 32) \
T (vnx64qi, 64) \
T (vnx128qi, 128) \
T (vnx32hi, 32) \
T (vnx64hi, 64) \
T (vnx32si, 32) \
T (vnx32sf, 32)
TEST_ALL (PERMUTE)
/* { dg-final { scan-assembler-times {vrgather\.vi\tv[0-9]+,\s*v[0-9]+,\s*31} 7 } } */

View file

@ -0,0 +1,29 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
#include "perm.h"
#define MASK_2(X, Y) 55, 55
#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y)
#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
#define MASK_64(X, Y) MASK_32 (X, Y), MASK_32 (X + 32, Y)
#define MASK_128(X, Y) MASK_64 (X, Y), MASK_64 (X + 64, Y)
#define PERMUTE(TYPE, NUNITS) \
void permute_##TYPE (TYPE values1, TYPE values2, TYPE *out) \
{ \
TYPE v \
= __builtin_shufflevector (values1, values2, MASK_##NUNITS (0, NUNITS)); \
*(TYPE *) out = v; \
}
#define TEST_ALL(T) \
T (vnx64qi, 64) \
T (vnx128qi, 128) \
T (vnx64hi, 64)
TEST_ALL (PERMUTE)
/* { dg-final { scan-assembler-times {vrgather\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+} 3 } } */

View file

@ -0,0 +1,58 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
#include "perm.h"
#define MASK_2(X, Y) (Y) - 1 - (X), (Y) - 2 - (X)
#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y)
#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
#define MASK_64(X, Y) MASK_32 (X, Y), MASK_32 (X + 32, Y)
#define MASK_128(X, Y) MASK_64 (X, Y), MASK_64 (X + 64, Y)
#define PERMUTE(TYPE, NUNITS) \
__attribute__ ((noipa)) void permute_##TYPE (TYPE values1, TYPE values2, \
TYPE *out) \
{ \
TYPE v \
= __builtin_shufflevector (values1, values2, MASK_##NUNITS (0, NUNITS)); \
*(TYPE *) out = v; \
}
#define TEST_ALL(T) \
T (vnx2qi, 2) \
T (vnx4qi, 4) \
T (vnx8qi, 8) \
T (vnx16qi, 16) \
T (vnx32qi, 32) \
T (vnx64qi, 64) \
T (vnx128qi, 128) \
T (vnx2hi, 2) \
T (vnx4hi, 4) \
T (vnx8hi, 8) \
T (vnx16hi, 16) \
T (vnx32hi, 32) \
T (vnx64hi, 64) \
T (vnx2si, 2) \
T (vnx4si, 4) \
T (vnx8si, 8) \
T (vnx16si, 16) \
T (vnx32si, 32) \
T (vnx2di, 2) \
T (vnx4di, 4) \
T (vnx8di, 8) \
T (vnx16di, 16) \
T (vnx2sf, 2) \
T (vnx4sf, 4) \
T (vnx8sf, 8) \
T (vnx16sf, 16) \
T (vnx32sf, 32) \
T (vnx2df, 2) \
T (vnx4df, 4) \
T (vnx8df, 8) \
T (vnx16df, 16)
TEST_ALL (PERMUTE)
/* { dg-final { scan-assembler-times {vrgather\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 31 } } */

View file

@ -0,0 +1,49 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
#include "perm.h"
#define PERMUTE(TYPE, TYPE2, NUNITS) \
__attribute__ ((noipa)) void permute_##TYPE (TYPE values1, TYPE values2, \
TYPE2 mask, TYPE *out) \
{ \
TYPE v = __builtin_shuffle (values1, values1, mask); \
*(TYPE *) out = v; \
}
#define TEST_ALL(T) \
T (vnx2qi, vnx2qi, 2) \
T (vnx4qi, vnx4qi, 4) \
T (vnx8qi, vnx8qi, 8) \
T (vnx16qi, vnx16qi, 16) \
T (vnx32qi, vnx32qi, 32) \
T (vnx64qi, vnx64qi, 64) \
T (vnx128qi, vnx128qi, 128) \
T (vnx2hi, vnx2hi, 2) \
T (vnx4hi, vnx4hi, 4) \
T (vnx8hi, vnx8hi, 8) \
T (vnx16hi, vnx16hi, 16) \
T (vnx32hi, vnx32hi, 32) \
T (vnx64hi, vnx64hi, 64) \
T (vnx2si, vnx2si, 2) \
T (vnx4si, vnx4si, 4) \
T (vnx8si, vnx8si, 8) \
T (vnx16si, vnx16si, 16) \
T (vnx32si, vnx32si, 32) \
T (vnx2di, vnx2di, 2) \
T (vnx4di, vnx4di, 4) \
T (vnx8di, vnx8di, 8) \
T (vnx16di, vnx16di, 16) \
T (vnx2sf, vnx2si, 2) \
T (vnx4sf, vnx4si, 4) \
T (vnx8sf, vnx8si, 8) \
T (vnx16sf, vnx16si, 16) \
T (vnx32sf, vnx32si, 32) \
T (vnx2df, vnx2di, 2) \
T (vnx4df, vnx4di, 4) \
T (vnx8df, vnx8di, 8) \
T (vnx16df, vnx16di, 16)
TEST_ALL (PERMUTE)
/* { dg-final { scan-assembler-times {vrgather\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 31 } } */

View file

@ -0,0 +1,58 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
#include "perm.h"
#define MASK_2(X, Y) Y + 1, Y + 1
#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y)
#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
#define MASK_64(X, Y) MASK_32 (X, Y), MASK_32 (X + 32, Y)
#define MASK_128(X, Y) MASK_64 (X, Y), MASK_64 (X + 64, Y)
#define PERMUTE(TYPE, NUNITS) \
__attribute__ ((noipa)) void permute_##TYPE (TYPE values1, TYPE values2, \
TYPE *out) \
{ \
TYPE v \
= __builtin_shufflevector (values1, values2, MASK_##NUNITS (0, NUNITS)); \
*(TYPE *) out = v; \
}
#define TEST_ALL(T) \
T (vnx2qi, 2) \
T (vnx4qi, 4) \
T (vnx8qi, 8) \
T (vnx16qi, 16) \
T (vnx32qi, 32) \
T (vnx64qi, 64) \
T (vnx128qi, 128) \
T (vnx2hi, 2) \
T (vnx4hi, 4) \
T (vnx8hi, 8) \
T (vnx16hi, 16) \
T (vnx32hi, 32) \
T (vnx64hi, 64) \
T (vnx2si, 2) \
T (vnx4si, 4) \
T (vnx8si, 8) \
T (vnx16si, 16) \
T (vnx32si, 32) \
T (vnx2di, 2) \
T (vnx4di, 4) \
T (vnx8di, 8) \
T (vnx16di, 16) \
T (vnx2sf, 2) \
T (vnx4sf, 4) \
T (vnx8sf, 8) \
T (vnx16sf, 16) \
T (vnx32sf, 32) \
T (vnx2df, 2) \
T (vnx4df, 4) \
T (vnx8df, 8) \
T (vnx16df, 16)
TEST_ALL (PERMUTE)
/* { dg-final { scan-assembler-times {vrgather\.vi\tv[0-9]+,\s*v[0-9]+,\s*1} 31 } } */

View file

@ -0,0 +1,49 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv64gcv -mabi=lp64d" } */
#include "perm.h"
#define PERMUTE(TYPE, TYPE2, NUNITS) \
__attribute__ ((noipa)) void permute_##TYPE (TYPE values1, TYPE values2, \
TYPE2 mask, TYPE *out) \
{ \
TYPE v = __builtin_shuffle (values1, values2, mask); \
*(TYPE *) out = v; \
}
#define TEST_ALL(T) \
T (vnx2qi, vnx2qi, 2) \
T (vnx4qi, vnx4qi, 4) \
T (vnx8qi, vnx8qi, 8) \
T (vnx16qi, vnx16qi, 16) \
T (vnx32qi, vnx32qi, 32) \
T (vnx64qi, vnx64qi, 64) \
T (vnx128qi, vnx128qi, 128) \
T (vnx2hi, vnx2hi, 2) \
T (vnx4hi, vnx4hi, 4) \
T (vnx8hi, vnx8hi, 8) \
T (vnx16hi, vnx16hi, 16) \
T (vnx32hi, vnx32hi, 32) \
T (vnx64hi, vnx64hi, 64) \
T (vnx2si, vnx2si, 2) \
T (vnx4si, vnx4si, 4) \
T (vnx8si, vnx8si, 8) \
T (vnx16si, vnx16si, 16) \
T (vnx32si, vnx32si, 32) \
T (vnx2di, vnx2di, 2) \
T (vnx4di, vnx4di, 4) \
T (vnx8di, vnx8di, 8) \
T (vnx16di, vnx16di, 16) \
T (vnx2sf, vnx2si, 2) \
T (vnx4sf, vnx4si, 4) \
T (vnx8sf, vnx8si, 8) \
T (vnx16sf, vnx16si, 16) \
T (vnx32sf, vnx32si, 32) \
T (vnx2df, vnx2di, 2) \
T (vnx4df, vnx4di, 4) \
T (vnx8df, vnx8di, 8) \
T (vnx16df, vnx16di, 16)
TEST_ALL (PERMUTE)
/* { dg-final { scan-assembler-times {vrgather\.vv\tv[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0.t} 31 } } */

View file

@ -0,0 +1,70 @@
#include <stdint.h>
typedef int8_t vnx2qi __attribute__ ((vector_size (2)));
typedef int8_t vnx4qi __attribute__ ((vector_size (4)));
typedef int8_t vnx8qi __attribute__ ((vector_size (8)));
typedef int8_t vnx16qi __attribute__ ((vector_size (16)));
typedef int8_t vnx32qi __attribute__ ((vector_size (32)));
typedef int8_t vnx64qi __attribute__ ((vector_size (64)));
typedef int8_t vnx128qi __attribute__ ((vector_size (128)));
typedef int16_t vnx2hi __attribute__ ((vector_size (4)));
typedef int16_t vnx4hi __attribute__ ((vector_size (8)));
typedef int16_t vnx8hi __attribute__ ((vector_size (16)));
typedef int16_t vnx16hi __attribute__ ((vector_size (32)));
typedef int16_t vnx32hi __attribute__ ((vector_size (64)));
typedef int16_t vnx64hi __attribute__ ((vector_size (128)));
typedef int32_t vnx2si __attribute__ ((vector_size (8)));
typedef int32_t vnx4si __attribute__ ((vector_size (16)));
typedef int32_t vnx8si __attribute__ ((vector_size (32)));
typedef int32_t vnx16si __attribute__ ((vector_size (64)));
typedef int32_t vnx32si __attribute__ ((vector_size (128)));
typedef int64_t vnx2di __attribute__ ((vector_size (16)));
typedef int64_t vnx4di __attribute__ ((vector_size (32)));
typedef int64_t vnx8di __attribute__ ((vector_size (64)));
typedef int64_t vnx16di __attribute__ ((vector_size (128)));
typedef float vnx2sf __attribute__ ((vector_size (8)));
typedef float vnx4sf __attribute__ ((vector_size (16)));
typedef float vnx8sf __attribute__ ((vector_size (32)));
typedef float vnx16sf __attribute__ ((vector_size (64)));
typedef float vnx32sf __attribute__ ((vector_size (128)));
typedef double vnx2df __attribute__ ((vector_size (16)));
typedef double vnx4df __attribute__ ((vector_size (32)));
typedef double vnx8df __attribute__ ((vector_size (64)));
typedef double vnx16df __attribute__ ((vector_size (128)));
#define INIT_PERMUTE(NUNITS, NUM1, NUM2, TYPE) \
TYPE v_##TYPE##_in1; \
TYPE v_##TYPE##_in2; \
TYPE v_##TYPE##_out = {0}; \
for (int i = 0; i < NUNITS; i++) \
{ \
v_##TYPE##_in1[i] = i * NUM1 + NUM2; \
v_##TYPE##_in2[i] = i * NUM1 - NUM2; \
}
#define CHECK_PERMUTE_SINGLE(NUNITS, VALUE, TYPE) \
for (int i = 0; i < NUNITS; i++) \
if (v_##TYPE##_out[i] != VALUE) \
__builtin_abort ();
#define CHECK_PERMUTE_REVERSE(NUNITS, TYPE) \
for (int i = 0; i < NUNITS; i++) \
if (v_##TYPE##_out[i] != v_##TYPE##_in1[NUNITS - 1 - i]) \
__builtin_abort ();
#define CHECK_PERMUTE_DOUBLE(NUNITS, TYPE) \
for (int i = 0; i < NUNITS; i++) \
{ \
int new_index = i * 2; \
if (new_index < NUNITS \
&& v_##TYPE##_out[i] != v_##TYPE##_in1[new_index]) \
__builtin_abort (); \
if (new_index >= NUNITS \
&& v_##TYPE##_out[i] != v_##TYPE##_in2[new_index % NUNITS]) \
__builtin_abort (); \
}

View file

@ -0,0 +1,104 @@
/* { dg-do run { target { riscv_vector } } } */
/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
#include "perm-1.c"
int __attribute__ ((optimize (0)))
main ()
{
INIT_PERMUTE(2, 3, 79, vnx2qi)
permute_vnx2qi (v_vnx2qi_in1, v_vnx2qi_in2, &v_vnx2qi_out);
CHECK_PERMUTE_SINGLE(2, 3*1+79, vnx2qi)
INIT_PERMUTE(4, 2, -69, vnx4qi)
permute_vnx4qi (v_vnx4qi_in1, v_vnx4qi_in2, &v_vnx4qi_out);
CHECK_PERMUTE_SINGLE(4, 2*1+-69, vnx4qi)
INIT_PERMUTE(8, 4, -33, vnx8qi)
permute_vnx8qi (v_vnx8qi_in1, v_vnx8qi_in2, &v_vnx8qi_out);
CHECK_PERMUTE_SINGLE(8, 4*1+-33, vnx8qi)
INIT_PERMUTE(16, -3, 15, vnx16qi)
permute_vnx16qi (v_vnx16qi_in1, v_vnx16qi_in2, &v_vnx16qi_out);
CHECK_PERMUTE_SINGLE(16, -3*1+15, vnx16qi)
INIT_PERMUTE(32, -1, 30, vnx32qi)
permute_vnx32qi (v_vnx32qi_in1, v_vnx32qi_in2, &v_vnx32qi_out);
CHECK_PERMUTE_SINGLE(32, -1*1+30, vnx32qi)
INIT_PERMUTE(64, -1, 66, vnx64qi)
permute_vnx64qi (v_vnx64qi_in1, v_vnx64qi_in2, &v_vnx64qi_out);
CHECK_PERMUTE_SINGLE(64, -1*1+66, vnx64qi)
INIT_PERMUTE(128, -1, 38, vnx128qi)
permute_vnx128qi (v_vnx128qi_in1, v_vnx128qi_in2, &v_vnx128qi_out);
CHECK_PERMUTE_SINGLE(128, -1*1+38, vnx128qi)
INIT_PERMUTE(2, 2, 30238, vnx2hi)
permute_vnx2hi (v_vnx2hi_in1, v_vnx2hi_in2, &v_vnx2hi_out);
CHECK_PERMUTE_SINGLE(2, 2*1+30238, vnx2hi)
INIT_PERMUTE(4, -45, -2345, vnx4hi)
permute_vnx4hi (v_vnx4hi_in1, v_vnx4hi_in2, &v_vnx4hi_out);
CHECK_PERMUTE_SINGLE(4, -45*1+-2345, vnx4hi)
INIT_PERMUTE(8, 98, -18415, vnx8hi)
permute_vnx8hi (v_vnx8hi_in1, v_vnx8hi_in2, &v_vnx8hi_out);
CHECK_PERMUTE_SINGLE(8, 98*1+-18415, vnx8hi)
INIT_PERMUTE(16, 56, 3299, vnx16hi)
permute_vnx16hi (v_vnx16hi_in1, v_vnx16hi_in2, &v_vnx16hi_out);
CHECK_PERMUTE_SINGLE(16, 56*1+3299, vnx16hi)
INIT_PERMUTE(32, 15641, -9156, vnx32hi)
permute_vnx32hi (v_vnx32hi_in1, v_vnx32hi_in2, &v_vnx32hi_out);
CHECK_PERMUTE_SINGLE(32, 15641*1+-9156, vnx32hi)
INIT_PERMUTE(64, -25641, 8093, vnx64hi)
permute_vnx64hi (v_vnx64hi_in1, v_vnx64hi_in2, &v_vnx64hi_out);
CHECK_PERMUTE_SINGLE(64, -25641*1+8093, vnx64hi)
INIT_PERMUTE(2, -428, -15651, vnx2si)
permute_vnx2si (v_vnx2si_in1, v_vnx2si_in2, &v_vnx2si_out);
CHECK_PERMUTE_SINGLE(2, -428*1+-15651, vnx2si)
INIT_PERMUTE(4, 208, -55651, vnx4si)
permute_vnx4si (v_vnx4si_in1, v_vnx4si_in2, &v_vnx4si_out);
CHECK_PERMUTE_SINGLE(4, 208*1+-55651, vnx4si)
INIT_PERMUTE(8, 808, 75651, vnx8si)
permute_vnx8si (v_vnx8si_in1, v_vnx8si_in2, &v_vnx8si_out);
CHECK_PERMUTE_SINGLE(8, 808*1+75651, vnx8si)
INIT_PERMUTE(16, 816, -8941561, vnx16si)
permute_vnx16si (v_vnx16si_in1, v_vnx16si_in2, &v_vnx16si_out);
CHECK_PERMUTE_SINGLE(16, 816*1+-8941561, vnx16si)
INIT_PERMUTE(32, -532, 98416, vnx32si)
permute_vnx32si (v_vnx32si_in1, v_vnx32si_in2, &v_vnx32si_out);
CHECK_PERMUTE_SINGLE(32, -532*1+98416, vnx32si)
INIT_PERMUTE(2, -4161, 9551616, vnx2di)
permute_vnx2di (v_vnx2di_in1, v_vnx2di_in2, &v_vnx2di_out);
CHECK_PERMUTE_SINGLE(2, -4161*1+9551616, vnx2di)
INIT_PERMUTE(4, 7259, -15644961, vnx4di)
permute_vnx4di (v_vnx4di_in1, v_vnx4di_in2, &v_vnx4di_out);
CHECK_PERMUTE_SINGLE(4, 7259*1+-15644961, vnx4di)
INIT_PERMUTE(8, 351, 9156651, vnx8di)
permute_vnx8di (v_vnx8di_in1, v_vnx8di_in2, &v_vnx8di_out);
CHECK_PERMUTE_SINGLE(8, 351*1+9156651, vnx8di)
INIT_PERMUTE(16, 11, -816196231,vnx16di)
permute_vnx16di (v_vnx16di_in1, v_vnx16di_in2, &v_vnx16di_out);
CHECK_PERMUTE_SINGLE(16, 11*1+-816196231, vnx16di)
INIT_PERMUTE(2, 4552, -89, vnx2sf)
permute_vnx2sf (v_vnx2sf_in1, v_vnx2sf_in2, &v_vnx2sf_out);
CHECK_PERMUTE_SINGLE(2, (4552+-89), vnx2sf)
INIT_PERMUTE(4, 685, 7961, vnx4sf)
permute_vnx4sf (v_vnx4sf_in1, v_vnx4sf_in2, &v_vnx4sf_out);
CHECK_PERMUTE_SINGLE(4, 685+7961, vnx4sf)
INIT_PERMUTE(8, 3927, 16513, vnx8sf)
permute_vnx8sf (v_vnx8sf_in1, v_vnx8sf_in2, &v_vnx8sf_out);
CHECK_PERMUTE_SINGLE(8, 3927*1+16513, vnx8sf)
INIT_PERMUTE(16, -68, 16156571, vnx16sf)
permute_vnx16sf (v_vnx16sf_in1, v_vnx16sf_in2, &v_vnx16sf_out);
CHECK_PERMUTE_SINGLE(16, -68*1+16156571, vnx16sf)
INIT_PERMUTE(32, 9985, 1561318, vnx32sf)
permute_vnx32sf (v_vnx32sf_in1, v_vnx32sf_in2, &v_vnx32sf_out);
CHECK_PERMUTE_SINGLE(32, 9985*1+1561318, vnx32sf)
INIT_PERMUTE(2, -1565.1561, -5641565.515, vnx2df)
permute_vnx2df (v_vnx2df_in1, v_vnx2df_in2, &v_vnx2df_out);
CHECK_PERMUTE_SINGLE(2, -1565.1561*1+-5641565.515, vnx2df)
INIT_PERMUTE(4, -189.14897196, -15616547.5165574, vnx4df)
permute_vnx4df (v_vnx4df_in1, v_vnx4df_in2, &v_vnx4df_out);
CHECK_PERMUTE_SINGLE(4, -189.14897196*1+-15616547.5165574, vnx4df)
INIT_PERMUTE(8, 651.158691561, -56163.1655411, vnx8df)
permute_vnx8df (v_vnx8df_in1, v_vnx8df_in2, &v_vnx8df_out);
CHECK_PERMUTE_SINGLE(8, 651.158691561*1+-56163.1655411, vnx8df)
INIT_PERMUTE(16, 58.91516377, 251465.81561, vnx16df)
permute_vnx16df (v_vnx16df_in1, v_vnx16df_in2, &v_vnx16df_out);
CHECK_PERMUTE_SINGLE(16, 58.91516377*1+251465.81561, vnx16df)
return 0;
}

View file

@ -0,0 +1,32 @@
/* { dg-do run { target { riscv_vector } } } */
/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
#include "perm-2.c"
int __attribute__ ((optimize (0)))
main ()
{
INIT_PERMUTE(32, -1, 30, vnx32qi)
permute_vnx32qi (v_vnx32qi_in1, v_vnx32qi_in2, &v_vnx32qi_out);
CHECK_PERMUTE_SINGLE(32, -1*31+30, vnx32qi)
INIT_PERMUTE(64, -1, 66, vnx64qi)
permute_vnx64qi (v_vnx64qi_in1, v_vnx64qi_in2, &v_vnx64qi_out);
CHECK_PERMUTE_SINGLE(64, -1*31+66, vnx64qi)
INIT_PERMUTE(128, -1, 38, vnx128qi)
permute_vnx128qi (v_vnx128qi_in1, v_vnx128qi_in2, &v_vnx128qi_out);
CHECK_PERMUTE_SINGLE(128, -1*31+38, vnx128qi)
INIT_PERMUTE(32, 156, -9156, vnx32hi)
permute_vnx32hi (v_vnx32hi_in1, v_vnx32hi_in2, &v_vnx32hi_out);
CHECK_PERMUTE_SINGLE(32, 156*31+-9156, vnx32hi)
INIT_PERMUTE(64, -251, 8093, vnx64hi)
permute_vnx64hi (v_vnx64hi_in1, v_vnx64hi_in2, &v_vnx64hi_out);
CHECK_PERMUTE_SINGLE(64, -251*31+8093, vnx64hi)
INIT_PERMUTE(32, -532, 98416, vnx32si)
permute_vnx32si (v_vnx32si_in1, v_vnx32si_in2, &v_vnx32si_out);
CHECK_PERMUTE_SINGLE(32, -532*31+98416, vnx32si)
INIT_PERMUTE(32, 995, 1561318, vnx32sf)
permute_vnx32sf (v_vnx32sf_in1, v_vnx32sf_in2, &v_vnx32sf_out);
CHECK_PERMUTE_SINGLE(32, 995*31+1561318, vnx32sf)
return 0;
}

View file

@ -0,0 +1,20 @@
/* { dg-do run { target { riscv_vector } } } */
/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
#include "perm-3.c"
int __attribute__ ((optimize (0)))
main ()
{
INIT_PERMUTE(64, -1, 66, vnx64qi)
permute_vnx64qi (v_vnx64qi_in1, v_vnx64qi_in2, &v_vnx64qi_out);
CHECK_PERMUTE_SINGLE(64, -1*55+66, vnx64qi)
INIT_PERMUTE(128, -1, 38, vnx128qi)
permute_vnx128qi (v_vnx128qi_in1, v_vnx128qi_in2, &v_vnx128qi_out);
CHECK_PERMUTE_SINGLE(128, -1*55+38, vnx128qi)
INIT_PERMUTE(64, -251, 8093, vnx64hi)
permute_vnx64hi (v_vnx64hi_in1, v_vnx64hi_in2, &v_vnx64hi_out);
CHECK_PERMUTE_SINGLE(64, -251*55+8093, vnx64hi)
return 0;
}

View file

@ -0,0 +1,104 @@
/* { dg-do run { target { riscv_vector } } } */
/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
#include "perm-4.c"
int __attribute__ ((optimize (0)))
main ()
{
INIT_PERMUTE(2, 3, 79, vnx2qi)
permute_vnx2qi (v_vnx2qi_in1, v_vnx2qi_in2, &v_vnx2qi_out);
CHECK_PERMUTE_REVERSE(2, vnx2qi)
INIT_PERMUTE(4, 2, -69, vnx4qi)
permute_vnx4qi (v_vnx4qi_in1, v_vnx4qi_in2, &v_vnx4qi_out);
CHECK_PERMUTE_REVERSE(4, vnx4qi)
INIT_PERMUTE(8, 4, -33, vnx8qi)
permute_vnx8qi (v_vnx8qi_in1, v_vnx8qi_in2, &v_vnx8qi_out);
CHECK_PERMUTE_REVERSE(8, vnx8qi)
INIT_PERMUTE(16, -3, 15, vnx16qi)
permute_vnx16qi (v_vnx16qi_in1, v_vnx16qi_in2, &v_vnx16qi_out);
CHECK_PERMUTE_REVERSE(16, vnx16qi)
INIT_PERMUTE(32, -1, 30, vnx32qi)
permute_vnx32qi (v_vnx32qi_in1, v_vnx32qi_in2, &v_vnx32qi_out);
CHECK_PERMUTE_REVERSE(32, vnx32qi)
INIT_PERMUTE(64, -1, 66, vnx64qi)
permute_vnx64qi (v_vnx64qi_in1, v_vnx64qi_in2, &v_vnx64qi_out);
CHECK_PERMUTE_REVERSE(64, vnx64qi)
INIT_PERMUTE(128, -1, 38, vnx128qi)
permute_vnx128qi (v_vnx128qi_in1, v_vnx128qi_in2, &v_vnx128qi_out);
CHECK_PERMUTE_REVERSE(128, vnx128qi)
INIT_PERMUTE(2, 2, 30238, vnx2hi)
permute_vnx2hi (v_vnx2hi_in1, v_vnx2hi_in2, &v_vnx2hi_out);
CHECK_PERMUTE_REVERSE(2, vnx2hi)
INIT_PERMUTE(4, -45, -2345, vnx4hi)
permute_vnx4hi (v_vnx4hi_in1, v_vnx4hi_in2, &v_vnx4hi_out);
CHECK_PERMUTE_REVERSE(4, vnx4hi)
INIT_PERMUTE(8, 98, -18415, vnx8hi)
permute_vnx8hi (v_vnx8hi_in1, v_vnx8hi_in2, &v_vnx8hi_out);
CHECK_PERMUTE_REVERSE(8, vnx8hi)
INIT_PERMUTE(16, 56, 3299, vnx16hi)
permute_vnx16hi (v_vnx16hi_in1, v_vnx16hi_in2, &v_vnx16hi_out);
CHECK_PERMUTE_REVERSE(16, vnx16hi)
INIT_PERMUTE(32, 15641, -9156, vnx32hi)
permute_vnx32hi (v_vnx32hi_in1, v_vnx32hi_in2, &v_vnx32hi_out);
CHECK_PERMUTE_REVERSE(32, vnx32hi)
INIT_PERMUTE(64, -25641, 8093, vnx64hi)
permute_vnx64hi (v_vnx64hi_in1, v_vnx64hi_in2, &v_vnx64hi_out);
CHECK_PERMUTE_REVERSE(64, vnx64hi)
INIT_PERMUTE(2, -428, -15651, vnx2si)
permute_vnx2si (v_vnx2si_in1, v_vnx2si_in2, &v_vnx2si_out);
CHECK_PERMUTE_REVERSE(2, vnx2si)
INIT_PERMUTE(4, 208, -55651, vnx4si)
permute_vnx4si (v_vnx4si_in1, v_vnx4si_in2, &v_vnx4si_out);
CHECK_PERMUTE_REVERSE(4, vnx4si)
INIT_PERMUTE(8, 808, 75651, vnx8si)
permute_vnx8si (v_vnx8si_in1, v_vnx8si_in2, &v_vnx8si_out);
CHECK_PERMUTE_REVERSE(8, vnx8si)
INIT_PERMUTE(16, 816, -8941561, vnx16si)
permute_vnx16si (v_vnx16si_in1, v_vnx16si_in2, &v_vnx16si_out);
CHECK_PERMUTE_REVERSE(16, vnx16si)
INIT_PERMUTE(32, -532, 98416, vnx32si)
permute_vnx32si (v_vnx32si_in1, v_vnx32si_in2, &v_vnx32si_out);
CHECK_PERMUTE_REVERSE(32, vnx32si)
INIT_PERMUTE(2, -4161, 9551616, vnx2di)
permute_vnx2di (v_vnx2di_in1, v_vnx2di_in2, &v_vnx2di_out);
CHECK_PERMUTE_REVERSE(2, vnx2di)
INIT_PERMUTE(4, 7259, -15644961, vnx4di)
permute_vnx4di (v_vnx4di_in1, v_vnx4di_in2, &v_vnx4di_out);
CHECK_PERMUTE_REVERSE(4, vnx4di)
INIT_PERMUTE(8, 351, 9156651, vnx8di)
permute_vnx8di (v_vnx8di_in1, v_vnx8di_in2, &v_vnx8di_out);
CHECK_PERMUTE_REVERSE(8, vnx8di)
INIT_PERMUTE(16, 11, -816196231,vnx16di)
permute_vnx16di (v_vnx16di_in1, v_vnx16di_in2, &v_vnx16di_out);
CHECK_PERMUTE_REVERSE(16, vnx16di)
INIT_PERMUTE(2, 4552, -89, vnx2sf)
permute_vnx2sf (v_vnx2sf_in1, v_vnx2sf_in2, &v_vnx2sf_out);
CHECK_PERMUTE_REVERSE(2, vnx2sf)
INIT_PERMUTE(4, 685, 7961, vnx4sf)
permute_vnx4sf (v_vnx4sf_in1, v_vnx4sf_in2, &v_vnx4sf_out);
CHECK_PERMUTE_REVERSE(4, vnx4sf)
INIT_PERMUTE(8, 3927, 16513, vnx8sf)
permute_vnx8sf (v_vnx8sf_in1, v_vnx8sf_in2, &v_vnx8sf_out);
CHECK_PERMUTE_REVERSE(8, vnx8sf)
INIT_PERMUTE(16, -68, 16156571, vnx16sf)
permute_vnx16sf (v_vnx16sf_in1, v_vnx16sf_in2, &v_vnx16sf_out);
CHECK_PERMUTE_REVERSE(16, vnx16sf)
INIT_PERMUTE(32, 9985, 1561318, vnx32sf)
permute_vnx32sf (v_vnx32sf_in1, v_vnx32sf_in2, &v_vnx32sf_out);
CHECK_PERMUTE_REVERSE(32, vnx32sf)
INIT_PERMUTE(2, -1565.1561, -5641565.515, vnx2df)
permute_vnx2df (v_vnx2df_in1, v_vnx2df_in2, &v_vnx2df_out);
CHECK_PERMUTE_REVERSE(2, vnx2df)
INIT_PERMUTE(4, -189.14897196, -15616547.5165574, vnx4df)
permute_vnx4df (v_vnx4df_in1, v_vnx4df_in2, &v_vnx4df_out);
CHECK_PERMUTE_REVERSE(4, vnx4df)
INIT_PERMUTE(8, 651.158691561, -56163.1655411, vnx8df)
permute_vnx8df (v_vnx8df_in1, v_vnx8df_in2, &v_vnx8df_out);
CHECK_PERMUTE_REVERSE(8, vnx8df)
INIT_PERMUTE(16, 58.91516377, 251465.81561, vnx16df)
permute_vnx16df (v_vnx16df_in1, v_vnx16df_in2, &v_vnx16df_out);
CHECK_PERMUTE_REVERSE(16, vnx16df)
return 0;
}

View file

@ -0,0 +1,137 @@
/* { dg-do run { target { riscv_vector } } } */
/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
#include "perm-5.c"
#define MASK_2(X, Y) (Y) - 1 - (X) + (Y), (Y) -2 - (X) + (Y)
#define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
#define MASK_8(X, Y) MASK_4 (X, Y), MASK_4 (X + 4, Y)
#define MASK_16(X, Y) MASK_8 (X, Y), MASK_8 (X + 8, Y)
#define MASK_32(X, Y) MASK_16 (X, Y), MASK_16 (X + 16, Y)
#define MASK_64(X, Y) MASK_32 (X, Y), MASK_32 (X + 32, Y)
#define MASK_128(X, Y) MASK_64 (X, Y), MASK_64 (X + 64, Y)
#define INIT_MASK(TYPE, NUNTIS) \
TYPE TYPE##_mask = {MASK_##NUNTIS (0, NUNTIS)};
int __attribute__ ((optimize (0)))
main ()
{
INIT_PERMUTE(2, 3, 79, vnx2qi)
INIT_MASK (vnx2qi, 2)
permute_vnx2qi (v_vnx2qi_in1, v_vnx2qi_in2, vnx2qi_mask, &v_vnx2qi_out);
CHECK_PERMUTE_REVERSE(2, vnx2qi)
INIT_PERMUTE(4, 2, -69, vnx4qi)
INIT_MASK (vnx4qi, 4)
permute_vnx4qi (v_vnx4qi_in1, v_vnx4qi_in2, vnx4qi_mask, &v_vnx4qi_out);
CHECK_PERMUTE_REVERSE(4, vnx4qi)
INIT_PERMUTE(8, 4, -33, vnx8qi)
INIT_MASK (vnx8qi, 8)
permute_vnx8qi (v_vnx8qi_in1, v_vnx8qi_in2, vnx8qi_mask, &v_vnx8qi_out);
CHECK_PERMUTE_REVERSE(8, vnx8qi)
INIT_PERMUTE(16, -3, 15, vnx16qi)
INIT_MASK (vnx16qi, 16)
permute_vnx16qi (v_vnx16qi_in1, v_vnx16qi_in2, vnx16qi_mask, &v_vnx16qi_out);
CHECK_PERMUTE_REVERSE(16, vnx16qi)
INIT_PERMUTE(32, -1, 30, vnx32qi)
INIT_MASK (vnx32qi, 32)
permute_vnx32qi (v_vnx32qi_in1, v_vnx32qi_in2, vnx32qi_mask, &v_vnx32qi_out);
CHECK_PERMUTE_REVERSE(32, vnx32qi)
INIT_PERMUTE(64, -1, 66, vnx64qi)
INIT_MASK (vnx64qi, 64)
permute_vnx64qi (v_vnx64qi_in1, v_vnx64qi_in2, vnx64qi_mask, &v_vnx64qi_out);
CHECK_PERMUTE_REVERSE(64, vnx64qi)
INIT_PERMUTE(128, -1, 38, vnx128qi)
INIT_MASK (vnx128qi, 128)
permute_vnx128qi (v_vnx128qi_in1, v_vnx128qi_in2, vnx128qi_mask, &v_vnx128qi_out);
CHECK_PERMUTE_REVERSE(128, vnx128qi)
INIT_PERMUTE(2, 2, 30238, vnx2hi)
INIT_MASK (vnx2hi, 2)
permute_vnx2hi (v_vnx2hi_in1, v_vnx2hi_in2, vnx2hi_mask, &v_vnx2hi_out);
CHECK_PERMUTE_REVERSE(2, vnx2hi)
INIT_PERMUTE(4, -45, -2345, vnx4hi)
INIT_MASK (vnx4hi, 4)
permute_vnx4hi (v_vnx4hi_in1, v_vnx4hi_in2, vnx4hi_mask, &v_vnx4hi_out);
CHECK_PERMUTE_REVERSE(4, vnx4hi)
INIT_PERMUTE(8, 98, -18415, vnx8hi)
INIT_MASK (vnx8hi, 8)
permute_vnx8hi (v_vnx8hi_in1, v_vnx8hi_in2, vnx8hi_mask, &v_vnx8hi_out);
CHECK_PERMUTE_REVERSE(8, vnx8hi)
INIT_PERMUTE(16, 56, 3299, vnx16hi)
INIT_MASK (vnx16hi, 16)
permute_vnx16hi (v_vnx16hi_in1, v_vnx16hi_in2, vnx16hi_mask, &v_vnx16hi_out);
CHECK_PERMUTE_REVERSE(16, vnx16hi)
INIT_PERMUTE(32, 15641, -9156, vnx32hi)
INIT_MASK (vnx32hi, 32)
permute_vnx32hi (v_vnx32hi_in1, v_vnx32hi_in2, vnx32hi_mask, &v_vnx32hi_out);
CHECK_PERMUTE_REVERSE(32, vnx32hi)
INIT_PERMUTE(64, -25641, 8093, vnx64hi)
INIT_MASK (vnx64hi, 64)
permute_vnx64hi (v_vnx64hi_in1, v_vnx64hi_in2, vnx64hi_mask, &v_vnx64hi_out);
CHECK_PERMUTE_REVERSE(64, vnx64hi)
INIT_PERMUTE(2, -428, -15651, vnx2si)
INIT_MASK (vnx2si, 2)
permute_vnx2si (v_vnx2si_in1, v_vnx2si_in2, vnx2si_mask, &v_vnx2si_out);
CHECK_PERMUTE_REVERSE(2, vnx2si)
INIT_PERMUTE(4, 208, -55651, vnx4si)
INIT_MASK (vnx4si, 4)
permute_vnx4si (v_vnx4si_in1, v_vnx4si_in2, vnx4si_mask, &v_vnx4si_out);
CHECK_PERMUTE_REVERSE(4, vnx4si)
INIT_PERMUTE(8, 808, 75651, vnx8si)
INIT_MASK (vnx8si, 8)
permute_vnx8si (v_vnx8si_in1, v_vnx8si_in2, vnx8si_mask, &v_vnx8si_out);
CHECK_PERMUTE_REVERSE(8, vnx8si)
INIT_PERMUTE(16, 816, -8941561, vnx16si)
INIT_MASK (vnx16si, 16)
permute_vnx16si (v_vnx16si_in1, v_vnx16si_in2, vnx16si_mask, &v_vnx16si_out);
CHECK_PERMUTE_REVERSE(16, vnx16si)
INIT_PERMUTE(32, -532, 98416, vnx32si)
INIT_MASK (vnx32si, 32)
permute_vnx32si (v_vnx32si_in1, v_vnx32si_in2, vnx32si_mask, &v_vnx32si_out);
CHECK_PERMUTE_REVERSE(32, vnx32si)
INIT_PERMUTE(2, -4161, 9551616, vnx2di)
INIT_MASK (vnx2di, 2)
permute_vnx2di (v_vnx2di_in1, v_vnx2di_in2, vnx2di_mask, &v_vnx2di_out);
CHECK_PERMUTE_REVERSE(2, vnx2di)
INIT_PERMUTE(4, 7259, -15644961, vnx4di)
INIT_MASK (vnx4di, 4)
permute_vnx4di (v_vnx4di_in1, v_vnx4di_in2, vnx4di_mask, &v_vnx4di_out);
CHECK_PERMUTE_REVERSE(4, vnx4di)
INIT_PERMUTE(8, 351, 9156651, vnx8di)
INIT_MASK (vnx8di, 8)
permute_vnx8di (v_vnx8di_in1, v_vnx8di_in2, vnx8di_mask, &v_vnx8di_out);
CHECK_PERMUTE_REVERSE(8, vnx8di)
INIT_PERMUTE(16, 11, -816196231,vnx16di)
INIT_MASK (vnx16di, 16)
permute_vnx16di (v_vnx16di_in1, v_vnx16di_in2, vnx16di_mask, &v_vnx16di_out);
CHECK_PERMUTE_REVERSE(16, vnx16di)
INIT_PERMUTE(2, 4552, -89, vnx2sf)
permute_vnx2sf (v_vnx2sf_in1, v_vnx2sf_in2, vnx2si_mask, &v_vnx2sf_out);
CHECK_PERMUTE_REVERSE(2, vnx2sf)
INIT_PERMUTE(4, 685, 7961, vnx4sf)
permute_vnx4sf (v_vnx4sf_in1, v_vnx4sf_in2, vnx4si_mask, &v_vnx4sf_out);
CHECK_PERMUTE_REVERSE(4, vnx4sf)
INIT_PERMUTE(8, 3927, 16513, vnx8sf)
permute_vnx8sf (v_vnx8sf_in1, v_vnx8sf_in2, vnx8si_mask, &v_vnx8sf_out);
CHECK_PERMUTE_REVERSE(8, vnx8sf)
INIT_PERMUTE(16, -68, 16156571, vnx16sf)
permute_vnx16sf (v_vnx16sf_in1, v_vnx16sf_in2, vnx16si_mask, &v_vnx16sf_out);
CHECK_PERMUTE_REVERSE(16, vnx16sf)
INIT_PERMUTE(32, 9985, 1561318, vnx32sf)
permute_vnx32sf (v_vnx32sf_in1, v_vnx32sf_in2, vnx32si_mask, &v_vnx32sf_out);
CHECK_PERMUTE_REVERSE(32, vnx32sf)
INIT_PERMUTE(2, -1565.1561, -5641565.515, vnx2df)
permute_vnx2df (v_vnx2df_in1, v_vnx2df_in2, vnx2di_mask, &v_vnx2df_out);
CHECK_PERMUTE_REVERSE(2, vnx2df)
INIT_PERMUTE(4, -189.14897196, -15616547.5165574, vnx4df)
permute_vnx4df (v_vnx4df_in1, v_vnx4df_in2, vnx4di_mask, &v_vnx4df_out);
CHECK_PERMUTE_REVERSE(4, vnx4df)
INIT_PERMUTE(8, 651.158691561, -56163.1655411, vnx8df)
permute_vnx8df (v_vnx8df_in1, v_vnx8df_in2, vnx8di_mask, &v_vnx8df_out);
CHECK_PERMUTE_REVERSE(8, vnx8df)
INIT_PERMUTE(16, 58.91516377, 251465.81561, vnx16df)
permute_vnx16df (v_vnx16df_in1, v_vnx16df_in2, vnx16di_mask, &v_vnx16df_out);
CHECK_PERMUTE_REVERSE(16, vnx16df)
return 0;
}

View file

@ -0,0 +1,104 @@
/* { dg-do run { target { riscv_vector } } } */
/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O3" } */
#include "perm-6.c"
int __attribute__ ((optimize (0)))
main ()
{
INIT_PERMUTE(2, 3, 79, vnx2qi)
permute_vnx2qi (v_vnx2qi_in1, v_vnx2qi_in2, &v_vnx2qi_out);
CHECK_PERMUTE_SINGLE(2, 3*1-79, vnx2qi)
INIT_PERMUTE(4, 2, -69, vnx4qi)
permute_vnx4qi (v_vnx4qi_in1, v_vnx4qi_in2, &v_vnx4qi_out);
CHECK_PERMUTE_SINGLE(4, 2*1-(-69), vnx4qi)
INIT_PERMUTE(8, 4, -33, vnx8qi)
permute_vnx8qi (v_vnx8qi_in1, v_vnx8qi_in2, &v_vnx8qi_out);
CHECK_PERMUTE_SINGLE(8, 4*1-(-33), vnx8qi)
INIT_PERMUTE(16, -3, 15, vnx16qi)
permute_vnx16qi (v_vnx16qi_in1, v_vnx16qi_in2, &v_vnx16qi_out);
CHECK_PERMUTE_SINGLE(16, -3*1-15, vnx16qi)
INIT_PERMUTE(32, -1, 30, vnx32qi)
permute_vnx32qi (v_vnx32qi_in1, v_vnx32qi_in2, &v_vnx32qi_out);
CHECK_PERMUTE_SINGLE(32, -1*1-30, vnx32qi)
INIT_PERMUTE(64, -1, 66, vnx64qi)
permute_vnx64qi (v_vnx64qi_in1, v_vnx64qi_in2, &v_vnx64qi_out);
CHECK_PERMUTE_SINGLE(64, -1*1-66, vnx64qi)
INIT_PERMUTE(128, -1, 38, vnx128qi)
permute_vnx128qi (v_vnx128qi_in1, v_vnx128qi_in2, &v_vnx128qi_out);
CHECK_PERMUTE_SINGLE(128, -1*1-38, vnx128qi)
INIT_PERMUTE(2, 2, 30238, vnx2hi)
permute_vnx2hi (v_vnx2hi_in1, v_vnx2hi_in2, &v_vnx2hi_out);
CHECK_PERMUTE_SINGLE(2, 2*1-30238, vnx2hi)
INIT_PERMUTE(4, -45, -2345, vnx4hi)
permute_vnx4hi (v_vnx4hi_in1, v_vnx4hi_in2, &v_vnx4hi_out);
CHECK_PERMUTE_SINGLE(4, -45*1-(-2345), vnx4hi)
INIT_PERMUTE(8, 98, -18415, vnx8hi)
permute_vnx8hi (v_vnx8hi_in1, v_vnx8hi_in2, &v_vnx8hi_out);
CHECK_PERMUTE_SINGLE(8, 98*1-(-18415), vnx8hi)
INIT_PERMUTE(16, 56, 3299, vnx16hi)
permute_vnx16hi (v_vnx16hi_in1, v_vnx16hi_in2, &v_vnx16hi_out);
CHECK_PERMUTE_SINGLE(16, 56*1-3299, vnx16hi)
INIT_PERMUTE(32, 15641, -9156, vnx32hi)
permute_vnx32hi (v_vnx32hi_in1, v_vnx32hi_in2, &v_vnx32hi_out);
CHECK_PERMUTE_SINGLE(32, 15641*1-(-9156), vnx32hi)
INIT_PERMUTE(64, -2564, 8093, vnx64hi)
permute_vnx64hi (v_vnx64hi_in1, v_vnx64hi_in2, &v_vnx64hi_out);
CHECK_PERMUTE_SINGLE(64, -2564*1-8093, vnx64hi)
INIT_PERMUTE(2, -428, -15651, vnx2si)
permute_vnx2si (v_vnx2si_in1, v_vnx2si_in2, &v_vnx2si_out);
CHECK_PERMUTE_SINGLE(2, -428*1-(-15651), vnx2si)
INIT_PERMUTE(4, 208, -55651, vnx4si)
permute_vnx4si (v_vnx4si_in1, v_vnx4si_in2, &v_vnx4si_out);
CHECK_PERMUTE_SINGLE(4, 208*1-(-55651), vnx4si)
INIT_PERMUTE(8, 808, 75651, vnx8si)
permute_vnx8si (v_vnx8si_in1, v_vnx8si_in2, &v_vnx8si_out);
CHECK_PERMUTE_SINGLE(8, 808*1-75651, vnx8si)
INIT_PERMUTE(16, 816, -8941561, vnx16si)
permute_vnx16si (v_vnx16si_in1, v_vnx16si_in2, &v_vnx16si_out);
CHECK_PERMUTE_SINGLE(16, 816*1-(-8941561), vnx16si)
INIT_PERMUTE(32, -532, 98416, vnx32si)
permute_vnx32si (v_vnx32si_in1, v_vnx32si_in2, &v_vnx32si_out);
CHECK_PERMUTE_SINGLE(32, -532*1-98416, vnx32si)
INIT_PERMUTE(2, -4161, 9551616, vnx2di)
permute_vnx2di (v_vnx2di_in1, v_vnx2di_in2, &v_vnx2di_out);
CHECK_PERMUTE_SINGLE(2, -4161*1-9551616, vnx2di)
INIT_PERMUTE(4, 7259, -15644961, vnx4di)
permute_vnx4di (v_vnx4di_in1, v_vnx4di_in2, &v_vnx4di_out);
CHECK_PERMUTE_SINGLE(4, 7259*1-(-15644961), vnx4di)
INIT_PERMUTE(8, 351, 9156651, vnx8di)
permute_vnx8di (v_vnx8di_in1, v_vnx8di_in2, &v_vnx8di_out);
CHECK_PERMUTE_SINGLE(8, 351*1-9156651, vnx8di)
INIT_PERMUTE(16, 11, -816196231,vnx16di)
permute_vnx16di (v_vnx16di_in1, v_vnx16di_in2, &v_vnx16di_out);
CHECK_PERMUTE_SINGLE(16, 11*1-(-816196231), vnx16di)
INIT_PERMUTE(2, 4552, -89, vnx2sf)
permute_vnx2sf (v_vnx2sf_in1, v_vnx2sf_in2, &v_vnx2sf_out);
CHECK_PERMUTE_SINGLE(2, (4552-(-89)), vnx2sf)
INIT_PERMUTE(4, 685, 7961, vnx4sf)
permute_vnx4sf (v_vnx4sf_in1, v_vnx4sf_in2, &v_vnx4sf_out);
CHECK_PERMUTE_SINGLE(4, 685-7961, vnx4sf)
INIT_PERMUTE(8, 3927, 16513, vnx8sf)
permute_vnx8sf (v_vnx8sf_in1, v_vnx8sf_in2, &v_vnx8sf_out);
CHECK_PERMUTE_SINGLE(8, 3927*1-16513, vnx8sf)
INIT_PERMUTE(16, -68, 16156571, vnx16sf)
permute_vnx16sf (v_vnx16sf_in1, v_vnx16sf_in2, &v_vnx16sf_out);
CHECK_PERMUTE_SINGLE(16, -68*1-16156571, vnx16sf)
INIT_PERMUTE(32, 9985, 1561318, vnx32sf)
permute_vnx32sf (v_vnx32sf_in1, v_vnx32sf_in2, &v_vnx32sf_out);
CHECK_PERMUTE_SINGLE(32, 9985*1-1561318, vnx32sf)
INIT_PERMUTE(2, -1565.1561, -5641565.515, vnx2df)
permute_vnx2df (v_vnx2df_in1, v_vnx2df_in2, &v_vnx2df_out);
CHECK_PERMUTE_SINGLE(2, -1565.1561*1-(-5641565.515), vnx2df)
INIT_PERMUTE(4, -189.14897196, -15616547.5165574, vnx4df)
permute_vnx4df (v_vnx4df_in1, v_vnx4df_in2, &v_vnx4df_out);
CHECK_PERMUTE_SINGLE(4, -189.14897196*1-(-15616547.5165574), vnx4df)
INIT_PERMUTE(8, 651.158691561, -56163.1655411, vnx8df)
permute_vnx8df (v_vnx8df_in1, v_vnx8df_in2, &v_vnx8df_out);
CHECK_PERMUTE_SINGLE(8, 651.158691561*1-(-56163.1655411), vnx8df)
INIT_PERMUTE(16, 58.91516377, 251465.81561, vnx16df)
permute_vnx16df (v_vnx16df_in1, v_vnx16df_in2, &v_vnx16df_out);
CHECK_PERMUTE_SINGLE(16, 58.91516377*1-251465.81561, vnx16df)
return 0;
}

View file

@ -0,0 +1,135 @@
/* { dg-do run { target { riscv_vector } } } */
/* { dg-options "--param riscv-autovec-preference=fixed-vlmax -O0" } */
#include "perm-7.c"
#define MASK_2(X) X, X + 2
#define MASK_4(X) MASK_2 (X), MASK_2 (X+4)
#define MASK_8(X) MASK_4 (X), MASK_4 (X+8)
#define MASK_16(X) MASK_8 (X), MASK_8 (X+16)
#define MASK_32(X) MASK_16 (X), MASK_16 (X+32)
#define MASK_64(X) MASK_32 (X), MASK_32 (X+64)
#define MASK_128(X) MASK_64 (X), MASK_64 (X+128)
#define INIT_MASK(TYPE, NUNTIS) TYPE TYPE##_mask = {MASK_##NUNTIS (0)};
int __attribute__ ((optimize (0))) main ()
{
INIT_PERMUTE(2, 3, 79, vnx2qi)
INIT_MASK (vnx2qi, 2)
permute_vnx2qi (v_vnx2qi_in1, v_vnx2qi_in2, vnx2qi_mask, &v_vnx2qi_out);
CHECK_PERMUTE_DOUBLE(2, vnx2qi)
INIT_PERMUTE(4, 2, -69, vnx4qi)
INIT_MASK (vnx4qi, 4)
permute_vnx4qi (v_vnx4qi_in1, v_vnx4qi_in2, vnx4qi_mask, &v_vnx4qi_out);
CHECK_PERMUTE_DOUBLE(4, vnx4qi)
INIT_PERMUTE(8, 4, -33, vnx8qi)
INIT_MASK (vnx8qi, 8)
permute_vnx8qi (v_vnx8qi_in1, v_vnx8qi_in2, vnx8qi_mask, &v_vnx8qi_out);
CHECK_PERMUTE_DOUBLE(8, vnx8qi)
INIT_PERMUTE(16, -3, 15, vnx16qi)
INIT_MASK (vnx16qi, 16)
permute_vnx16qi (v_vnx16qi_in1, v_vnx16qi_in2, vnx16qi_mask, &v_vnx16qi_out);
CHECK_PERMUTE_DOUBLE(16, vnx16qi)
INIT_PERMUTE(32, -1, 30, vnx32qi)
INIT_MASK (vnx32qi, 32)
permute_vnx32qi (v_vnx32qi_in1, v_vnx32qi_in2, vnx32qi_mask, &v_vnx32qi_out);
CHECK_PERMUTE_DOUBLE(32, vnx32qi)
INIT_PERMUTE(64, -1, 66, vnx64qi)
INIT_MASK (vnx64qi, 64)
permute_vnx64qi (v_vnx64qi_in1, v_vnx64qi_in2, vnx64qi_mask, &v_vnx64qi_out);
CHECK_PERMUTE_DOUBLE(64, vnx64qi)
INIT_PERMUTE(128, -1, 38, vnx128qi)
INIT_MASK (vnx128qi, 128)
permute_vnx128qi (v_vnx128qi_in1, v_vnx128qi_in2, vnx128qi_mask, &v_vnx128qi_out);
CHECK_PERMUTE_DOUBLE(128, vnx128qi)
INIT_PERMUTE(2, 2, 30238, vnx2hi)
INIT_MASK (vnx2hi, 2)
permute_vnx2hi (v_vnx2hi_in1, v_vnx2hi_in2, vnx2hi_mask, &v_vnx2hi_out);
CHECK_PERMUTE_DOUBLE(2, vnx2hi)
INIT_PERMUTE(4, -45, -2345, vnx4hi)
INIT_MASK (vnx4hi, 4)
permute_vnx4hi (v_vnx4hi_in1, v_vnx4hi_in2, vnx4hi_mask, &v_vnx4hi_out);
CHECK_PERMUTE_DOUBLE(4, vnx4hi)
INIT_PERMUTE(8, 98, -18415, vnx8hi)
INIT_MASK (vnx8hi, 8)
permute_vnx8hi (v_vnx8hi_in1, v_vnx8hi_in2, vnx8hi_mask, &v_vnx8hi_out);
CHECK_PERMUTE_DOUBLE(8, vnx8hi)
INIT_PERMUTE(16, 56, 3299, vnx16hi)
INIT_MASK (vnx16hi, 16)
permute_vnx16hi (v_vnx16hi_in1, v_vnx16hi_in2, vnx16hi_mask, &v_vnx16hi_out);
CHECK_PERMUTE_DOUBLE(16, vnx16hi)
INIT_PERMUTE(32, 15641, -9156, vnx32hi)
INIT_MASK (vnx32hi, 32)
permute_vnx32hi (v_vnx32hi_in1, v_vnx32hi_in2, vnx32hi_mask, &v_vnx32hi_out);
CHECK_PERMUTE_DOUBLE(32, vnx32hi)
INIT_PERMUTE(64, -25641, 8093, vnx64hi)
INIT_MASK (vnx64hi, 64)
permute_vnx64hi (v_vnx64hi_in1, v_vnx64hi_in2, vnx64hi_mask, &v_vnx64hi_out);
CHECK_PERMUTE_DOUBLE(64, vnx64hi)
INIT_PERMUTE(2, -428, -15651, vnx2si)
INIT_MASK (vnx2si, 2)
permute_vnx2si (v_vnx2si_in1, v_vnx2si_in2, vnx2si_mask, &v_vnx2si_out);
CHECK_PERMUTE_DOUBLE(2, vnx2si)
INIT_PERMUTE(4, 208, -55651, vnx4si)
INIT_MASK (vnx4si, 4)
permute_vnx4si (v_vnx4si_in1, v_vnx4si_in2, vnx4si_mask, &v_vnx4si_out);
CHECK_PERMUTE_DOUBLE(4, vnx4si)
INIT_PERMUTE(8, 808, 75651, vnx8si)
INIT_MASK (vnx8si, 8)
permute_vnx8si (v_vnx8si_in1, v_vnx8si_in2, vnx8si_mask, &v_vnx8si_out);
CHECK_PERMUTE_DOUBLE(8, vnx8si)
INIT_PERMUTE(16, 816, -8941561, vnx16si)
INIT_MASK (vnx16si, 16)
permute_vnx16si (v_vnx16si_in1, v_vnx16si_in2, vnx16si_mask, &v_vnx16si_out);
CHECK_PERMUTE_DOUBLE(16, vnx16si)
INIT_PERMUTE(32, -532, 98416, vnx32si)
INIT_MASK (vnx32si, 32)
permute_vnx32si (v_vnx32si_in1, v_vnx32si_in2, vnx32si_mask, &v_vnx32si_out);
CHECK_PERMUTE_DOUBLE(32, vnx32si)
INIT_PERMUTE(2, -4161, 9551616, vnx2di)
INIT_MASK (vnx2di, 2)
permute_vnx2di (v_vnx2di_in1, v_vnx2di_in2, vnx2di_mask, &v_vnx2di_out);
CHECK_PERMUTE_DOUBLE(2, vnx2di)
INIT_PERMUTE(4, 7259, -15644961, vnx4di)
INIT_MASK (vnx4di, 4)
permute_vnx4di (v_vnx4di_in1, v_vnx4di_in2, vnx4di_mask, &v_vnx4di_out);
CHECK_PERMUTE_DOUBLE(4, vnx4di)
INIT_PERMUTE(8, 351, 9156651, vnx8di)
INIT_MASK (vnx8di, 8)
permute_vnx8di (v_vnx8di_in1, v_vnx8di_in2, vnx8di_mask, &v_vnx8di_out);
CHECK_PERMUTE_DOUBLE(8, vnx8di)
INIT_PERMUTE(16, 11, -816196231,vnx16di)
INIT_MASK (vnx16di, 16)
permute_vnx16di (v_vnx16di_in1, v_vnx16di_in2, vnx16di_mask, &v_vnx16di_out);
CHECK_PERMUTE_DOUBLE(16, vnx16di)
INIT_PERMUTE(2, 4552, -89, vnx2sf)
permute_vnx2sf (v_vnx2sf_in1, v_vnx2sf_in2, vnx2si_mask, &v_vnx2sf_out);
CHECK_PERMUTE_DOUBLE(2, vnx2sf)
INIT_PERMUTE(4, 685, 7961, vnx4sf)
permute_vnx4sf (v_vnx4sf_in1, v_vnx4sf_in2, vnx4si_mask, &v_vnx4sf_out);
CHECK_PERMUTE_DOUBLE(4, vnx4sf)
INIT_PERMUTE(8, 3927, 16513, vnx8sf)
permute_vnx8sf (v_vnx8sf_in1, v_vnx8sf_in2, vnx8si_mask, &v_vnx8sf_out);
CHECK_PERMUTE_DOUBLE(8, vnx8sf)
INIT_PERMUTE(16, -68, 16156571, vnx16sf)
permute_vnx16sf (v_vnx16sf_in1, v_vnx16sf_in2, vnx16si_mask, &v_vnx16sf_out);
CHECK_PERMUTE_DOUBLE(16, vnx16sf)
INIT_PERMUTE(32, 9985, 1561318, vnx32sf)
permute_vnx32sf (v_vnx32sf_in1, v_vnx32sf_in2, vnx32si_mask, &v_vnx32sf_out);
CHECK_PERMUTE_DOUBLE(32, vnx32sf)
INIT_PERMUTE(2, -1565.1561, -5641565.515, vnx2df)
permute_vnx2df (v_vnx2df_in1, v_vnx2df_in2, vnx2di_mask, &v_vnx2df_out);
CHECK_PERMUTE_DOUBLE(2, vnx2df)
INIT_PERMUTE(4, -189.14897196, -15616547.5165574, vnx4df)
permute_vnx4df (v_vnx4df_in1, v_vnx4df_in2, vnx4di_mask, &v_vnx4df_out);
CHECK_PERMUTE_DOUBLE(4, vnx4df)
INIT_PERMUTE(8, 651.158691561, -56163.1655411, vnx8df)
permute_vnx8df (v_vnx8df_in1, v_vnx8df_in2, vnx8di_mask, &v_vnx8df_out);
CHECK_PERMUTE_DOUBLE(8, vnx8df)
INIT_PERMUTE(16, 58.91516377, 251465.81561, vnx16df)
permute_vnx16df (v_vnx16df_in1, v_vnx16df_in2, vnx16di_mask, &v_vnx16df_out);
CHECK_PERMUTE_DOUBLE(16, vnx16df)
return 0;
}