2016-11-29 Tamar Christina <tamar.christina@arm.com>

* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
	(AARCH64_ONLY, CHECK_CRYPTO): New macros.
	(Poly64x1_t, Poly64x2_t): Added types.
	* gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
	(vmov_n_p64, vmovq_n_p64): Added.
	(vld2_lane_p64, vld2q_lane_p64): Likewise.
	(vld3_lane_p64, vld3q_lane_p64): Likewise.
	(vld4_lane_p64, vld4q_lane_p64): Likewise.
	(vst2_lane_p64, vst2q_lane_p64): Likewise.
	(vst3_lane_p64, vst3q_lane_p64): Likewise.
	(vst4_lane_p64, vst4q_lane_p64): Likewise.
	(vget_lane_p64, vgetq_lane_p64): Likewise.
	(vget_high_p64): Likewise.
	* gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c:
	Added AArch64 flags.
	(vreint_vector, vreint_vector_res): Moved to header.
	* gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c:
	Added Aarch64 flags.
	(vreint_vector, vreint_vector_res): Moved to header.

From-SVN: r242962
This commit is contained in:
Tamar Christina 2016-11-29 14:53:46 +00:00 committed by Tamar Christina
parent 6323c98156
commit 753a952341
5 changed files with 439 additions and 12 deletions

View file

@ -1,3 +1,25 @@
2016-11-29 Tamar Christina <tamar.christina@arm.com>
* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
(AARCH64_ONLY, CHECK_CRYPTO): New macros.
(Poly64x1_t, Poly64x2_t): Added types.
* gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
(vmov_n_p64, vmovq_n_p64): Added.
(vld2_lane_p64, vld2q_lane_p64): Likewise.
(vld3_lane_p64, vld3q_lane_p64): Likewise.
(vld4_lane_p64, vld4q_lane_p64): Likewise.
(vst2_lane_p64, vst2q_lane_p64): Likewise.
(vst3_lane_p64, vst3q_lane_p64): Likewise.
(vst4_lane_p64, vst4q_lane_p64): Likewise.
(vget_lane_p64, vgetq_lane_p64): Likewise.
(vget_high_p64): Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c:
Added AArch64 flags.
(vreint_vector, vreint_vector_res): Moved to header.
* gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c:
Added Aarch64 flags.
(vreint_vector, vreint_vector_res): Moved to header.
2016-11-29 Janus Weil <janus@gcc.gnu.org>
PR fortran/58175

View file

@ -32,6 +32,13 @@ extern size_t strlen(const char *);
VECT_VAR(expected, int, 16, 4) -> expected_int16x4
VECT_VAR_DECL(expected, int, 16, 4) -> int16x4_t expected_int16x4
*/
/* Some instructions don't exist on ARM.
Use this macro to guard against them. */
#ifdef __aarch64__
#define AARCH64_ONLY(X) X
#else
#define AARCH64_ONLY(X)
#endif
#define xSTR(X) #X
#define STR(X) xSTR(X)
@ -92,6 +99,13 @@ extern size_t strlen(const char *);
fprintf(stderr, "CHECKED %s %s\n", STR(VECT_TYPE(T, W, N)), MSG); \
}
#if defined (__ARM_FEATURE_CRYPTO)
#define CHECK_CRYPTO(MSG,T,W,N,FMT,EXPECTED,COMMENT) \
CHECK(MSG,T,W,N,FMT,EXPECTED,COMMENT)
#else
#define CHECK_CRYPTO(MSG,T,W,N,FMT,EXPECTED,COMMENT)
#endif
/* Floating-point variant. */
#define CHECK_FP(MSG,T,W,N,FMT,EXPECTED,COMMENT) \
{ \
@ -184,6 +198,9 @@ extern ARRAY(expected, uint, 32, 2);
extern ARRAY(expected, uint, 64, 1);
extern ARRAY(expected, poly, 8, 8);
extern ARRAY(expected, poly, 16, 4);
#if defined (__ARM_FEATURE_CRYPTO)
extern ARRAY(expected, poly, 64, 1);
#endif
extern ARRAY(expected, hfloat, 16, 4);
extern ARRAY(expected, hfloat, 32, 2);
extern ARRAY(expected, hfloat, 64, 1);
@ -197,6 +214,9 @@ extern ARRAY(expected, uint, 32, 4);
extern ARRAY(expected, uint, 64, 2);
extern ARRAY(expected, poly, 8, 16);
extern ARRAY(expected, poly, 16, 8);
#if defined (__ARM_FEATURE_CRYPTO)
extern ARRAY(expected, poly, 64, 2);
#endif
extern ARRAY(expected, hfloat, 16, 8);
extern ARRAY(expected, hfloat, 32, 4);
extern ARRAY(expected, hfloat, 64, 2);
@ -213,6 +233,7 @@ extern ARRAY(expected, hfloat, 64, 2);
CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \
CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \
CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \
CHECK_CRYPTO(test_name, poly, 64, 1, PRIx64, EXPECTED, comment); \
CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \
\
CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment); \
@ -225,6 +246,7 @@ extern ARRAY(expected, hfloat, 64, 2);
CHECK(test_name, uint, 64, 2, PRIx64, EXPECTED, comment); \
CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \
CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \
CHECK_CRYPTO(test_name, poly, 64, 2, PRIx64, EXPECTED, comment); \
CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \
} \
@ -398,6 +420,9 @@ static void clean_results (void)
CLEAN(result, uint, 64, 1);
CLEAN(result, poly, 8, 8);
CLEAN(result, poly, 16, 4);
#if defined (__ARM_FEATURE_CRYPTO)
CLEAN(result, poly, 64, 1);
#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
CLEAN(result, float, 16, 4);
#endif
@ -413,6 +438,9 @@ static void clean_results (void)
CLEAN(result, uint, 64, 2);
CLEAN(result, poly, 8, 16);
CLEAN(result, poly, 16, 8);
#if defined (__ARM_FEATURE_CRYPTO)
CLEAN(result, poly, 64, 2);
#endif
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
CLEAN(result, float, 16, 8);
#endif
@ -438,6 +466,13 @@ static void clean_results (void)
#define DECL_VARIABLE(VAR, T1, W, N) \
VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N)
#if defined (__ARM_FEATURE_CRYPTO)
#define DECL_VARIABLE_CRYPTO(VAR, T1, W, N) \
DECL_VARIABLE(VAR, T1, W, N)
#else
#define DECL_VARIABLE_CRYPTO(VAR, T1, W, N)
#endif
/* Declare only 64 bits signed variants. */
#define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR) \
DECL_VARIABLE(VAR, int, 8, 8); \
@ -473,6 +508,7 @@ static void clean_results (void)
DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 8); \
DECL_VARIABLE(VAR, poly, 16, 4); \
DECL_VARIABLE_CRYPTO(VAR, poly, 64, 1); \
DECL_VARIABLE(VAR, float, 16, 4); \
DECL_VARIABLE(VAR, float, 32, 2)
#else
@ -481,6 +517,7 @@ static void clean_results (void)
DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 8); \
DECL_VARIABLE(VAR, poly, 16, 4); \
DECL_VARIABLE_CRYPTO(VAR, poly, 64, 1); \
DECL_VARIABLE(VAR, float, 32, 2)
#endif
@ -491,6 +528,7 @@ static void clean_results (void)
DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 16); \
DECL_VARIABLE(VAR, poly, 16, 8); \
DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2); \
DECL_VARIABLE(VAR, float, 16, 8); \
DECL_VARIABLE(VAR, float, 32, 4)
#else
@ -499,6 +537,7 @@ static void clean_results (void)
DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \
DECL_VARIABLE(VAR, poly, 8, 16); \
DECL_VARIABLE(VAR, poly, 16, 8); \
DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2); \
DECL_VARIABLE(VAR, float, 32, 4)
#endif
/* Declare all variants. */
@ -531,6 +570,13 @@ static void clean_results (void)
/* Helpers to call macros with 1 constant and 5 variable
arguments. */
#if defined (__ARM_FEATURE_CRYPTO)
#define MACRO_CRYPTO(MACRO, VAR1, VAR2, T1, T2, T3, W, N) \
MACRO(VAR1, VAR2, T1, T2, T3, W, N)
#else
#define MACRO_CRYPTO(MACRO, VAR1, VAR2, T1, T2, T3, W, N)
#endif
#define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) \
MACRO(VAR, , int, s, 8, 8); \
MACRO(VAR, , int, s, 16, 4); \
@ -601,13 +647,15 @@ static void clean_results (void)
TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
MACRO(VAR1, VAR2, , poly, p, 8, 8); \
MACRO(VAR1, VAR2, , poly, p, 16, 4)
MACRO(VAR1, VAR2, , poly, p, 16, 4); \
MACRO_CRYPTO(MACRO, VAR1, VAR2, , poly, p, 64, 1)
#define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) \
TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \
MACRO(VAR1, VAR2, q, poly, p, 8, 16); \
MACRO(VAR1, VAR2, q, poly, p, 16, 8)
MACRO(VAR1, VAR2, q, poly, p, 16, 8); \
MACRO_CRYPTO(MACRO, VAR1, VAR2, q, poly, p, 64, 2)
#define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2) \
TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2); \

View file

@ -1,8 +1,9 @@
/* This file contains tests for all the *p64 intrinsics, except for
vreinterpret which have their own testcase. */
/* { dg-require-effective-target arm_crypto_ok } */
/* { dg-require-effective-target arm_crypto_ok { target { arm*-*-* } } } */
/* { dg-add-options arm_crypto } */
/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
#include <arm_neon.h>
#include "arm-neon-ref.h"
@ -38,6 +39,17 @@ VECT_VAR_DECL(vdup_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
VECT_VAR_DECL(vdup_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
0xfffffffffffffff2 };
/* Expected results: vmov_n. */
VECT_VAR_DECL(vmov_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(vmov_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
0xfffffffffffffff0 };
VECT_VAR_DECL(vmov_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(vmov_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
0xfffffffffffffff1 };
VECT_VAR_DECL(vmov_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
VECT_VAR_DECL(vmov_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
0xfffffffffffffff2 };
/* Expected results: vext. */
VECT_VAR_DECL(vext_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(vext_expected,poly,64,2) [] = { 0xfffffffffffffff1, 0x88 };
@ -45,6 +57,9 @@ VECT_VAR_DECL(vext_expected,poly,64,2) [] = { 0xfffffffffffffff1, 0x88 };
/* Expected results: vget_low. */
VECT_VAR_DECL(vget_low_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
/* Expected results: vget_high. */
VECT_VAR_DECL(vget_high_expected,poly,64,1) [] = { 0xfffffffffffffff1 };
/* Expected results: vld1. */
VECT_VAR_DECL(vld1_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(vld1_expected,poly,64,2) [] = { 0xfffffffffffffff0,
@ -109,6 +124,39 @@ VECT_VAR_DECL(vst1_lane_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(vst1_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0,
0x3333333333333333 };
/* Expected results: vldX_lane. */
VECT_VAR_DECL(expected_vld_st2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld_st2_0,poly,64,2) [] = { 0xfffffffffffffff0,
0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld_st2_1,poly,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld_st2_1,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
0xaaaaaaaaaaaaaaaa };
VECT_VAR_DECL(expected_vld_st3_0,poly,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld_st3_0,poly,64,2) [] = { 0xfffffffffffffff0,
0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld_st3_1,poly,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld_st3_1,poly,64,2) [] = { 0xfffffffffffffff2,
0xaaaaaaaaaaaaaaaa };
VECT_VAR_DECL(expected_vld_st3_2,poly,64,1) [] = { 0xfffffffffffffff2 };
VECT_VAR_DECL(expected_vld_st3_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
0xaaaaaaaaaaaaaaaa };
VECT_VAR_DECL(expected_vld_st4_0,poly,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld_st4_0,poly,64,2) [] = { 0xfffffffffffffff0,
0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld_st4_1,poly,64,1) [] = { 0xfffffffffffffff1 };
VECT_VAR_DECL(expected_vld_st4_1,poly,64,2) [] = { 0xfffffffffffffff2,
0xfffffffffffffff3 };
VECT_VAR_DECL(expected_vld_st4_2,poly,64,1) [] = { 0xfffffffffffffff2 };
VECT_VAR_DECL(expected_vld_st4_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
0xaaaaaaaaaaaaaaaa };
VECT_VAR_DECL(expected_vld_st4_3,poly,64,1) [] = { 0xfffffffffffffff3 };
VECT_VAR_DECL(expected_vld_st4_3,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
0xaaaaaaaaaaaaaaaa };
/* Expected results: vget_lane. */
VECT_VAR_DECL(vget_lane_expected,poly,64,1) = 0xfffffffffffffff0;
VECT_VAR_DECL(vget_lane_expected,poly,64,2) = 0xfffffffffffffff0;
int main (void)
{
int i;
@ -341,6 +389,26 @@ int main (void)
CHECK(TEST_MSG, poly, 64, 1, PRIx64, vget_low_expected, "");
/* vget_high_p64 tests. */
#undef TEST_MSG
#define TEST_MSG "VGET_HIGH"
#define TEST_VGET_HIGH(T1, T2, W, N, N2) \
VECT_VAR(vget_high_vector64, T1, W, N) = \
vget_high_##T2##W(VECT_VAR(vget_high_vector128, T1, W, N2)); \
vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vget_high_vector64, T1, W, N))
DECL_VARIABLE(vget_high_vector64, poly, 64, 1);
DECL_VARIABLE(vget_high_vector128, poly, 64, 2);
CLEAN(result, poly, 64, 1);
VLOAD(vget_high_vector128, buffer, q, poly, p, 64, 2);
TEST_VGET_HIGH(poly, p, 64, 1, 2);
CHECK(TEST_MSG, poly, 64, 1, PRIx64, vget_high_expected, "");
/* vld1_p64 tests. */
#undef TEST_MSG
#define TEST_MSG "VLD1/VLD1Q"
@ -645,7 +713,7 @@ int main (void)
VECT_VAR(vst1_lane_vector, T1, W, N) = \
vld1##Q##_##T2##W(VECT_VAR(buffer, T1, W, N)); \
vst1##Q##_lane_##T2##W(VECT_VAR(result, T1, W, N), \
VECT_VAR(vst1_lane_vector, T1, W, N), L)
VECT_VAR(vst1_lane_vector, T1, W, N), L);
DECL_VARIABLE(vst1_lane_vector, poly, 64, 1);
DECL_VARIABLE(vst1_lane_vector, poly, 64, 2);
@ -659,5 +727,298 @@ int main (void)
CHECK(TEST_MSG, poly, 64, 1, PRIx64, vst1_lane_expected, "");
CHECK(TEST_MSG, poly, 64, 2, PRIx64, vst1_lane_expected, "");
#ifdef __aarch64__
/* vmov_n_p64 tests. */
#undef TEST_MSG
#define TEST_MSG "VMOV/VMOVQ"
#define TEST_VMOV(Q, T1, T2, W, N) \
VECT_VAR(vmov_n_vector, T1, W, N) = \
vmov##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]); \
vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vmov_n_vector, T1, W, N))
DECL_VARIABLE(vmov_n_vector, poly, 64, 1);
DECL_VARIABLE(vmov_n_vector, poly, 64, 2);
/* Try to read different places from the input buffer. */
for (i=0; i< 3; i++) {
CLEAN(result, poly, 64, 1);
CLEAN(result, poly, 64, 2);
TEST_VMOV(, poly, p, 64, 1);
TEST_VMOV(q, poly, p, 64, 2);
switch (i) {
case 0:
CHECK(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected0, "");
CHECK(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected0, "");
break;
case 1:
CHECK(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected1, "");
CHECK(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected1, "");
break;
case 2:
CHECK(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected2, "");
CHECK(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected2, "");
break;
default:
abort();
}
}
/* vget_lane_p64 tests. */
#undef TEST_MSG
#define TEST_MSG "VGET_LANE/VGETQ_LANE"
#define TEST_VGET_LANE(Q, T1, T2, W, N, L) \
VECT_VAR(vget_lane_vector, T1, W, N) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \
if (VECT_VAR(vget_lane_vector, T1, W, N) != VECT_VAR(vget_lane_expected, T1, W, N)) { \
fprintf(stderr, \
"ERROR in %s (%s line %d in result '%s') at type %s " \
"got 0x%" PRIx##W " != 0x%" PRIx##W "\n", \
TEST_MSG, __FILE__, __LINE__, \
STR(VECT_VAR(vget_lane_expected, T1, W, N)), \
STR(VECT_NAME(T1, W, N)), \
VECT_VAR(vget_lane_vector, T1, W, N), \
VECT_VAR(vget_lane_expected, T1, W, N)); \
abort (); \
}
/* Initialize input values. */
DECL_VARIABLE(vector, poly, 64, 1);
DECL_VARIABLE(vector, poly, 64, 2);
VLOAD(vector, buffer, , poly, p, 64, 1);
VLOAD(vector, buffer, q, poly, p, 64, 2);
VECT_VAR_DECL(vget_lane_vector, poly, 64, 1);
VECT_VAR_DECL(vget_lane_vector, poly, 64, 2);
TEST_VGET_LANE( , poly, p, 64, 1, 0);
TEST_VGET_LANE(q, poly, p, 64, 2, 0);
/* vldx_lane_p64 tests. */
#undef TEST_MSG
#define TEST_MSG "VLDX_LANE/VLDXQ_LANE"
VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 64, 2);
VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 64, 3);
VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 64, 4);
/* In this case, input variables are arrays of vectors. */
#define DECL_VLD_STX_LANE(T1, W, N, X) \
VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \
VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X); \
VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
/* We need to use a temporary result buffer (result_bis), because
the one used for other tests is not large enough. A subset of the
result data is moved from result_bis to result, and it is this
subset which is used to check the actual behavior. The next
macro enables to move another chunk of data from result_bis to
result. */
/* We also use another extra input buffer (buffer_src), which we
fill with 0xAA, and which it used to load a vector from which we
read a given lane. */
#define TEST_VLDX_LANE(Q, T1, T2, W, N, X, L) \
memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, \
sizeof(VECT_VAR(buffer_src, T1, W, N))); \
\
VECT_ARRAY_VAR(vector_src, T1, W, N, X) = \
vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \
\
VECT_ARRAY_VAR(vector, T1, W, N, X) = \
/* Use dedicated init buffer, of size. X */ \
vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X), \
VECT_ARRAY_VAR(vector_src, T1, W, N, X), \
L); \
vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \
VECT_ARRAY_VAR(vector, T1, W, N, X)); \
memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
sizeof(VECT_VAR(result, T1, W, N)))
/* Overwrite "result" with the contents of "result_bis"[Y]. */
#undef TEST_EXTRA_CHUNK
#define TEST_EXTRA_CHUNK(T1, W, N, X, Y) \
memcpy(VECT_VAR(result, T1, W, N), \
&(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \
sizeof(VECT_VAR(result, T1, W, N)));
/* Add some padding to try to catch out of bound accesses. */
#define ARRAY1(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[1]={42}
#define DUMMY_ARRAY(V, T, W, N, L) \
VECT_VAR_DECL(V,T,W,N)[N*L]={0}; \
ARRAY1(V##_pad,T,W,N)
#define DECL_ALL_VLD_STX_LANE(X) \
DECL_VLD_STX_LANE(poly, 64, 1, X); \
DECL_VLD_STX_LANE(poly, 64, 2, X);
#define TEST_ALL_VLDX_LANE(X) \
TEST_VLDX_LANE(, poly, p, 64, 1, X, 0); \
TEST_VLDX_LANE(q, poly, p, 64, 2, X, 0);
#define TEST_ALL_EXTRA_CHUNKS(X,Y) \
TEST_EXTRA_CHUNK(poly, 64, 1, X, Y) \
TEST_EXTRA_CHUNK(poly, 64, 2, X, Y)
#define CHECK_RESULTS_VLD_STX_LANE(test_name,EXPECTED,comment) \
CHECK(test_name, poly, 64, 1, PRIx64, EXPECTED, comment); \
CHECK(test_name, poly, 64, 2, PRIx64, EXPECTED, comment);
/* Declare the temporary buffers / variables. */
DECL_ALL_VLD_STX_LANE(2);
DECL_ALL_VLD_STX_LANE(3);
DECL_ALL_VLD_STX_LANE(4);
DUMMY_ARRAY(buffer_src, poly, 64, 1, 4);
DUMMY_ARRAY(buffer_src, poly, 64, 2, 4);
/* Check vld2_lane/vld2q_lane. */
clean_results ();
#undef TEST_MSG
#define TEST_MSG "VLD2_LANE/VLD2Q_LANE"
TEST_ALL_VLDX_LANE(2);
CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st2_0, " chunk 0");
TEST_ALL_EXTRA_CHUNKS(2, 1);
CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st2_1, " chunk 1");
/* Check vld3_lane/vld3q_lane. */
clean_results ();
#undef TEST_MSG
#define TEST_MSG "VLD3_LANE/VLD3Q_LANE"
TEST_ALL_VLDX_LANE(3);
CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_0, " chunk 0");
TEST_ALL_EXTRA_CHUNKS(3, 1);
CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_1, " chunk 1");
TEST_ALL_EXTRA_CHUNKS(3, 2);
CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_2, " chunk 2");
/* Check vld4_lane/vld4q_lane. */
clean_results ();
#undef TEST_MSG
#define TEST_MSG "VLD4_LANE/VLD4Q_LANE"
TEST_ALL_VLDX_LANE(4);
CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_0, " chunk 0");
TEST_ALL_EXTRA_CHUNKS(4, 1);
CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_1, " chunk 1");
TEST_ALL_EXTRA_CHUNKS(4, 2);
CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_2, " chunk 2");
TEST_ALL_EXTRA_CHUNKS(4, 3);
CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_3, " chunk 3");
/* In this case, input variables are arrays of vectors. */
#define DECL_VSTX_LANE(T1, W, N, X) \
VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \
VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X); \
VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N]
/* We need to use a temporary result buffer (result_bis), because
the one used for other tests is not large enough. A subset of the
result data is moved from result_bis to result, and it is this
subset which is used to check the actual behavior. The next
macro enables to move another chunk of data from result_bis to
result. */
/* We also use another extra input buffer (buffer_src), which we
fill with 0xAA, and which it used to load a vector from which we
read a given lane. */
#define TEST_VSTX_LANE(Q, T1, T2, W, N, X, L) \
memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, \
sizeof(VECT_VAR(buffer_src, T1, W, N))); \
memset (VECT_VAR(result_bis_##X, T1, W, N), 0, \
sizeof(VECT_VAR(result_bis_##X, T1, W, N))); \
\
VECT_ARRAY_VAR(vector_src, T1, W, N, X) = \
vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \
\
VECT_ARRAY_VAR(vector, T1, W, N, X) = \
/* Use dedicated init buffer, of size X. */ \
vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X), \
VECT_ARRAY_VAR(vector_src, T1, W, N, X), \
L); \
vst##X##Q##_lane_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \
VECT_ARRAY_VAR(vector, T1, W, N, X), \
L); \
memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \
sizeof(VECT_VAR(result, T1, W, N)));
#define TEST_ALL_VSTX_LANE(X) \
TEST_VSTX_LANE(, poly, p, 64, 1, X, 0); \
TEST_VSTX_LANE(q, poly, p, 64, 2, X, 0);
/* Check vst2_lane/vst2q_lane. */
clean_results ();
#undef TEST_MSG
#define TEST_MSG "VST2_LANE/VST2Q_LANE"
TEST_ALL_VSTX_LANE(2);
#define CMT " (chunk 0)"
CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st2_0, CMT);
TEST_ALL_EXTRA_CHUNKS(2, 1);
#undef CMT
#define CMT " chunk 1"
CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st2_1, CMT);
/* Check vst3_lane/vst3q_lane. */
clean_results ();
#undef TEST_MSG
#define TEST_MSG "VST3_LANE/VST3Q_LANE"
TEST_ALL_VSTX_LANE(3);
#undef CMT
#define CMT " (chunk 0)"
CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_0, CMT);
TEST_ALL_EXTRA_CHUNKS(3, 1);
#undef CMT
#define CMT " (chunk 1)"
CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_1, CMT);
TEST_ALL_EXTRA_CHUNKS(3, 2);
#undef CMT
#define CMT " (chunk 2)"
CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_2, CMT);
/* Check vst4_lane/vst4q_lane. */
clean_results ();
#undef TEST_MSG
#define TEST_MSG "VST4_LANE/VST4Q_LANE"
TEST_ALL_VSTX_LANE(4);
#undef CMT
#define CMT " (chunk 0)"
CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_0, CMT);
TEST_ALL_EXTRA_CHUNKS(4, 1);
#undef CMT
#define CMT " (chunk 1)"
CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_1, CMT);
TEST_ALL_EXTRA_CHUNKS(4, 2);
#undef CMT
#define CMT " (chunk 2)"
CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_2, CMT);
TEST_ALL_EXTRA_CHUNKS(4, 3);
#undef CMT
#define CMT " (chunk 3)"
CHECK(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_3, CMT);
#endif /* __aarch64__. */
return 0;
}

View file

@ -1,7 +1,8 @@
/* This file contains tests for the vreinterpret *p128 intrinsics. */
/* { dg-require-effective-target arm_crypto_ok } */
/* { dg-require-effective-target arm_crypto_ok { target { arm*-*-* } } } */
/* { dg-add-options arm_crypto } */
/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
#include <arm_neon.h>
#include "arm-neon-ref.h"
@ -78,9 +79,7 @@ VECT_VAR_DECL(vreint_expected_q_f16_p128,hfloat,16,8) [] = { 0xfff0, 0xffff,
int main (void)
{
DECL_VARIABLE_128BITS_VARIANTS(vreint_vector);
DECL_VARIABLE(vreint_vector, poly, 64, 2);
DECL_VARIABLE_128BITS_VARIANTS(vreint_vector_res);
DECL_VARIABLE(vreint_vector_res, poly, 64, 2);
clean_results ();

View file

@ -1,7 +1,8 @@
/* This file contains tests for the vreinterpret *p64 intrinsics. */
/* { dg-require-effective-target arm_crypto_ok } */
/* { dg-require-effective-target arm_crypto_ok { target { arm*-*-* } } } */
/* { dg-add-options arm_crypto } */
/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
#include <arm_neon.h>
#include "arm-neon-ref.h"
@ -121,11 +122,7 @@ int main (void)
CHECK_FP(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, "");
DECL_VARIABLE_ALL_VARIANTS(vreint_vector);
DECL_VARIABLE(vreint_vector, poly, 64, 1);
DECL_VARIABLE(vreint_vector, poly, 64, 2);
DECL_VARIABLE_ALL_VARIANTS(vreint_vector_res);
DECL_VARIABLE(vreint_vector_res, poly, 64, 1);
DECL_VARIABLE(vreint_vector_res, poly, 64, 2);
clean_results ();