[AArch64] Add NEON intrinsics vqrdmlah and vqrdmlsh.

gcc/
	* gcc/config/aarch64/arm_neon.h (vqrdmlah_s16, vqrdmlah_s32): New.
	(vqrdmlahq_s16, vqrdmlahq_s32): New.
	(vqrdmlsh_s16, vqrdmlsh_s32): New.
	(vqrdmlshq_s16, vqrdmlshq_s32): New.

        gcc/testsuite
	* gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc: New file,
	support code for vqrdml{as}h tests.
	* gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c: New.
	* gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c: New.

From-SVN: r230971
This commit is contained in:
Matthew Wahab 2015-11-26 15:13:02 +00:00 committed by Matthew Wahab
parent a1d5d08d25
commit 0c6110a126
6 changed files with 323 additions and 0 deletions

View file

@ -1,3 +1,10 @@
2015-11-26 Matthew Wahab <matthew.wahab@arm.com>
* gcc/config/aarch64/arm_neon.h (vqrdmlah_s16, vqrdmlah_s32): New.
(vqrdmlahq_s16, vqrdmlahq_s32): New.
(vqrdmlsh_s16, vqrdmlsh_s32): New.
(vqrdmlshq_s16, vqrdmlshq_s32): New.
2015-11-26 Matthew Wahab <matthew.wahab@arm.com>
* config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Add

View file

@ -11213,6 +11213,59 @@ vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c);
}
/* ARMv8.1 instrinsics. */
#pragma GCC push_options
#pragma GCC target ("arch=armv8.1-a")
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
{
return __builtin_aarch64_sqrdmlahv4hi (__a, __b, __c);
}
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqrdmlah_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
{
return __builtin_aarch64_sqrdmlahv2si (__a, __b, __c);
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqrdmlahq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
{
return __builtin_aarch64_sqrdmlahv8hi (__a, __b, __c);
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqrdmlahq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
{
return __builtin_aarch64_sqrdmlahv4si (__a, __b, __c);
}
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vqrdmlsh_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c)
{
return __builtin_aarch64_sqrdmlshv4hi (__a, __b, __c);
}
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vqrdmlsh_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c)
{
return __builtin_aarch64_sqrdmlshv2si (__a, __b, __c);
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vqrdmlshq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
{
return __builtin_aarch64_sqrdmlshv8hi (__a, __b, __c);
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
{
return __builtin_aarch64_sqrdmlshv4si (__a, __b, __c);
}
#pragma GCC pop_options
#pragma GCC push_options
#pragma GCC target ("+nothing+crypto")
/* vaes */

View file

@ -1,3 +1,10 @@
2015-11-26 Matthew Wahab <matthew.wahab@arm.com>
* gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh.inc: New file,
support code for vqrdml{as}h tests.
* gcc.target/aarch64/advsimd-intrinsics/vqrdmlah.c: New.
* gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh.c: New.
2015-11-26 Matthew Wahab <matthew.wahab@arm.com>
* lib/target-supports.exp (add_options_for_arm_v8_1a_neon): New.

View file

@ -0,0 +1,138 @@
#define FNNAME1(NAME) exec_ ## NAME
#define FNNAME(NAME) FNNAME1 (NAME)
void FNNAME (INSN) (void)
{
/* vector_res = vqrdmlah (vector, vector2, vector3, vector4),
then store the result. */
#define TEST_VQRDMLAH2(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
Set_Neon_Cumulative_Sat (0, VECT_VAR (vector_res, T1, W, N)); \
VECT_VAR (vector_res, T1, W, N) = \
INSN##Q##_##T2##W (VECT_VAR (vector, T1, W, N), \
VECT_VAR (vector2, T1, W, N), \
VECT_VAR (vector3, T1, W, N)); \
vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N), \
VECT_VAR (vector_res, T1, W, N)); \
CHECK_CUMULATIVE_SAT (TEST_MSG, T1, W, N, \
EXPECTED_CUMULATIVE_SAT, CMT)
/* Two auxliary macros are necessary to expand INSN. */
#define TEST_VQRDMLAH1(INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
TEST_VQRDMLAH2 (INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
#define TEST_VQRDMLAH(Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT) \
TEST_VQRDMLAH1 (INSN, Q, T1, T2, W, N, EXPECTED_CUMULATIVE_SAT, CMT)
DECL_VARIABLE (vector, int, 16, 4);
DECL_VARIABLE (vector, int, 32, 2);
DECL_VARIABLE (vector, int, 16, 8);
DECL_VARIABLE (vector, int, 32, 4);
DECL_VARIABLE (vector_res, int, 16, 4);
DECL_VARIABLE (vector_res, int, 32, 2);
DECL_VARIABLE (vector_res, int, 16, 8);
DECL_VARIABLE (vector_res, int, 32, 4);
DECL_VARIABLE (vector2, int, 16, 4);
DECL_VARIABLE (vector2, int, 32, 2);
DECL_VARIABLE (vector2, int, 16, 8);
DECL_VARIABLE (vector2, int, 32, 4);
DECL_VARIABLE (vector3, int, 16, 4);
DECL_VARIABLE (vector3, int, 32, 2);
DECL_VARIABLE (vector3, int, 16, 8);
DECL_VARIABLE (vector3, int, 32, 4);
clean_results ();
VLOAD (vector, buffer, , int, s, 16, 4);
VLOAD (vector, buffer, , int, s, 32, 2);
VLOAD (vector, buffer, q, int, s, 16, 8);
VLOAD (vector, buffer, q, int, s, 32, 4);
/* Initialize vector2. */
VDUP (vector2, , int, s, 16, 4, 0x5555);
VDUP (vector2, , int, s, 32, 2, 0xBB);
VDUP (vector2, q, int, s, 16, 8, 0xBB);
VDUP (vector2, q, int, s, 32, 4, 0x22);
/* Initialize vector3. */
VDUP (vector3, , int, s, 16, 4, 0x5555);
VDUP (vector3, , int, s, 32, 2, 0xBB);
VDUP (vector3, q, int, s, 16, 8, 0x33);
VDUP (vector3, q, int, s, 32, 4, 0x22);
#define CMT ""
TEST_VQRDMLAH ( , int, s, 16, 4, expected_cumulative_sat, CMT);
TEST_VQRDMLAH ( , int, s, 32, 2, expected_cumulative_sat, CMT);
TEST_VQRDMLAH (q, int, s, 16, 8, expected_cumulative_sat, CMT);
TEST_VQRDMLAH (q, int, s, 32, 4, expected_cumulative_sat, CMT);
CHECK (TEST_MSG, int, 16, 4, PRIx16, expected, CMT);
CHECK (TEST_MSG, int, 32, 2, PRIx32, expected, CMT);
CHECK (TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
CHECK (TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
/* Now use input values such that the multiplication causes
saturation. */
#define TEST_MSG_MUL " (check mul cumulative saturation)"
VDUP (vector, , int, s, 16, 4, 0x8000);
VDUP (vector, , int, s, 32, 2, 0x80000000);
VDUP (vector, q, int, s, 16, 8, 0x8000);
VDUP (vector, q, int, s, 32, 4, 0x80000000);
VDUP (vector2, , int, s, 16, 4, 0x8000);
VDUP (vector2, , int, s, 32, 2, 0x80000000);
VDUP (vector2, q, int, s, 16, 8, 0x8000);
VDUP (vector2, q, int, s, 32, 4, 0x80000000);
VDUP (vector3, , int, s, 16, 4, 0x8000);
VDUP (vector3, , int, s, 32, 2, 0x80000000);
VDUP (vector3, q, int, s, 16, 8, 0x8000);
VDUP (vector3, q, int, s, 32, 4, 0x80000000);
TEST_VQRDMLAH ( , int, s, 16, 4, expected_cumulative_sat_mul, TEST_MSG_MUL);
TEST_VQRDMLAH ( , int, s, 32, 2, expected_cumulative_sat_mul, TEST_MSG_MUL);
TEST_VQRDMLAH (q, int, s, 16, 8, expected_cumulative_sat_mul, TEST_MSG_MUL);
TEST_VQRDMLAH (q, int, s, 32, 4, expected_cumulative_sat_mul, TEST_MSG_MUL);
CHECK (TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL);
CHECK (TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL);
CHECK (TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL);
CHECK (TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL);
/* Use input values where rounding produces a result equal to the
saturation value, but does not set the saturation flag. */
#define TEST_MSG_ROUND " (check rounding)"
VDUP (vector, , int, s, 16, 4, 0x8000);
VDUP (vector, , int, s, 32, 2, 0x80000000);
VDUP (vector, q, int, s, 16, 8, 0x8000);
VDUP (vector, q, int, s, 32, 4, 0x80000000);
VDUP (vector2, , int, s, 16, 4, 0x8001);
VDUP (vector2, , int, s, 32, 2, 0x80000001);
VDUP (vector2, q, int, s, 16, 8, 0x8001);
VDUP (vector2, q, int, s, 32, 4, 0x80000001);
VDUP (vector3, , int, s, 16, 4, 0x8001);
VDUP (vector3, , int, s, 32, 2, 0x80000001);
VDUP (vector3, q, int, s, 16, 8, 0x8001);
VDUP (vector3, q, int, s, 32, 4, 0x80000001);
TEST_VQRDMLAH ( , int, s, 16, 4, expected_cumulative_sat_round, \
TEST_MSG_ROUND);
TEST_VQRDMLAH ( , int, s, 32, 2, expected_cumulative_sat_round, \
TEST_MSG_ROUND);
TEST_VQRDMLAH (q, int, s, 16, 8, expected_cumulative_sat_round, \
TEST_MSG_ROUND);
TEST_VQRDMLAH (q, int, s, 32, 4, expected_cumulative_sat_round, \
TEST_MSG_ROUND);
CHECK (TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND);
CHECK (TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND);
CHECK (TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND);
CHECK (TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND);
}
int
main (void)
{
FNNAME (INSN) ();
return 0;
}

View file

@ -0,0 +1,57 @@
/* { dg-require-effective-target arm_v8_1a_neon_hw } */
/* { dg-add-options arm_v8_1a_neon } */
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
/* Expected values of cumulative_saturation flag. */
int VECT_VAR (expected_cumulative_sat, int, 16, 4) = 0;
int VECT_VAR (expected_cumulative_sat, int, 32, 2) = 0;
int VECT_VAR (expected_cumulative_sat, int, 16, 8) = 0;
int VECT_VAR (expected_cumulative_sat, int, 32, 4) = 0;
/* Expected results. */
VECT_VAR_DECL (expected, int, 16, 4) [] = { 0x38d3, 0x38d4, 0x38d5, 0x38d6 };
VECT_VAR_DECL (expected, int, 32, 2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL (expected, int, 16, 8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0xfff4, 0xfff5, 0xfff6, 0xfff7 };
VECT_VAR_DECL (expected, int, 32, 4) [] = { 0xfffffff0, 0xfffffff1,
0xfffffff2, 0xfffffff3 };
/* Expected values of cumulative_saturation flag when multiplication
saturates. */
int VECT_VAR (expected_cumulative_sat_mul, int, 16, 4) = 0;
int VECT_VAR (expected_cumulative_sat_mul, int, 32, 2) = 0;
int VECT_VAR (expected_cumulative_sat_mul, int, 16, 8) = 0;
int VECT_VAR (expected_cumulative_sat_mul, int, 32, 4) = 0;
/* Expected results when multiplication saturates. */
VECT_VAR_DECL (expected_mul, int, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL (expected_mul, int, 32, 2) [] = { 0x0, 0x0 };
VECT_VAR_DECL (expected_mul, int, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL (expected_mul, int, 32, 4) [] = { 0x0, 0x0, 0x0, 0x0 };
/* Expected values of cumulative_saturation flag when rounding
should not cause saturation. */
int VECT_VAR (expected_cumulative_sat_round, int, 16, 4) = 0;
int VECT_VAR (expected_cumulative_sat_round, int, 32, 2) = 0;
int VECT_VAR (expected_cumulative_sat_round, int, 16, 8) = 0;
int VECT_VAR (expected_cumulative_sat_round, int, 32, 4) = 0;
/* Expected results when rounding should not cause saturation. */
VECT_VAR_DECL (expected_round, int, 16, 4) [] = { 0xfffe, 0xfffe,
0xfffe, 0xfffe };
VECT_VAR_DECL (expected_round, int, 32, 2) [] = { 0xfffffffe, 0xfffffffe };
VECT_VAR_DECL (expected_round, int, 16, 8) [] = { 0xfffe, 0xfffe,
0xfffe, 0xfffe,
0xfffe, 0xfffe,
0xfffe, 0xfffe };
VECT_VAR_DECL (expected_round, int, 32, 4) [] = { 0xfffffffe, 0xfffffffe,
0xfffffffe, 0xfffffffe };
#define INSN vqrdmlah
#define TEST_MSG "VQRDMLAH"
#include "vqrdmlXh.inc"

View file

@ -0,0 +1,61 @@
/* { dg-require-effective-target arm_v8_1a_neon_hw } */
/* { dg-add-options arm_v8_1a_neon } */
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
/* Expected values of cumulative_saturation flag. */
int VECT_VAR (expected_cumulative_sat, int, 16, 4) = 0;
int VECT_VAR (expected_cumulative_sat, int, 32, 2) = 0;
int VECT_VAR (expected_cumulative_sat, int, 16, 8) = 0;
int VECT_VAR (expected_cumulative_sat, int, 32, 4) = 0;
/* Expected results. */
VECT_VAR_DECL (expected, int, 16, 4) [] = { 0xc70d, 0xc70e, 0xc70f, 0xc710 };
VECT_VAR_DECL (expected, int, 32, 2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL (expected, int, 16, 8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0xfff4, 0xfff5, 0xfff6, 0xfff7 };
VECT_VAR_DECL (expected, int, 32, 4) [] = { 0xfffffff0, 0xfffffff1,
0xfffffff2, 0xfffffff3 };
/* Expected values of cumulative_saturation flag when multiplication
saturates. */
int VECT_VAR (expected_cumulative_sat_mul, int, 16, 4) = 1;
int VECT_VAR (expected_cumulative_sat_mul, int, 32, 2) = 1;
int VECT_VAR (expected_cumulative_sat_mul, int, 16, 8) = 1;
int VECT_VAR (expected_cumulative_sat_mul, int, 32, 4) = 1;
/* Expected results when multiplication saturates. */
VECT_VAR_DECL (expected_mul, int, 16, 4) [] = { 0x8000, 0x8000,
0x8000, 0x8000 };
VECT_VAR_DECL (expected_mul, int, 32, 2) [] = { 0x80000000, 0x80000000 };
VECT_VAR_DECL (expected_mul, int, 16, 8) [] = { 0x8000, 0x8000,
0x8000, 0x8000,
0x8000, 0x8000,
0x8000, 0x8000 };
VECT_VAR_DECL (expected_mul, int, 32, 4) [] = { 0x80000000, 0x80000000,
0x80000000, 0x80000000 };
/* Expected values of cumulative_saturation flag when rounding
should not cause saturation. */
int VECT_VAR (expected_cumulative_sat_round, int, 16, 4) = 1;
int VECT_VAR (expected_cumulative_sat_round, int, 32, 2) = 1;
int VECT_VAR (expected_cumulative_sat_round, int, 16, 8) = 1;
int VECT_VAR (expected_cumulative_sat_round, int, 32, 4) = 1;
/* Expected results when rounding should not cause saturation. */
VECT_VAR_DECL (expected_round, int, 16, 4) [] = { 0x8000, 0x8000,
0x8000, 0x8000 };
VECT_VAR_DECL (expected_round, int, 32, 2) [] = { 0x80000000, 0x80000000 };
VECT_VAR_DECL (expected_round, int, 16, 8) [] = { 0x8000, 0x8000,
0x8000, 0x8000,
0x8000, 0x8000,
0x8000, 0x8000 };
VECT_VAR_DECL (expected_round, int, 32, 4) [] = { 0x80000000, 0x80000000,
0x80000000, 0x80000000 };
#define INSN vqrdmlsh
#define TEST_MSG "VQRDMLSH"
#include "vqrdmlXh.inc"