aarch64: Use intrinsics for upper saturating shift right
The use of vqshrn_high_n_s32 was triggering an unneeded register move, because sqshrn2 is destructive but was declared as inline assembly in arm_neon.h. This patch implements sqshrn2 and uqshrn2 as actual intrinsics which do not trigger the unnecessary move, along with new tests to cover them. gcc/ChangeLog 2020-11-06 David Candler <david.candler@arm.com> * config/aarch64/aarch64-builtins.c (TYPES_SHIFT2IMM): Add define. (TYPES_SHIFT2IMM_UUSS): Add define. (TYPES_USHIFT2IMM): Add define. * config/aarch64/aarch64-simd.md (aarch64_<sur>q<r>shr<u>n2_n<mode>): Add new insn for upper saturating shift right. * config/aarch64/aarch64-simd-builtins.def: Add intrinsics. * config/aarch64/arm_neon.h: (vqrshrn_high_n_s16): Expand using intrinsic rather than inline asm. (vqrshrn_high_n_s32): Likewise. (vqrshrn_high_n_s64): Likewise. (vqrshrn_high_n_u16): Likewise. (vqrshrn_high_n_u32): Likewise. (vqrshrn_high_n_u64): Likewise. (vqrshrun_high_n_s16): Likewise. (vqrshrun_high_n_s32): Likewise. (vqrshrun_high_n_s64): Likewise. (vqshrn_high_n_s16): Likewise. (vqshrn_high_n_s32): Likewise. (vqshrn_high_n_s64): Likewise. (vqshrn_high_n_u16): Likewise. (vqshrn_high_n_u32): Likewise. (vqshrn_high_n_u64): Likewise. (vqshrun_high_n_s16): Likewise. (vqshrun_high_n_s32): Likewise. (vqshrun_high_n_s64): Likewise. gcc/testsuite/ChangeLog 2020-11-06 David Candler <david.candler@arm.com> * gcc.target/aarch64/advsimd-intrinsics/vqrshrn_high_n.c: New testcase. * gcc.target/aarch64/advsimd-intrinsics/vqrshrun_high_n.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vqshrn_high_n.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vqshrun_high_n.c: Likewise. * gcc.target/aarch64/narrow_high-intrinsics.c: Update expected assembler for sqshrun2, sqrshrun2, sqshrn2, uqshrn2, sqrshrn2 and uqrshrn2.
This commit is contained in:
parent
4b5f564a5d
commit
05f1883cfd
9 changed files with 854 additions and 258 deletions
|
@ -266,6 +266,11 @@ static enum aarch64_type_qualifiers
|
|||
aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
= { qualifier_unsigned, qualifier_unsigned, qualifier_immediate };
|
||||
#define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
|
||||
#define TYPES_USHIFT2IMM (aarch64_types_ternopu_imm_qualifiers)
|
||||
static enum aarch64_type_qualifiers
|
||||
aarch64_types_shift2_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
= { qualifier_unsigned, qualifier_unsigned, qualifier_none, qualifier_immediate };
|
||||
#define TYPES_SHIFT2IMM_UUSS (aarch64_types_shift2_to_unsigned_qualifiers)
|
||||
|
||||
static enum aarch64_type_qualifiers
|
||||
aarch64_types_ternop_s_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
|
@ -277,6 +282,7 @@ aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|||
#define TYPES_SETREG (aarch64_types_ternop_s_imm_qualifiers)
|
||||
#define TYPES_SHIFTINSERT (aarch64_types_ternop_s_imm_qualifiers)
|
||||
#define TYPES_SHIFTACC (aarch64_types_ternop_s_imm_qualifiers)
|
||||
#define TYPES_SHIFT2IMM (aarch64_types_ternop_s_imm_qualifiers)
|
||||
|
||||
static enum aarch64_type_qualifiers
|
||||
aarch64_types_ternop_p_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
||||
|
|
|
@ -285,6 +285,13 @@
|
|||
BUILTIN_VSQN_HSDI (USHIFTIMM, uqshrn_n, 0, ALL)
|
||||
BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0, ALL)
|
||||
BUILTIN_VSQN_HSDI (USHIFTIMM, uqrshrn_n, 0, ALL)
|
||||
/* Implemented by aarch64_<sur>q<r>shr<u>n2_n<mode>. */
|
||||
BUILTIN_VQN (SHIFT2IMM_UUSS, sqshrun2_n, 0, NONE)
|
||||
BUILTIN_VQN (SHIFT2IMM_UUSS, sqrshrun2_n, 0, NONE)
|
||||
BUILTIN_VQN (SHIFT2IMM, sqshrn2_n, 0, NONE)
|
||||
BUILTIN_VQN (USHIFT2IMM, uqshrn2_n, 0, NONE)
|
||||
BUILTIN_VQN (SHIFT2IMM, sqrshrn2_n, 0, NONE)
|
||||
BUILTIN_VQN (USHIFT2IMM, uqrshrn2_n, 0, NONE)
|
||||
/* Implemented by aarch64_<sur>s<lr>i_n<mode>. */
|
||||
BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0, ALL)
|
||||
BUILTIN_VSDQ_I_DI (USHIFTACC, usri_n, 0, ALL)
|
||||
|
|
|
@ -4720,6 +4720,17 @@
|
|||
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>"
|
||||
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
|
||||
(unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
|
||||
(match_operand:VQN 2 "register_operand" "w")
|
||||
(match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
|
||||
VQSHRN_N))]
|
||||
"TARGET_SIMD"
|
||||
"<sur>q<r>shr<u>n2\\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
|
||||
[(set_attr "type" "neon_sat_shift_imm_narrow_q")]
|
||||
)
|
||||
|
||||
|
||||
;; cm(eq|ge|gt|lt|le)
|
||||
;; Note, we have constraints for Dz and Z as different expanders
|
||||
|
|
|
@ -9993,275 +9993,131 @@ vqrdmulhq_n_s32 (int32x4_t __a, int32_t __b)
|
|||
return __result;
|
||||
}
|
||||
|
||||
#define vqrshrn_high_n_s16(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
int16x8_t b_ = (b); \
|
||||
int8x8_t a_ = (a); \
|
||||
int8x16_t result = vcombine_s8 \
|
||||
(a_, vcreate_s8 \
|
||||
(__AARCH64_UINT64_C (0x0))); \
|
||||
__asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \
|
||||
: "+w"(result) \
|
||||
: "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
__extension__ extern __inline int8x16_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vqrshrn_high_n_s16 (int8x8_t __a, int16x8_t __b, const int __c)
|
||||
{
|
||||
return __builtin_aarch64_sqrshrn2_nv8hi (__a, __b, __c);
|
||||
}
|
||||
|
||||
#define vqrshrn_high_n_s32(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
int32x4_t b_ = (b); \
|
||||
int16x4_t a_ = (a); \
|
||||
int16x8_t result = vcombine_s16 \
|
||||
(a_, vcreate_s16 \
|
||||
(__AARCH64_UINT64_C (0x0))); \
|
||||
__asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \
|
||||
: "+w"(result) \
|
||||
: "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
__extension__ extern __inline int16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vqrshrn_high_n_s32 (int16x4_t __a, int32x4_t __b, const int __c)
|
||||
{
|
||||
return __builtin_aarch64_sqrshrn2_nv4si (__a, __b, __c);
|
||||
}
|
||||
|
||||
#define vqrshrn_high_n_s64(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
int64x2_t b_ = (b); \
|
||||
int32x2_t a_ = (a); \
|
||||
int32x4_t result = vcombine_s32 \
|
||||
(a_, vcreate_s32 \
|
||||
(__AARCH64_UINT64_C (0x0))); \
|
||||
__asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \
|
||||
: "+w"(result) \
|
||||
: "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
__extension__ extern __inline int32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vqrshrn_high_n_s64 (int32x2_t __a, int64x2_t __b, const int __c)
|
||||
{
|
||||
return __builtin_aarch64_sqrshrn2_nv2di (__a, __b, __c);
|
||||
}
|
||||
|
||||
#define vqrshrn_high_n_u16(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
uint16x8_t b_ = (b); \
|
||||
uint8x8_t a_ = (a); \
|
||||
uint8x16_t result = vcombine_u8 \
|
||||
(a_, vcreate_u8 \
|
||||
(__AARCH64_UINT64_C (0x0))); \
|
||||
__asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \
|
||||
: "+w"(result) \
|
||||
: "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
__extension__ extern __inline uint8x16_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vqrshrn_high_n_u16 (uint8x8_t __a, uint16x8_t __b, const int __c)
|
||||
{
|
||||
return __builtin_aarch64_uqrshrn2_nv8hi_uuus (__a, __b, __c);
|
||||
}
|
||||
|
||||
#define vqrshrn_high_n_u32(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
uint32x4_t b_ = (b); \
|
||||
uint16x4_t a_ = (a); \
|
||||
uint16x8_t result = vcombine_u16 \
|
||||
(a_, vcreate_u16 \
|
||||
(__AARCH64_UINT64_C (0x0))); \
|
||||
__asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \
|
||||
: "+w"(result) \
|
||||
: "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
__extension__ extern __inline uint16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vqrshrn_high_n_u32 (uint16x4_t __a, uint32x4_t __b, const int __c)
|
||||
{
|
||||
return __builtin_aarch64_uqrshrn2_nv4si_uuus (__a, __b, __c);
|
||||
}
|
||||
|
||||
#define vqrshrn_high_n_u64(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
uint64x2_t b_ = (b); \
|
||||
uint32x2_t a_ = (a); \
|
||||
uint32x4_t result = vcombine_u32 \
|
||||
(a_, vcreate_u32 \
|
||||
(__AARCH64_UINT64_C (0x0))); \
|
||||
__asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \
|
||||
: "+w"(result) \
|
||||
: "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
__extension__ extern __inline uint32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vqrshrn_high_n_u64 (uint32x2_t __a, uint64x2_t __b, const int __c)
|
||||
{
|
||||
return __builtin_aarch64_uqrshrn2_nv2di_uuus (__a, __b, __c);
|
||||
}
|
||||
|
||||
#define vqrshrun_high_n_s16(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
int16x8_t b_ = (b); \
|
||||
uint8x8_t a_ = (a); \
|
||||
uint8x16_t result = vcombine_u8 \
|
||||
(a_, vcreate_u8 \
|
||||
(__AARCH64_UINT64_C (0x0))); \
|
||||
__asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \
|
||||
: "+w"(result) \
|
||||
: "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
__extension__ extern __inline uint8x16_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vqrshrun_high_n_s16 (uint8x8_t __a, int16x8_t __b, const int __c)
|
||||
{
|
||||
return __builtin_aarch64_sqrshrun2_nv8hi_uuss (__a, __b, __c);
|
||||
}
|
||||
|
||||
#define vqrshrun_high_n_s32(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
int32x4_t b_ = (b); \
|
||||
uint16x4_t a_ = (a); \
|
||||
uint16x8_t result = vcombine_u16 \
|
||||
(a_, vcreate_u16 \
|
||||
(__AARCH64_UINT64_C (0x0))); \
|
||||
__asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \
|
||||
: "+w"(result) \
|
||||
: "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
__extension__ extern __inline uint16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vqrshrun_high_n_s32 (uint16x4_t __a, int32x4_t __b, const int __c)
|
||||
{
|
||||
return __builtin_aarch64_sqrshrun2_nv4si_uuss (__a, __b, __c);
|
||||
}
|
||||
|
||||
#define vqrshrun_high_n_s64(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
int64x2_t b_ = (b); \
|
||||
uint32x2_t a_ = (a); \
|
||||
uint32x4_t result = vcombine_u32 \
|
||||
(a_, vcreate_u32 \
|
||||
(__AARCH64_UINT64_C (0x0))); \
|
||||
__asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \
|
||||
: "+w"(result) \
|
||||
: "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
__extension__ extern __inline uint32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vqrshrun_high_n_s64 (uint32x2_t __a, int64x2_t __b, const int __c)
|
||||
{
|
||||
return __builtin_aarch64_sqrshrun2_nv2di_uuss (__a, __b, __c);
|
||||
}
|
||||
|
||||
#define vqshrn_high_n_s16(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
int16x8_t b_ = (b); \
|
||||
int8x8_t a_ = (a); \
|
||||
int8x16_t result = vcombine_s8 \
|
||||
(a_, vcreate_s8 \
|
||||
(__AARCH64_UINT64_C (0x0))); \
|
||||
__asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \
|
||||
: "+w"(result) \
|
||||
: "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
__extension__ extern __inline int8x16_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vqshrn_high_n_s16 (int8x8_t __a, int16x8_t __b, const int __c)
|
||||
{
|
||||
return __builtin_aarch64_sqshrn2_nv8hi (__a, __b, __c);
|
||||
}
|
||||
|
||||
#define vqshrn_high_n_s32(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
int32x4_t b_ = (b); \
|
||||
int16x4_t a_ = (a); \
|
||||
int16x8_t result = vcombine_s16 \
|
||||
(a_, vcreate_s16 \
|
||||
(__AARCH64_UINT64_C (0x0))); \
|
||||
__asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \
|
||||
: "+w"(result) \
|
||||
: "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
__extension__ extern __inline int16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vqshrn_high_n_s32 (int16x4_t __a, int32x4_t __b, const int __c)
|
||||
{
|
||||
return __builtin_aarch64_sqshrn2_nv4si (__a, __b, __c);
|
||||
}
|
||||
|
||||
#define vqshrn_high_n_s64(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
int64x2_t b_ = (b); \
|
||||
int32x2_t a_ = (a); \
|
||||
int32x4_t result = vcombine_s32 \
|
||||
(a_, vcreate_s32 \
|
||||
(__AARCH64_UINT64_C (0x0))); \
|
||||
__asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \
|
||||
: "+w"(result) \
|
||||
: "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
__extension__ extern __inline int32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vqshrn_high_n_s64 (int32x2_t __a, int64x2_t __b, const int __c)
|
||||
{
|
||||
return __builtin_aarch64_sqshrn2_nv2di (__a, __b, __c);
|
||||
}
|
||||
|
||||
#define vqshrn_high_n_u16(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
uint16x8_t b_ = (b); \
|
||||
uint8x8_t a_ = (a); \
|
||||
uint8x16_t result = vcombine_u8 \
|
||||
(a_, vcreate_u8 \
|
||||
(__AARCH64_UINT64_C (0x0))); \
|
||||
__asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \
|
||||
: "+w"(result) \
|
||||
: "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
__extension__ extern __inline uint8x16_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vqshrn_high_n_u16 (uint8x8_t __a, uint16x8_t __b, const int __c)
|
||||
{
|
||||
return __builtin_aarch64_uqshrn2_nv8hi_uuus (__a, __b, __c);
|
||||
}
|
||||
|
||||
#define vqshrn_high_n_u32(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
uint32x4_t b_ = (b); \
|
||||
uint16x4_t a_ = (a); \
|
||||
uint16x8_t result = vcombine_u16 \
|
||||
(a_, vcreate_u16 \
|
||||
(__AARCH64_UINT64_C (0x0))); \
|
||||
__asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \
|
||||
: "+w"(result) \
|
||||
: "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
__extension__ extern __inline uint16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vqshrn_high_n_u32 (uint16x4_t __a, uint32x4_t __b, const int __c)
|
||||
{
|
||||
return __builtin_aarch64_uqshrn2_nv4si_uuus (__a, __b, __c);
|
||||
}
|
||||
|
||||
#define vqshrn_high_n_u64(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
uint64x2_t b_ = (b); \
|
||||
uint32x2_t a_ = (a); \
|
||||
uint32x4_t result = vcombine_u32 \
|
||||
(a_, vcreate_u32 \
|
||||
(__AARCH64_UINT64_C (0x0))); \
|
||||
__asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \
|
||||
: "+w"(result) \
|
||||
: "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
__extension__ extern __inline uint32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vqshrn_high_n_u64 (uint32x2_t __a, uint64x2_t __b, const int __c)
|
||||
{
|
||||
return __builtin_aarch64_uqshrn2_nv2di_uuus (__a, __b, __c);
|
||||
}
|
||||
|
||||
#define vqshrun_high_n_s16(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
int16x8_t b_ = (b); \
|
||||
uint8x8_t a_ = (a); \
|
||||
uint8x16_t result = vcombine_u8 \
|
||||
(a_, vcreate_u8 \
|
||||
(__AARCH64_UINT64_C (0x0))); \
|
||||
__asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \
|
||||
: "+w"(result) \
|
||||
: "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
__extension__ extern __inline uint8x16_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vqshrun_high_n_s16 (uint8x8_t __a, int16x8_t __b, const int __c)
|
||||
{
|
||||
return __builtin_aarch64_sqshrun2_nv8hi_uuss (__a, __b, __c);
|
||||
}
|
||||
|
||||
#define vqshrun_high_n_s32(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
int32x4_t b_ = (b); \
|
||||
uint16x4_t a_ = (a); \
|
||||
uint16x8_t result = vcombine_u16 \
|
||||
(a_, vcreate_u16 \
|
||||
(__AARCH64_UINT64_C (0x0))); \
|
||||
__asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \
|
||||
: "+w"(result) \
|
||||
: "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
__extension__ extern __inline uint16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vqshrun_high_n_s32 (uint16x4_t __a, int32x4_t __b, const int __c)
|
||||
{
|
||||
return __builtin_aarch64_sqshrun2_nv4si_uuss (__a, __b, __c);
|
||||
}
|
||||
|
||||
#define vqshrun_high_n_s64(a, b, c) \
|
||||
__extension__ \
|
||||
({ \
|
||||
int64x2_t b_ = (b); \
|
||||
uint32x2_t a_ = (a); \
|
||||
uint32x4_t result = vcombine_u32 \
|
||||
(a_, vcreate_u32 \
|
||||
(__AARCH64_UINT64_C (0x0))); \
|
||||
__asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \
|
||||
: "+w"(result) \
|
||||
: "w"(b_), "i"(c) \
|
||||
: /* No clobbers */); \
|
||||
result; \
|
||||
})
|
||||
__extension__ extern __inline uint32x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vqshrun_high_n_s64 (uint32x2_t __a, int64x2_t __b, const int __c)
|
||||
{
|
||||
return __builtin_aarch64_sqshrun2_nv2di_uuss (__a, __b, __c);
|
||||
}
|
||||
|
||||
#define vrshrn_high_n_s16(a, b, c) \
|
||||
__extension__ \
|
||||
|
|
|
@ -0,0 +1,192 @@
|
|||
#include <arm_neon.h>
|
||||
#include "arm-neon-ref.h"
|
||||
#include "compute-ref-data.h"
|
||||
|
||||
/* Expected results. */
|
||||
VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
|
||||
0xf4, 0xf5, 0xf6, 0xf7,
|
||||
0xf8, 0xf9, 0xf9, 0xfa,
|
||||
0xfa, 0xfb, 0xfb, 0xfc };
|
||||
VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
|
||||
0xfff8, 0xfff9, 0xfff9, 0xfffa };
|
||||
VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
|
||||
0xfffffffc, 0xfffffffc };
|
||||
VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
|
||||
0xf4, 0xf5, 0xf6, 0xf7,
|
||||
0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff };
|
||||
VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
|
||||
0xffff, 0xffff, 0xffff, 0xffff };
|
||||
VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
|
||||
0xffffffff, 0xffffffff };
|
||||
|
||||
/* Expected results with shift by 3. */
|
||||
VECT_VAR_DECL(expected_sh3,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f };
|
||||
VECT_VAR_DECL(expected_sh3,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
|
||||
0x7fff, 0x7fff, 0x7fff, 0x7fff };
|
||||
VECT_VAR_DECL(expected_sh3,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
|
||||
0x7fffffff, 0x7fffffff };
|
||||
VECT_VAR_DECL(expected_sh3,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff };
|
||||
VECT_VAR_DECL(expected_sh3,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
|
||||
0xffff, 0xffff, 0xffff, 0xffff };
|
||||
VECT_VAR_DECL(expected_sh3,uint,32,4) [] = { 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff };
|
||||
|
||||
/* Expected results with shift by max amount. */
|
||||
VECT_VAR_DECL(expected_shmax,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f };
|
||||
VECT_VAR_DECL(expected_shmax,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
|
||||
0x7fff, 0x7fff, 0x7fff, 0x7fff };
|
||||
VECT_VAR_DECL(expected_shmax,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
|
||||
0x7fffffff, 0x7fffffff };
|
||||
VECT_VAR_DECL(expected_shmax,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff };
|
||||
VECT_VAR_DECL(expected_shmax,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
|
||||
0xffff, 0xffff, 0xffff, 0xffff };
|
||||
VECT_VAR_DECL(expected_shmax,uint,32,4) [] = { 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff };
|
||||
|
||||
#define INSN vqrshrn_high_n
|
||||
#define TEST_MSG "VQRSHRN_HIGH_N"
|
||||
|
||||
#define FNNAME1(NAME) void exec_ ## NAME (void)
|
||||
#define FNNAME(NAME) FNNAME1(NAME)
|
||||
|
||||
FNNAME (INSN)
|
||||
{
|
||||
/* Basic test: y=vqrshrn_high_n(x,v), then store the result. */
|
||||
#define TEST_VQRSHRN_HIGH_N2(INSN, T1, T2, W, W2, N, N2, V) \
|
||||
VECT_VAR(vector_res, T1, W2, N2) = \
|
||||
INSN##_##T2##W(VECT_VAR(vector1, T1, W2, N), \
|
||||
VECT_VAR(vector2, T1, W, N), V); \
|
||||
vst1q_##T2##W2(VECT_VAR(result, T1, W2, N2), \
|
||||
VECT_VAR(vector_res, T1, W2, N2)); \
|
||||
|
||||
/* Two auxliary macros are necessary to expand INSN */
|
||||
#define TEST_VQRSHRN_HIGH_N1(INSN, T1, T2, W, W2, N, N2, V) \
|
||||
TEST_VQRSHRN_HIGH_N2(INSN, T1, T2, W, W2, N, N2, V)
|
||||
|
||||
#define TEST_VQRSHRN_HIGH_N(T1, T2, W, W2, N, N2, V) \
|
||||
TEST_VQRSHRN_HIGH_N1(INSN, T1, T2, W, W2, N, N2, V)
|
||||
|
||||
|
||||
DECL_VARIABLE(vector1, int, 8, 8);
|
||||
DECL_VARIABLE(vector1, int, 16, 4);
|
||||
DECL_VARIABLE(vector1, int, 32, 2);
|
||||
DECL_VARIABLE(vector1, uint, 8, 8);
|
||||
DECL_VARIABLE(vector1, uint, 16, 4);
|
||||
DECL_VARIABLE(vector1, uint, 32, 2);
|
||||
|
||||
/* vector is twice as large as vector_res. */
|
||||
DECL_VARIABLE(vector2, int, 16, 8);
|
||||
DECL_VARIABLE(vector2, int, 32, 4);
|
||||
DECL_VARIABLE(vector2, int, 64, 2);
|
||||
DECL_VARIABLE(vector2, uint, 16, 8);
|
||||
DECL_VARIABLE(vector2, uint, 32, 4);
|
||||
DECL_VARIABLE(vector2, uint, 64, 2);
|
||||
|
||||
DECL_VARIABLE(vector_res, int, 8, 16);
|
||||
DECL_VARIABLE(vector_res, int, 16, 8);
|
||||
DECL_VARIABLE(vector_res, int, 32, 4);
|
||||
DECL_VARIABLE(vector_res, uint, 8, 16);
|
||||
DECL_VARIABLE(vector_res, uint, 16, 8);
|
||||
DECL_VARIABLE(vector_res, uint, 32, 4);
|
||||
|
||||
clean_results ();
|
||||
|
||||
VLOAD(vector1, buffer, , int, s, 8, 8);
|
||||
VLOAD(vector1, buffer, , int, s, 16, 4);
|
||||
VLOAD(vector1, buffer, , int, s, 32, 2);
|
||||
VLOAD(vector1, buffer, , uint, u, 8, 8);
|
||||
VLOAD(vector1, buffer, , uint, u, 16, 4);
|
||||
VLOAD(vector1, buffer, , uint, u, 32, 2);
|
||||
|
||||
VLOAD(vector2, buffer, q, int, s, 16, 8);
|
||||
VLOAD(vector2, buffer, q, int, s, 32, 4);
|
||||
VLOAD(vector2, buffer, q, int, s, 64, 2);
|
||||
VLOAD(vector2, buffer, q, uint, u, 16, 8);
|
||||
VLOAD(vector2, buffer, q, uint, u, 32, 4);
|
||||
VLOAD(vector2, buffer, q, uint, u, 64, 2);
|
||||
|
||||
/* Choose shift amount arbitrarily. */
|
||||
#define CMT ""
|
||||
TEST_VQRSHRN_HIGH_N(int, s, 16, 8, 8, 16, 1);
|
||||
TEST_VQRSHRN_HIGH_N(int, s, 32, 16, 4, 8, 1);
|
||||
TEST_VQRSHRN_HIGH_N(int, s, 64, 32, 2, 4, 2);
|
||||
TEST_VQRSHRN_HIGH_N(uint, u, 16, 8, 8, 16, 2);
|
||||
TEST_VQRSHRN_HIGH_N(uint, u, 32, 16, 4, 8, 3);
|
||||
TEST_VQRSHRN_HIGH_N(uint, u, 64, 32, 2, 4, 3);
|
||||
|
||||
CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT);
|
||||
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
|
||||
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
|
||||
CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);
|
||||
CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
|
||||
CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
|
||||
|
||||
|
||||
/* Another set of tests, shifting max value by 3. */
|
||||
VDUP(vector1, , int, s, 8, 8, 0x7F);
|
||||
VDUP(vector1, , int, s, 16, 4, 0x7FFF);
|
||||
VDUP(vector1, , int, s, 32, 2, 0x7FFFFFFFLL);
|
||||
VDUP(vector1, , uint, u, 8, 8, 0xFF);
|
||||
VDUP(vector1, , uint, u, 16, 4, 0xFFFF);
|
||||
VDUP(vector1, , uint, u, 32, 2, 0xFFFFFFFFULL);
|
||||
|
||||
VDUP(vector2, q, int, s, 16, 8, 0x7FFF);
|
||||
VDUP(vector2, q, int, s, 32, 4, 0x7FFFFFFF);
|
||||
VDUP(vector2, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
|
||||
VDUP(vector2, q, uint, u, 16, 8, 0xFFFF);
|
||||
VDUP(vector2, q, uint, u, 32, 4, 0xFFFFFFFF);
|
||||
VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
|
||||
|
||||
#undef CMT
|
||||
#define CMT " (check saturation: shift by 3)"
|
||||
TEST_VQRSHRN_HIGH_N(int, s, 16, 8, 8, 16, 3);
|
||||
TEST_VQRSHRN_HIGH_N(int, s, 32, 16, 4, 8, 3);
|
||||
TEST_VQRSHRN_HIGH_N(int, s, 64, 32, 2, 4, 3);
|
||||
TEST_VQRSHRN_HIGH_N(uint, u, 16, 8, 8, 16, 3);
|
||||
TEST_VQRSHRN_HIGH_N(uint, u, 32, 16, 4, 8, 3);
|
||||
TEST_VQRSHRN_HIGH_N(uint, u, 64, 32, 2, 4, 3);
|
||||
|
||||
CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_sh3, CMT);
|
||||
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_sh3, CMT);
|
||||
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_sh3, CMT);
|
||||
CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_sh3, CMT);
|
||||
CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_sh3, CMT);
|
||||
CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_sh3, CMT);
|
||||
|
||||
|
||||
/* Shift by max amount. */
|
||||
#undef CMT
|
||||
#define CMT " (check saturation: shift by max)"
|
||||
TEST_VQRSHRN_HIGH_N(int, s, 16, 8, 8, 16, 8);
|
||||
TEST_VQRSHRN_HIGH_N(int, s, 32, 16, 4, 8, 16);
|
||||
TEST_VQRSHRN_HIGH_N(int, s, 64, 32, 2, 4, 32);
|
||||
TEST_VQRSHRN_HIGH_N(uint, u, 16, 8, 8, 16, 8);
|
||||
TEST_VQRSHRN_HIGH_N(uint, u, 32, 16, 4, 8, 16);
|
||||
TEST_VQRSHRN_HIGH_N(uint, u, 64, 32, 2, 4, 32);
|
||||
|
||||
CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_shmax, CMT);
|
||||
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_shmax, CMT);
|
||||
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_shmax, CMT);
|
||||
CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_shmax, CMT);
|
||||
CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_shmax, CMT);
|
||||
CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_shmax, CMT);
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
exec_vqrshrn_high_n ();
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,194 @@
|
|||
#include <arm_neon.h>
|
||||
#include "arm-neon-ref.h"
|
||||
#include "compute-ref-data.h"
|
||||
|
||||
/* Expected results with negative input. */
|
||||
VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0xfe, 0xfe, 0xfe, 0xfe,
|
||||
0xfe, 0xfe, 0xfe, 0xfe,
|
||||
0x0, 0x0, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0 };
|
||||
VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
||||
0x0, 0x0, 0x0, 0x0 };
|
||||
VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0xfffffffc, 0xfffffffc, 0x0, 0x0 };
|
||||
|
||||
/* Expected results with max input value shifted by 1. */
|
||||
VECT_VAR_DECL(expected_max_sh1,uint,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff };
|
||||
VECT_VAR_DECL(expected_max_sh1,uint,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
|
||||
0xffff, 0xffff, 0xffff, 0xffff };
|
||||
VECT_VAR_DECL(expected_max_sh1,uint,32,4) [] = { 0x7fffffff, 0x7fffffff,
|
||||
0xffffffff, 0xffffffff };
|
||||
|
||||
/* Expected results with max input value shifted by max amount. */
|
||||
VECT_VAR_DECL(expected_max_shmax,uint,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x80, 0x80, 0x80, 0x80,
|
||||
0x80, 0x80, 0x80, 0x80 };
|
||||
VECT_VAR_DECL(expected_max_shmax,uint,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
|
||||
0x8000, 0x8000, 0x8000, 0x8000 };
|
||||
VECT_VAR_DECL(expected_max_shmax,uint,32,4) [] = { 0x7fffffff, 0x7fffffff,
|
||||
0x80000000, 0x80000000 };
|
||||
|
||||
/* Expected results with min input value shifted by max amount. */
|
||||
VECT_VAR_DECL(expected_min_shmax,uint,8,16) [] = { 0x80, 0x80, 0x80, 0x80,
|
||||
0x80, 0x80, 0x80, 0x80,
|
||||
0x0, 0x0, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0 };
|
||||
VECT_VAR_DECL(expected_min_shmax,uint,16,8) [] = { 0x8000, 0x8000, 0x8000, 0x8000,
|
||||
0x0, 0x0, 0x0, 0x0 };
|
||||
VECT_VAR_DECL(expected_min_shmax,uint,32,4) [] = { 0x80000000, 0x80000000,
|
||||
0x0, 0x0 };
|
||||
|
||||
/* Expected results with inputs in usual range. */
|
||||
VECT_VAR_DECL(expected,uint,8,16) [] = { 0x12, 0x12, 0x12, 0x12,
|
||||
0x12, 0x12, 0x12, 0x12,
|
||||
0x49, 0x49, 0x49, 0x49,
|
||||
0x49, 0x49, 0x49, 0x49 };
|
||||
VECT_VAR_DECL(expected,uint,16,8) [] = { 0x4321, 0x4321, 0x4321, 0x4321,
|
||||
0x0, 0x0, 0x0, 0x0 };
|
||||
VECT_VAR_DECL(expected,uint,32,4) [] = { 0xdeadbeef, 0xdeadbeef,
|
||||
0xdeadbf, 0xdeadbf };
|
||||
|
||||
#define INSN vqrshrun_high_n
|
||||
#define TEST_MSG "VQRSHRUN_HIGH_N"
|
||||
|
||||
#define FNNAME1(NAME) void exec_ ## NAME (void)
|
||||
#define FNNAME(NAME) FNNAME1(NAME)
|
||||
|
||||
FNNAME (INSN)
|
||||
{
|
||||
/* Basic test: y=vqrshrun_high_n(x,v), then store the result. */
|
||||
#define TEST_VQRSHRUN_HIGH_N2(INSN, T1, T2, W, W2, N, N2, V) \
|
||||
VECT_VAR(vector_res, uint, W2, N2) = \
|
||||
INSN##_##T2##W(VECT_VAR(vector1, uint, W2, N), \
|
||||
VECT_VAR(vector2, T1, W, N), V); \
|
||||
vst1q_u##W2(VECT_VAR(result, uint, W2, N2), \
|
||||
VECT_VAR(vector_res, uint, W2, N2)); \
|
||||
|
||||
/* Two auxliary macros are necessary to expand INSN */
|
||||
#define TEST_VQRSHRUN_HIGH_N1(INSN, T1, T2, W, W2, N, N2, V) \
|
||||
TEST_VQRSHRUN_HIGH_N2(INSN, T1, T2, W, W2, N, N2, V)
|
||||
|
||||
#define TEST_VQRSHRUN_HIGH_N(T1, T2, W, W2, N, N2, V) \
|
||||
TEST_VQRSHRUN_HIGH_N1(INSN, T1, T2, W, W2, N, N2, V)
|
||||
|
||||
|
||||
DECL_VARIABLE(vector1, uint, 8, 8);
|
||||
DECL_VARIABLE(vector1, uint, 16, 4);
|
||||
DECL_VARIABLE(vector1, uint, 32, 2);
|
||||
|
||||
/* vector is twice as large as vector_res. */
|
||||
DECL_VARIABLE(vector2, int, 16, 8);
|
||||
DECL_VARIABLE(vector2, int, 32, 4);
|
||||
DECL_VARIABLE(vector2, int, 64, 2);
|
||||
|
||||
DECL_VARIABLE(vector_res, uint, 8, 16);
|
||||
DECL_VARIABLE(vector_res, uint, 16, 8);
|
||||
DECL_VARIABLE(vector_res, uint, 32, 4);
|
||||
|
||||
clean_results ();
|
||||
|
||||
/* Fill input vector with negative values, to check saturation on
|
||||
limits. */
|
||||
VDUP(vector1, , uint, u, 8, 8, -2);
|
||||
VDUP(vector1, , uint, u, 16, 4, -3);
|
||||
VDUP(vector1, , uint, u, 32, 2, -4);
|
||||
|
||||
VDUP(vector2, q, int, s, 16, 8, -2);
|
||||
VDUP(vector2, q, int, s, 32, 4, -3);
|
||||
VDUP(vector2, q, int, s, 64, 2, -4);
|
||||
|
||||
/* Choose shift amount arbitrarily. */
|
||||
#define CMT " (negative input)"
|
||||
TEST_VQRSHRUN_HIGH_N(int, s, 16, 8, 8, 16, 3);
|
||||
TEST_VQRSHRUN_HIGH_N(int, s, 32, 16, 4, 8, 4);
|
||||
TEST_VQRSHRUN_HIGH_N(int, s, 64, 32, 2, 4, 2);
|
||||
|
||||
CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT);
|
||||
CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT);
|
||||
CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT);
|
||||
|
||||
|
||||
/* Fill input vector with max value, to check saturation on
|
||||
limits. */
|
||||
VDUP(vector1, , uint, u, 8, 8, 0x7F);
|
||||
VDUP(vector1, , uint, u, 16, 4, 0x7FFF);
|
||||
VDUP(vector1, , uint, u, 32, 2, 0x7FFFFFFFLL);
|
||||
|
||||
VDUP(vector2, q, int, s, 16, 8, 0x7FFF);
|
||||
VDUP(vector2, q, int, s, 32, 4, 0x7FFFFFFF);
|
||||
VDUP(vector2, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
|
||||
|
||||
/* shift by 1. */
|
||||
#undef CMT
|
||||
#define CMT " (check cumulative saturation: shift by 1)"
|
||||
TEST_VQRSHRUN_HIGH_N(int, s, 16, 8, 8, 16, 1);
|
||||
TEST_VQRSHRUN_HIGH_N(int, s, 32, 16, 4, 8, 1);
|
||||
TEST_VQRSHRUN_HIGH_N(int, s, 64, 32, 2, 4, 1);
|
||||
|
||||
CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh1, CMT);
|
||||
CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh1, CMT);
|
||||
CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh1, CMT);
|
||||
|
||||
|
||||
/* shift by max. */
|
||||
#undef CMT
|
||||
#define CMT " (check cumulative saturation: shift by max, positive input)"
|
||||
TEST_VQRSHRUN_HIGH_N(int, s, 16, 8, 8, 16, 8);
|
||||
TEST_VQRSHRUN_HIGH_N(int, s, 32, 16, 4, 8, 16);
|
||||
TEST_VQRSHRUN_HIGH_N(int, s, 64, 32, 2, 4, 32);
|
||||
|
||||
CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_shmax, CMT);
|
||||
CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_shmax, CMT);
|
||||
CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_shmax, CMT);
|
||||
|
||||
|
||||
/* Fill input vector with min value, to check saturation on limits. */
|
||||
VDUP(vector1, , uint, u, 8, 8, 0x80);
|
||||
VDUP(vector1, , uint, u, 16, 4, 0x8000);
|
||||
VDUP(vector1, , uint, u, 32, 2, 0x80000000LL);
|
||||
|
||||
VDUP(vector2, q, int, s, 16, 8, 0x8000);
|
||||
VDUP(vector2, q, int, s, 32, 4, 0x80000000);
|
||||
VDUP(vector2, q, int, s, 64, 2, 0x8000000000000000LL);
|
||||
|
||||
/* shift by max */
|
||||
#undef CMT
|
||||
#define CMT " (check cumulative saturation: shift by max, negative input)"
|
||||
TEST_VQRSHRUN_HIGH_N(int, s, 16, 8, 8, 16, 8);
|
||||
TEST_VQRSHRUN_HIGH_N(int, s, 32, 16, 4, 8, 16);
|
||||
TEST_VQRSHRUN_HIGH_N(int, s, 64, 32, 2, 4, 32);
|
||||
|
||||
CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_min_shmax, CMT);
|
||||
CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_min_shmax, CMT);
|
||||
CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_min_shmax, CMT);
|
||||
|
||||
|
||||
/* Fill input vector with positive values, to check normal case. */
|
||||
VDUP(vector1, , uint, u, 8, 8, 0x12);
|
||||
VDUP(vector1, , uint, u, 16, 4, 0x4321);
|
||||
VDUP(vector1, , uint, u, 32, 2, 0xDEADBEEF);
|
||||
|
||||
VDUP(vector2, q, int, s, 16, 8, 0x1234);
|
||||
VDUP(vector2, q, int, s, 32, 4, 0x87654321);
|
||||
VDUP(vector2, q, int, s, 64, 2, 0xDEADBEEF);
|
||||
|
||||
/* shift arbitrary amount. */
|
||||
#undef CMT
|
||||
#define CMT ""
|
||||
TEST_VQRSHRUN_HIGH_N(int, s, 16, 8, 8, 16, 6);
|
||||
TEST_VQRSHRUN_HIGH_N(int, s, 32, 16, 4, 8, 7);
|
||||
TEST_VQRSHRUN_HIGH_N(int, s, 64, 32, 2, 4, 8);
|
||||
|
||||
CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);
|
||||
CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
|
||||
CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
exec_vqrshrun_high_n ();
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,190 @@
|
|||
#include <arm_neon.h>
|
||||
#include "arm-neon-ref.h"
|
||||
#include "compute-ref-data.h"
|
||||
|
||||
/* Expected results. */
|
||||
VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
|
||||
0xf4, 0xf5, 0xf6, 0xf7,
|
||||
0xf8, 0xf8, 0xf9, 0xf9,
|
||||
0xfa, 0xfa, 0xfb, 0xfb };
|
||||
VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
|
||||
0xfff8, 0xfff8, 0xfff9, 0xfff9 };
|
||||
VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
|
||||
0xfffffffc, 0xfffffffc };
|
||||
VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
|
||||
0xf4, 0xf5, 0xf6, 0xf7,
|
||||
0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff };
|
||||
VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
|
||||
0xffff, 0xffff, 0xffff, 0xffff };
|
||||
VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
|
||||
0xffffffff, 0xffffffff };
|
||||
|
||||
/* Expected results with max input value shifted by 3. */
|
||||
VECT_VAR_DECL(expected_max_sh3,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f };
|
||||
VECT_VAR_DECL(expected_max_sh3,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
|
||||
0x7fff, 0x7fff, 0x7fff, 0x7fff };
|
||||
VECT_VAR_DECL(expected_max_sh3,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
|
||||
0x7fffffff, 0x7fffffff };
|
||||
VECT_VAR_DECL(expected_max_sh3,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff };
|
||||
VECT_VAR_DECL(expected_max_sh3,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
|
||||
0xffff, 0xffff, 0xffff, 0xffff };
|
||||
VECT_VAR_DECL(expected_max_sh3,uint,32,4) [] = { 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff };
|
||||
|
||||
/* Expected results with max input value shifted by type size. */
|
||||
VECT_VAR_DECL(expected_max_shmax,int,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f };
|
||||
VECT_VAR_DECL(expected_max_shmax,int,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
|
||||
0x7fff, 0x7fff, 0x7fff, 0x7fff };
|
||||
VECT_VAR_DECL(expected_max_shmax,int,32,4) [] = { 0x7fffffff, 0x7fffffff,
|
||||
0x7fffffff, 0x7fffffff };
|
||||
VECT_VAR_DECL(expected_max_shmax,uint,8,16) [] = { 0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff };
|
||||
VECT_VAR_DECL(expected_max_shmax,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff,
|
||||
0xffff, 0xffff, 0xffff, 0xffff };
|
||||
VECT_VAR_DECL(expected_max_shmax,uint,32,4) [] = { 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff };
|
||||
|
||||
#define INSN vqshrn_high_n
|
||||
#define TEST_MSG "VQSHRN_HIGH_N"
|
||||
|
||||
#define FNNAME1(NAME) void exec_ ## NAME (void)
|
||||
#define FNNAME(NAME) FNNAME1(NAME)
|
||||
|
||||
FNNAME (INSN)
|
||||
{
|
||||
/* Basic test: y=vqshrn_high_n(x1,x2,v), then store the result. */
|
||||
#define TEST_VQSHRN_HIGH_N2(INSN, T1, T2, W, W2, N, N2, V) \
|
||||
VECT_VAR(vector_res, T1, W2, N2) = \
|
||||
INSN##_##T2##W(VECT_VAR(vector1, T1, W2, N), \
|
||||
VECT_VAR(vector2, T1, W, N), V); \
|
||||
vst1q_##T2##W2(VECT_VAR(result, T1, W2, N2), \
|
||||
VECT_VAR(vector_res, T1, W2, N2));
|
||||
|
||||
/* Two auxliary macros are necessary to expand INSN */
|
||||
#define TEST_VQSHRN_HIGH_N1(INSN, T1, T2, W, W2, N, N2, V) \
|
||||
TEST_VQSHRN_HIGH_N2(INSN, T1, T2, W, W2, N, N2, V)
|
||||
|
||||
#define TEST_VQSHRN_HIGH_N(T1, T2, W, W2, N, N2, V) \
|
||||
TEST_VQSHRN_HIGH_N1(INSN, T1, T2, W, W2, N, N2, V)
|
||||
|
||||
|
||||
DECL_VARIABLE(vector1, int, 8, 8);
|
||||
DECL_VARIABLE(vector1, int, 16, 4);
|
||||
DECL_VARIABLE(vector1, int, 32, 2);
|
||||
DECL_VARIABLE(vector1, uint, 8, 8);
|
||||
DECL_VARIABLE(vector1, uint, 16, 4);
|
||||
DECL_VARIABLE(vector1, uint, 32, 2);
|
||||
|
||||
/* vector is twice as large as vector_res. */
|
||||
DECL_VARIABLE(vector2, int, 16, 8);
|
||||
DECL_VARIABLE(vector2, int, 32, 4);
|
||||
DECL_VARIABLE(vector2, int, 64, 2);
|
||||
DECL_VARIABLE(vector2, uint, 16, 8);
|
||||
DECL_VARIABLE(vector2, uint, 32, 4);
|
||||
DECL_VARIABLE(vector2, uint, 64, 2);
|
||||
|
||||
DECL_VARIABLE(vector_res, int, 8, 16);
|
||||
DECL_VARIABLE(vector_res, int, 16, 8);
|
||||
DECL_VARIABLE(vector_res, int, 32, 4);
|
||||
DECL_VARIABLE(vector_res, uint, 8, 16);
|
||||
DECL_VARIABLE(vector_res, uint, 16, 8);
|
||||
DECL_VARIABLE(vector_res, uint, 32, 4);
|
||||
|
||||
clean_results ();
|
||||
|
||||
VLOAD(vector1, buffer, , int, s, 8, 8);
|
||||
VLOAD(vector1, buffer, , int, s, 16, 4);
|
||||
VLOAD(vector1, buffer, , int, s, 32, 2);
|
||||
VLOAD(vector1, buffer, , uint, u, 8, 8);
|
||||
VLOAD(vector1, buffer, , uint, u, 16, 4);
|
||||
VLOAD(vector1, buffer, , uint, u, 32, 2);
|
||||
|
||||
VLOAD(vector2, buffer, q, int, s, 16, 8);
|
||||
VLOAD(vector2, buffer, q, int, s, 32, 4);
|
||||
VLOAD(vector2, buffer, q, int, s, 64, 2);
|
||||
VLOAD(vector2, buffer, q, uint, u, 16, 8);
|
||||
VLOAD(vector2, buffer, q, uint, u, 32, 4);
|
||||
VLOAD(vector2, buffer, q, uint, u, 64, 2);
|
||||
|
||||
/* Choose shift amount arbitrarily. */
|
||||
#define CMT ""
|
||||
TEST_VQSHRN_HIGH_N(int, s, 16, 8, 8, 16, 1);
|
||||
TEST_VQSHRN_HIGH_N(int, s, 32, 16, 4, 8, 1);
|
||||
TEST_VQSHRN_HIGH_N(int, s, 64, 32, 2, 4, 2);
|
||||
TEST_VQSHRN_HIGH_N(uint, u, 16, 8, 8, 16, 2);
|
||||
TEST_VQSHRN_HIGH_N(uint, u, 32, 16, 4, 8, 3);
|
||||
TEST_VQSHRN_HIGH_N(uint, u, 64, 32, 2, 4, 3);
|
||||
|
||||
CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, CMT);
|
||||
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, CMT);
|
||||
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, CMT);
|
||||
CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);
|
||||
CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
|
||||
CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
|
||||
|
||||
/* Use max possible value as input. */
|
||||
VDUP(vector1, , int, s, 8, 8, 0x7F);
|
||||
VDUP(vector1, , int, s, 16, 4, 0x7FFF);
|
||||
VDUP(vector1, , int, s, 32, 2, 0x7FFFFFFFLL);
|
||||
VDUP(vector1, , uint, u, 8, 8, 0xFF);
|
||||
VDUP(vector1, , uint, u, 16, 4, 0xFFFF);
|
||||
VDUP(vector1, , uint, u, 32, 2, 0xFFFFFFFFULL);
|
||||
|
||||
VDUP(vector2, q, int, s, 16, 8, 0x7FFF);
|
||||
VDUP(vector2, q, int, s, 32, 4, 0x7FFFFFFF);
|
||||
VDUP(vector2, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
|
||||
VDUP(vector2, q, uint, u, 16, 8, 0xFFFF);
|
||||
VDUP(vector2, q, uint, u, 32, 4, 0xFFFFFFFF);
|
||||
VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFFFFFFFFFFFULL);
|
||||
|
||||
#undef CMT
|
||||
#define CMT " (check saturation: shift by 3)"
|
||||
TEST_VQSHRN_HIGH_N(int, s, 16, 8, 8, 16, 3);
|
||||
TEST_VQSHRN_HIGH_N(int, s, 32, 16, 4, 8, 3);
|
||||
TEST_VQSHRN_HIGH_N(int, s, 64, 32, 2, 4, 3);
|
||||
TEST_VQSHRN_HIGH_N(uint, u, 16, 8, 8, 16, 3);
|
||||
TEST_VQSHRN_HIGH_N(uint, u, 32, 16, 4, 8, 3);
|
||||
TEST_VQSHRN_HIGH_N(uint, u, 64, 32, 2, 4, 3);
|
||||
|
||||
CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_sh3, CMT);
|
||||
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_sh3, CMT);
|
||||
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_sh3, CMT);
|
||||
CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh3, CMT);
|
||||
CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh3, CMT);
|
||||
CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh3, CMT);
|
||||
|
||||
|
||||
#undef CMT
|
||||
#define CMT " (check saturation: shift by max)"
|
||||
TEST_VQSHRN_HIGH_N(int, s, 16, 8, 8, 16, 8);
|
||||
TEST_VQSHRN_HIGH_N(int, s, 32, 16, 4, 8, 16);
|
||||
TEST_VQSHRN_HIGH_N(int, s, 64, 32, 2, 4, 32);
|
||||
TEST_VQSHRN_HIGH_N(uint, u, 16, 8, 8, 16, 8);
|
||||
TEST_VQSHRN_HIGH_N(uint, u, 32, 16, 4, 8, 16);
|
||||
TEST_VQSHRN_HIGH_N(uint, u, 64, 32, 2, 4, 32);
|
||||
|
||||
CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_shmax, CMT);
|
||||
CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_shmax, CMT);
|
||||
CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_shmax, CMT);
|
||||
CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_shmax, CMT);
|
||||
CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_shmax, CMT);
|
||||
CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_shmax, CMT);
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
exec_vqshrn_high_n ();
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,140 @@
|
|||
#include <arm_neon.h>
|
||||
#include "arm-neon-ref.h"
|
||||
#include "compute-ref-data.h"
|
||||
|
||||
/* Expected results with negative input. */
|
||||
VECT_VAR_DECL(expected_neg,uint,8,16) [] = { 0xfe, 0xfe, 0xfe, 0xfe,
|
||||
0xfe, 0xfe, 0xfe, 0xfe,
|
||||
0x0, 0x0, 0x0, 0x0,
|
||||
0x0,0x0, 0x0, 0x0 };
|
||||
VECT_VAR_DECL(expected_neg,uint,16,8) [] = { 0xfffd, 0xfffd, 0xfffd, 0xfffd,
|
||||
0x0, 0x0, 0x0, 0x0, 0x0 };
|
||||
VECT_VAR_DECL(expected_neg,uint,32,4) [] = { 0xfffffffc, 0xfffffffc,
|
||||
0x0, 0x0 };
|
||||
|
||||
/* Expected results with max input value shifted by 1. */
|
||||
VECT_VAR_DECL(expected_max_sh1,uint,8,16) [] = { 0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0x7f, 0x7f, 0x7f, 0x7f,
|
||||
0xff, 0xff, 0xff, 0xff,
|
||||
0xff, 0xff, 0xff, 0xff };
|
||||
VECT_VAR_DECL(expected_max_sh1,uint,16,8) [] = { 0x7fff, 0x7fff, 0x7fff, 0x7fff,
|
||||
0xffff, 0xffff, 0xffff, 0xffff };
|
||||
VECT_VAR_DECL(expected_max_sh1,uint,32,4) [] = { 0x7fffffff, 0x7fffffff,
|
||||
0xffffffff, 0xffffffff };
|
||||
|
||||
/* Expected results. */
|
||||
VECT_VAR_DECL(expected,uint,8,16) [] = { 0x12, 0x12, 0x12, 0x12,
|
||||
0x12, 0x12, 0x12, 0x12,
|
||||
0x48, 0x48, 0x48, 0x48,
|
||||
0x48, 0x48, 0x48, 0x48 };
|
||||
VECT_VAR_DECL(expected,uint,16,8) [] = { 0x4321, 0x4321, 0x4321, 0x4321,
|
||||
0x0, 0x0, 0x0, 0x0 };
|
||||
VECT_VAR_DECL(expected,uint,32,4) [] = { 0xdeadbeef, 0xdeadbeef,
|
||||
0xdeadbe, 0xdeadbe };
|
||||
|
||||
#define INSN vqshrun_high_n
|
||||
#define TEST_MSG "VQSHRUN_HIGH_N"
|
||||
|
||||
#define FNNAME1(NAME) void exec_ ## NAME (void)
|
||||
#define FNNAME(NAME) FNNAME1(NAME)
|
||||
|
||||
FNNAME (INSN)
|
||||
{
|
||||
/* Basic test: y=vqshrun_high_n(x,v), then store the result. */
|
||||
#define TEST_VQSHRUN_HIGH_N2(INSN, T1, T2, W, W2, N, N2, V) \
|
||||
VECT_VAR(vector_res, uint, W2, N2) = \
|
||||
INSN##_##T2##W(VECT_VAR(vector1,uint, W2, N), \
|
||||
VECT_VAR(vector2, T1, W, N), V); \
|
||||
vst1q_u##W2(VECT_VAR(result, uint, W2, N2), \
|
||||
VECT_VAR(vector_res, uint, W2, N2)); \
|
||||
|
||||
/* Two auxliary macros are necessary to expand INSN */
|
||||
#define TEST_VQSHRUN_HIGH_N1(INSN, T1, T2, W, W2, N, N2, V) \
|
||||
TEST_VQSHRUN_HIGH_N2(INSN, T1, T2, W, W2, N, N2, V)
|
||||
|
||||
#define TEST_VQSHRUN_HIGH_N(T1, T2, W, W2, N, N2, V) \
|
||||
TEST_VQSHRUN_HIGH_N1(INSN, T1, T2, W, W2, N, N2, V)
|
||||
|
||||
|
||||
DECL_VARIABLE(vector1, uint, 8, 8);
|
||||
DECL_VARIABLE(vector1, uint, 16, 4);
|
||||
DECL_VARIABLE(vector1, uint, 32, 2);
|
||||
|
||||
/* vector is twice as large as vector_res. */
|
||||
DECL_VARIABLE(vector2, int, 16, 8);
|
||||
DECL_VARIABLE(vector2, int, 32, 4);
|
||||
DECL_VARIABLE(vector2, int, 64, 2);
|
||||
|
||||
DECL_VARIABLE(vector_res, uint, 8, 16);
|
||||
DECL_VARIABLE(vector_res, uint, 16, 8);
|
||||
DECL_VARIABLE(vector_res, uint, 32, 4);
|
||||
|
||||
clean_results ();
|
||||
|
||||
/* Fill input vector with negative values, to check saturation on
|
||||
limits. */
|
||||
VDUP(vector1, , uint, u, 8, 8, -2);
|
||||
VDUP(vector1, , uint, u, 16, 4, -3);
|
||||
VDUP(vector1, , uint, u, 32, 2, -4);
|
||||
|
||||
VDUP(vector2, q, int, s, 16, 8, -2);
|
||||
VDUP(vector2, q, int, s, 32, 4, -3);
|
||||
VDUP(vector2, q, int, s, 64, 2, -4);
|
||||
|
||||
/* Choose shift amount arbitrarily. */
|
||||
#define CMT " (negative input)"
|
||||
TEST_VQSHRUN_HIGH_N(int, s, 16, 8, 8, 16, 3);
|
||||
TEST_VQSHRUN_HIGH_N(int, s, 32, 16, 4, 8, 4);
|
||||
TEST_VQSHRUN_HIGH_N(int, s, 64, 32, 2, 4, 2);
|
||||
|
||||
CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_neg, CMT);
|
||||
CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_neg, CMT);
|
||||
CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_neg, CMT);
|
||||
|
||||
|
||||
/* Fill input vector with max value, to check saturation on
|
||||
limits. */
|
||||
VDUP(vector1, , uint, u, 8, 8, 0x7F);
|
||||
VDUP(vector1, , uint, u, 16, 4, 0x7FFF);
|
||||
VDUP(vector1, , uint, u, 32, 2, 0x7FFFFFFFLL);
|
||||
|
||||
VDUP(vector2, q, int, s, 16, 8, 0x7FFF);
|
||||
VDUP(vector2, q, int, s, 32, 4, 0x7FFFFFFF);
|
||||
VDUP(vector2, q, int, s, 64, 2, 0x7FFFFFFFFFFFFFFFLL);
|
||||
|
||||
#undef CMT
|
||||
#define CMT " (check cumulative saturation)"
|
||||
TEST_VQSHRUN_HIGH_N(int, s, 16, 8, 8, 16, 1);
|
||||
TEST_VQSHRUN_HIGH_N(int, s, 32, 16, 4, 8, 1);
|
||||
TEST_VQSHRUN_HIGH_N(int, s, 64, 32, 2, 4, 1);
|
||||
|
||||
CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_sh1, CMT);
|
||||
CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_sh1, CMT);
|
||||
CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_sh1, CMT);
|
||||
|
||||
|
||||
/* Fill input vector with positive values, to check normal case. */
|
||||
VDUP(vector1, , uint, u, 8, 8, 0x12);
|
||||
VDUP(vector1, , uint, u, 16, 4, 0x4321);
|
||||
VDUP(vector1, , uint, u, 32, 2, 0xDEADBEEF);
|
||||
|
||||
VDUP(vector2, q, int, s, 16, 8, 0x1234);
|
||||
VDUP(vector2, q, int, s, 32, 4, 0x87654321);
|
||||
VDUP(vector2, q, int, s, 64, 2, 0xDEADBEEF);
|
||||
|
||||
#undef CMT
|
||||
#define CMT ""
|
||||
TEST_VQSHRUN_HIGH_N(int, s, 16, 8, 8, 16, 6);
|
||||
TEST_VQSHRUN_HIGH_N(int, s, 32, 16, 4, 8, 7);
|
||||
TEST_VQSHRUN_HIGH_N(int, s, 64, 32, 2, 4, 8);
|
||||
|
||||
CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, CMT);
|
||||
CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, CMT);
|
||||
CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT);
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
exec_vqshrun_high_n ();
|
||||
return 0;
|
||||
}
|
|
@ -113,12 +113,12 @@ ONE (vmovn_high, uint32x4_t, uint32x2_t, uint64x2_t, u64)
|
|||
/* { dg-final { scan-assembler-times "raddhn2\\tv" 6} } */
|
||||
/* { dg-final { scan-assembler-times "\\trshrn2 v" 6} } */
|
||||
/* { dg-final { scan-assembler-times "\\tshrn2 v" 6} } */
|
||||
/* { dg-final { scan-assembler-times "sqshrun2 v" 3} } */
|
||||
/* { dg-final { scan-assembler-times "sqrshrun2 v" 3} } */
|
||||
/* { dg-final { scan-assembler-times "sqshrn2 v" 3} } */
|
||||
/* { dg-final { scan-assembler-times "uqshrn2 v" 3} } */
|
||||
/* { dg-final { scan-assembler-times "sqrshrn2 v" 3} } */
|
||||
/* { dg-final { scan-assembler-times "uqrshrn2 v" 3} } */
|
||||
/* { dg-final { scan-assembler-times "sqshrun2\\tv" 3} } */
|
||||
/* { dg-final { scan-assembler-times "sqrshrun2\\tv" 3} } */
|
||||
/* { dg-final { scan-assembler-times "sqshrn2\\tv" 3} } */
|
||||
/* { dg-final { scan-assembler-times "uqshrn2\\tv" 3} } */
|
||||
/* { dg-final { scan-assembler-times "sqrshrn2\\tv" 3} } */
|
||||
/* { dg-final { scan-assembler-times "uqrshrn2\\tv" 3} } */
|
||||
/* { dg-final { scan-assembler-times "uqxtn2 v" 3} } */
|
||||
/* { dg-final { scan-assembler-times "sqxtn2 v" 3} } */
|
||||
/* { dg-final { scan-assembler-times "sqxtun2 v" 3} } */
|
||||
|
|
Loading…
Add table
Reference in a new issue