[Patch][GCC][AArch64] - Lower store and load neon builtins to gimple
20-10-2021 Andre Vieira <andre.simoesdiasvieira@arm.com> Jirui Wu <jirui.wu@arm.com> gcc/ChangeLog: * config/aarch64/aarch64-builtins.c (aarch64_general_gimple_fold_builtin): lower vld1 and vst1 variants of the neon builtins * config/aarch64/aarch64-protos.h: (aarch64_general_gimple_fold_builtin): Add gsi parameter. * config/aarch64/aarch64.c (aarch64_general_gimple_fold_builtin): Likwise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/fmla_intrinsic_1.c: prevent over optimization. * gcc.target/aarch64/fmls_intrinsic_1.c: Likewise. * gcc.target/aarch64/fmul_intrinsic_1.c: Likewise. * gcc.target/aarch64/mla_intrinsic_1.c: Likewise. * gcc.target/aarch64/mls_intrinsic_1.c: Likewise. * gcc.target/aarch64/mul_intrinsic_1.c: Likewise. * gcc.target/aarch64/simd/vmul_elem_1.c: Likewise. * gcc.target/aarch64/vclz.c: Likewise. * gcc.target/aarch64/vneg_s.c: Likewise.
This commit is contained in:
parent
914045dff1
commit
ad44c6a56c
12 changed files with 369 additions and 250 deletions
|
@ -46,6 +46,7 @@
|
|||
#include "emit-rtl.h"
|
||||
#include "stringpool.h"
|
||||
#include "attribs.h"
|
||||
#include "gimple-fold.h"
|
||||
|
||||
#define v8qi_UP E_V8QImode
|
||||
#define v4hi_UP E_V4HImode
|
||||
|
@ -2399,11 +2400,65 @@ aarch64_general_fold_builtin (unsigned int fcode, tree type,
|
|||
return NULL_TREE;
|
||||
}
|
||||
|
||||
enum aarch64_simd_type
|
||||
get_mem_type_for_load_store (unsigned int fcode)
|
||||
{
|
||||
switch (fcode)
|
||||
{
|
||||
VAR1 (LOAD1, ld1 , 0, LOAD, v8qi)
|
||||
VAR1 (STORE1, st1 , 0, STORE, v8qi)
|
||||
return Int8x8_t;
|
||||
VAR1 (LOAD1, ld1 , 0, LOAD, v16qi)
|
||||
VAR1 (STORE1, st1 , 0, STORE, v16qi)
|
||||
return Int8x16_t;
|
||||
VAR1 (LOAD1, ld1 , 0, LOAD, v4hi)
|
||||
VAR1 (STORE1, st1 , 0, STORE, v4hi)
|
||||
return Int16x4_t;
|
||||
VAR1 (LOAD1, ld1 , 0, LOAD, v8hi)
|
||||
VAR1 (STORE1, st1 , 0, STORE, v8hi)
|
||||
return Int16x8_t;
|
||||
VAR1 (LOAD1, ld1 , 0, LOAD, v2si)
|
||||
VAR1 (STORE1, st1 , 0, STORE, v2si)
|
||||
return Int32x2_t;
|
||||
VAR1 (LOAD1, ld1 , 0, LOAD, v4si)
|
||||
VAR1 (STORE1, st1 , 0, STORE, v4si)
|
||||
return Int32x4_t;
|
||||
VAR1 (LOAD1, ld1 , 0, LOAD, v2di)
|
||||
VAR1 (STORE1, st1 , 0, STORE, v2di)
|
||||
return Int64x2_t;
|
||||
VAR1 (LOAD1, ld1 , 0, LOAD, v4hf)
|
||||
VAR1 (STORE1, st1 , 0, STORE, v4hf)
|
||||
return Float16x4_t;
|
||||
VAR1 (LOAD1, ld1 , 0, LOAD, v8hf)
|
||||
VAR1 (STORE1, st1 , 0, STORE, v8hf)
|
||||
return Float16x8_t;
|
||||
VAR1 (LOAD1, ld1 , 0, LOAD, v4bf)
|
||||
VAR1 (STORE1, st1 , 0, STORE, v4bf)
|
||||
return Bfloat16x4_t;
|
||||
VAR1 (LOAD1, ld1 , 0, LOAD, v8bf)
|
||||
VAR1 (STORE1, st1 , 0, STORE, v8bf)
|
||||
return Bfloat16x8_t;
|
||||
VAR1 (LOAD1, ld1 , 0, LOAD, v2sf)
|
||||
VAR1 (STORE1, st1 , 0, STORE, v2sf)
|
||||
return Float32x2_t;
|
||||
VAR1 (LOAD1, ld1 , 0, LOAD, v4sf)
|
||||
VAR1 (STORE1, st1 , 0, STORE, v4sf)
|
||||
return Float32x4_t;
|
||||
VAR1 (LOAD1, ld1 , 0, LOAD, v2df)
|
||||
VAR1 (STORE1, st1 , 0, STORE, v2df)
|
||||
return Float64x2_t;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Try to fold STMT, given that it's a call to the built-in function with
|
||||
subcode FCODE. Return the new statement on success and null on
|
||||
failure. */
|
||||
gimple *
|
||||
aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt)
|
||||
aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
|
||||
gimple_stmt_iterator *gsi)
|
||||
{
|
||||
gimple *new_stmt = NULL;
|
||||
unsigned nargs = gimple_call_num_args (stmt);
|
||||
|
@ -2421,6 +2476,52 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt)
|
|||
1, args[0]);
|
||||
gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
|
||||
break;
|
||||
|
||||
/*lower store and load neon builtins to gimple. */
|
||||
BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD)
|
||||
if (!BYTES_BIG_ENDIAN)
|
||||
{
|
||||
enum aarch64_simd_type mem_type
|
||||
= get_mem_type_for_load_store(fcode);
|
||||
aarch64_simd_type_info simd_type
|
||||
= aarch64_simd_types[mem_type];
|
||||
tree elt_ptr_type = build_pointer_type (simd_type.eltype);
|
||||
tree zero = build_zero_cst (elt_ptr_type);
|
||||
gimple_seq stmts = NULL;
|
||||
tree base = gimple_convert (&stmts, elt_ptr_type,
|
||||
args[0]);
|
||||
if (stmts)
|
||||
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
|
||||
new_stmt
|
||||
= gimple_build_assign (gimple_get_lhs (stmt),
|
||||
fold_build2 (MEM_REF,
|
||||
simd_type.itype,
|
||||
base, zero));
|
||||
}
|
||||
break;
|
||||
|
||||
BUILTIN_VALL_F16 (STORE1, st1, 0, STORE)
|
||||
if (!BYTES_BIG_ENDIAN)
|
||||
{
|
||||
enum aarch64_simd_type mem_type
|
||||
= get_mem_type_for_load_store(fcode);
|
||||
aarch64_simd_type_info simd_type
|
||||
= aarch64_simd_types[mem_type];
|
||||
tree elt_ptr_type = build_pointer_type (simd_type.eltype);
|
||||
tree zero = build_zero_cst (elt_ptr_type);
|
||||
gimple_seq stmts = NULL;
|
||||
tree base = gimple_convert (&stmts, elt_ptr_type,
|
||||
args[0]);
|
||||
if (stmts)
|
||||
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
|
||||
new_stmt
|
||||
= gimple_build_assign (fold_build2 (MEM_REF,
|
||||
simd_type.itype,
|
||||
base,
|
||||
zero), args[1]);
|
||||
}
|
||||
break;
|
||||
|
||||
BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10, ALL)
|
||||
BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10, ALL)
|
||||
new_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
|
||||
|
|
|
@ -962,7 +962,8 @@ void aarch64_override_options_internal (struct gcc_options *);
|
|||
const char *aarch64_general_mangle_builtin_type (const_tree);
|
||||
void aarch64_general_init_builtins (void);
|
||||
tree aarch64_general_fold_builtin (unsigned int, tree, unsigned int, tree *);
|
||||
gimple *aarch64_general_gimple_fold_builtin (unsigned int, gcall *);
|
||||
gimple *aarch64_general_gimple_fold_builtin (unsigned int, gcall *,
|
||||
gimple_stmt_iterator *);
|
||||
rtx aarch64_general_expand_builtin (unsigned int, tree, rtx, int);
|
||||
tree aarch64_general_builtin_decl (unsigned, bool);
|
||||
tree aarch64_general_builtin_rsqrt (unsigned int);
|
||||
|
|
|
@ -14156,7 +14156,7 @@ aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
|
|||
switch (code & AARCH64_BUILTIN_CLASS)
|
||||
{
|
||||
case AARCH64_BUILTIN_GENERAL:
|
||||
new_stmt = aarch64_general_gimple_fold_builtin (subcode, stmt);
|
||||
new_stmt = aarch64_general_gimple_fold_builtin (subcode, stmt, gsi);
|
||||
break;
|
||||
|
||||
case AARCH64_BUILTIN_SVE:
|
||||
|
|
|
@ -11,6 +11,7 @@ extern void abort (void);
|
|||
|
||||
#define TEST_VMLA(q1, q2, size, in1_lanes, in2_lanes) \
|
||||
static void \
|
||||
__attribute__((noipa,noinline)) \
|
||||
test_vfma##q1##_lane##q2##_f##size (float##size##_t * res, \
|
||||
const float##size##_t *in1, \
|
||||
const float##size##_t *in2) \
|
||||
|
@ -104,12 +105,12 @@ main (int argc, char **argv)
|
|||
vfmaq_laneq_f32. */
|
||||
/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.s\\\[\[0-9\]+\\\]" 2 } } */
|
||||
|
||||
/* vfma_lane_f64. */
|
||||
/* { dg-final { scan-assembler-times "fmadd\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 1 } } */
|
||||
/* vfma_lane_f64.
|
||||
vfma_laneq_f64. */
|
||||
/* { dg-final { scan-assembler-times "fmadd\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 2 } } */
|
||||
|
||||
/* vfmaq_lane_f64.
|
||||
vfma_laneq_f64.
|
||||
vfmaq_laneq_f64. */
|
||||
/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 3 } } */
|
||||
/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 } } */
|
||||
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@ extern void abort (void);
|
|||
|
||||
#define TEST_VMLS(q1, q2, size, in1_lanes, in2_lanes) \
|
||||
static void \
|
||||
__attribute__((noipa,noinline)) \
|
||||
test_vfms##q1##_lane##q2##_f##size (float##size##_t * res, \
|
||||
const float##size##_t *in1, \
|
||||
const float##size##_t *in2) \
|
||||
|
@ -105,12 +106,12 @@ main (int argc, char **argv)
|
|||
vfmsq_laneq_f32. */
|
||||
/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.s\\\[\[0-9\]+\\\]" 2 } } */
|
||||
|
||||
/* vfms_lane_f64. */
|
||||
/* { dg-final { scan-assembler-times "fmsub\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 1 } } */
|
||||
/* vfms_lane_f64.
|
||||
vfms_laneq_f64. */
|
||||
/* { dg-final { scan-assembler-times "fmsub\\td\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+\, d\[0-9\]+" 2 } } */
|
||||
|
||||
/* vfmsq_lane_f64.
|
||||
vfms_laneq_f64.
|
||||
vfmsq_laneq_f64. */
|
||||
/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 3 } } */
|
||||
/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 } } */
|
||||
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@ extern double fabs (double);
|
|||
|
||||
#define TEST_VMUL(q1, q2, size, in1_lanes, in2_lanes) \
|
||||
static void \
|
||||
__attribute__((noipa,noinline)) \
|
||||
test_vmul##q1##_lane##q2##_f##size (float##size##_t * res, \
|
||||
const float##size##_t *in1, \
|
||||
const float##size##_t *in2) \
|
||||
|
@ -104,12 +105,12 @@ main (int argc, char **argv)
|
|||
vmulq_laneq_f32. */
|
||||
/* { dg-final { scan-assembler-times "fmul\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.s\\\[\[0-9\]+\\\]" 2 } } */
|
||||
|
||||
/* vmul_lane_f64. */
|
||||
/* { dg-final { scan-assembler-times "fmul\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 1 } } */
|
||||
/* vmul_lane_f64.
|
||||
Vmul_laneq_f64. */
|
||||
/* { dg-final { scan-assembler-times "fmul\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
|
||||
|
||||
/* vmul_laneq_f64.
|
||||
vmulq_lane_f64.
|
||||
/* vmulq_lane_f64.
|
||||
vmulq_laneq_f64. */
|
||||
/* { dg-final { scan-assembler-times "fmul\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 3 } } */
|
||||
/* { dg-final { scan-assembler-times "fmul\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[\[0-9\]+\\\]" 2 } } */
|
||||
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@ extern void abort (void);
|
|||
|
||||
#define TEST_VMLA(q, su, size, in1_lanes, in2_lanes) \
|
||||
static void \
|
||||
__attribute__((noipa,noinline)) \
|
||||
test_vmlaq_lane##q##_##su##size (MAP##su (size, ) * res, \
|
||||
const MAP##su(size, ) *in1, \
|
||||
const MAP##su(size, ) *in2) \
|
||||
|
|
|
@ -11,6 +11,7 @@ extern void abort (void);
|
|||
|
||||
#define TEST_VMLS(q, su, size, in1_lanes, in2_lanes) \
|
||||
static void \
|
||||
__attribute__((noipa,noinline)) \
|
||||
test_vmlsq_lane##q##_##su##size (MAP##su (size, ) * res, \
|
||||
const MAP##su(size, ) *in1, \
|
||||
const MAP##su(size, ) *in2) \
|
||||
|
|
|
@ -11,6 +11,7 @@ extern void abort (void);
|
|||
|
||||
#define TEST_VMUL(q, su, size, in1_lanes, in2_lanes) \
|
||||
static void \
|
||||
__attribute__((noipa,noinline)) \
|
||||
test_vmulq_lane##q##_##su##size (MAP##su (size, ) * res, \
|
||||
const MAP##su(size, ) *in1, \
|
||||
const MAP##su(size, ) *in2) \
|
||||
|
|
|
@ -146,12 +146,14 @@ check_v2sf (float32_t elemA, float32_t elemB)
|
|||
|
||||
vst1_f32 (vec32x2_res, vmul_n_f32 (vec32x2_src, elemA));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 2; indx++)
|
||||
if (* (uint32_t *) &vec32x2_res[indx] != * (uint32_t *) &expected2_1[indx])
|
||||
abort ();
|
||||
|
||||
vst1_f32 (vec32x2_res, vmul_n_f32 (vec32x2_src, elemB));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 2; indx++)
|
||||
if (* (uint32_t *) &vec32x2_res[indx] != * (uint32_t *) &expected2_2[indx])
|
||||
abort ();
|
||||
|
@ -169,24 +171,28 @@ check_v4sf (float32_t elemA, float32_t elemB, float32_t elemC, float32_t elemD)
|
|||
|
||||
vst1q_f32 (vec32x4_res, vmulq_n_f32 (vec32x4_src, elemA));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 4; indx++)
|
||||
if (* (uint32_t *) &vec32x4_res[indx] != * (uint32_t *) &expected4_1[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_f32 (vec32x4_res, vmulq_n_f32 (vec32x4_src, elemB));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 4; indx++)
|
||||
if (* (uint32_t *) &vec32x4_res[indx] != * (uint32_t *) &expected4_2[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_f32 (vec32x4_res, vmulq_n_f32 (vec32x4_src, elemC));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 4; indx++)
|
||||
if (* (uint32_t *) &vec32x4_res[indx] != * (uint32_t *) &expected4_3[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_f32 (vec32x4_res, vmulq_n_f32 (vec32x4_src, elemD));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 4; indx++)
|
||||
if (* (uint32_t *) &vec32x4_res[indx] != * (uint32_t *) &expected4_4[indx])
|
||||
abort ();
|
||||
|
@ -204,12 +210,14 @@ check_v2df (float64_t elemdC, float64_t elemdD)
|
|||
|
||||
vst1q_f64 (vec64x2_res, vmulq_n_f64 (vec64x2_src, elemdC));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 2; indx++)
|
||||
if (* (uint64_t *) &vec64x2_res[indx] != * (uint64_t *) &expectedd2_1[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_f64 (vec64x2_res, vmulq_n_f64 (vec64x2_src, elemdD));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 2; indx++)
|
||||
if (* (uint64_t *) &vec64x2_res[indx] != * (uint64_t *) &expectedd2_2[indx])
|
||||
abort ();
|
||||
|
@ -227,12 +235,14 @@ check_v2si (int32_t elemsA, int32_t elemsB)
|
|||
|
||||
vst1_s32 (vecs32x2_res, vmul_n_s32 (vecs32x2_src, elemsA));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 2; indx++)
|
||||
if (vecs32x2_res[indx] != expecteds2_1[indx])
|
||||
abort ();
|
||||
|
||||
vst1_s32 (vecs32x2_res, vmul_n_s32 (vecs32x2_src, elemsB));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 2; indx++)
|
||||
if (vecs32x2_res[indx] != expecteds2_2[indx])
|
||||
abort ();
|
||||
|
@ -248,12 +258,14 @@ check_v2si_unsigned (uint32_t elemusA, uint32_t elemusB)
|
|||
|
||||
vst1_u32 (vecus32x2_res, vmul_n_u32 (vecus32x2_src, elemusA));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 2; indx++)
|
||||
if (vecus32x2_res[indx] != expectedus2_1[indx])
|
||||
abort ();
|
||||
|
||||
vst1_u32 (vecus32x2_res, vmul_n_u32 (vecus32x2_src, elemusB));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 2; indx++)
|
||||
if (vecus32x2_res[indx] != expectedus2_2[indx])
|
||||
abort ();
|
||||
|
@ -271,24 +283,28 @@ check_v4si (int32_t elemsA, int32_t elemsB, int32_t elemsC, int32_t elemsD)
|
|||
|
||||
vst1q_s32 (vecs32x4_res, vmulq_n_s32 (vecs32x4_src, elemsA));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 4; indx++)
|
||||
if (vecs32x4_res[indx] != expecteds4_1[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_s32 (vecs32x4_res, vmulq_n_s32 (vecs32x4_src, elemsB));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 4; indx++)
|
||||
if (vecs32x4_res[indx] != expecteds4_2[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_s32 (vecs32x4_res, vmulq_n_s32 (vecs32x4_src, elemsC));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 4; indx++)
|
||||
if (vecs32x4_res[indx] != expecteds4_3[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_s32 (vecs32x4_res, vmulq_n_s32 (vecs32x4_src, elemsD));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 4; indx++)
|
||||
if (vecs32x4_res[indx] != expecteds4_4[indx])
|
||||
abort ();
|
||||
|
@ -305,24 +321,28 @@ check_v4si_unsigned (uint32_t elemusA, uint32_t elemusB, uint32_t elemusC,
|
|||
|
||||
vst1q_u32 (vecus32x4_res, vmulq_n_u32 (vecus32x4_src, elemusA));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 4; indx++)
|
||||
if (vecus32x4_res[indx] != expectedus4_1[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_u32 (vecus32x4_res, vmulq_n_u32 (vecus32x4_src, elemusB));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 4; indx++)
|
||||
if (vecus32x4_res[indx] != expectedus4_2[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_u32 (vecus32x4_res, vmulq_n_u32 (vecus32x4_src, elemusC));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 4; indx++)
|
||||
if (vecus32x4_res[indx] != expectedus4_3[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_u32 (vecus32x4_res, vmulq_n_u32 (vecus32x4_src, elemusD));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 4; indx++)
|
||||
if (vecus32x4_res[indx] != expectedus4_4[indx])
|
||||
abort ();
|
||||
|
@ -341,24 +361,28 @@ check_v4hi (int16_t elemhA, int16_t elemhB, int16_t elemhC, int16_t elemhD)
|
|||
|
||||
vst1_s16 (vech16x4_res, vmul_n_s16 (vech16x4_src, elemhA));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 4; indx++)
|
||||
if (vech16x4_res[indx] != expectedh4_1[indx])
|
||||
abort ();
|
||||
|
||||
vst1_s16 (vech16x4_res, vmul_n_s16 (vech16x4_src, elemhB));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 4; indx++)
|
||||
if (vech16x4_res[indx] != expectedh4_2[indx])
|
||||
abort ();
|
||||
|
||||
vst1_s16 (vech16x4_res, vmul_n_s16 (vech16x4_src, elemhC));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 4; indx++)
|
||||
if (vech16x4_res[indx] != expectedh4_3[indx])
|
||||
abort ();
|
||||
|
||||
vst1_s16 (vech16x4_res, vmul_n_s16 (vech16x4_src, elemhD));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 4; indx++)
|
||||
if (vech16x4_res[indx] != expectedh4_4[indx])
|
||||
abort ();
|
||||
|
@ -375,24 +399,28 @@ check_v4hi_unsigned (uint16_t elemuhA, uint16_t elemuhB, uint16_t elemuhC,
|
|||
|
||||
vst1_u16 (vecuh16x4_res, vmul_n_u16 (vecuh16x4_src, elemuhA));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 4; indx++)
|
||||
if (vecuh16x4_res[indx] != expecteduh4_1[indx])
|
||||
abort ();
|
||||
|
||||
vst1_u16 (vecuh16x4_res, vmul_n_u16 (vecuh16x4_src, elemuhB));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 4; indx++)
|
||||
if (vecuh16x4_res[indx] != expecteduh4_2[indx])
|
||||
abort ();
|
||||
|
||||
vst1_u16 (vecuh16x4_res, vmul_n_u16 (vecuh16x4_src, elemuhC));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 4; indx++)
|
||||
if (vecuh16x4_res[indx] != expecteduh4_3[indx])
|
||||
abort ();
|
||||
|
||||
vst1_u16 (vecuh16x4_res, vmul_n_u16 (vecuh16x4_src, elemuhD));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 4; indx++)
|
||||
if (vecuh16x4_res[indx] != expecteduh4_4[indx])
|
||||
abort ();
|
||||
|
@ -411,48 +439,56 @@ check_v8hi (int16_t elemhA, int16_t elemhB, int16_t elemhC, int16_t elemhD,
|
|||
|
||||
vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhA));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 8; indx++)
|
||||
if (vech16x8_res[indx] != expectedh8_1[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhB));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 8; indx++)
|
||||
if (vech16x8_res[indx] != expectedh8_2[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhC));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 8; indx++)
|
||||
if (vech16x8_res[indx] != expectedh8_3[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhD));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 8; indx++)
|
||||
if (vech16x8_res[indx] != expectedh8_4[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhE));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 8; indx++)
|
||||
if (vech16x8_res[indx] != expectedh8_5[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhF));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 8; indx++)
|
||||
if (vech16x8_res[indx] != expectedh8_6[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhG));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 8; indx++)
|
||||
if (vech16x8_res[indx] != expectedh8_7[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhH));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 8; indx++)
|
||||
if (vech16x8_res[indx] != expectedh8_8[indx])
|
||||
abort ();
|
||||
|
@ -470,48 +506,56 @@ check_v8hi_unsigned (uint16_t elemuhA, uint16_t elemuhB, uint16_t elemuhC,
|
|||
|
||||
vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhA));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 8; indx++)
|
||||
if (vecuh16x8_res[indx] != expecteduh8_1[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhB));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 8; indx++)
|
||||
if (vecuh16x8_res[indx] != expecteduh8_2[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhC));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 8; indx++)
|
||||
if (vecuh16x8_res[indx] != expecteduh8_3[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhD));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 8; indx++)
|
||||
if (vecuh16x8_res[indx] != expecteduh8_4[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhE));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 8; indx++)
|
||||
if (vecuh16x8_res[indx] != expecteduh8_5[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhF));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 8; indx++)
|
||||
if (vecuh16x8_res[indx] != expecteduh8_6[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhG));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 8; indx++)
|
||||
if (vecuh16x8_res[indx] != expecteduh8_7[indx])
|
||||
abort ();
|
||||
|
||||
vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhH));
|
||||
|
||||
asm volatile ("" : : : "memory");
|
||||
for (indx = 0; indx < 8; indx++)
|
||||
if (vecuh16x8_res[indx] != expecteduh8_8[indx])
|
||||
abort ();
|
||||
|
|
|
@ -66,22 +66,62 @@ extern void abort (void);
|
|||
#define CLZ_INST(reg_len, data_len, is_signed) \
|
||||
CONCAT1 (vclz, POSTFIX (reg_len, data_len, is_signed))
|
||||
|
||||
#define RUN_TEST(test_set, answ_set, reg_len, data_len, is_signed, n) \
|
||||
INHIB_OPTIMIZATION; \
|
||||
a = LOAD_INST (reg_len, data_len, is_signed) (test_set); \
|
||||
b = LOAD_INST (reg_len, data_len, is_signed) (answ_set); \
|
||||
a = CLZ_INST (reg_len, data_len, is_signed) (a); \
|
||||
for (i = 0; i < n; i++) \
|
||||
if (a [i] != b [i]) \
|
||||
return 1;
|
||||
#define BUILD_TEST(type, size, lanes) \
|
||||
int __attribute__((noipa,noinline)) \
|
||||
run_test##type##size##x##lanes (int##size##_t* test_set, \
|
||||
int##size##_t* answ_set, \
|
||||
int reg_len, int data_len, \
|
||||
int n) \
|
||||
{ \
|
||||
int i; \
|
||||
INHIB_OPTIMIZATION; \
|
||||
int##size##x##lanes##_t a = vld1##type##size (test_set); \
|
||||
int##size##x##lanes##_t b = vld1##type##size (answ_set); \
|
||||
a = vclz##type##size (a); \
|
||||
for (i = 0; i < n; i++){ \
|
||||
if (a [i] != b [i]) \
|
||||
return 1; \
|
||||
} \
|
||||
return 0; \
|
||||
}
|
||||
|
||||
/* unsigned inputs */
|
||||
#define U_BUILD_TEST(type, size, lanes) \
|
||||
int __attribute__((noipa,noinline)) \
|
||||
run_test##type##size##x##lanes (uint##size##_t* test_set, \
|
||||
uint##size##_t* answ_set, \
|
||||
int reg_len, int data_len, \
|
||||
int n) \
|
||||
{ \
|
||||
int i; \
|
||||
INHIB_OPTIMIZATION; \
|
||||
uint##size##x##lanes##_t a = vld1##type##size (test_set); \
|
||||
uint##size##x##lanes##_t b = vld1##type##size (answ_set); \
|
||||
a = vclz##type##size (a); \
|
||||
for (i = 0; i < n; i++){ \
|
||||
if (a [i] != b [i]) \
|
||||
return 1; \
|
||||
} \
|
||||
return 0; \
|
||||
}
|
||||
|
||||
BUILD_TEST (_s, 8, 8)
|
||||
BUILD_TEST (_s, 16, 4)
|
||||
BUILD_TEST (_s, 32, 2)
|
||||
BUILD_TEST (q_s, 8, 16)
|
||||
BUILD_TEST (q_s, 16, 8)
|
||||
BUILD_TEST (q_s, 32, 4)
|
||||
|
||||
U_BUILD_TEST (_u, 8, 8)
|
||||
U_BUILD_TEST (_u, 16, 4)
|
||||
U_BUILD_TEST (_u, 32, 2)
|
||||
U_BUILD_TEST (q_u, 8, 16)
|
||||
U_BUILD_TEST (q_u, 16, 8)
|
||||
U_BUILD_TEST (q_u, 32, 4)
|
||||
|
||||
int __attribute__ ((noinline))
|
||||
test_vclz_s8 ()
|
||||
{
|
||||
int i;
|
||||
int8x8_t a;
|
||||
int8x8_t b;
|
||||
|
||||
int8_t test_set0[8] = {
|
||||
TEST0, TEST1, TEST2, TEST3,
|
||||
TEST4, TEST5, TEST6, TEST7
|
||||
|
@ -98,22 +138,18 @@ test_vclz_s8 ()
|
|||
0, 0, 0, 0,
|
||||
0, 0, 0, 0
|
||||
};
|
||||
RUN_TEST (test_set0, answ_set0, 64, 8, 1, 8);
|
||||
RUN_TEST (test_set1, answ_set1, 64, 8, 1, 1);
|
||||
int o1 = run_test_s8x8 (test_set0, answ_set0, 64, 8, 8);
|
||||
int o2 = run_test_s8x8 (test_set1, answ_set1, 64, 8, 1);
|
||||
|
||||
return 0;
|
||||
return o1||o2;
|
||||
}
|
||||
|
||||
/* Double scan-assembler-times to take account of unsigned functions. */
|
||||
/* { dg-final { scan-assembler-times "clz\\tv\[0-9\]+\.8b, v\[0-9\]+\.8b" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "clz\\tv\[0-9\]+\.8b, v\[0-9\]+\.8b" 2 } } */
|
||||
|
||||
int __attribute__ ((noinline))
|
||||
test_vclz_s16 ()
|
||||
{
|
||||
int i;
|
||||
int16x4_t a;
|
||||
int16x4_t b;
|
||||
|
||||
int16_t test_set0[4] = { TEST0, TEST1, TEST2, TEST3 };
|
||||
int16_t test_set1[4] = { TEST4, TEST5, TEST6, TEST7 };
|
||||
int16_t test_set2[4] = { TEST8, TEST9, TEST10, TEST11 };
|
||||
|
@ -126,25 +162,21 @@ test_vclz_s16 ()
|
|||
int16_t answ_set3[4] = { 4, 3, 2, 1 };
|
||||
int16_t answ_set4[4] = { 0, 0, 0, 0 };
|
||||
|
||||
RUN_TEST (test_set0, answ_set0, 64, 16, 1, 4);
|
||||
RUN_TEST (test_set1, answ_set1, 64, 16, 1, 4);
|
||||
RUN_TEST (test_set2, answ_set2, 64, 16, 1, 4);
|
||||
RUN_TEST (test_set3, answ_set3, 64, 16, 1, 4);
|
||||
RUN_TEST (test_set4, answ_set4, 64, 16, 1, 1);
|
||||
int o1 = run_test_s16x4 (test_set0, answ_set0, 64, 16, 4);
|
||||
int o2 = run_test_s16x4 (test_set1, answ_set1, 64, 16, 4);
|
||||
int o3 = run_test_s16x4 (test_set2, answ_set2, 64, 16, 4);
|
||||
int o4 = run_test_s16x4 (test_set3, answ_set3, 64, 16, 4);
|
||||
int o5 = run_test_s16x4 (test_set4, answ_set4, 64, 16, 1);
|
||||
|
||||
return 0;
|
||||
return o1||o2||o3||o4||o5;
|
||||
}
|
||||
|
||||
/* Double scan-assembler-times to take account of unsigned functions. */
|
||||
/* { dg-final { scan-assembler-times "clz\\tv\[0-9\]+\.4h, v\[0-9\]+\.4h" 10} } */
|
||||
/* { dg-final { scan-assembler-times "clz\\tv\[0-9\]+\.4h, v\[0-9\]+\.4h" 2} } */
|
||||
|
||||
int __attribute__ ((noinline))
|
||||
test_vclz_s32 ()
|
||||
{
|
||||
int i;
|
||||
int32x2_t a;
|
||||
int32x2_t b;
|
||||
|
||||
int32_t test_set0[2] = { TEST0, TEST1 };
|
||||
int32_t test_set1[2] = { TEST2, TEST3 };
|
||||
int32_t test_set2[2] = { TEST4, TEST5 };
|
||||
|
@ -181,37 +213,34 @@ test_vclz_s32 ()
|
|||
int32_t answ_set15[2] = { 2, 1 };
|
||||
int32_t answ_set16[2] = { 0, 0 };
|
||||
|
||||
RUN_TEST (test_set0, answ_set0, 64, 32, 1, 2);
|
||||
RUN_TEST (test_set1, answ_set1, 64, 32, 1, 2);
|
||||
RUN_TEST (test_set2, answ_set2, 64, 32, 1, 2);
|
||||
RUN_TEST (test_set3, answ_set3, 64, 32, 1, 2);
|
||||
RUN_TEST (test_set4, answ_set4, 64, 32, 1, 2);
|
||||
RUN_TEST (test_set5, answ_set5, 64, 32, 1, 2);
|
||||
RUN_TEST (test_set6, answ_set6, 64, 32, 1, 2);
|
||||
RUN_TEST (test_set7, answ_set7, 64, 32, 1, 2);
|
||||
RUN_TEST (test_set8, answ_set8, 64, 32, 1, 2);
|
||||
RUN_TEST (test_set9, answ_set9, 64, 32, 1, 2);
|
||||
RUN_TEST (test_set10, answ_set10, 64, 32, 1, 2);
|
||||
RUN_TEST (test_set11, answ_set11, 64, 32, 1, 2);
|
||||
RUN_TEST (test_set12, answ_set12, 64, 32, 1, 2);
|
||||
RUN_TEST (test_set13, answ_set13, 64, 32, 1, 2);
|
||||
RUN_TEST (test_set14, answ_set14, 64, 32, 1, 2);
|
||||
RUN_TEST (test_set15, answ_set15, 64, 32, 1, 2);
|
||||
RUN_TEST (test_set16, answ_set16, 64, 32, 1, 1);
|
||||
int o1 = run_test_s32x2 (test_set0, answ_set0, 64, 32, 2);
|
||||
int o2 = run_test_s32x2 (test_set1, answ_set1, 64, 32, 2);
|
||||
int o3 = run_test_s32x2 (test_set2, answ_set2, 64, 32, 2);
|
||||
int o4 = run_test_s32x2 (test_set3, answ_set3, 64, 32, 2);
|
||||
int o5 = run_test_s32x2 (test_set4, answ_set4, 64, 32, 2);
|
||||
int o6 = run_test_s32x2 (test_set5, answ_set5, 64, 32, 2);
|
||||
int o7 = run_test_s32x2 (test_set6, answ_set6, 64, 32, 2);
|
||||
int o8 = run_test_s32x2 (test_set7, answ_set7, 64, 32, 2);
|
||||
int o9 = run_test_s32x2 (test_set8, answ_set8, 64, 32, 2);
|
||||
int o10 = run_test_s32x2 (test_set9, answ_set9, 64, 32, 2);
|
||||
int o11 = run_test_s32x2 (test_set10, answ_set10, 64, 32, 2);
|
||||
int o12 = run_test_s32x2 (test_set11, answ_set11, 64, 32, 2);
|
||||
int o13 = run_test_s32x2 (test_set12, answ_set12, 64, 32, 2);
|
||||
int o14 = run_test_s32x2 (test_set13, answ_set13, 64, 32, 2);
|
||||
int o15 = run_test_s32x2 (test_set14, answ_set14, 64, 32, 2);
|
||||
int o16 = run_test_s32x2 (test_set15, answ_set15, 64, 32, 2);
|
||||
int o17 = run_test_s32x2 (test_set16, answ_set16, 64, 32, 1);
|
||||
|
||||
return 0;
|
||||
return o1||o2||o3||o4||o5||o6||o7||o8||o9||o10||o11||o12||o13||o14
|
||||
||o15||o16||o17;
|
||||
}
|
||||
|
||||
/* Double scan-assembler-times to take account of unsigned functions. */
|
||||
/* { dg-final { scan-assembler-times "clz\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" 34 } } */
|
||||
/* { dg-final { scan-assembler-times "clz\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" 2 } } */
|
||||
|
||||
int __attribute__ ((noinline))
|
||||
test_vclzq_s8 ()
|
||||
{
|
||||
int i;
|
||||
int8x16_t a;
|
||||
int8x16_t b;
|
||||
|
||||
int8_t test_set0[16] = {
|
||||
TEST0, TEST1, TEST2, TEST3, TEST4, TEST5, TEST6, TEST7,
|
||||
TEST8, TEST8, TEST8, TEST8, TEST8, TEST8, TEST8, TEST8
|
||||
|
@ -219,8 +248,8 @@ test_vclzq_s8 ()
|
|||
int8_t answ_set0[16] = {
|
||||
8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
RUN_TEST (test_set0, answ_set0, 128, 8, 1, 9);
|
||||
return 0;
|
||||
int o1 = run_testq_s8x16 (test_set0, answ_set0, 128, 8, 9);
|
||||
return o1;
|
||||
}
|
||||
|
||||
/* Double scan-assembler-times to take account of unsigned functions. */
|
||||
|
@ -229,10 +258,6 @@ test_vclzq_s8 ()
|
|||
int __attribute__ ((noinline))
|
||||
test_vclzq_s16 ()
|
||||
{
|
||||
int i;
|
||||
int16x8_t a;
|
||||
int16x8_t b;
|
||||
|
||||
int16_t test_set0[8] = {
|
||||
TEST0, TEST1, TEST2, TEST3, TEST4, TEST5, TEST6, TEST7
|
||||
};
|
||||
|
@ -252,23 +277,19 @@ test_vclzq_s16 ()
|
|||
int16_t answ_set2[8] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
RUN_TEST (test_set0, answ_set0, 128, 16, 1, 8);
|
||||
RUN_TEST (test_set1, answ_set1, 128, 16, 1, 8);
|
||||
RUN_TEST (test_set2, answ_set2, 128, 16, 1, 1);
|
||||
int o1 = run_testq_s16x8 (test_set0, answ_set0, 128, 16, 8);
|
||||
int o2 = run_testq_s16x8 (test_set1, answ_set1, 128, 16, 8);
|
||||
int o3 = run_testq_s16x8 (test_set2, answ_set2, 128, 16, 1);
|
||||
|
||||
return 0;
|
||||
return o1||o2||o3;
|
||||
}
|
||||
|
||||
/* Double scan-assembler-times to take account of unsigned functions. */
|
||||
/* { dg-final { scan-assembler-times "clz\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h" 6 } } */
|
||||
/* { dg-final { scan-assembler-times "clz\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h" 2 } } */
|
||||
|
||||
int __attribute__ ((noinline))
|
||||
test_vclzq_s32 ()
|
||||
{
|
||||
int i;
|
||||
int32x4_t a;
|
||||
int32x4_t b;
|
||||
|
||||
int32_t test_set0[4] = { TEST0, TEST1, TEST2, TEST3 };
|
||||
int32_t test_set1[4] = { TEST4, TEST5, TEST6, TEST7 };
|
||||
int32_t test_set2[4] = { TEST8, TEST9, TEST10, TEST11 };
|
||||
|
@ -289,27 +310,23 @@ test_vclzq_s32 ()
|
|||
int32_t answ_set7[4] = { 4, 3, 2, 1 };
|
||||
int32_t answ_set8[4] = { 0, 0, 0, 0 };
|
||||
|
||||
RUN_TEST (test_set0, answ_set0, 128, 32, 1, 4);
|
||||
RUN_TEST (test_set1, answ_set1, 128, 32, 1, 4);
|
||||
RUN_TEST (test_set2, answ_set2, 128, 32, 1, 4);
|
||||
RUN_TEST (test_set3, answ_set3, 128, 32, 1, 4);
|
||||
RUN_TEST (test_set4, answ_set4, 128, 32, 1, 1);
|
||||
int o1 = run_testq_s32x4 (test_set0, answ_set0, 128, 32, 4);
|
||||
int o2 = run_testq_s32x4 (test_set1, answ_set1, 128, 32, 4);
|
||||
int o3 = run_testq_s32x4 (test_set2, answ_set2, 128, 32, 4);
|
||||
int o4 = run_testq_s32x4 (test_set3, answ_set3, 128, 32, 4);
|
||||
int o5 = run_testq_s32x4 (test_set4, answ_set4, 128, 32, 1);
|
||||
|
||||
return 0;
|
||||
return o1||o2||o3||o4||o5;
|
||||
}
|
||||
|
||||
/* Double scan-assembler-times to take account of unsigned functions. */
|
||||
/* { dg-final { scan-assembler-times "clz\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" 10 } } */
|
||||
/* { dg-final { scan-assembler-times "clz\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" 2 } } */
|
||||
|
||||
/* Unsigned versions. */
|
||||
|
||||
int __attribute__ ((noinline))
|
||||
test_vclz_u8 ()
|
||||
{
|
||||
int i;
|
||||
uint8x8_t a;
|
||||
uint8x8_t b;
|
||||
|
||||
uint8_t test_set0[8] = {
|
||||
TEST0, TEST1, TEST2, TEST3, TEST4, TEST5, TEST6, TEST7
|
||||
};
|
||||
|
@ -323,10 +340,10 @@ test_vclz_u8 ()
|
|||
0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
RUN_TEST (test_set0, answ_set0, 64, 8, 0, 8);
|
||||
RUN_TEST (test_set1, answ_set1, 64, 8, 0, 1);
|
||||
int o1 = run_test_u8x8 (test_set0, answ_set0, 64, 8, 8);
|
||||
int o2 = run_test_u8x8 (test_set1, answ_set1, 64, 8, 1);
|
||||
|
||||
return 0;
|
||||
return o1||o2;
|
||||
}
|
||||
|
||||
/* ASM scan near test for signed version. */
|
||||
|
@ -334,10 +351,6 @@ test_vclz_u8 ()
|
|||
int __attribute__ ((noinline))
|
||||
test_vclz_u16 ()
|
||||
{
|
||||
int i;
|
||||
uint16x4_t a;
|
||||
uint16x4_t b;
|
||||
|
||||
uint16_t test_set0[4] = { TEST0, TEST1, TEST2, TEST3 };
|
||||
uint16_t test_set1[4] = { TEST4, TEST5, TEST6, TEST7 };
|
||||
uint16_t test_set2[4] = { TEST8, TEST9, TEST10, TEST11 };
|
||||
|
@ -350,13 +363,13 @@ test_vclz_u16 ()
|
|||
uint16_t answ_set3[4] = { 4, 3, 2, 1 };
|
||||
uint16_t answ_set4[4] = { 0, 0, 0, 0 };
|
||||
|
||||
RUN_TEST (test_set0, answ_set0, 64, 16, 0, 4);
|
||||
RUN_TEST (test_set1, answ_set1, 64, 16, 0, 4);
|
||||
RUN_TEST (test_set2, answ_set2, 64, 16, 0, 4);
|
||||
RUN_TEST (test_set3, answ_set3, 64, 16, 0, 4);
|
||||
RUN_TEST (test_set4, answ_set4, 64, 16, 0, 1);
|
||||
int o1 = run_test_u16x4 (test_set0, answ_set0, 64, 16, 4);
|
||||
int o2 = run_test_u16x4 (test_set1, answ_set1, 64, 16, 4);
|
||||
int o3 = run_test_u16x4 (test_set2, answ_set2, 64, 16, 4);
|
||||
int o4 = run_test_u16x4 (test_set3, answ_set3, 64, 16, 4);
|
||||
int o5 = run_test_u16x4 (test_set4, answ_set4, 64, 16, 1);
|
||||
|
||||
return 0;
|
||||
return o1||o2||o3||o4||o5;
|
||||
}
|
||||
|
||||
/* ASM scan near test for signed version. */
|
||||
|
@ -364,10 +377,6 @@ test_vclz_u16 ()
|
|||
int __attribute__ ((noinline))
|
||||
test_vclz_u32 ()
|
||||
{
|
||||
int i;
|
||||
uint32x2_t a;
|
||||
uint32x2_t b;
|
||||
|
||||
uint32_t test_set0[2] = { TEST0, TEST1 };
|
||||
uint32_t test_set1[2] = { TEST2, TEST3 };
|
||||
uint32_t test_set2[2] = { TEST4, TEST5 };
|
||||
|
@ -404,25 +413,26 @@ test_vclz_u32 ()
|
|||
uint32_t answ_set15[2] = { 2, 1 };
|
||||
uint32_t answ_set16[2] = { 0, 0 };
|
||||
|
||||
RUN_TEST (test_set0, answ_set0, 64, 32, 0, 2);
|
||||
RUN_TEST (test_set1, answ_set1, 64, 32, 0, 2);
|
||||
RUN_TEST (test_set2, answ_set2, 64, 32, 0, 2);
|
||||
RUN_TEST (test_set3, answ_set3, 64, 32, 0, 2);
|
||||
RUN_TEST (test_set4, answ_set4, 64, 32, 0, 2);
|
||||
RUN_TEST (test_set5, answ_set5, 64, 32, 0, 2);
|
||||
RUN_TEST (test_set6, answ_set6, 64, 32, 0, 2);
|
||||
RUN_TEST (test_set7, answ_set7, 64, 32, 0, 2);
|
||||
RUN_TEST (test_set8, answ_set8, 64, 32, 0, 2);
|
||||
RUN_TEST (test_set9, answ_set9, 64, 32, 0, 2);
|
||||
RUN_TEST (test_set10, answ_set10, 64, 32, 0, 2);
|
||||
RUN_TEST (test_set11, answ_set11, 64, 32, 0, 2);
|
||||
RUN_TEST (test_set12, answ_set12, 64, 32, 0, 2);
|
||||
RUN_TEST (test_set13, answ_set13, 64, 32, 0, 2);
|
||||
RUN_TEST (test_set14, answ_set14, 64, 32, 0, 2);
|
||||
RUN_TEST (test_set15, answ_set15, 64, 32, 0, 2);
|
||||
RUN_TEST (test_set16, answ_set16, 64, 32, 0, 1);
|
||||
int o1 = run_test_u32x2 (test_set0, answ_set0, 64, 32, 2);
|
||||
int o2 = run_test_u32x2 (test_set1, answ_set1, 64, 32, 2);
|
||||
int o3 = run_test_u32x2 (test_set2, answ_set2, 64, 32, 2);
|
||||
int o4 = run_test_u32x2 (test_set3, answ_set3, 64, 32, 2);
|
||||
int o5 = run_test_u32x2 (test_set4, answ_set4, 64, 32, 2);
|
||||
int o6 = run_test_u32x2 (test_set5, answ_set5, 64, 32, 2);
|
||||
int o7 = run_test_u32x2 (test_set6, answ_set6, 64, 32, 2);
|
||||
int o8 = run_test_u32x2 (test_set7, answ_set7, 64, 32, 2);
|
||||
int o9 = run_test_u32x2 (test_set8, answ_set8, 64, 32, 2);
|
||||
int o10 = run_test_u32x2 (test_set9, answ_set9, 64, 32, 2);
|
||||
int o11 = run_test_u32x2 (test_set10, answ_set10, 64, 32, 2);
|
||||
int o12 = run_test_u32x2 (test_set11, answ_set11, 64, 32, 2);
|
||||
int o13 = run_test_u32x2 (test_set12, answ_set12, 64, 32, 2);
|
||||
int o14 = run_test_u32x2 (test_set13, answ_set13, 64, 32, 2);
|
||||
int o15 = run_test_u32x2 (test_set14, answ_set14, 64, 32, 2);
|
||||
int o16 = run_test_u32x2 (test_set15, answ_set15, 64, 32, 2);
|
||||
int o17 = run_test_u32x2 (test_set16, answ_set16, 64, 32, 1);
|
||||
|
||||
return 0;
|
||||
return o1||o2||o3||o4||o5||o6||o7||o8||o9||o10||o11||o12||o13||o14
|
||||
||o15||o16||o17;
|
||||
}
|
||||
|
||||
/* ASM scan near test for signed version. */
|
||||
|
@ -441,9 +451,9 @@ test_vclzq_u8 ()
|
|||
uint8_t answ_set0[16] = {
|
||||
8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
RUN_TEST (test_set0, answ_set0, 128, 8, 0, 9);
|
||||
int o1 = run_testq_u8x16 (test_set0, answ_set0, 128, 8, 9);
|
||||
|
||||
return 0;
|
||||
return o1;
|
||||
}
|
||||
|
||||
/* ASM scan near test for signed version. */
|
||||
|
@ -476,11 +486,11 @@ test_vclzq_u16 ()
|
|||
0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
RUN_TEST (test_set0, answ_set0, 128, 16, 0, 8);
|
||||
RUN_TEST (test_set1, answ_set1, 128, 16, 0, 8);
|
||||
RUN_TEST (test_set2, answ_set2, 128, 16, 0, 1);
|
||||
int o1 = run_testq_u16x8 (test_set0, answ_set0, 128, 16, 8);
|
||||
int o2 = run_testq_u16x8 (test_set1, answ_set1, 128, 16, 8);
|
||||
int o3 = run_testq_u16x8 (test_set2, answ_set2, 128, 16, 1);
|
||||
|
||||
return 0;
|
||||
return o1||o2||o3;
|
||||
}
|
||||
|
||||
/* ASM scan near test for signed version. */
|
||||
|
@ -488,10 +498,6 @@ test_vclzq_u16 ()
|
|||
int __attribute__ ((noinline))
|
||||
test_vclzq_u32 ()
|
||||
{
|
||||
int i;
|
||||
uint32x4_t a;
|
||||
uint32x4_t b;
|
||||
|
||||
uint32_t test_set0[4] = { TEST0, TEST1, TEST2, TEST3 };
|
||||
uint32_t test_set1[4] = { TEST4, TEST5, TEST6, TEST7 };
|
||||
uint32_t test_set2[4] = { TEST8, TEST9, TEST10, TEST11 };
|
||||
|
@ -512,13 +518,13 @@ test_vclzq_u32 ()
|
|||
uint32_t answ_set7[4] = { 4, 3, 2, 1 };
|
||||
uint32_t answ_set8[4] = { 0, 0, 0, 0 };
|
||||
|
||||
RUN_TEST (test_set0, answ_set0, 128, 32, 0, 4);
|
||||
RUN_TEST (test_set1, answ_set1, 128, 32, 0, 4);
|
||||
RUN_TEST (test_set2, answ_set2, 128, 32, 0, 4);
|
||||
RUN_TEST (test_set3, answ_set3, 128, 32, 0, 4);
|
||||
RUN_TEST (test_set4, answ_set4, 128, 32, 0, 1);
|
||||
int o1 = run_testq_u32x4 (test_set0, answ_set0, 128, 32, 4);
|
||||
int o2 = run_testq_u32x4 (test_set1, answ_set1, 128, 32, 4);
|
||||
int o3 = run_testq_u32x4 (test_set2, answ_set2, 128, 32, 4);
|
||||
int o4 = run_testq_u32x4 (test_set3, answ_set3, 128, 32, 4);
|
||||
int o5 = run_testq_u32x4 (test_set4, answ_set4, 128, 32, 1);
|
||||
|
||||
return 0;
|
||||
return o1||o2||o3||o4||o5;
|
||||
}
|
||||
|
||||
/* ASM scan near test for signed version. */
|
||||
|
|
|
@ -31,49 +31,24 @@
|
|||
|
||||
extern void abort (void);
|
||||
|
||||
#define CONCAT(a, b) a##b
|
||||
#define CONCAT1(a, b) CONCAT (a, b)
|
||||
#define REG_INFEX64 _
|
||||
#define REG_INFEX128 q_
|
||||
#define REG_INFEX(reg_len) REG_INFEX##reg_len
|
||||
#define POSTFIX(reg_len, data_len) \
|
||||
CONCAT1 (REG_INFEX (reg_len), s##data_len)
|
||||
#define DATA_TYPE_32 float
|
||||
#define DATA_TYPE_64 double
|
||||
#define DATA_TYPE(data_len) DATA_TYPE_##data_len
|
||||
|
||||
#define FORCE_SIMD_INST64_8(data)
|
||||
#define FORCE_SIMD_INST64_16(data)
|
||||
#define FORCE_SIMD_INST64_32(data)
|
||||
#define FORCE_SIMD_INST64_64(data) force_simd (data)
|
||||
#define FORCE_SIMD_INST128_8(data)
|
||||
#define FORCE_SIMD_INST128_16(data)
|
||||
#define FORCE_SIMD_INST128_32(data)
|
||||
#define FORCE_SIMD_INST128_64(data)
|
||||
|
||||
#define FORCE_SIMD_INST(reg_len, data_len, data) \
|
||||
CONCAT1 (FORCE_SIMD_INST, reg_len##_##data_len) (data)
|
||||
#define LOAD_INST(reg_len, data_len) \
|
||||
CONCAT1 (vld1, POSTFIX (reg_len, data_len))
|
||||
#define NEG_INST(reg_len, data_len) \
|
||||
CONCAT1 (vneg, POSTFIX (reg_len, data_len))
|
||||
|
||||
#define RUN_TEST(test_set, answ_set, reg_len, data_len, n, a, b) \
|
||||
{ \
|
||||
int i; \
|
||||
INHIB_OPTIMIZATION; \
|
||||
(a) = LOAD_INST (reg_len, data_len) (test_set); \
|
||||
(b) = LOAD_INST (reg_len, data_len) (answ_set); \
|
||||
FORCE_SIMD_INST (reg_len, data_len, a) \
|
||||
a = NEG_INST (reg_len, data_len) (a); \
|
||||
FORCE_SIMD_INST (reg_len, data_len, a) \
|
||||
for (i = 0; i < n; i++) \
|
||||
{ \
|
||||
INHIB_OPTIMIZATION; \
|
||||
if (a[i] != b[i]) \
|
||||
return 1; \
|
||||
} \
|
||||
}
|
||||
#define BUILD_TEST(type, size, lanes) \
|
||||
int __attribute__((noipa,noinline)) \
|
||||
run_test##type##size##x##lanes (int##size##_t* test_set, \
|
||||
int##size##_t* answ_set, \
|
||||
int reg_len, int data_len, int n) \
|
||||
{ \
|
||||
int i; \
|
||||
int##size##x##lanes##_t a = vld1##type##size (test_set); \
|
||||
int##size##x##lanes##_t b = vld1##type##size (answ_set); \
|
||||
a = vneg##type##size (a); \
|
||||
for (i = 0; i < n; i++) \
|
||||
{ \
|
||||
INHIB_OPTIMIZATION; \
|
||||
if (a[i] != b[i]) \
|
||||
return 1; \
|
||||
} \
|
||||
return 0; \
|
||||
} \
|
||||
|
||||
#define RUN_TEST_SCALAR(test_val, answ_val, a, b) \
|
||||
{ \
|
||||
|
@ -87,12 +62,19 @@ extern void abort (void);
|
|||
force_simd (res); \
|
||||
}
|
||||
|
||||
BUILD_TEST (_s, 8, 8)
|
||||
BUILD_TEST (_s, 16, 4)
|
||||
BUILD_TEST (_s, 32, 2)
|
||||
BUILD_TEST (_s, 64, 1)
|
||||
|
||||
BUILD_TEST (q_s, 8, 16)
|
||||
BUILD_TEST (q_s, 16, 8)
|
||||
BUILD_TEST (q_s, 32, 4)
|
||||
BUILD_TEST (q_s, 64, 2)
|
||||
|
||||
int __attribute__ ((noinline))
|
||||
test_vneg_s8 ()
|
||||
{
|
||||
int8x8_t a;
|
||||
int8x8_t b;
|
||||
|
||||
int8_t test_set0[8] = {
|
||||
TEST0, TEST1, TEST2, TEST3, TEST4, TEST5, SCHAR_MAX, SCHAR_MIN
|
||||
};
|
||||
|
@ -100,9 +82,9 @@ test_vneg_s8 ()
|
|||
ANSW0, ANSW1, ANSW2, ANSW3, ANSW4, ANSW5, SCHAR_MIN + 1, SCHAR_MIN
|
||||
};
|
||||
|
||||
RUN_TEST (test_set0, answ_set0, 64, 8, 8, a, b);
|
||||
int o1 = run_test_s8x8 (test_set0, answ_set0, 64, 8, 8);
|
||||
|
||||
return 0;
|
||||
return o1;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.8b, v\[0-9\]+\.8b" 1 } } */
|
||||
|
@ -110,29 +92,23 @@ test_vneg_s8 ()
|
|||
int __attribute__ ((noinline))
|
||||
test_vneg_s16 ()
|
||||
{
|
||||
int16x4_t a;
|
||||
int16x4_t b;
|
||||
|
||||
int16_t test_set0[4] = { TEST0, TEST1, TEST2, TEST3 };
|
||||
int16_t test_set1[4] = { TEST4, TEST5, SHRT_MAX, SHRT_MIN };
|
||||
|
||||
int16_t answ_set0[4] = { ANSW0, ANSW1, ANSW2, ANSW3 };
|
||||
int16_t answ_set1[4] = { ANSW4, ANSW5, SHRT_MIN + 1, SHRT_MIN };
|
||||
|
||||
RUN_TEST (test_set0, answ_set0, 64, 16, 4, a, b);
|
||||
RUN_TEST (test_set1, answ_set1, 64, 16, 4, a, b);
|
||||
int o1 = run_test_s16x4 (test_set0, answ_set0, 64, 16, 4);
|
||||
int o2 = run_test_s16x4 (test_set1, answ_set1, 64, 16, 4);
|
||||
|
||||
return 0;
|
||||
return o1||o2;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.4h, v\[0-9\]+\.4h" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */
|
||||
|
||||
int __attribute__ ((noinline))
|
||||
test_vneg_s32 ()
|
||||
{
|
||||
int32x2_t a;
|
||||
int32x2_t b;
|
||||
|
||||
int32_t test_set0[2] = { TEST0, TEST1 };
|
||||
int32_t test_set1[2] = { TEST2, TEST3 };
|
||||
int32_t test_set2[2] = { TEST4, TEST5 };
|
||||
|
@ -143,22 +119,19 @@ test_vneg_s32 ()
|
|||
int32_t answ_set2[2] = { ANSW4, ANSW5 };
|
||||
int32_t answ_set3[2] = { INT_MIN + 1, INT_MIN };
|
||||
|
||||
RUN_TEST (test_set0, answ_set0, 64, 32, 2, a, b);
|
||||
RUN_TEST (test_set1, answ_set1, 64, 32, 2, a, b);
|
||||
RUN_TEST (test_set2, answ_set2, 64, 32, 2, a, b);
|
||||
RUN_TEST (test_set3, answ_set3, 64, 32, 2, a, b);
|
||||
int o1 = run_test_s32x2 (test_set0, answ_set0, 64, 32, 2);
|
||||
int o2 = run_test_s32x2 (test_set1, answ_set1, 64, 32, 2);
|
||||
int o3 = run_test_s32x2 (test_set2, answ_set2, 64, 32, 2);
|
||||
int o4 = run_test_s32x2 (test_set3, answ_set3, 64, 32, 2);
|
||||
|
||||
return 0;
|
||||
return o1||o2||o3||o4;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" 1 } } */
|
||||
|
||||
int __attribute__ ((noinline))
|
||||
test_vneg_s64 ()
|
||||
{
|
||||
int64x1_t a;
|
||||
int64x1_t b;
|
||||
|
||||
int64_t test_set0[1] = { TEST0 };
|
||||
int64_t test_set1[1] = { TEST1 };
|
||||
int64_t test_set2[1] = { TEST2 };
|
||||
|
@ -177,16 +150,16 @@ test_vneg_s64 ()
|
|||
int64_t answ_set6[1] = { LLONG_MIN + 1 };
|
||||
int64_t answ_set7[1] = { LLONG_MIN };
|
||||
|
||||
RUN_TEST (test_set0, answ_set0, 64, 64, 1, a, b);
|
||||
RUN_TEST (test_set1, answ_set1, 64, 64, 1, a, b);
|
||||
RUN_TEST (test_set2, answ_set2, 64, 64, 1, a, b);
|
||||
RUN_TEST (test_set3, answ_set3, 64, 64, 1, a, b);
|
||||
RUN_TEST (test_set4, answ_set4, 64, 64, 1, a, b);
|
||||
RUN_TEST (test_set5, answ_set5, 64, 64, 1, a, b);
|
||||
RUN_TEST (test_set6, answ_set6, 64, 64, 1, a, b);
|
||||
RUN_TEST (test_set7, answ_set7, 64, 64, 1, a, b);
|
||||
int o1 = run_test_s64x1 (test_set0, answ_set0, 64, 64, 1);
|
||||
int o2 = run_test_s64x1 (test_set1, answ_set1, 64, 64, 1);
|
||||
int o3 = run_test_s64x1 (test_set2, answ_set2, 64, 64, 1);
|
||||
int o4 = run_test_s64x1 (test_set3, answ_set3, 64, 64, 1);
|
||||
int o5 = run_test_s64x1 (test_set4, answ_set4, 64, 64, 1);
|
||||
int o6 = run_test_s64x1 (test_set5, answ_set5, 64, 64, 1);
|
||||
int o7 = run_test_s64x1 (test_set6, answ_set6, 64, 64, 1);
|
||||
int o8 = run_test_s64x1 (test_set7, answ_set7, 64, 64, 1);
|
||||
|
||||
return 0;
|
||||
return o1||o2||o3||o4||o5||o6||o7||o8;
|
||||
}
|
||||
|
||||
int __attribute__ ((noinline))
|
||||
|
@ -206,14 +179,11 @@ test_vnegd_s64 ()
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "neg\\td\[0-9\]+, d\[0-9\]+" 16 } } */
|
||||
/* { dg-final { scan-assembler-times "neg\\td\[0-9\]+, d\[0-9\]+" 8 } } */
|
||||
|
||||
int __attribute__ ((noinline))
|
||||
test_vnegq_s8 ()
|
||||
{
|
||||
int8x16_t a;
|
||||
int8x16_t b;
|
||||
|
||||
int8_t test_set0[16] = {
|
||||
TEST0, TEST1, TEST2, TEST3, TEST4, TEST5, SCHAR_MAX, SCHAR_MIN,
|
||||
4, 8, 15, 16, 23, 42, -1, -2
|
||||
|
@ -224,9 +194,9 @@ test_vnegq_s8 ()
|
|||
-4, -8, -15, -16, -23, -42, 1, 2
|
||||
};
|
||||
|
||||
RUN_TEST (test_set0, answ_set0, 128, 8, 8, a, b);
|
||||
int o1 = run_testq_s8x16 (test_set0, answ_set0, 128, 8, 8);
|
||||
|
||||
return 0;
|
||||
return o1;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
|
||||
|
@ -234,9 +204,6 @@ test_vnegq_s8 ()
|
|||
int __attribute__ ((noinline))
|
||||
test_vnegq_s16 ()
|
||||
{
|
||||
int16x8_t a;
|
||||
int16x8_t b;
|
||||
|
||||
int16_t test_set0[8] = {
|
||||
TEST0, TEST1, TEST2, TEST3, TEST4, TEST5, SHRT_MAX, SHRT_MIN
|
||||
};
|
||||
|
@ -244,9 +211,9 @@ test_vnegq_s16 ()
|
|||
ANSW0, ANSW1, ANSW2, ANSW3, ANSW4, ANSW5, SHRT_MIN + 1, SHRT_MIN
|
||||
};
|
||||
|
||||
RUN_TEST (test_set0, answ_set0, 128, 16, 8, a, b);
|
||||
int o1 = run_testq_s16x8 (test_set0, answ_set0, 128, 16, 8);
|
||||
|
||||
return 0;
|
||||
return o1;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h" 1 } } */
|
||||
|
@ -254,29 +221,23 @@ test_vnegq_s16 ()
|
|||
int __attribute__ ((noinline))
|
||||
test_vnegq_s32 ()
|
||||
{
|
||||
int32x4_t a;
|
||||
int32x4_t b;
|
||||
|
||||
int32_t test_set0[4] = { TEST0, TEST1, TEST2, TEST3 };
|
||||
int32_t test_set1[4] = { TEST4, TEST5, INT_MAX, INT_MIN };
|
||||
|
||||
int32_t answ_set0[4] = { ANSW0, ANSW1, ANSW2, ANSW3 };
|
||||
int32_t answ_set1[4] = { ANSW4, ANSW5, INT_MIN + 1, INT_MIN };
|
||||
|
||||
RUN_TEST (test_set0, answ_set0, 128, 32, 4, a, b);
|
||||
RUN_TEST (test_set1, answ_set1, 128, 32, 4, a, b);
|
||||
int o1 = run_testq_s32x4 (test_set0, answ_set0, 128, 32, 4);
|
||||
int o2 = run_testq_s32x4 (test_set1, answ_set1, 128, 32, 4);
|
||||
|
||||
return 0;
|
||||
return o1||o2;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */
|
||||
|
||||
int __attribute__ ((noinline))
|
||||
test_vnegq_s64 ()
|
||||
{
|
||||
int64x2_t a;
|
||||
int64x2_t b;
|
||||
|
||||
int64_t test_set0[2] = { TEST0, TEST1 };
|
||||
int64_t test_set1[2] = { TEST2, TEST3 };
|
||||
int64_t test_set2[2] = { TEST4, TEST5 };
|
||||
|
@ -287,15 +248,15 @@ test_vnegq_s64 ()
|
|||
int64_t answ_set2[2] = { ANSW4, ANSW5 };
|
||||
int64_t answ_set3[2] = { LLONG_MIN + 1, LLONG_MIN };
|
||||
|
||||
RUN_TEST (test_set0, answ_set0, 128, 64, 2, a, b);
|
||||
RUN_TEST (test_set1, answ_set1, 128, 64, 2, a, b);
|
||||
RUN_TEST (test_set2, answ_set2, 128, 64, 2, a, b);
|
||||
RUN_TEST (test_set3, answ_set3, 128, 64, 2, a, b);
|
||||
int o1 = run_testq_s64x2 (test_set0, answ_set0, 128, 64, 2);
|
||||
int o2 = run_testq_s64x2 (test_set1, answ_set1, 128, 64, 2);
|
||||
int o3 = run_testq_s64x2 (test_set2, answ_set2, 128, 64, 2);
|
||||
int o4 = run_testq_s64x2 (test_set3, answ_set3, 128, 64, 2);
|
||||
|
||||
return 0;
|
||||
return o1||o2||o2||o4;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" 4 } } */
|
||||
/* { dg-final { scan-assembler-times "neg\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
|
|
Loading…
Add table
Reference in a new issue