diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f25a2b7f520..126ad08300b 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2014-04-23 Alex Velenko + + * gcc.target/aarch64/vdup_lane_1.c: New testcase. + * gcc.target/aarch64/vdup_lane_2.c: New testcase. + * gcc.target/aarch64/vdup_n_1.c: New testcase. + 2014-04-23 Kyrylo Tkachov * gcc.target/arm/rev16.c: New test. diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c b/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c new file mode 100644 index 00000000000..4582471c8aa --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c @@ -0,0 +1,430 @@ +/* Test vdup_lane intrinsics work correctly. */ +/* { dg-do run } */ +/* { dg-options "--save-temps -O1" } */ + +#include + +extern void abort (void); + +float32x2_t __attribute__ ((noinline)) +wrap_vdup_lane_f32_0 (float32x2_t a) +{ + return vdup_lane_f32 (a, 0); +} + +float32x2_t __attribute__ ((noinline)) +wrap_vdup_lane_f32_1 (float32x2_t a) +{ + return vdup_lane_f32 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdup_lane_f32 () +{ + float32x2_t a; + float32x2_t b; + int i; + float32_t c[2] = { 0.0 , 3.14 }; + float32_t d[2]; + + a = vld1_f32 (c); + b = wrap_vdup_lane_f32_0 (a); + vst1_f32 (d, b); + for (i = 0; i < 2; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdup_lane_f32_1 (a); + vst1_f32 (d, b); + for (i = 0; i < 2; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +float32x4_t __attribute__ ((noinline)) +wrap_vdupq_lane_f32_0 (float32x2_t a) +{ + return vdupq_lane_f32 (a, 0); +} + +float32x4_t __attribute__ ((noinline)) +wrap_vdupq_lane_f32_1 (float32x2_t a) +{ + return vdupq_lane_f32 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdupq_lane_f32 () +{ + float32x2_t a; + float32x4_t b; + int i; + float32_t c[2] = { 0.0 , 3.14 }; + float32_t d[4]; + + a = vld1_f32 (c); + b = wrap_vdupq_lane_f32_0 (a); + vst1q_f32 (d, b); + for (i = 0; i < 4; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdupq_lane_f32_1 (a); + vst1q_f32 (d, b); + for (i = 0; i < 4; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +int8x8_t __attribute__ ((noinline)) +wrap_vdup_lane_s8_0 (int8x8_t a) +{ + return vdup_lane_s8 (a, 0); +} + +int8x8_t __attribute__ ((noinline)) +wrap_vdup_lane_s8_1 (int8x8_t a) +{ + return vdup_lane_s8 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdup_lane_s8 () +{ + int8x8_t a; + int8x8_t b; + int i; + /* Only two first cases are interesting. */ + int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + int8_t d[8]; + + a = vld1_s8 (c); + b = wrap_vdup_lane_s8_0 (a); + vst1_s8 (d, b); + for (i = 0; i < 8; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdup_lane_s8_1 (a); + vst1_s8 (d, b); + for (i = 0; i < 8; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +int8x16_t __attribute__ ((noinline)) +wrap_vdupq_lane_s8_0 (int8x8_t a) +{ + return vdupq_lane_s8 (a, 0); +} + +int8x16_t __attribute__ ((noinline)) +wrap_vdupq_lane_s8_1 (int8x8_t a) +{ + return vdupq_lane_s8 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdupq_lane_s8 () +{ + int8x8_t a; + int8x16_t b; + int i; + /* Only two first cases are interesting. */ + int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + int8_t d[16]; + + a = vld1_s8 (c); + b = wrap_vdupq_lane_s8_0 (a); + vst1q_s8 (d, b); + for (i = 0; i < 16; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdupq_lane_s8_1 (a); + vst1q_s8 (d, b); + for (i = 0; i < 16; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +int16x4_t __attribute__ ((noinline)) +wrap_vdup_lane_s16_0 (int16x4_t a) +{ + return vdup_lane_s16 (a, 0); +} + +int16x4_t __attribute__ ((noinline)) +wrap_vdup_lane_s16_1 (int16x4_t a) +{ + return vdup_lane_s16 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdup_lane_s16 () +{ + int16x4_t a; + int16x4_t b; + int i; + /* Only two first cases are interesting. */ + int16_t c[4] = { 0, 1, 2, 3 }; + int16_t d[4]; + + a = vld1_s16 (c); + b = wrap_vdup_lane_s16_0 (a); + vst1_s16 (d, b); + for (i = 0; i < 4; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdup_lane_s16_1 (a); + vst1_s16 (d, b); + for (i = 0; i < 4; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +int16x8_t __attribute__ ((noinline)) +wrap_vdupq_lane_s16_0 (int16x4_t a) +{ + return vdupq_lane_s16 (a, 0); +} + +int16x8_t __attribute__ ((noinline)) +wrap_vdupq_lane_s16_1 (int16x4_t a) +{ + return vdupq_lane_s16 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdupq_lane_s16 () +{ + int16x4_t a; + int16x8_t b; + int i; + /* Only two first cases are interesting. */ + int16_t c[4] = { 0, 1, 2, 3 }; + int16_t d[8]; + + a = vld1_s16 (c); + b = wrap_vdupq_lane_s16_0 (a); + vst1q_s16 (d, b); + for (i = 0; i < 8; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdupq_lane_s16_1 (a); + vst1q_s16 (d, b); + for (i = 0; i < 8; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +int32x2_t __attribute__ ((noinline)) +wrap_vdup_lane_s32_0 (int32x2_t a) +{ + return vdup_lane_s32 (a, 0); +} + +int32x2_t __attribute__ ((noinline)) +wrap_vdup_lane_s32_1 (int32x2_t a) +{ + return vdup_lane_s32 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdup_lane_s32 () +{ + int32x2_t a; + int32x2_t b; + int i; + int32_t c[2] = { 0, 1 }; + int32_t d[2]; + + a = vld1_s32 (c); + b = wrap_vdup_lane_s32_0 (a); + vst1_s32 (d, b); + for (i = 0; i < 2; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdup_lane_s32_1 (a); + vst1_s32 (d, b); + for (i = 0; i < 2; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +int32x4_t __attribute__ ((noinline)) +wrap_vdupq_lane_s32_0 (int32x2_t a) +{ + return vdupq_lane_s32 (a, 0); +} + +int32x4_t __attribute__ ((noinline)) +wrap_vdupq_lane_s32_1 (int32x2_t a) +{ + return vdupq_lane_s32 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdupq_lane_s32 () +{ + int32x2_t a; + int32x4_t b; + int i; + int32_t c[2] = { 0, 1 }; + int32_t d[4]; + + a = vld1_s32 (c); + b = wrap_vdupq_lane_s32_0 (a); + vst1q_s32 (d, b); + for (i = 0; i < 4; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdupq_lane_s32_1 (a); + vst1q_s32 (d, b); + for (i = 0; i < 4; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +int64x1_t __attribute__ ((noinline)) +wrap_vdup_lane_s64_0 (int64x1_t a) +{ + return vdup_lane_s64 (a, 0); +} + +int64x1_t __attribute__ ((noinline)) +wrap_vdup_lane_s64_1 (int64x1_t a) +{ + return vdup_lane_s64 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdup_lane_s64 () +{ + int64x1_t a; + int64x1_t b; + int64_t c[1]; + int64_t d[1]; + + c[0] = 0; + a = vld1_s64 (c); + b = wrap_vdup_lane_s64_0 (a); + vst1_s64 (d, b); + if (c[0] != d[0]) + return 1; + + c[0] = 1; + a = vld1_s64 (c); + b = wrap_vdup_lane_s64_1 (a); + vst1_s64 (d, b); + if (c[0] != d[0]) + return 1; + return 0; +} + +int64x2_t __attribute__ ((noinline)) +wrap_vdupq_lane_s64_0 (int64x1_t a) +{ + return vdupq_lane_s64 (a, 0); +} + +int64x2_t __attribute__ ((noinline)) +wrap_vdupq_lane_s64_1 (int64x1_t a) +{ + return vdupq_lane_s64 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdupq_lane_s64 () +{ + int64x1_t a; + int64x2_t b; + int i; + int64_t c[1]; + int64_t d[2]; + + c[0] = 0; + a = vld1_s64 (c); + b = wrap_vdupq_lane_s64_0 (a); + vst1q_s64 (d, b); + for (i = 0; i < 2; i++) + if (c[0] != d[i]) + return 1; + + c[0] = 1; + a = vld1_s64 (c); + b = wrap_vdupq_lane_s64_1 (a); + vst1q_s64 (d, b); + for (i = 0; i < 2; i++) + if (c[0] != d[i]) + return 1; + return 0; +} + +int +main () +{ + + if (test_vdup_lane_f32 ()) + abort (); + if (test_vdup_lane_s8 ()) + abort (); + if (test_vdup_lane_s16 ()) + abort (); + if (test_vdup_lane_s32 ()) + abort (); + if (test_vdup_lane_s64 ()) + abort (); + if (test_vdupq_lane_f32 ()) + abort (); + if (test_vdupq_lane_s8 ()) + abort (); + if (test_vdupq_lane_s16 ()) + abort (); + if (test_vdupq_lane_s32 ()) + abort (); + if (test_vdupq_lane_s64 ()) + abort (); + + return 0; +} + +/* Asm check for test_vdup_lane_s8. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */ + +/* Asm check for test_vdupq_lane_s8. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */ + +/* Asm check for test_vdup_lane_s16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */ +/* Asm check for test_vdup_lane_s16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */ + +/* Asm check for test_vdupq_lane_s16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */ +/* Asm check for test_vdupq_lane_s16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */ + +/* Asm check for test_vdup_lane_f32 and test_vdup_lane_s32. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */ + +/* Asm check for test_vdupq_lane_f32 and test_vdupq_lane_s32. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */ + +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c b/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c new file mode 100644 index 00000000000..7c04e759a52 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c @@ -0,0 +1,343 @@ +/* Test vdup_lane intrinsics work correctly. */ +/* { dg-do run } */ +/* { dg-options "-O1 --save-temps" } */ + +#include + +#define force_simd(V1) asm volatile ("" \ + : "=w"(V1) \ + : "w"(V1) \ + : /* No clobbers */) + +extern void abort (void); + +float32_t __attribute__ ((noinline)) +wrap_vdups_lane_f32_0 (float32x2_t dummy, float32x2_t a) +{ + return vdups_lane_f32 (a, 0); +} + +float32_t __attribute__ ((noinline)) +wrap_vdups_lane_f32_1 (float32x2_t a) +{ + return vdups_lane_f32 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdups_lane_f32 () +{ + float32x2_t a; + float32_t b; + float32_t c[2] = { 0.0, 1.0 }; + + a = vld1_f32 (c); + b = wrap_vdups_lane_f32_0 (a, a); + if (c[0] != b) + return 1; + b = wrap_vdups_lane_f32_1 (a); + if (c[1] != b) + return 1; + return 0; +} + +float64_t __attribute__ ((noinline)) +wrap_vdupd_lane_f64_0 (float64x1_t dummy, float64x1_t a) +{ + return vdupd_lane_f64 (a, 0); +} + +int __attribute__ ((noinline)) +test_vdupd_lane_f64 () +{ + float64x1_t a; + float64_t b; + float64_t c[1] = { 0.0 }; + a = vld1_f64 (c); + b = wrap_vdupd_lane_f64_0 (a, a); + if (c[0] != b) + return 1; + return 0; +} + +int8_t __attribute__ ((noinline)) +wrap_vdupb_lane_s8_0 (int8x8_t dummy, int8x8_t a) +{ + int8_t result = vdupb_lane_s8 (a, 0); + force_simd (result); + return result; +} + +int8_t __attribute__ ((noinline)) +wrap_vdupb_lane_s8_1 (int8x8_t a) +{ + int8_t result = vdupb_lane_s8 (a, 1); + force_simd (result); + return result; +} + +int __attribute__ ((noinline)) +test_vdupb_lane_s8 () +{ + int8x8_t a; + int8_t b; + int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + + a = vld1_s8 (c); + b = wrap_vdupb_lane_s8_0 (a, a); + if (c[0] != b) + return 1; + b = wrap_vdupb_lane_s8_1 (a); + if (c[1] != b) + return 1; + + return 0; +} + +uint8_t __attribute__ ((noinline)) +wrap_vdupb_lane_u8_0 (uint8x8_t dummy, uint8x8_t a) +{ + uint8_t result = vdupb_lane_u8 (a, 0); + force_simd (result); + return result; +} + +uint8_t __attribute__ ((noinline)) +wrap_vdupb_lane_u8_1 (uint8x8_t a) +{ + uint8_t result = vdupb_lane_u8 (a, 1); + force_simd (result); + return result; +} + +int __attribute__ ((noinline)) +test_vdupb_lane_u8 () +{ + uint8x8_t a; + uint8_t b; + uint8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + + a = vld1_u8 (c); + b = wrap_vdupb_lane_u8_0 (a, a); + if (c[0] != b) + return 1; + b = wrap_vdupb_lane_u8_1 (a); + if (c[1] != b) + return 1; + return 0; +} + +int16_t __attribute__ ((noinline)) +wrap_vduph_lane_s16_0 (int16x4_t dummy, int16x4_t a) +{ + int16_t result = vduph_lane_s16 (a, 0); + force_simd (result); + return result; +} + +int16_t __attribute__ ((noinline)) +wrap_vduph_lane_s16_1 (int16x4_t a) +{ + int16_t result = vduph_lane_s16 (a, 1); + force_simd (result); + return result; +} + +int __attribute__ ((noinline)) +test_vduph_lane_s16 () +{ + int16x4_t a; + int16_t b; + int16_t c[4] = { 0, 1, 2, 3 }; + + a = vld1_s16 (c); + b = wrap_vduph_lane_s16_0 (a, a); + if (c[0] != b) + return 1; + b = wrap_vduph_lane_s16_1 (a); + if (c[1] != b) + return 1; + return 0; +} + +uint16_t __attribute__ ((noinline)) +wrap_vduph_lane_u16_0 (uint16x4_t dummy, uint16x4_t a) +{ + uint16_t result = vduph_lane_u16 (a, 0); + force_simd (result); + return result; +} + +uint16_t __attribute__ ((noinline)) +wrap_vduph_lane_u16_1 (uint16x4_t a) +{ + uint16_t result = vduph_lane_u16 (a, 1); + force_simd (result); + return result; +} + +int __attribute__ ((noinline)) +test_vduph_lane_u16 () +{ + uint16x4_t a; + uint16_t b; + uint16_t c[4] = { 0, 1, 2, 3 }; + + a = vld1_u16 (c); + b = wrap_vduph_lane_u16_0 (a, a); + if (c[0] != b) + return 1; + b = wrap_vduph_lane_u16_1 (a); + if (c[1] != b) + return 1; + return 0; +} + +int32_t __attribute__ ((noinline)) +wrap_vdups_lane_s32_0 (int32x2_t dummy, int32x2_t a) +{ + int32_t result = vdups_lane_s32 (a, 0); + force_simd (result); + return result; +} + +int32_t __attribute__ ((noinline)) +wrap_vdups_lane_s32_1 (int32x2_t a) +{ + int32_t result = vdups_lane_s32 (a, 1); + force_simd (result); + return result; +} + +int __attribute__ ((noinline)) +test_vdups_lane_s32 () +{ + int32x2_t a; + int32_t b; + int32_t c[2] = { 0, 1 }; + + a = vld1_s32 (c); + b = wrap_vdups_lane_s32_0 (vcreate_s32 (0), a); + if (c[0] != b) + return 1; + b = wrap_vdups_lane_s32_1 (a); + if (c[1] != b) + return 1; + return 0; +} + +uint32_t __attribute__ ((noinline)) +wrap_vdups_lane_u32_0 (uint32x2_t dummy, uint32x2_t a) +{ + uint32_t result = vdups_lane_u32 (a, 0); + force_simd (result); + return result; +} + +uint32_t __attribute__ ((noinline)) +wrap_vdups_lane_u32_1 (uint32x2_t a) +{ + uint32_t result = vdups_lane_u32 (a, 1); + force_simd (result); + return result; +} + +int __attribute__ ((noinline)) +test_vdups_lane_u32 () +{ + uint32x2_t a; + uint32_t b; + uint32_t c[2] = { 0, 1 }; + a = vld1_u32 (c); + b = wrap_vdups_lane_u32_0 (a, a); + if (c[0] != b) + return 1; + b = wrap_vdups_lane_u32_1 (a); + if (c[1] != b) + return 1; + return 0; +} + +uint64_t __attribute__ ((noinline)) +wrap_vdupd_lane_u64_0 (uint64x1_t dummy, uint64x1_t a) +{ + return vdupd_lane_u64 (a, 0);; +} + +int __attribute__ ((noinline)) +test_vdupd_lane_u64 () +{ + uint64x1_t a; + uint64_t b; + uint64_t c[1] = { 0 }; + + a = vld1_u64 (c); + b = wrap_vdupd_lane_u64_0 (a, a); + if (c[0] != b) + return 1; + return 0; +} + +int64_t __attribute__ ((noinline)) +wrap_vdupd_lane_s64_0 (uint64x1_t dummy, int64x1_t a) +{ + return vdupd_lane_u64 (a, 0); +} + +int __attribute__ ((noinline)) +test_vdupd_lane_s64 () +{ + int64x1_t a; + int64_t b; + int64_t c[1] = { 0 }; + + a = vld1_s64 (c); + b = wrap_vdupd_lane_s64_0 (a, a); + if (c[0] != b) + return 1; + return 0; +} + +int +main () +{ + if (test_vdups_lane_f32 ()) + abort (); + if (test_vdupd_lane_f64 ()) + abort (); + if (test_vdupb_lane_s8 ()) + abort (); + if (test_vdupb_lane_u8 ()) + abort (); + if (test_vduph_lane_s16 ()) + abort (); + if (test_vduph_lane_u16 ()) + abort (); + if (test_vdups_lane_s32 ()) + abort (); + if (test_vdups_lane_u32 ()) + abort (); + if (test_vdupd_lane_s64 ()) + abort (); + if (test_vdupd_lane_u64 ()) + abort (); + return 0; +} + +/* Asm check for vdupb_lane_s8, vdupb_lane_u8. */ +/* { dg-final { scan-assembler-not "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[0\\\]" } } */ +/* { dg-final { scan-assembler-times "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[1\\\]" 2 } } */ + +/* Asm check for vduph_lane_h16, vduph_lane_h16. */ +/* { dg-final { scan-assembler-not "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[0\\\]" } } */ +/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[1\\\]" 2 } } */ + +/* Asm check for vdups_lane_f32, vdups_lane_s32, vdups_lane_u32. */ +/* Can't generate "dup s, v[0]" for vdups_lane_s32 and vdups_lane_u32. */ +/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[0\\\]" 1} } */ +/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[1\\\]" 3 } } */ + +/* Asm check for vdupd_lane_f64, vdupd_lane_s64, vdupd_lane_u64. */ +/* Attempts to make the compiler generate vdupd are not practical. */ +/* { dg-final { scan-assembler-not "dup\\td\[0-9\]+, v\[0-9\]+\.d\\\[0\\\]" } } + +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c b/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c new file mode 100644 index 00000000000..a79910d68d7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c @@ -0,0 +1,619 @@ +/* Test vdup_lane intrinsics work correctly. */ +/* { dg-do run } */ +/* { dg-options "-O1 --save-temps" } */ + +#include + +extern void abort (void); + +float32x2_t __attribute__ ((noinline)) +wrap_vdup_n_f32 (float32_t a) +{ + return vdup_n_f32 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_f32 () +{ + float32_t a = 1.0; + float32x2_t b; + float32_t c[2]; + int i; + + b = wrap_vdup_n_f32 (a); + vst1_f32 (c, b); + for (i = 0; i < 2; i++) + if (a != c[i]) + return 1; + return 0; +} + +float32x4_t __attribute__ ((noinline)) +wrap_vdupq_n_f32 (float32_t a) +{ + return vdupq_n_f32 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_f32 () +{ + float32_t a = 1.0; + float32x4_t b; + float32_t c[4]; + int i; + + b = wrap_vdupq_n_f32 (a); + vst1q_f32 (c, b); + for (i = 0; i < 4; i++) + if (a != c[i]) + return 1; + return 0; +} + +float64x1_t __attribute__ ((noinline)) +wrap_vdup_n_f64 (float64_t a) +{ + return vdup_n_f64 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_f64 () +{ + float64_t a = 1.0; + float64x1_t b; + float64_t c[1]; + int i; + + b = wrap_vdup_n_f64 (a); + vst1_f64 (c, b); + for (i = 0; i < 1; i++) + if (a != c[i]) + return 1; + return 0; +} + +float64x2_t __attribute__ ((noinline)) +wrap_vdupq_n_f64 (float64_t a) +{ + return vdupq_n_f64 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_f64 () +{ + float64_t a = 1.0; + float64x2_t b; + float64_t c[2]; + int i; + + b = wrap_vdupq_n_f64 (a); + vst1q_f64 (c, b); + for (i = 0; i < 2; i++) + if (a != c[i]) + return 1; + return 0; +} + +poly8x8_t __attribute__ ((noinline)) +wrap_vdup_n_p8 (poly8_t a) +{ + return vdup_n_p8 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_p8 () +{ + poly8_t a = 1; + poly8x8_t b; + poly8_t c[8]; + int i; + + b = wrap_vdup_n_p8 (a); + vst1_p8 (c, b); + for (i = 0; i < 8; i++) + if (a != c[i]) + return 1; + return 0; +} + +poly8x16_t __attribute__ ((noinline)) +wrap_vdupq_n_p8 (poly8_t a) +{ + return vdupq_n_p8 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_p8 () +{ + poly8_t a = 1; + poly8x16_t b; + poly8_t c[16]; + int i; + + b = wrap_vdupq_n_p8 (a); + vst1q_p8 (c, b); + for (i = 0; i < 16; i++) + if (a != c[i]) + return 1; + return 0; +} + +int8x8_t __attribute__ ((noinline)) +wrap_vdup_n_s8 (int8_t a) +{ + return vdup_n_s8 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_s8 () +{ + int8_t a = 1; + int8x8_t b; + int8_t c[8]; + int i; + + b = wrap_vdup_n_s8 (a); + vst1_s8 (c, b); + for (i = 0; i < 8; i++) + if (a != c[i]) + return 1; + return 0; +} + +int8x16_t __attribute__ ((noinline)) +wrap_vdupq_n_s8 (int8_t a) +{ + return vdupq_n_s8 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_s8 () +{ + int8_t a = 1; + int8x16_t b; + int8_t c[16]; + int i; + + b = wrap_vdupq_n_s8 (a); + vst1q_s8 (c, b); + for (i = 0; i < 16; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint8x8_t __attribute__ ((noinline)) +wrap_vdup_n_u8 (uint8_t a) +{ + return vdup_n_u8 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_u8 () +{ + uint8_t a = 1; + uint8x8_t b; + uint8_t c[8]; + int i; + + b = wrap_vdup_n_u8 (a); + vst1_u8 (c, b); + for (i = 0; i < 8; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint8x16_t __attribute__ ((noinline)) +wrap_vdupq_n_u8 (uint8_t a) +{ + return vdupq_n_u8 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_u8 () +{ + uint8_t a = 1; + uint8x16_t b; + uint8_t c[16]; + int i; + + b = wrap_vdupq_n_u8 (a); + vst1q_u8 (c, b); + for (i = 0; i < 16; i++) + if (a != c[i]) + return 1; + return 0; +} + +poly16x4_t __attribute__ ((noinline)) +wrap_vdup_n_p16 (poly16_t a) +{ + return vdup_n_p16 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_p16 () +{ + poly16_t a = 1; + poly16x4_t b; + poly16_t c[4]; + int i; + + b = wrap_vdup_n_p16 (a); + vst1_p16 (c, b); + for (i = 0; i < 4; i++) + if (a != c[i]) + return 1; + return 0; +} + +poly16x8_t __attribute__ ((noinline)) +wrap_vdupq_n_p16 (poly16_t a) +{ + return vdupq_n_p16 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_p16 () +{ + poly16_t a = 1; + poly16x8_t b; + poly16_t c[8]; + int i; + + b = wrap_vdupq_n_p16 (a); + vst1q_p16 (c, b); + for (i = 0; i < 8; i++) + if (a != c[i]) + return 1; + return 0; +} + +int16x4_t __attribute__ ((noinline)) +wrap_vdup_n_s16 (int16_t a) +{ + return vdup_n_s16 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_s16 () +{ + int16_t a = 1; + int16x4_t b; + int16_t c[4]; + int i; + + b = wrap_vdup_n_s16 (a); + vst1_s16 (c, b); + for (i = 0; i < 4; i++) + if (a != c[i]) + return 1; + return 0; +} + +int16x8_t __attribute__ ((noinline)) +wrap_vdupq_n_s16 (int16_t a) +{ + return vdupq_n_s16 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_s16 () +{ + int16_t a = 1; + int16x8_t b; + int16_t c[8]; + int i; + + b = wrap_vdupq_n_s16 (a); + vst1q_s16 (c, b); + for (i = 0; i < 8; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint16x4_t __attribute__ ((noinline)) +wrap_vdup_n_u16 (uint16_t a) +{ + return vdup_n_u16 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_u16 () +{ + uint16_t a = 1; + uint16x4_t b; + uint16_t c[4]; + int i; + + b = wrap_vdup_n_u16 (a); + vst1_u16 (c, b); + for (i = 0; i < 4; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint16x8_t __attribute__ ((noinline)) +wrap_vdupq_n_u16 (uint16_t a) +{ + return vdupq_n_u16 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_u16 () +{ + uint16_t a = 1; + uint16x8_t b; + uint16_t c[8]; + int i; + + b = wrap_vdupq_n_u16 (a); + vst1q_u16 (c, b); + for (i = 0; i < 8; i++) + if (a != c[i]) + return 1; + return 0; +} + +int32x2_t __attribute__ ((noinline)) +wrap_vdup_n_s32 (int32_t a) +{ + return vdup_n_s32 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_s32 () +{ + int32_t a = 1; + int32x2_t b; + int32_t c[2]; + int i; + + b = wrap_vdup_n_s32 (a); + vst1_s32 (c, b); + for (i = 0; i < 2; i++) + if (a != c[i]) + return 1; + return 0; +} + +int32x4_t __attribute__ ((noinline)) +wrap_vdupq_n_s32 (int32_t a) +{ + return vdupq_n_s32 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_s32 () +{ + int32_t a = 1; + int32x4_t b; + int32_t c[4]; + int i; + + b = wrap_vdupq_n_s32 (a); + vst1q_s32 (c, b); + for (i = 0; i < 4; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint32x2_t __attribute__ ((noinline)) +wrap_vdup_n_u32 (uint32_t a) +{ + return vdup_n_u32 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_u32 () +{ + uint32_t a = 1; + uint32x2_t b; + uint32_t c[2]; + int i; + + b = wrap_vdup_n_u32 (a); + vst1_u32 (c, b); + for (i = 0; i < 2; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint32x4_t __attribute__ ((noinline)) +wrap_vdupq_n_u32 (uint32_t a) +{ + return vdupq_n_u32 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_u32 () +{ + uint32_t a = 1; + uint32x4_t b; + uint32_t c[4]; + int i; + + b = wrap_vdupq_n_u32 (a); + vst1q_u32 (c, b); + for (i = 0; i < 4; i++) + if (a != c[i]) + return 1; + return 0; +} + +int64x1_t __attribute__ ((noinline)) +wrap_vdup_n_s64 (int64_t a) +{ + return vdup_n_s64 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_s64 () +{ + int64_t a = 1; + int64x1_t b; + int64_t c[1]; + int i; + + b = wrap_vdup_n_s64 (a); + vst1_s64 (c, b); + for (i = 0; i < 1; i++) + if (a != c[i]) + return 1; + return 0; +} + +int64x2_t __attribute__ ((noinline)) +wrap_vdupq_n_s64 (int64_t a) +{ + return vdupq_n_s64 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_s64 () +{ + int64_t a = 1; + int64x2_t b; + int64_t c[2]; + int i; + + b = wrap_vdupq_n_s64 (a); + vst1q_s64 (c, b); + for (i = 0; i < 2; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint64x1_t __attribute__ ((noinline)) +wrap_vdup_n_u64 (uint64_t a) +{ + return vdup_n_u64 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_u64 () +{ + uint64_t a = 1; + uint64x1_t b; + uint64_t c[1]; + int i; + + b = wrap_vdup_n_u64 (a); + vst1_u64 (c, b); + for (i = 0; i < 1; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint64x2_t __attribute__ ((noinline)) +wrap_vdupq_n_u64 (uint64_t a) +{ + return vdupq_n_u64 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_u64 () +{ + uint64_t a = 1; + uint64x2_t b; + uint64_t c[2]; + int i; + + b = wrap_vdupq_n_u64 (a); + vst1q_u64 (c, b); + for (i = 0; i < 2; i++) + if (a != c[i]) + return 1; + return 0; +} + +int +main () +{ + if (test_vdup_n_f32 ()) + abort (); + if (test_vdup_n_f64 ()) + abort (); + if (test_vdup_n_p8 ()) + abort (); + if (test_vdup_n_u8 ()) + abort (); + if (test_vdup_n_s8 ()) + abort (); + if (test_vdup_n_p16 ()) + abort (); + if (test_vdup_n_s16 ()) + abort (); + if (test_vdup_n_u16 ()) + abort (); + if (test_vdup_n_s32 ()) + abort (); + if (test_vdup_n_u32 ()) + abort (); + if (test_vdup_n_s64 ()) + abort (); + if (test_vdup_n_u64 ()) + abort (); + if (test_vdupq_n_f32 ()) + abort (); + if (test_vdupq_n_f64 ()) + abort (); + if (test_vdupq_n_p8 ()) + abort (); + if (test_vdupq_n_u8 ()) + abort (); + if (test_vdupq_n_s8 ()) + abort (); + if (test_vdupq_n_p16 ()) + abort (); + if (test_vdupq_n_s16 ()) + abort (); + if (test_vdupq_n_u16 ()) + abort (); + if (test_vdupq_n_s32 ()) + abort (); + if (test_vdupq_n_u32 ()) + abort (); + if (test_vdupq_n_s64 ()) + abort (); + if (test_vdupq_n_u64 ()) + abort (); + return 0; +} + +/* No asm checks for vdup_n_f32, vdupq_n_f32, vdup_n_f64 and vdupq_n_f64. + Cannot force floating point value in general purpose regester. */ + +/* Asm check for test_vdup_n_p8, test_vdup_n_s8, test_vdup_n_u8. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, w\[0-9\]+" 3 } } */ + +/* Asm check for test_vdupq_n_p8, test_vdupq_n_s8, test_vdupq_n_u8. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, w\[0-9\]+" 3 } } */ + +/* Asm check for test_vdup_n_p16, test_vdup_n_s16, test_vdup_n_u16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, w\[0-9\]+" 3 } } */ + +/* Asm check for test_vdupq_n_p16, test_vdupq_n_s16, test_vdupq_n_u16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, w\[0-9\]+" 3 } } */ + +/* Asm check for test_vdup_n_s32, test_vdup_n_u32. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, w\[0-9\]+" 2 } } */ + +/* Asm check for test_vdupq_n_s32, test_vdupq_n_u32. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, w\[0-9\]+" 2 } } */ + +/* Asm check for test_vdup_n_s64, test_vdup_n_u64 are left out. + Attempts to make the compiler generate "dup\\td\[0-9\]+, x\[0-9\]+" + are not practical. */ + +/* Asm check for test_vdupq_n_s64, test_vdupq_n_u64. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2d, x\[0-9\]+" 2 } } */ + +/* { dg-final { cleanup-saved-temps } } */