aarch64: Add vcopy(q)__lane(q)_bf16 intrinsics
gcc/ChangeLog 2020-10-20 Andrea Corallo <andrea.corallo@arm.com> * config/aarch64/arm_neon.h (vcopy_lane_bf16, vcopyq_lane_bf16) (vcopyq_laneq_bf16, vcopy_laneq_bf16): New intrinsics. gcc/testsuite/ChangeLog 2020-10-20 Andrea Corallo <andrea.corallo@arm.com> * gcc.target/aarch64/advsimd-intrinsics/bf16_vect_copy_lane_1.c: New test. * gcc.target/aarch64/advsimd-intrinsics/vcopy_lane_bf16_indices_1.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vcopy_lane_bf16_indices_2.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vcopy_laneq_bf16_indices_1.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vcopy_laneq_bf16_indices_2.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vcopyq_lane_bf16_indices_1.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vcopyq_lane_bf16_indices_2.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vcopyq_laneq_bf16_indices_1.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vcopyq_laneq_bf16_indices_2.c: Likewise.
This commit is contained in:
parent
d4fd8638be
commit
8eb8dcac6e
10 changed files with 206 additions and 0 deletions
|
@ -35716,6 +35716,42 @@ vcvtq_high_bf16_f32 (bfloat16x8_t __inactive, float32x4_t __a)
|
|||
return __builtin_aarch64_bfcvtn2v8bf (__inactive, __a);
|
||||
}
|
||||
|
||||
__extension__ extern __inline bfloat16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcopy_lane_bf16 (bfloat16x4_t __a, const int __lane1,
|
||||
bfloat16x4_t __b, const int __lane2)
|
||||
{
|
||||
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
|
||||
__a, __lane1);
|
||||
}
|
||||
|
||||
__extension__ extern __inline bfloat16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcopyq_lane_bf16 (bfloat16x8_t __a, const int __lane1,
|
||||
bfloat16x4_t __b, const int __lane2)
|
||||
{
|
||||
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
|
||||
__a, __lane1);
|
||||
}
|
||||
|
||||
__extension__ extern __inline bfloat16x4_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcopy_laneq_bf16 (bfloat16x4_t __a, const int __lane1,
|
||||
bfloat16x8_t __b, const int __lane2)
|
||||
{
|
||||
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
|
||||
__a, __lane1);
|
||||
}
|
||||
|
||||
__extension__ extern __inline bfloat16x8_t
|
||||
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
|
||||
vcopyq_laneq_bf16 (bfloat16x8_t __a, const int __lane1,
|
||||
bfloat16x8_t __b, const int __lane2)
|
||||
{
|
||||
return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
|
||||
__a, __lane1);
|
||||
}
|
||||
|
||||
#pragma GCC pop_options
|
||||
|
||||
/* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics. */
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
/* { dg-do assemble { target { aarch64*-*-* } } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-additional-options "-march=armv8.2-a+bf16 -O3 --save-temps -std=gnu90" } */
|
||||
|
||||
#include "arm_neon.h"
|
||||
|
||||
bfloat16x4_t __attribute__((noinline,noclone))
|
||||
test_vcopy_lane_bf16 (bfloat16x4_t a, bfloat16x4_t b)
|
||||
{
|
||||
return vcopy_lane_bf16 (a, 1, b, 2);
|
||||
}
|
||||
|
||||
bfloat16x8_t __attribute__((noinline,noclone))
|
||||
test_vcopyq_lane_bf16 (bfloat16x8_t a, bfloat16x4_t b)
|
||||
{
|
||||
return vcopyq_lane_bf16 (a, 1, b, 2);
|
||||
}
|
||||
|
||||
bfloat16x4_t __attribute__((noinline,noclone))
|
||||
test_vcopy_laneq_bf16 (bfloat16x4_t a, bfloat16x8_t b)
|
||||
{
|
||||
return vcopy_laneq_bf16 (a, 1, b, 2);
|
||||
}
|
||||
|
||||
bfloat16x8_t __attribute__((noinline,noclone))
|
||||
test_vcopyq_laneq_bf16 (bfloat16x8_t a, bfloat16x8_t b)
|
||||
{
|
||||
return vcopyq_laneq_bf16 (a, 1, b, 2);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "ins\\tv0.h\\\[1\\\], v1.h\\\[2\\\]" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "ins\\tv0.h\\\[1\\\], v1.h\\\[0\\\]" 2 } } */
|
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do compile { target { aarch64*-*-* } } } */
|
||||
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
bfloat16x4_t
|
||||
test_vcopy_lane_bf16 (bfloat16x4_t a, bfloat16x4_t b)
|
||||
{
|
||||
bfloat16x4_t res;
|
||||
res = vcopy_lane_bf16 (a, 0, b, 4);
|
||||
res = vcopy_lane_bf16 (a, 0, b, -1);
|
||||
return res;
|
||||
}
|
||||
|
||||
/* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
|
||||
/* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
|
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do compile { target { aarch64*-*-* } } } */
|
||||
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
bfloat16x4_t
|
||||
test_vcopy_lane_bf16 (bfloat16x4_t a, bfloat16x4_t b)
|
||||
{
|
||||
bfloat16x4_t res;
|
||||
res = vcopy_lane_bf16 (a, -1, b, 2);
|
||||
res = vcopy_lane_bf16 (a, 4, b, 2);
|
||||
return res;
|
||||
}
|
||||
|
||||
/* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
|
||||
/* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
|
|
@ -0,0 +1,17 @@
|
|||
/* { dg-do compile { target { aarch64*-*-* } } } */
|
||||
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
bfloat16x4_t
|
||||
test_vcopy_laneq_bf16 (bfloat16x4_t a, bfloat16x8_t b)
|
||||
{
|
||||
bfloat16x4_t res;
|
||||
/* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
|
||||
res = vcopy_laneq_bf16 (a, -1, b, 2);
|
||||
/* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
|
||||
res = vcopy_laneq_bf16 (a, 4, b, 2);
|
||||
return res;
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
/* { dg-do compile { target { aarch64*-*-* } } } */
|
||||
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
bfloat16x4_t
|
||||
test_vcopy_laneq_bf16 (bfloat16x4_t a, bfloat16x8_t b)
|
||||
{
|
||||
bfloat16x4_t res;
|
||||
/* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
|
||||
res = vcopy_laneq_bf16 (a, 1, b, -1);
|
||||
/* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
|
||||
res = vcopy_laneq_bf16 (a, 1, b, 8);
|
||||
return res;
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
/* { dg-do compile { target { aarch64*-*-* } } } */
|
||||
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
bfloat16x8_t
|
||||
test_vcopyq_lane_bf16 (bfloat16x8_t a, bfloat16x4_t b)
|
||||
{
|
||||
bfloat16x8_t res;
|
||||
/* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
|
||||
res = vcopyq_lane_bf16 (a, -1, b, 2);
|
||||
/* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
|
||||
res = vcopyq_lane_bf16 (a, 8, b, 2);
|
||||
return res;
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
/* { dg-do compile { target { aarch64*-*-* } } } */
|
||||
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
bfloat16x8_t
|
||||
test_vcopyq_lane_bf16 (bfloat16x8_t a, bfloat16x4_t b)
|
||||
{
|
||||
bfloat16x8_t res;
|
||||
/* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
|
||||
res = vcopyq_lane_bf16 (a, 2, b, -1);
|
||||
/* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
|
||||
res = vcopyq_lane_bf16 (a, 2, b, 4);
|
||||
return res;
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
/* { dg-do compile { target { aarch64*-*-* } } } */
|
||||
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
bfloat16x8_t
|
||||
test_vcopyq_laneq_bf16 (bfloat16x8_t a, bfloat16x8_t b)
|
||||
{
|
||||
bfloat16x8_t res;
|
||||
/* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
|
||||
res = vcopyq_laneq_bf16 (a, -1, b, 2);
|
||||
/* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
|
||||
res = vcopyq_laneq_bf16 (a, 8, b, 2);
|
||||
return res;
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
/* { dg-do compile { target { aarch64*-*-* } } } */
|
||||
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
|
||||
/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
|
||||
/* { dg-add-options arm_v8_2a_bf16_neon } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
bfloat16x8_t
|
||||
test_vcopyq_laneq_bf16 (bfloat16x8_t a, bfloat16x8_t b)
|
||||
{
|
||||
bfloat16x8_t res;
|
||||
/* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
|
||||
res = vcopyq_laneq_bf16 (a, 2, b, -1);
|
||||
/* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
|
||||
res = vcopyq_laneq_bf16 (a, 2, b, 8);
|
||||
return res;
|
||||
}
|
Loading…
Add table
Reference in a new issue