[PATCH AArch64 2/2] Replace temporary inline assembler for vget_high
* config/aarch64/arm_neon.h (__GET_HIGH): New macro. (vget_high_f32, vget_high_f64, vget_high_p8, vget_high_p16, vget_high_s8, vget_high_s16, vget_high_s32, vget_high_s64, vget_high_u8, vget_high_u16, vget_high_u32, vget_high_u64): Remove temporary __asm__ and reimplement. From-SVN: r214952
This commit is contained in:
parent
6fff10f9aa
commit
8684fa50fa
2 changed files with 87 additions and 132 deletions
|
@ -1,3 +1,11 @@
|
|||
2014-09-05 Alan Lawrence <alan.lawrence@arm.com>
|
||||
|
||||
* config/aarch64/arm_neon.h (__GET_HIGH): New macro.
|
||||
(vget_high_f32, vget_high_f64, vget_high_p8, vget_high_p16,
|
||||
vget_high_s8, vget_high_s16, vget_high_s32, vget_high_s64,
|
||||
vget_high_u8, vget_high_u16, vget_high_u32, vget_high_u64):
|
||||
Remove temporary __asm__ and reimplement.
|
||||
|
||||
2014-09-05 Alan Lawrence <alan.lawrence@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-builtins.c (aarch64_fold_builtin): Remove code
|
||||
|
|
|
@ -4328,6 +4328,85 @@ vget_low_u64 (uint64x2_t __a)
|
|||
|
||||
#undef __GET_LOW
|
||||
|
||||
#define __GET_HIGH(__TYPE) \
|
||||
uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \
|
||||
uint64x1_t hi = vcreate_u64 (vgetq_lane_u64 (tmp, 1)); \
|
||||
return vreinterpret_##__TYPE##_u64 (hi);
|
||||
|
||||
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
|
||||
vget_high_f32 (float32x4_t __a)
|
||||
{
|
||||
__GET_HIGH (f32);
|
||||
}
|
||||
|
||||
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
|
||||
vget_high_f64 (float64x2_t __a)
|
||||
{
|
||||
__GET_HIGH (f64);
|
||||
}
|
||||
|
||||
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
|
||||
vget_high_p8 (poly8x16_t __a)
|
||||
{
|
||||
__GET_HIGH (p8);
|
||||
}
|
||||
|
||||
__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
|
||||
vget_high_p16 (poly16x8_t __a)
|
||||
{
|
||||
__GET_HIGH (p16);
|
||||
}
|
||||
|
||||
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
|
||||
vget_high_s8 (int8x16_t __a)
|
||||
{
|
||||
__GET_HIGH (s8);
|
||||
}
|
||||
|
||||
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
|
||||
vget_high_s16 (int16x8_t __a)
|
||||
{
|
||||
__GET_HIGH (s16);
|
||||
}
|
||||
|
||||
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
|
||||
vget_high_s32 (int32x4_t __a)
|
||||
{
|
||||
__GET_HIGH (s32);
|
||||
}
|
||||
|
||||
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
|
||||
vget_high_s64 (int64x2_t __a)
|
||||
{
|
||||
__GET_HIGH (s64);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
|
||||
vget_high_u8 (uint8x16_t __a)
|
||||
{
|
||||
__GET_HIGH (u8);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
|
||||
vget_high_u16 (uint16x8_t __a)
|
||||
{
|
||||
__GET_HIGH (u16);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
|
||||
vget_high_u32 (uint32x4_t __a)
|
||||
{
|
||||
__GET_HIGH (u32);
|
||||
}
|
||||
|
||||
#undef __GET_HIGH
|
||||
|
||||
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
|
||||
vget_high_u64 (uint64x2_t __a)
|
||||
{
|
||||
return vcreate_u64 (vgetq_lane_u64 (__a, 1));
|
||||
}
|
||||
|
||||
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
|
||||
vcombine_s8 (int8x8_t __a, int8x8_t __b)
|
||||
{
|
||||
|
@ -5770,138 +5849,6 @@ vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c)
|
|||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
|
||||
vget_high_f32 (float32x4_t a)
|
||||
{
|
||||
float32x2_t result;
|
||||
__asm__ ("ins %0.d[0], %1.d[1]"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
|
||||
vget_high_f64 (float64x2_t a)
|
||||
{
|
||||
float64x1_t result;
|
||||
__asm__ ("ins %0.d[0], %1.d[1]"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
|
||||
vget_high_p8 (poly8x16_t a)
|
||||
{
|
||||
poly8x8_t result;
|
||||
__asm__ ("ins %0.d[0], %1.d[1]"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
|
||||
vget_high_p16 (poly16x8_t a)
|
||||
{
|
||||
poly16x4_t result;
|
||||
__asm__ ("ins %0.d[0], %1.d[1]"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
|
||||
vget_high_s8 (int8x16_t a)
|
||||
{
|
||||
int8x8_t result;
|
||||
__asm__ ("ins %0.d[0], %1.d[1]"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
|
||||
vget_high_s16 (int16x8_t a)
|
||||
{
|
||||
int16x4_t result;
|
||||
__asm__ ("ins %0.d[0], %1.d[1]"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
|
||||
vget_high_s32 (int32x4_t a)
|
||||
{
|
||||
int32x2_t result;
|
||||
__asm__ ("ins %0.d[0], %1.d[1]"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
|
||||
vget_high_s64 (int64x2_t a)
|
||||
{
|
||||
int64x1_t result;
|
||||
__asm__ ("ins %0.d[0], %1.d[1]"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
|
||||
vget_high_u8 (uint8x16_t a)
|
||||
{
|
||||
uint8x8_t result;
|
||||
__asm__ ("ins %0.d[0], %1.d[1]"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
|
||||
vget_high_u16 (uint16x8_t a)
|
||||
{
|
||||
uint16x4_t result;
|
||||
__asm__ ("ins %0.d[0], %1.d[1]"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
|
||||
vget_high_u32 (uint32x4_t a)
|
||||
{
|
||||
uint32x2_t result;
|
||||
__asm__ ("ins %0.d[0], %1.d[1]"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
|
||||
vget_high_u64 (uint64x2_t a)
|
||||
{
|
||||
uint64x1_t result;
|
||||
__asm__ ("ins %0.d[0], %1.d[1]"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
|
||||
vhsub_s8 (int8x8_t a, int8x8_t b)
|
||||
{
|
||||
|
|
Loading…
Add table
Reference in a new issue