[AArch64] Refactor reduc_<su>plus patterns.
gcc/ * config/aarch64/aarch64-builtins.c (aarch64_gimple_fold_builtin.c): Fold more modes for reduc_splus_. * config/aarch64/aarch64-simd-builtins.def (reduc_splus_): Add new modes. (reduc_uplus_): New. * config/aarch64/aarch64-simd.md (aarch64_addvv4sf): Remove. (reduc_uplus_v4sf): Likewise. (reduc_splus_v4sf): Likewise. (aarch64_addv<mode>): Likewise. (reduc_uplus_<mode>): Likewise. (reduc_splus_<mode>): Likewise. (aarch64_addvv2di): Likewise. (reduc_uplus_v2di): Likewise. (reduc_splus_v2di): Likewise. (aarch64_addvv2si): Likewise. (reduc_uplus_v2si): Likewise. (reduc_splus_v2si): Likewise. (reduc_<sur>plus_<mode>): New. (reduc_<sur>plus_v2di): Likewise. (reduc_<sur>plus_v2si): Likewise. (reduc_<sur>plus_v4sf): Likewise. (aarch64_addpv4sf): Likewise. * config/aarch64/arm_neon.h (vaddv<q>_<s,u,f><8, 16, 32, 64): Rewrite using builtins. * config/aarch64/iterators.md (unspec): Remove UNSPEC_ADDV, add UNSPEC_SADDV, UNSPEC_UADDV. (SUADDV): New. (sur): Add UNSPEC_SADDV, UNSPEC_UADDV. gcc/testsuite/ * gcc.target/aarch64/vect-vaddv.c: New. From-SVN: r198500
This commit is contained in:
parent
6dce23a8ae
commit
36054fabf5
8 changed files with 313 additions and 264 deletions
|
@ -1,3 +1,34 @@
|
|||
2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-builtins.c
|
||||
(aarch64_gimple_fold_builtin.c): Fold more modes for reduc_splus_.
|
||||
* config/aarch64/aarch64-simd-builtins.def
|
||||
(reduc_splus_): Add new modes.
|
||||
(reduc_uplus_): New.
|
||||
* config/aarch64/aarch64-simd.md (aarch64_addvv4sf): Remove.
|
||||
(reduc_uplus_v4sf): Likewise.
|
||||
(reduc_splus_v4sf): Likewise.
|
||||
(aarch64_addv<mode>): Likewise.
|
||||
(reduc_uplus_<mode>): Likewise.
|
||||
(reduc_splus_<mode>): Likewise.
|
||||
(aarch64_addvv2di): Likewise.
|
||||
(reduc_uplus_v2di): Likewise.
|
||||
(reduc_splus_v2di): Likewise.
|
||||
(aarch64_addvv2si): Likewise.
|
||||
(reduc_uplus_v2si): Likewise.
|
||||
(reduc_splus_v2si): Likewise.
|
||||
(reduc_<sur>plus_<mode>): New.
|
||||
(reduc_<sur>plus_v2di): Likewise.
|
||||
(reduc_<sur>plus_v2si): Likewise.
|
||||
(reduc_<sur>plus_v4sf): Likewise.
|
||||
(aarch64_addpv4sf): Likewise.
|
||||
* config/aarch64/arm_neon.h
|
||||
(vaddv<q>_<s,u,f><8, 16, 32, 64): Rewrite using builtins.
|
||||
* config/aarch64/iterators.md (unspec): Remove UNSPEC_ADDV,
|
||||
add UNSPEC_SADDV, UNSPEC_UADDV.
|
||||
(SUADDV): New.
|
||||
(sur): Add UNSPEC_SADDV, UNSPEC_UADDV.
|
||||
|
||||
2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
|
||||
|
||||
* config/aarch64/arm_neon.h
|
||||
|
|
|
@ -1365,7 +1365,7 @@ aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
|
|||
|
||||
switch (fcode)
|
||||
{
|
||||
BUILTIN_VDQF (UNOP, addv, 0)
|
||||
BUILTIN_VALL (UNOP, reduc_splus_, 10)
|
||||
new_stmt = gimple_build_assign_with_ops (
|
||||
REDUC_PLUS_EXPR,
|
||||
gimple_call_lhs (stmt),
|
||||
|
|
|
@ -234,8 +234,9 @@
|
|||
BUILTIN_VSDQ_I_DI (BINOP, cmgtu, 0)
|
||||
BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0)
|
||||
|
||||
/* Implemented by aarch64_addv<mode>. */
|
||||
BUILTIN_VDQF (UNOP, addv, 0)
|
||||
/* Implemented by reduc_<sur>plus_<mode>. */
|
||||
BUILTIN_VALL (UNOP, reduc_splus_, 10)
|
||||
BUILTIN_VDQ (UNOP, reduc_uplus_, 10)
|
||||
|
||||
/* Implemented by reduc_<maxmin_uns>_<mode>. */
|
||||
BUILTIN_VDQIF (UNOP, reduc_smax_, 10)
|
||||
|
|
|
@ -1438,7 +1438,47 @@
|
|||
(set_attr "simd_mode" "<MODE>")]
|
||||
)
|
||||
|
||||
;; FP 'across lanes' add.
|
||||
;; 'across lanes' add.
|
||||
|
||||
(define_insn "reduc_<sur>plus_<mode>"
|
||||
[(set (match_operand:VDQV 0 "register_operand" "=w")
|
||||
(unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
|
||||
SUADDV))]
|
||||
"TARGET_SIMD"
|
||||
"addv\\t%<Vetype>0, %1.<Vtype>"
|
||||
[(set_attr "simd_type" "simd_addv")
|
||||
(set_attr "simd_mode" "<MODE>")]
|
||||
)
|
||||
|
||||
(define_insn "reduc_<sur>plus_v2di"
|
||||
[(set (match_operand:V2DI 0 "register_operand" "=w")
|
||||
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")]
|
||||
SUADDV))]
|
||||
"TARGET_SIMD"
|
||||
"addp\\t%d0, %1.2d"
|
||||
[(set_attr "simd_type" "simd_addv")
|
||||
(set_attr "simd_mode" "V2DI")]
|
||||
)
|
||||
|
||||
(define_insn "reduc_<sur>plus_v2si"
|
||||
[(set (match_operand:V2SI 0 "register_operand" "=w")
|
||||
(unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
|
||||
SUADDV))]
|
||||
"TARGET_SIMD"
|
||||
"addp\\t%0.2s, %1.2s, %1.2s"
|
||||
[(set_attr "simd_type" "simd_addv")
|
||||
(set_attr "simd_mode" "V2SI")]
|
||||
)
|
||||
|
||||
(define_insn "reduc_<sur>plus_<mode>"
|
||||
[(set (match_operand:V2F 0 "register_operand" "=w")
|
||||
(unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
|
||||
SUADDV))]
|
||||
"TARGET_SIMD"
|
||||
"faddp\\t%<Vetype>0, %1.<Vtype>"
|
||||
[(set_attr "simd_type" "simd_fadd")
|
||||
(set_attr "simd_mode" "<MODE>")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_addpv4sf"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=w")
|
||||
|
@ -1450,9 +1490,10 @@
|
|||
(set_attr "simd_mode" "V4SF")]
|
||||
)
|
||||
|
||||
(define_expand "reduc_uplus_v4sf"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=w")
|
||||
(match_operand:V4SF 1 "register_operand" "w"))]
|
||||
(define_expand "reduc_<sur>plus_v4sf"
|
||||
[(set (match_operand:V4SF 0 "register_operand")
|
||||
(unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
|
||||
SUADDV))]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
rtx tmp = gen_reg_rtx (V4SFmode);
|
||||
|
@ -1461,133 +1502,6 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "reduc_splus_v4sf"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=w")
|
||||
(match_operand:V4SF 1 "register_operand" "w"))]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
rtx tmp = gen_reg_rtx (V4SFmode);
|
||||
emit_insn (gen_aarch64_addpv4sf (tmp, operands[1]));
|
||||
emit_insn (gen_aarch64_addpv4sf (operands[0], tmp));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "aarch64_addvv4sf"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=w")
|
||||
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
|
||||
UNSPEC_FADDV))]
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
emit_insn (gen_reduc_splus_v4sf (operands[0], operands[1]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "aarch64_addv<mode>"
|
||||
[(set (match_operand:V2F 0 "register_operand" "=w")
|
||||
(unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
|
||||
UNSPEC_FADDV))]
|
||||
"TARGET_SIMD"
|
||||
"faddp\\t%<Vetype>0, %1.<Vtype>"
|
||||
[(set_attr "simd_type" "simd_fadd")
|
||||
(set_attr "simd_mode" "<MODE>")]
|
||||
)
|
||||
|
||||
(define_expand "reduc_uplus_<mode>"
|
||||
[(set (match_operand:V2F 0 "register_operand" "=w")
|
||||
(unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
|
||||
UNSPEC_FADDV))]
|
||||
"TARGET_SIMD"
|
||||
""
|
||||
)
|
||||
|
||||
(define_expand "reduc_splus_<mode>"
|
||||
[(set (match_operand:V2F 0 "register_operand" "=w")
|
||||
(unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
|
||||
UNSPEC_FADDV))]
|
||||
"TARGET_SIMD"
|
||||
""
|
||||
)
|
||||
|
||||
;; Reduction across lanes.
|
||||
|
||||
(define_insn "aarch64_addv<mode>"
|
||||
[(set (match_operand:VDQV 0 "register_operand" "=w")
|
||||
(unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
|
||||
UNSPEC_ADDV))]
|
||||
"TARGET_SIMD"
|
||||
"addv\\t%<Vetype>0, %1.<Vtype>"
|
||||
[(set_attr "simd_type" "simd_addv")
|
||||
(set_attr "simd_mode" "<MODE>")]
|
||||
)
|
||||
|
||||
(define_expand "reduc_splus_<mode>"
|
||||
[(set (match_operand:VDQV 0 "register_operand" "=w")
|
||||
(unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
|
||||
UNSPEC_ADDV))]
|
||||
"TARGET_SIMD"
|
||||
""
|
||||
)
|
||||
|
||||
(define_expand "reduc_uplus_<mode>"
|
||||
[(set (match_operand:VDQV 0 "register_operand" "=w")
|
||||
(unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
|
||||
UNSPEC_ADDV))]
|
||||
"TARGET_SIMD"
|
||||
""
|
||||
)
|
||||
|
||||
(define_insn "aarch64_addvv2di"
|
||||
[(set (match_operand:V2DI 0 "register_operand" "=w")
|
||||
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")]
|
||||
UNSPEC_ADDV))]
|
||||
"TARGET_SIMD"
|
||||
"addp\\t%d0, %1.2d"
|
||||
[(set_attr "simd_type" "simd_add")
|
||||
(set_attr "simd_mode" "V2DI")]
|
||||
)
|
||||
|
||||
(define_expand "reduc_uplus_v2di"
|
||||
[(set (match_operand:V2DI 0 "register_operand" "=w")
|
||||
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")]
|
||||
UNSPEC_ADDV))]
|
||||
"TARGET_SIMD"
|
||||
""
|
||||
)
|
||||
|
||||
(define_expand "reduc_splus_v2di"
|
||||
[(set (match_operand:V2DI 0 "register_operand" "=w")
|
||||
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")]
|
||||
UNSPEC_ADDV))]
|
||||
"TARGET_SIMD"
|
||||
""
|
||||
)
|
||||
|
||||
(define_insn "aarch64_addvv2si"
|
||||
[(set (match_operand:V2SI 0 "register_operand" "=w")
|
||||
(unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
|
||||
UNSPEC_ADDV))]
|
||||
"TARGET_SIMD"
|
||||
"addp\\t%0.2s, %1.2s, %1.2s"
|
||||
[(set_attr "simd_type" "simd_add")
|
||||
(set_attr "simd_mode" "V2SI")]
|
||||
)
|
||||
|
||||
(define_expand "reduc_uplus_v2si"
|
||||
[(set (match_operand:V2SI 0 "register_operand" "=w")
|
||||
(unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
|
||||
UNSPEC_ADDV))]
|
||||
"TARGET_SIMD"
|
||||
""
|
||||
)
|
||||
|
||||
(define_expand "reduc_splus_v2si"
|
||||
[(set (match_operand:V2SI 0 "register_operand" "=w")
|
||||
(unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
|
||||
UNSPEC_ADDV))]
|
||||
"TARGET_SIMD"
|
||||
""
|
||||
)
|
||||
|
||||
;; 'across lanes' max and min ops.
|
||||
|
||||
(define_insn "reduc_<maxmin_uns>_<mode>"
|
||||
|
|
|
@ -4655,116 +4655,6 @@ vaddlvq_u32 (uint32x4_t a)
|
|||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int8_t __attribute__ ((__always_inline__))
|
||||
vaddv_s8 (int8x8_t a)
|
||||
{
|
||||
int8_t result;
|
||||
__asm__ ("addv %b0,%1.8b"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
|
||||
vaddv_s16 (int16x4_t a)
|
||||
{
|
||||
int16_t result;
|
||||
__asm__ ("addv %h0,%1.4h"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
|
||||
vaddv_u8 (uint8x8_t a)
|
||||
{
|
||||
uint8_t result;
|
||||
__asm__ ("addv %b0,%1.8b"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
|
||||
vaddv_u16 (uint16x4_t a)
|
||||
{
|
||||
uint16_t result;
|
||||
__asm__ ("addv %h0,%1.4h"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int8_t __attribute__ ((__always_inline__))
|
||||
vaddvq_s8 (int8x16_t a)
|
||||
{
|
||||
int8_t result;
|
||||
__asm__ ("addv %b0,%1.16b"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
|
||||
vaddvq_s16 (int16x8_t a)
|
||||
{
|
||||
int16_t result;
|
||||
__asm__ ("addv %h0,%1.8h"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
|
||||
vaddvq_s32 (int32x4_t a)
|
||||
{
|
||||
int32_t result;
|
||||
__asm__ ("addv %s0,%1.4s"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
|
||||
vaddvq_u8 (uint8x16_t a)
|
||||
{
|
||||
uint8_t result;
|
||||
__asm__ ("addv %b0,%1.16b"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
|
||||
vaddvq_u16 (uint16x8_t a)
|
||||
{
|
||||
uint16_t result;
|
||||
__asm__ ("addv %h0,%1.8h"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
|
||||
vaddvq_u32 (uint32x4_t a)
|
||||
{
|
||||
uint32_t result;
|
||||
__asm__ ("addv %s0,%1.4s"
|
||||
: "=w"(result)
|
||||
: "w"(a)
|
||||
: /* No clobbers */);
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
|
||||
vbsl_f32 (uint32x2_t a, float32x2_t b, float32x2_t c)
|
||||
{
|
||||
|
@ -16995,22 +16885,6 @@ vaddlv_u32 (uint32x2_t a)
|
|||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
|
||||
vaddv_s32 (int32x2_t a)
|
||||
{
|
||||
int32_t result;
|
||||
__asm__ ("addp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
|
||||
vaddv_u32 (uint32x2_t a)
|
||||
{
|
||||
uint32_t result;
|
||||
__asm__ ("addp %0.2s, %1.2s, %1.2s" : "=w"(result) : "w"(a) : );
|
||||
return result;
|
||||
}
|
||||
|
||||
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
|
||||
vpaddd_s64 (int64x2_t __a)
|
||||
{
|
||||
|
@ -18026,24 +17900,117 @@ vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
|
|||
return __a + __b;
|
||||
}
|
||||
|
||||
/* vaddv */
|
||||
|
||||
__extension__ static __inline int8_t __attribute__ ((__always_inline__))
|
||||
vaddv_s8 (int8x8_t __a)
|
||||
{
|
||||
return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0);
|
||||
}
|
||||
|
||||
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
|
||||
vaddv_s16 (int16x4_t __a)
|
||||
{
|
||||
return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0);
|
||||
}
|
||||
|
||||
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
|
||||
vaddv_s32 (int32x2_t __a)
|
||||
{
|
||||
return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
|
||||
vaddv_u8 (uint8x8_t __a)
|
||||
{
|
||||
return vget_lane_u8 ((uint8x8_t)
|
||||
__builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a), 0);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
|
||||
vaddv_u16 (uint16x4_t __a)
|
||||
{
|
||||
return vget_lane_u16 ((uint16x4_t)
|
||||
__builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a), 0);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
|
||||
vaddv_u32 (uint32x2_t __a)
|
||||
{
|
||||
return vget_lane_u32 ((uint32x2_t)
|
||||
__builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a), 0);
|
||||
}
|
||||
|
||||
__extension__ static __inline int8_t __attribute__ ((__always_inline__))
|
||||
vaddvq_s8 (int8x16_t __a)
|
||||
{
|
||||
return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a), 0);
|
||||
}
|
||||
|
||||
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
|
||||
vaddvq_s16 (int16x8_t __a)
|
||||
{
|
||||
return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0);
|
||||
}
|
||||
|
||||
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
|
||||
vaddvq_s32 (int32x4_t __a)
|
||||
{
|
||||
return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0);
|
||||
}
|
||||
|
||||
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
|
||||
vaddvq_s64 (int64x2_t __a)
|
||||
{
|
||||
return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint8_t __attribute__ ((__always_inline__))
|
||||
vaddvq_u8 (uint8x16_t __a)
|
||||
{
|
||||
return vgetq_lane_u8 ((uint8x16_t)
|
||||
__builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a), 0);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint16_t __attribute__ ((__always_inline__))
|
||||
vaddvq_u16 (uint16x8_t __a)
|
||||
{
|
||||
return vgetq_lane_u16 ((uint16x8_t)
|
||||
__builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a), 0);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
|
||||
vaddvq_u32 (uint32x4_t __a)
|
||||
{
|
||||
return vgetq_lane_u32 ((uint32x4_t)
|
||||
__builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a), 0);
|
||||
}
|
||||
|
||||
__extension__ static __inline uint32_t __attribute__ ((__always_inline__))
|
||||
vaddvq_u64 (uint64x2_t __a)
|
||||
{
|
||||
return vgetq_lane_u64 ((uint64x2_t)
|
||||
__builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a), 0);
|
||||
}
|
||||
|
||||
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
|
||||
vaddv_f32 (float32x2_t __a)
|
||||
{
|
||||
float32x2_t t = __builtin_aarch64_addvv2sf (__a);
|
||||
float32x2_t t = __builtin_aarch64_reduc_splus_v2sf (__a);
|
||||
return vget_lane_f32 (t, 0);
|
||||
}
|
||||
|
||||
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
|
||||
vaddvq_f32 (float32x4_t __a)
|
||||
{
|
||||
float32x4_t t = __builtin_aarch64_addvv4sf (__a);
|
||||
float32x4_t t = __builtin_aarch64_reduc_splus_v4sf (__a);
|
||||
return vgetq_lane_f32 (t, 0);
|
||||
}
|
||||
|
||||
__extension__ static __inline float64_t __attribute__ ((__always_inline__))
|
||||
vaddvq_f64 (float64x2_t __a)
|
||||
{
|
||||
float64x2_t t = __builtin_aarch64_addvv2df (__a);
|
||||
float64x2_t t = __builtin_aarch64_reduc_splus_v2df (__a);
|
||||
return vgetq_lane_f64 (t, 0);
|
||||
}
|
||||
|
||||
|
|
|
@ -170,7 +170,8 @@
|
|||
UNSPEC_FMINNMV ; Used in aarch64-simd.md.
|
||||
UNSPEC_FMINV ; Used in aarch64-simd.md.
|
||||
UNSPEC_FADDV ; Used in aarch64-simd.md.
|
||||
UNSPEC_ADDV ; Used in aarch64-simd.md.
|
||||
UNSPEC_SADDV ; Used in aarch64-simd.md.
|
||||
UNSPEC_UADDV ; Used in aarch64-simd.md.
|
||||
UNSPEC_SMAXV ; Used in aarch64-simd.md.
|
||||
UNSPEC_SMINV ; Used in aarch64-simd.md.
|
||||
UNSPEC_UMAXV ; Used in aarch64-simd.md.
|
||||
|
@ -686,6 +687,8 @@
|
|||
(define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV
|
||||
UNSPEC_FMAXNMV UNSPEC_FMINNMV])
|
||||
|
||||
(define_int_iterator SUADDV [UNSPEC_SADDV UNSPEC_UADDV])
|
||||
|
||||
(define_int_iterator HADDSUB [UNSPEC_SHADD UNSPEC_UHADD
|
||||
UNSPEC_SRHADD UNSPEC_URHADD
|
||||
UNSPEC_SHSUB UNSPEC_UHSUB
|
||||
|
@ -777,6 +780,7 @@
|
|||
(UNSPEC_SUBHN2 "") (UNSPEC_RSUBHN2 "r")
|
||||
(UNSPEC_SQXTN "s") (UNSPEC_UQXTN "u")
|
||||
(UNSPEC_USQADD "us") (UNSPEC_SUQADD "su")
|
||||
(UNSPEC_SADDV "s") (UNSPEC_UADDV "u")
|
||||
(UNSPEC_SSLI "s") (UNSPEC_USLI "u")
|
||||
(UNSPEC_SSRI "s") (UNSPEC_USRI "u")
|
||||
(UNSPEC_USRA "u") (UNSPEC_SSRA "s")
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
|
||||
|
||||
* gcc.target/aarch64/vect-vaddv.c: New.
|
||||
|
||||
2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
|
||||
|
||||
* gcc.target/aarch64/vect-vmaxv.c: New.
|
||||
|
|
128
gcc/testsuite/gcc.target/aarch64/vect-vaddv.c
Normal file
128
gcc/testsuite/gcc.target/aarch64/vect-vaddv.c
Normal file
|
@ -0,0 +1,128 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O3 --save-temps -ffast-math" } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
extern void abort (void);
|
||||
extern float fabsf (float);
|
||||
extern double fabs (double);
|
||||
|
||||
#define NUM_TESTS 16
|
||||
#define DELTA 0.000001
|
||||
|
||||
int8_t input_int8[] = {1, 56, 2, -9, -90, 23, 54, 76,
|
||||
-4, 34, 110, -110, 6, 4, 75, -34};
|
||||
int16_t input_int16[] = {1, 56, 2, -9, -90, 23, 54, 76,
|
||||
-4, 34, 110, -110, 6, 4, 75, -34};
|
||||
int32_t input_int32[] = {1, 56, 2, -9, -90, 23, 54, 76,
|
||||
-4, 34, 110, -110, 6, 4, 75, -34};
|
||||
int64_t input_int64[] = {1, 56, 2, -9, -90, 23, 54, 76,
|
||||
-4, 34, 110, -110, 6, 4, 75, -34};
|
||||
|
||||
uint8_t input_uint8[] = {1, 56, 2, 9, 90, 23, 54, 76,
|
||||
4, 34, 110, 110, 6, 4, 75, 34};
|
||||
uint16_t input_uint16[] = {1, 56, 2, 9, 90, 23, 54, 76,
|
||||
4, 34, 110, 110, 6, 4, 75, 34};
|
||||
uint32_t input_uint32[] = {1, 56, 2, 9, 90, 23, 54, 76,
|
||||
4, 34, 110, 110, 6, 4, 75, 34};
|
||||
|
||||
uint64_t input_uint64[] = {1, 56, 2, 9, 90, 23, 54, 76,
|
||||
4, 34, 110, 110, 6, 4, 75, 34};
|
||||
|
||||
float input_float32[] = {0.1f, -0.1f, 0.4f, 10.3f,
|
||||
200.0f, -800.0f, -13.0f, -0.5f,
|
||||
7.9f, -870.0f, 10.4f, 310.11f,
|
||||
0.0f, -865.0f, -2213.0f, -1.5f};
|
||||
|
||||
double input_float64[] = {0.1, -0.1, 0.4, 10.3,
|
||||
200.0, -800.0, -13.0, -0.5,
|
||||
7.9, -870.0, 10.4, 310.11,
|
||||
0.0, -865.0, -2213.0, -1.5};
|
||||
|
||||
#define EQUALF(a, b) (fabsf (a - b) < DELTA)
|
||||
#define EQUALD(a, b) (fabs (a - b) < DELTA)
|
||||
#define EQUALL(a, b) (a == b)
|
||||
|
||||
#define TEST(SUFFIX, Q, TYPE, LANES, FLOAT) \
|
||||
int \
|
||||
test_vaddv##SUFFIX##_##TYPE##x##LANES##_t (void) \
|
||||
{ \
|
||||
int i, j; \
|
||||
int moves = (NUM_TESTS - LANES) + 1; \
|
||||
TYPE##_t out_l[NUM_TESTS]; \
|
||||
TYPE##_t out_v[NUM_TESTS]; \
|
||||
\
|
||||
/* Calculate linearly. */ \
|
||||
for (i = 0; i < moves; i++) \
|
||||
{ \
|
||||
out_l[i] = input_##TYPE[i]; \
|
||||
for (j = 1; j < LANES; j++) \
|
||||
out_l[i] += input_##TYPE[i + j]; \
|
||||
} \
|
||||
\
|
||||
/* Calculate using vector reduction intrinsics. */ \
|
||||
for (i = 0; i < moves; i++) \
|
||||
{ \
|
||||
TYPE##x##LANES##_t t1 = vld1##Q##_##SUFFIX (input_##TYPE + i); \
|
||||
out_v[i] = vaddv##Q##_##SUFFIX (t1); \
|
||||
} \
|
||||
\
|
||||
/* Compare. */ \
|
||||
for (i = 0; i < moves; i++) \
|
||||
{ \
|
||||
if (!EQUAL##FLOAT (out_v[i], out_l[i])) \
|
||||
return 0; \
|
||||
} \
|
||||
return 1; \
|
||||
}
|
||||
|
||||
#define BUILD_VARIANTS(TYPE, STYPE, W32, W64, F) \
|
||||
TEST (STYPE, , TYPE, W32, F) \
|
||||
TEST (STYPE, q, TYPE, W64, F) \
|
||||
|
||||
BUILD_VARIANTS (int8, s8, 8, 16, L)
|
||||
BUILD_VARIANTS (uint8, u8, 8, 16, L)
|
||||
/* { dg-final { scan-assembler "addv\\tb\[0-9\]+, v\[0-9\]+\.8b" } } */
|
||||
/* { dg-final { scan-assembler "addv\\tb\[0-9\]+, v\[0-9\]+\.16b" } } */
|
||||
BUILD_VARIANTS (int16, s16, 4, 8, L)
|
||||
BUILD_VARIANTS (uint16, u16, 4, 8, L)
|
||||
/* { dg-final { scan-assembler "addv\\th\[0-9\]+, v\[0-9\]+\.4h" } } */
|
||||
/* { dg-final { scan-assembler "addv\\th\[0-9\]+, v\[0-9\]+\.8h" } } */
|
||||
BUILD_VARIANTS (int32, s32, 2, 4, L)
|
||||
BUILD_VARIANTS (uint32, u32, 2, 4, L)
|
||||
/* { dg-final { scan-assembler "addp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
|
||||
/* { dg-final { scan-assembler "addv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
|
||||
TEST (s64, q, int64, 2, D)
|
||||
TEST (u64, q, uint64, 2, D)
|
||||
/* { dg-final { scan-assembler "addp\\td\[0-9\]+\, v\[0-9\]+\.2d" } } */
|
||||
|
||||
BUILD_VARIANTS (float32, f32, 2, 4, F)
|
||||
/* { dg-final { scan-assembler "faddp\\ts\[0-9\]+, v\[0-9\]+\.2s" } } */
|
||||
/* { dg-final { scan-assembler "faddp\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" } } */
|
||||
TEST (f64, q, float64, 2, D)
|
||||
/* { dg-final { scan-assembler "faddp\\td\[0-9\]+\, v\[0-9\]+\.2d" } } */
|
||||
|
||||
#undef TEST
|
||||
#define TEST(SUFFIX, Q, TYPE, LANES, FLOAT) \
|
||||
{ \
|
||||
if (!test_vaddv##SUFFIX##_##TYPE##x##LANES##_t ()) \
|
||||
abort (); \
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
BUILD_VARIANTS (int8, s8, 8, 16, L)
|
||||
BUILD_VARIANTS (uint8, u8, 8, 16, L)
|
||||
BUILD_VARIANTS (int16, s16, 4, 8, L)
|
||||
BUILD_VARIANTS (uint16, u16, 4, 8, L)
|
||||
BUILD_VARIANTS (int32, s32, 2, 4, L)
|
||||
BUILD_VARIANTS (uint32, u32, 2, 4, L)
|
||||
|
||||
BUILD_VARIANTS (float32, f32, 2, 4, F)
|
||||
TEST (f64, q, float64, 2, D)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { cleanup-saved-temps } } */
|
Loading…
Add table
Reference in a new issue