[AArch64] Rewrite the vdup_lane intrinsics in C

gcc/
	* config/aarch64/aarch64-simd-builtins.def
	(dup_lane_scalar): Remove.
	* config/aarch64/aarch64-simd.md
	(aarch64_simd_dup): Add 'w->w' alternative.
	(aarch64_dup_lane<mode>): Allow for VALL.
	(aarch64_dup_lane_scalar<mode>): Remove.
	(aarch64_dup_lane_<vswap_width_name><mode>): New.
	(aarch64_get_lane_signed<mode>): Add w->w altenative.
	(aarch64_get_lane_unsigned<mode>): Likewise.
	(aarch64_get_lane<mode>): Likewise.
	* config/aarch64/aarch64.c (aarch64_evpc_dup): New.
	(aarch64_expand_vec_perm_const_1): Use aarch64_evpc_dup.
	* config/aarch64/iterators.md (VSWAP_WIDTH): New.
	(VCON): Change container of V2SF.
	(vswap_width_name): Likewise.
	* config/aarch64/arm_neon.h
	(__aarch64_vdup_lane_any): New.
	(__aarch64_vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.
	(vdup<q>_n_<psuf><8,16,32,64>): Convert to C implementation.
	(vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.

gcc/testsuite/
	* gcc.target/aarch64/scalar_intrinsics.c
	(vdup<bhsd>_lane<su><8,16,32,64>): Force values to SIMD registers.

From-SVN: r202180
This commit is contained in:
James Greenhalgh 2013-09-02 16:22:10 +00:00 committed by James Greenhalgh
parent d617d2d806
commit 91bd4114a7
7 changed files with 887 additions and 602 deletions

View file

@ -1,3 +1,26 @@
2013-09-02 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/aarch64-simd-builtins.def
(dup_lane_scalar): Remove.
* config/aarch64/aarch64-simd.md
(aarch64_simd_dup): Add 'w->w' alternative.
(aarch64_dup_lane<mode>): Allow for VALL.
(aarch64_dup_lane_scalar<mode>): Remove.
(aarch64_dup_lane_<vswap_width_name><mode>): New.
(aarch64_get_lane_signed<mode>): Add w->w altenative.
(aarch64_get_lane_unsigned<mode>): Likewise.
(aarch64_get_lane<mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_evpc_dup): New.
(aarch64_expand_vec_perm_const_1): Use aarch64_evpc_dup.
* config/aarch64/iterators.md (VSWAP_WIDTH): New.
(VCON): Change container of V2SF.
(vswap_width_name): Likewise.
* config/aarch64/arm_neon.h
(__aarch64_vdup_lane_any): New.
(__aarch64_vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.
(vdup<q>_n_<psuf><8,16,32,64>): Convert to C implementation.
(vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.
2013-09-02 Eric Botcazou <ebotcazou@adacore.com> 2013-09-02 Eric Botcazou <ebotcazou@adacore.com>
PR middle-end/56382 PR middle-end/56382

View file

@ -336,24 +336,13 @@
}) })
(define_insn "aarch64_simd_dup<mode>" (define_insn "aarch64_simd_dup<mode>"
[(set (match_operand:VDQ 0 "register_operand" "=w") [(set (match_operand:VDQ 0 "register_operand" "=w, w")
(vec_duplicate:VDQ (match_operand:<VEL> 1 "register_operand" "r")))] (vec_duplicate:VDQ (match_operand:<VEL> 1 "register_operand" "r, w")))]
"TARGET_SIMD" "TARGET_SIMD"
"dup\\t%0.<Vtype>, %<vw>1" "@
[(set_attr "simd_type" "simd_dupgp") dup\\t%0.<Vtype>, %<vw>1
(set_attr "simd_mode" "<MODE>")] dup\\t%0.<Vtype>, %1.<Vetype>[0]"
) [(set_attr "simd_type" "simd_dupgp, simd_dup")
(define_insn "aarch64_dup_lane<mode>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(vec_duplicate:VDQ_I
(vec_select:<VEL>
(match_operand:<VCON> 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")])
)))]
"TARGET_SIMD"
"dup\\t%<v>0<Vmtype>, %1.<Vetype>[%2]"
[(set_attr "simd_type" "simd_dup")
(set_attr "simd_mode" "<MODE>")] (set_attr "simd_mode" "<MODE>")]
) )
@ -366,6 +355,32 @@
(set_attr "simd_mode" "<MODE>")] (set_attr "simd_mode" "<MODE>")]
) )
(define_insn "aarch64_dup_lane<mode>"
[(set (match_operand:VALL 0 "register_operand" "=w")
(vec_duplicate:VALL
(vec_select:<VEL>
(match_operand:VALL 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")])
)))]
"TARGET_SIMD"
"dup\\t%0.<Vtype>, %1.<Vetype>[%2]"
[(set_attr "simd_type" "simd_dup")
(set_attr "simd_mode" "<MODE>")]
)
(define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
[(set (match_operand:VALL 0 "register_operand" "=w")
(vec_duplicate:VALL
(vec_select:<VEL>
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")])
)))]
"TARGET_SIMD"
"dup\\t%0.<Vtype>, %1.<Vetype>[%2]"
[(set_attr "simd_type" "simd_dup")
(set_attr "simd_mode" "<MODE>")]
)
(define_insn "*aarch64_simd_mov<mode>" (define_insn "*aarch64_simd_mov<mode>"
[(set (match_operand:VD 0 "aarch64_simd_nonimmediate_operand" [(set (match_operand:VD 0 "aarch64_simd_nonimmediate_operand"
"=w, Utv, w, ?r, ?w, ?r, w") "=w, Utv, w, ?r, ?w, ?r, w")

View file

@ -7931,6 +7931,55 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d)
return true; return true;
} }
static bool
aarch64_evpc_dup (struct expand_vec_perm_d *d)
{
rtx (*gen) (rtx, rtx, rtx);
rtx out = d->target;
rtx in0;
enum machine_mode vmode = d->vmode;
unsigned int i, elt, nelt = d->nelt;
rtx lane;
/* TODO: This may not be big-endian safe. */
if (BYTES_BIG_ENDIAN)
return false;
elt = d->perm[0];
for (i = 1; i < nelt; i++)
{
if (elt != d->perm[i])
return false;
}
/* The generic preparation in aarch64_expand_vec_perm_const_1
swaps the operand order and the permute indices if it finds
d->perm[0] to be in the second operand. Thus, we can always
use d->op0 and need not do any extra arithmetic to get the
correct lane number. */
in0 = d->op0;
lane = GEN_INT (elt);
switch (vmode)
{
case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
default:
return false;
}
emit_insn (gen (out, in0, lane));
return true;
}
static bool static bool
aarch64_evpc_tbl (struct expand_vec_perm_d *d) aarch64_evpc_tbl (struct expand_vec_perm_d *d)
{ {
@ -7988,6 +8037,8 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
return true; return true;
else if (aarch64_evpc_trn (d)) else if (aarch64_evpc_trn (d))
return true; return true;
else if (aarch64_evpc_dup (d))
return true;
return aarch64_evpc_tbl (d); return aarch64_evpc_tbl (d);
} }
return false; return false;

File diff suppressed because it is too large Load diff

View file

@ -383,7 +383,7 @@
(V4HI "V8HI") (V8HI "V8HI") (V4HI "V8HI") (V8HI "V8HI")
(V2SI "V4SI") (V4SI "V4SI") (V2SI "V4SI") (V4SI "V4SI")
(DI "V2DI") (V2DI "V2DI") (DI "V2DI") (V2DI "V2DI")
(V2SF "V2SF") (V4SF "V4SF") (V2SF "V4SF") (V4SF "V4SF")
(V2DF "V2DF") (SI "V4SI") (V2DF "V2DF") (SI "V4SI")
(HI "V8HI") (QI "V16QI")]) (HI "V8HI") (QI "V16QI")])
@ -527,6 +527,20 @@
(define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si")]) (define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si")])
(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")]) (define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")])
(define_mode_attr VSWAP_WIDTH [(V8QI "V16QI") (V16QI "V8QI")
(V4HI "V8HI") (V8HI "V4HI")
(V2SI "V4SI") (V4SI "V2SI")
(DI "V2DI") (V2DI "DI")
(V2SF "V4SF") (V4SF "V2SF")
(DF "V2DF") (V2DF "DF")])
(define_mode_attr vswap_width_name [(V8QI "to_128") (V16QI "to_64")
(V4HI "to_128") (V8HI "to_64")
(V2SI "to_128") (V4SI "to_64")
(DI "to_128") (V2DI "to_64")
(V2SF "to_128") (V4SF "to_64")
(DF "to_128") (V2DF "to_64")])
;; ------------------------------------------------------------------- ;; -------------------------------------------------------------------
;; Code Iterators ;; Code Iterators
;; ------------------------------------------------------------------- ;; -------------------------------------------------------------------

View file

@ -1,3 +1,8 @@
2013-09-02 James Greenhalgh <james.greenhalgh@arm.com>
* gcc.target/aarch64/scalar_intrinsics.c
(vdup<bhsd>_lane<su><8,16,32,64>): Force values to SIMD registers.
2013-09-02 Richard Biener <rguenther@suse.de> 2013-09-02 Richard Biener <rguenther@suse.de>
PR middle-end/57511 PR middle-end/57511

View file

@ -198,13 +198,21 @@ test_vcltzd_s64 (int64x1_t a)
int8x1_t int8x1_t
test_vdupb_lane_s8 (int8x16_t a) test_vdupb_lane_s8 (int8x16_t a)
{ {
return vdupb_lane_s8 (a, 2); int8x1_t res;
force_simd (a);
res = vdupb_laneq_s8 (a, 2);
force_simd (res);
return res;
} }
uint8x1_t uint8x1_t
test_vdupb_lane_u8 (uint8x16_t a) test_vdupb_lane_u8 (uint8x16_t a)
{ {
return vdupb_lane_u8 (a, 2); uint8x1_t res;
force_simd (a);
res = vdupb_laneq_u8 (a, 2);
force_simd (res);
return res;
} }
/* { dg-final { scan-assembler-times "aarch64_get_lanev8hi" 2 } } */ /* { dg-final { scan-assembler-times "aarch64_get_lanev8hi" 2 } } */
@ -212,13 +220,21 @@ test_vdupb_lane_u8 (uint8x16_t a)
int16x1_t int16x1_t
test_vduph_lane_s16 (int16x8_t a) test_vduph_lane_s16 (int16x8_t a)
{ {
return vduph_lane_s16 (a, 2); int16x1_t res;
force_simd (a);
res = vduph_laneq_s16 (a, 2);
force_simd (res);
return res;
} }
uint16x1_t uint16x1_t
test_vduph_lane_u16 (uint16x8_t a) test_vduph_lane_u16 (uint16x8_t a)
{ {
return vduph_lane_u16 (a, 2); uint16x1_t res;
force_simd (a);
res = vduph_laneq_u16 (a, 2);
force_simd (res);
return res;
} }
/* { dg-final { scan-assembler-times "aarch64_get_lanev4si" 2 } } */ /* { dg-final { scan-assembler-times "aarch64_get_lanev4si" 2 } } */
@ -226,13 +242,21 @@ test_vduph_lane_u16 (uint16x8_t a)
int32x1_t int32x1_t
test_vdups_lane_s32 (int32x4_t a) test_vdups_lane_s32 (int32x4_t a)
{ {
return vdups_lane_s32 (a, 2); int32x1_t res;
force_simd (a);
res = vdups_laneq_s32 (a, 2);
force_simd (res);
return res;
} }
uint32x1_t uint32x1_t
test_vdups_lane_u32 (uint32x4_t a) test_vdups_lane_u32 (uint32x4_t a)
{ {
return vdups_lane_u32 (a, 2); uint32x1_t res;
force_simd (a);
res = vdups_laneq_u32 (a, 2);
force_simd (res);
return res;
} }
/* { dg-final { scan-assembler-times "aarch64_get_lanev2di" 2 } } */ /* { dg-final { scan-assembler-times "aarch64_get_lanev2di" 2 } } */
@ -240,13 +264,21 @@ test_vdups_lane_u32 (uint32x4_t a)
int64x1_t int64x1_t
test_vdupd_lane_s64 (int64x2_t a) test_vdupd_lane_s64 (int64x2_t a)
{ {
return vdupd_lane_s64 (a, 1); int64x1_t res;
force_simd (a);
res = vdupd_laneq_s64 (a, 1);
force_simd (res);
return res;
} }
uint64x1_t uint64x1_t
test_vdupd_lane_u64 (uint64x2_t a) test_vdupd_lane_u64 (uint64x2_t a)
{ {
return vdupd_lane_u64 (a, 1); uint64x1_t res;
force_simd (a);
res = vdupd_laneq_u64 (a, 1);
force_simd (res);
return res;
} }
/* { dg-final { scan-assembler-times "\\tcmtst\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */ /* { dg-final { scan-assembler-times "\\tcmtst\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */