[AARCH64] Add zip{1, 2}, uzp{1, 2}, trn{1, 2} support
for vector permute. gcc/ * config/aarch64/aarch64-simd-builtins.def: Add new builtins. * config/aarch64/aarch64-simd.md (simd_type): Add uzp. (aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>): New. * config/aarch64/aarch64.c (aarch64_evpc_trn): New. (aarch64_evpc_uzp): Likewise. (aarch64_evpc_zip): Likewise. (aarch64_expand_vec_perm_const_1): Check for trn, zip, uzp patterns. * config/aarch64/iterators.md (unspec): Add neccessary unspecs. (PERMUTE): New. (perm_insn): Likewise. (perm_hilo): Likewise. From-SVN: r194219
This commit is contained in:
parent
88b080739a
commit
cc4d934fa0
5 changed files with 317 additions and 2 deletions
|
@ -1,3 +1,17 @@
|
|||
2012-12-05 James Greenhalgh <james.greenhalgh@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-simd-builtins.def: Add new builtins.
|
||||
* config/aarch64/aarch64-simd.md (simd_type): Add uzp.
|
||||
(aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>): New.
|
||||
* config/aarch64/aarch64.c (aarch64_evpc_trn): New.
|
||||
(aarch64_evpc_uzp): Likewise.
|
||||
(aarch64_evpc_zip): Likewise.
|
||||
(aarch64_expand_vec_perm_const_1): Check for trn, zip, uzp patterns.
|
||||
* config/aarch64/iterators.md (unspec): Add neccessary unspecs.
|
||||
(PERMUTE): New.
|
||||
(perm_insn): Likewise.
|
||||
(perm_hilo): Likewise.
|
||||
|
||||
2012-12-05 James Greenhalgh <james.greenhalgh@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-protos.h
|
||||
|
|
|
@ -224,3 +224,12 @@
|
|||
BUILTIN_VDQF (UNOP, fcvtpu)
|
||||
BUILTIN_VDQF (UNOP, fcvtms)
|
||||
BUILTIN_VDQF (UNOP, fcvtmu)
|
||||
|
||||
/* Implemented by
|
||||
aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>. */
|
||||
BUILTIN_VALL (BINOP, zip1)
|
||||
BUILTIN_VALL (BINOP, zip2)
|
||||
BUILTIN_VALL (BINOP, uzp1)
|
||||
BUILTIN_VALL (BINOP, uzp2)
|
||||
BUILTIN_VALL (BINOP, trn1)
|
||||
BUILTIN_VALL (BINOP, trn2)
|
||||
|
|
|
@ -128,7 +128,8 @@
|
|||
; simd_store4s store single structure from one lane for four registers (ST4 [index]).
|
||||
; simd_tbl table lookup.
|
||||
; simd_trn transpose.
|
||||
; simd_zip zip/unzip.
|
||||
; simd_uzp unzip.
|
||||
; simd_zip zip.
|
||||
|
||||
(define_attr "simd_type"
|
||||
"simd_abd,\
|
||||
|
@ -230,6 +231,7 @@
|
|||
simd_store4s,\
|
||||
simd_tbl,\
|
||||
simd_trn,\
|
||||
simd_uzp,\
|
||||
simd_zip,\
|
||||
none"
|
||||
(const_string "none"))
|
||||
|
@ -3406,6 +3408,17 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
|
||||
[(set (match_operand:VALL 0 "register_operand" "=w")
|
||||
(unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
|
||||
(match_operand:VALL 2 "register_operand" "w")]
|
||||
PERMUTE))]
|
||||
"TARGET_SIMD"
|
||||
"<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
|
||||
[(set_attr "simd_type" "simd_<PERMUTE:perm_insn>")
|
||||
(set_attr "simd_mode" "<MODE>")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_st2<mode>_dreg"
|
||||
[(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
|
||||
(unspec:TI [(match_operand:OI 1 "register_operand" "w")
|
||||
|
|
|
@ -6919,6 +6919,261 @@ aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
|
|||
aarch64_expand_vec_perm_1 (target, op0, op1, sel);
|
||||
}
|
||||
|
||||
/* Recognize patterns suitable for the TRN instructions. */
|
||||
static bool
|
||||
aarch64_evpc_trn (struct expand_vec_perm_d *d)
|
||||
{
|
||||
unsigned int i, odd, mask, nelt = d->nelt;
|
||||
rtx out, in0, in1, x;
|
||||
rtx (*gen) (rtx, rtx, rtx);
|
||||
enum machine_mode vmode = d->vmode;
|
||||
|
||||
if (GET_MODE_UNIT_SIZE (vmode) > 8)
|
||||
return false;
|
||||
|
||||
/* Note that these are little-endian tests.
|
||||
We correct for big-endian later. */
|
||||
if (d->perm[0] == 0)
|
||||
odd = 0;
|
||||
else if (d->perm[0] == 1)
|
||||
odd = 1;
|
||||
else
|
||||
return false;
|
||||
mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
|
||||
|
||||
for (i = 0; i < nelt; i += 2)
|
||||
{
|
||||
if (d->perm[i] != i + odd)
|
||||
return false;
|
||||
if (d->perm[i + 1] != ((i + nelt + odd) & mask))
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Success! */
|
||||
if (d->testing_p)
|
||||
return true;
|
||||
|
||||
in0 = d->op0;
|
||||
in1 = d->op1;
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
{
|
||||
x = in0, in0 = in1, in1 = x;
|
||||
odd = !odd;
|
||||
}
|
||||
out = d->target;
|
||||
|
||||
if (odd)
|
||||
{
|
||||
switch (vmode)
|
||||
{
|
||||
case V16QImode: gen = gen_aarch64_trn2v16qi; break;
|
||||
case V8QImode: gen = gen_aarch64_trn2v8qi; break;
|
||||
case V8HImode: gen = gen_aarch64_trn2v8hi; break;
|
||||
case V4HImode: gen = gen_aarch64_trn2v4hi; break;
|
||||
case V4SImode: gen = gen_aarch64_trn2v4si; break;
|
||||
case V2SImode: gen = gen_aarch64_trn2v2si; break;
|
||||
case V2DImode: gen = gen_aarch64_trn2v2di; break;
|
||||
case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
|
||||
case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
|
||||
case V2DFmode: gen = gen_aarch64_trn2v2df; break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (vmode)
|
||||
{
|
||||
case V16QImode: gen = gen_aarch64_trn1v16qi; break;
|
||||
case V8QImode: gen = gen_aarch64_trn1v8qi; break;
|
||||
case V8HImode: gen = gen_aarch64_trn1v8hi; break;
|
||||
case V4HImode: gen = gen_aarch64_trn1v4hi; break;
|
||||
case V4SImode: gen = gen_aarch64_trn1v4si; break;
|
||||
case V2SImode: gen = gen_aarch64_trn1v2si; break;
|
||||
case V2DImode: gen = gen_aarch64_trn1v2di; break;
|
||||
case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
|
||||
case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
|
||||
case V2DFmode: gen = gen_aarch64_trn1v2df; break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
emit_insn (gen (out, in0, in1));
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Recognize patterns suitable for the UZP instructions. */
|
||||
static bool
|
||||
aarch64_evpc_uzp (struct expand_vec_perm_d *d)
|
||||
{
|
||||
unsigned int i, odd, mask, nelt = d->nelt;
|
||||
rtx out, in0, in1, x;
|
||||
rtx (*gen) (rtx, rtx, rtx);
|
||||
enum machine_mode vmode = d->vmode;
|
||||
|
||||
if (GET_MODE_UNIT_SIZE (vmode) > 8)
|
||||
return false;
|
||||
|
||||
/* Note that these are little-endian tests.
|
||||
We correct for big-endian later. */
|
||||
if (d->perm[0] == 0)
|
||||
odd = 0;
|
||||
else if (d->perm[0] == 1)
|
||||
odd = 1;
|
||||
else
|
||||
return false;
|
||||
mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
|
||||
|
||||
for (i = 0; i < nelt; i++)
|
||||
{
|
||||
unsigned elt = (i * 2 + odd) & mask;
|
||||
if (d->perm[i] != elt)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Success! */
|
||||
if (d->testing_p)
|
||||
return true;
|
||||
|
||||
in0 = d->op0;
|
||||
in1 = d->op1;
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
{
|
||||
x = in0, in0 = in1, in1 = x;
|
||||
odd = !odd;
|
||||
}
|
||||
out = d->target;
|
||||
|
||||
if (odd)
|
||||
{
|
||||
switch (vmode)
|
||||
{
|
||||
case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
|
||||
case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
|
||||
case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
|
||||
case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
|
||||
case V4SImode: gen = gen_aarch64_uzp2v4si; break;
|
||||
case V2SImode: gen = gen_aarch64_uzp2v2si; break;
|
||||
case V2DImode: gen = gen_aarch64_uzp2v2di; break;
|
||||
case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
|
||||
case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
|
||||
case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (vmode)
|
||||
{
|
||||
case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
|
||||
case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
|
||||
case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
|
||||
case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
|
||||
case V4SImode: gen = gen_aarch64_uzp1v4si; break;
|
||||
case V2SImode: gen = gen_aarch64_uzp1v2si; break;
|
||||
case V2DImode: gen = gen_aarch64_uzp1v2di; break;
|
||||
case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
|
||||
case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
|
||||
case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
emit_insn (gen (out, in0, in1));
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Recognize patterns suitable for the ZIP instructions. */
|
||||
static bool
|
||||
aarch64_evpc_zip (struct expand_vec_perm_d *d)
|
||||
{
|
||||
unsigned int i, high, mask, nelt = d->nelt;
|
||||
rtx out, in0, in1, x;
|
||||
rtx (*gen) (rtx, rtx, rtx);
|
||||
enum machine_mode vmode = d->vmode;
|
||||
|
||||
if (GET_MODE_UNIT_SIZE (vmode) > 8)
|
||||
return false;
|
||||
|
||||
/* Note that these are little-endian tests.
|
||||
We correct for big-endian later. */
|
||||
high = nelt / 2;
|
||||
if (d->perm[0] == high)
|
||||
/* Do Nothing. */
|
||||
;
|
||||
else if (d->perm[0] == 0)
|
||||
high = 0;
|
||||
else
|
||||
return false;
|
||||
mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
|
||||
|
||||
for (i = 0; i < nelt / 2; i++)
|
||||
{
|
||||
unsigned elt = (i + high) & mask;
|
||||
if (d->perm[i * 2] != elt)
|
||||
return false;
|
||||
elt = (elt + nelt) & mask;
|
||||
if (d->perm[i * 2 + 1] != elt)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Success! */
|
||||
if (d->testing_p)
|
||||
return true;
|
||||
|
||||
in0 = d->op0;
|
||||
in1 = d->op1;
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
{
|
||||
x = in0, in0 = in1, in1 = x;
|
||||
high = !high;
|
||||
}
|
||||
out = d->target;
|
||||
|
||||
if (high)
|
||||
{
|
||||
switch (vmode)
|
||||
{
|
||||
case V16QImode: gen = gen_aarch64_zip2v16qi; break;
|
||||
case V8QImode: gen = gen_aarch64_zip2v8qi; break;
|
||||
case V8HImode: gen = gen_aarch64_zip2v8hi; break;
|
||||
case V4HImode: gen = gen_aarch64_zip2v4hi; break;
|
||||
case V4SImode: gen = gen_aarch64_zip2v4si; break;
|
||||
case V2SImode: gen = gen_aarch64_zip2v2si; break;
|
||||
case V2DImode: gen = gen_aarch64_zip2v2di; break;
|
||||
case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
|
||||
case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
|
||||
case V2DFmode: gen = gen_aarch64_zip2v2df; break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (vmode)
|
||||
{
|
||||
case V16QImode: gen = gen_aarch64_zip1v16qi; break;
|
||||
case V8QImode: gen = gen_aarch64_zip1v8qi; break;
|
||||
case V8HImode: gen = gen_aarch64_zip1v8hi; break;
|
||||
case V4HImode: gen = gen_aarch64_zip1v4hi; break;
|
||||
case V4SImode: gen = gen_aarch64_zip1v4si; break;
|
||||
case V2SImode: gen = gen_aarch64_zip1v2si; break;
|
||||
case V2DImode: gen = gen_aarch64_zip1v2di; break;
|
||||
case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
|
||||
case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
|
||||
case V2DFmode: gen = gen_aarch64_zip1v2df; break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
emit_insn (gen (out, in0, in1));
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
aarch64_evpc_tbl (struct expand_vec_perm_d *d)
|
||||
{
|
||||
|
@ -6969,7 +7224,15 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
|
|||
}
|
||||
|
||||
if (TARGET_SIMD)
|
||||
return aarch64_evpc_tbl (d);
|
||||
{
|
||||
if (aarch64_evpc_zip (d))
|
||||
return true;
|
||||
else if (aarch64_evpc_uzp (d))
|
||||
return true;
|
||||
else if (aarch64_evpc_trn (d))
|
||||
return true;
|
||||
return aarch64_evpc_tbl (d);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -230,6 +230,12 @@
|
|||
UNSPEC_BSL ; Used in aarch64-simd.md.
|
||||
UNSPEC_TBL ; Used in vector permute patterns.
|
||||
UNSPEC_CONCAT ; Used in vector permute patterns.
|
||||
UNSPEC_ZIP1 ; Used in vector permute patterns.
|
||||
UNSPEC_ZIP2 ; Used in vector permute patterns.
|
||||
UNSPEC_UZP1 ; Used in vector permute patterns.
|
||||
UNSPEC_UZP2 ; Used in vector permute patterns.
|
||||
UNSPEC_TRN1 ; Used in vector permute patterns.
|
||||
UNSPEC_TRN2 ; Used in vector permute patterns.
|
||||
])
|
||||
|
||||
;; -------------------------------------------------------------------
|
||||
|
@ -652,6 +658,9 @@
|
|||
|
||||
(define_int_iterator VCMP_U [UNSPEC_CMHS UNSPEC_CMHI UNSPEC_CMTST])
|
||||
|
||||
(define_int_iterator PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2
|
||||
UNSPEC_TRN1 UNSPEC_TRN2
|
||||
UNSPEC_UZP1 UNSPEC_UZP2])
|
||||
|
||||
(define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM
|
||||
UNSPEC_FRINTI UNSPEC_FRINTX UNSPEC_FRINTA])
|
||||
|
@ -757,3 +766,10 @@
|
|||
(define_int_attr fcvt_pattern [(UNSPEC_FRINTZ "btrunc") (UNSPEC_FRINTA "round")
|
||||
(UNSPEC_FRINTP "ceil") (UNSPEC_FRINTM "floor")])
|
||||
|
||||
(define_int_attr perm_insn [(UNSPEC_ZIP1 "zip") (UNSPEC_ZIP2 "zip")
|
||||
(UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn")
|
||||
(UNSPEC_UZP1 "uzp") (UNSPEC_UZP2 "uzp")])
|
||||
|
||||
(define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2")
|
||||
(UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2")
|
||||
(UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")])
|
||||
|
|
Loading…
Add table
Reference in a new issue