re PR target/71201 (PowerPC XXPERM instruction fails on ISA 3.0 system.)
[gcc] 2016-05-23 Michael Meissner <meissner@linux.vnet.ibm.com> PR target/71201 * config/rs6000/altivec.md (altivec_vperm_<mode>_internal): Drop ISA 3.0 xxperm fusion alternative. (altivec_vperm_v8hiv16qi): Likewise. (altivec_vperm_<mode>_uns_internal): Likewise. (vperm_v8hiv4si): Likewise. (vperm_v16qiv8hi): Likewise. [gcc/testsuite] 2016-05-23 Michael Meissner <meissner@linux.vnet.ibm.com> Kelvin Nilsen <kelvin@gcc.gnu.org> * gcc.target/powerpc/p9-permute.c: Run test on big endian as well as little endian. [gcc] 2016-05-23 Michael Meissner <meissner@linux.vnet.ibm.com> Kelvin Nilsen <kelvin@gcc.gnu.org> * config/rs6000/rs6000.c (rs6000_expand_vector_set): Generate vpermr/xxpermr on ISA 3.0. (altivec_expand_vec_perm_le): Likewise. * config/rs6000/altivec.md (UNSPEC_VPERMR): New unspec. (altivec_vpermr_<mode>_internal): Add VPERMR/XXPERMR support for ISA 3.0. Co-Authored-By: Kelvin Nilsen <kelvin@gcc.gnu.org> From-SVN: r236617
This commit is contained in:
parent
290279c438
commit
fe3f334041
5 changed files with 111 additions and 59 deletions
|
@ -1,3 +1,23 @@
|
|||
2016-05-23 Michael Meissner <meissner@linux.vnet.ibm.com>
|
||||
|
||||
PR target/71201
|
||||
* config/rs6000/altivec.md (altivec_vperm_<mode>_internal): Drop
|
||||
ISA 3.0 xxperm fusion alternative.
|
||||
(altivec_vperm_v8hiv16qi): Likewise.
|
||||
(altivec_vperm_<mode>_uns_internal): Likewise.
|
||||
(vperm_v8hiv4si): Likewise.
|
||||
(vperm_v16qiv8hi): Likewise.
|
||||
|
||||
2016-05-23 Michael Meissner <meissner@linux.vnet.ibm.com>
|
||||
Kelvin Nilsen <kelvin@gcc.gnu.org>
|
||||
|
||||
* config/rs6000/rs6000.c (rs6000_expand_vector_set): Generate
|
||||
vpermr/xxpermr on ISA 3.0.
|
||||
(altivec_expand_vec_perm_le): Likewise.
|
||||
* config/rs6000/altivec.md (UNSPEC_VPERMR): New unspec.
|
||||
(altivec_vpermr_<mode>_internal): Add VPERMR/XXPERMR support for
|
||||
ISA 3.0.
|
||||
|
||||
2016-05-23 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
* config/i386/i386.h (IS_STACK_MODE): Enable for
|
||||
|
|
|
@ -58,6 +58,7 @@
|
|||
UNSPEC_VSUM2SWS
|
||||
UNSPEC_VSUMSWS
|
||||
UNSPEC_VPERM
|
||||
UNSPEC_VPERMR
|
||||
UNSPEC_VPERM_UNS
|
||||
UNSPEC_VRFIN
|
||||
UNSPEC_VCFUX
|
||||
|
@ -1952,32 +1953,30 @@
|
|||
|
||||
;; Slightly prefer vperm, since the target does not overlap the source
|
||||
(define_insn "*altivec_vperm_<mode>_internal"
|
||||
[(set (match_operand:VM 0 "register_operand" "=v,?wo,?&wo")
|
||||
(unspec:VM [(match_operand:VM 1 "register_operand" "v,0,wo")
|
||||
(match_operand:VM 2 "register_operand" "v,wo,wo")
|
||||
(match_operand:V16QI 3 "register_operand" "v,wo,wo")]
|
||||
[(set (match_operand:VM 0 "register_operand" "=v,?wo")
|
||||
(unspec:VM [(match_operand:VM 1 "register_operand" "v,0")
|
||||
(match_operand:VM 2 "register_operand" "v,wo")
|
||||
(match_operand:V16QI 3 "register_operand" "v,wo")]
|
||||
UNSPEC_VPERM))]
|
||||
"TARGET_ALTIVEC"
|
||||
"@
|
||||
vperm %0,%1,%2,%3
|
||||
xxperm %x0,%x2,%x3
|
||||
xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3"
|
||||
xxperm %x0,%x2,%x3"
|
||||
[(set_attr "type" "vecperm")
|
||||
(set_attr "length" "4,4,8")])
|
||||
(set_attr "length" "4")])
|
||||
|
||||
(define_insn "altivec_vperm_v8hiv16qi"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "=v,?wo,?&wo")
|
||||
(unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v,0,wo")
|
||||
(match_operand:V8HI 2 "register_operand" "v,wo,wo")
|
||||
(match_operand:V16QI 3 "register_operand" "v,wo,wo")]
|
||||
[(set (match_operand:V16QI 0 "register_operand" "=v,?wo")
|
||||
(unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v,0")
|
||||
(match_operand:V8HI 2 "register_operand" "v,wo")
|
||||
(match_operand:V16QI 3 "register_operand" "v,wo")]
|
||||
UNSPEC_VPERM))]
|
||||
"TARGET_ALTIVEC"
|
||||
"@
|
||||
vperm %0,%1,%2,%3
|
||||
xxperm %x0,%x2,%x3
|
||||
xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3"
|
||||
xxperm %x0,%x2,%x3"
|
||||
[(set_attr "type" "vecperm")
|
||||
(set_attr "length" "4,4,8")])
|
||||
(set_attr "length" "4")])
|
||||
|
||||
(define_expand "altivec_vperm_<mode>_uns"
|
||||
[(set (match_operand:VM 0 "register_operand" "")
|
||||
|
@ -1995,18 +1994,17 @@
|
|||
})
|
||||
|
||||
(define_insn "*altivec_vperm_<mode>_uns_internal"
|
||||
[(set (match_operand:VM 0 "register_operand" "=v,?wo,?&wo")
|
||||
(unspec:VM [(match_operand:VM 1 "register_operand" "v,0,wo")
|
||||
(match_operand:VM 2 "register_operand" "v,wo,wo")
|
||||
(match_operand:V16QI 3 "register_operand" "v,wo,wo")]
|
||||
[(set (match_operand:VM 0 "register_operand" "=v,?wo")
|
||||
(unspec:VM [(match_operand:VM 1 "register_operand" "v,0")
|
||||
(match_operand:VM 2 "register_operand" "v,wo")
|
||||
(match_operand:V16QI 3 "register_operand" "v,wo")]
|
||||
UNSPEC_VPERM_UNS))]
|
||||
"TARGET_ALTIVEC"
|
||||
"@
|
||||
vperm %0,%1,%2,%3
|
||||
xxperm %x0,%x2,%x3
|
||||
xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3"
|
||||
xxperm %x0,%x2,%x3"
|
||||
[(set_attr "type" "vecperm")
|
||||
(set_attr "length" "4,4,8")])
|
||||
(set_attr "length" "4")])
|
||||
|
||||
(define_expand "vec_permv16qi"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "")
|
||||
|
@ -2035,6 +2033,19 @@
|
|||
FAIL;
|
||||
})
|
||||
|
||||
(define_insn "*altivec_vpermr_<mode>_internal"
|
||||
[(set (match_operand:VM 0 "register_operand" "=v,?wo")
|
||||
(unspec:VM [(match_operand:VM 1 "register_operand" "v,0")
|
||||
(match_operand:VM 2 "register_operand" "v,wo")
|
||||
(match_operand:V16QI 3 "register_operand" "v,wo")]
|
||||
UNSPEC_VPERMR))]
|
||||
"TARGET_P9_VECTOR"
|
||||
"@
|
||||
vpermr %0,%1,%2,%3
|
||||
xxpermr %x0,%x2,%x3"
|
||||
[(set_attr "type" "vecperm")
|
||||
(set_attr "length" "4")])
|
||||
|
||||
(define_insn "altivec_vrfip" ; ceil
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=v")
|
||||
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")]
|
||||
|
@ -2844,32 +2855,30 @@
|
|||
"")
|
||||
|
||||
(define_insn "vperm_v8hiv4si"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=v,?wo,?&wo")
|
||||
(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v,0,wo")
|
||||
(match_operand:V4SI 2 "register_operand" "v,wo,wo")
|
||||
(match_operand:V16QI 3 "register_operand" "v,wo,wo")]
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=v,?wo")
|
||||
(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v,0")
|
||||
(match_operand:V4SI 2 "register_operand" "v,wo")
|
||||
(match_operand:V16QI 3 "register_operand" "v,wo")]
|
||||
UNSPEC_VPERMSI))]
|
||||
"TARGET_ALTIVEC"
|
||||
"@
|
||||
vperm %0,%1,%2,%3
|
||||
xxperm %x0,%x2,%x3
|
||||
xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3"
|
||||
xxperm %x0,%x2,%x3"
|
||||
[(set_attr "type" "vecperm")
|
||||
(set_attr "length" "4,4,8")])
|
||||
(set_attr "length" "4")])
|
||||
|
||||
(define_insn "vperm_v16qiv8hi"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=v,?wo,?&wo")
|
||||
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v,0,wo")
|
||||
(match_operand:V8HI 2 "register_operand" "v,wo,wo")
|
||||
(match_operand:V16QI 3 "register_operand" "v,wo,wo")]
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=v,?wo")
|
||||
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v,0")
|
||||
(match_operand:V8HI 2 "register_operand" "v,wo")
|
||||
(match_operand:V16QI 3 "register_operand" "v,wo")]
|
||||
UNSPEC_VPERMHI))]
|
||||
"TARGET_ALTIVEC"
|
||||
"@
|
||||
vperm %0,%1,%2,%3
|
||||
xxperm %x0,%x2,%x3
|
||||
xxlor %x0,%x1,%x1\t\t# xxperm fusion\;xxperm %x0,%x2,%x3"
|
||||
xxperm %x0,%x2,%x3"
|
||||
[(set_attr "type" "vecperm")
|
||||
(set_attr "length" "4,4,8")])
|
||||
(set_attr "length" "4")])
|
||||
|
||||
|
||||
(define_expand "vec_unpacku_hi_v16qi"
|
||||
|
|
|
@ -6863,21 +6863,29 @@ rs6000_expand_vector_set (rtx target, rtx val, int elt)
|
|||
gen_rtvec (3, target, reg,
|
||||
force_reg (V16QImode, x)),
|
||||
UNSPEC_VPERM);
|
||||
else
|
||||
else
|
||||
{
|
||||
/* Invert selector. We prefer to generate VNAND on P8 so
|
||||
that future fusion opportunities can kick in, but must
|
||||
generate VNOR elsewhere. */
|
||||
rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
|
||||
rtx iorx = (TARGET_P8_VECTOR
|
||||
? gen_rtx_IOR (V16QImode, notx, notx)
|
||||
: gen_rtx_AND (V16QImode, notx, notx));
|
||||
rtx tmp = gen_reg_rtx (V16QImode);
|
||||
emit_insn (gen_rtx_SET (tmp, iorx));
|
||||
if (TARGET_P9_VECTOR)
|
||||
x = gen_rtx_UNSPEC (mode,
|
||||
gen_rtvec (3, target, reg,
|
||||
force_reg (V16QImode, x)),
|
||||
UNSPEC_VPERMR);
|
||||
else
|
||||
{
|
||||
/* Invert selector. We prefer to generate VNAND on P8 so
|
||||
that future fusion opportunities can kick in, but must
|
||||
generate VNOR elsewhere. */
|
||||
rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
|
||||
rtx iorx = (TARGET_P8_VECTOR
|
||||
? gen_rtx_IOR (V16QImode, notx, notx)
|
||||
: gen_rtx_AND (V16QImode, notx, notx));
|
||||
rtx tmp = gen_reg_rtx (V16QImode);
|
||||
emit_insn (gen_rtx_SET (tmp, iorx));
|
||||
|
||||
/* Permute with operands reversed and adjusted selector. */
|
||||
x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
|
||||
UNSPEC_VPERM);
|
||||
/* Permute with operands reversed and adjusted selector. */
|
||||
x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
|
||||
UNSPEC_VPERM);
|
||||
}
|
||||
}
|
||||
|
||||
emit_insn (gen_rtx_SET (target, x));
|
||||
|
@ -34365,17 +34373,25 @@ altivec_expand_vec_perm_le (rtx operands[4])
|
|||
if (!REG_P (target))
|
||||
tmp = gen_reg_rtx (mode);
|
||||
|
||||
/* Invert the selector with a VNAND if available, else a VNOR.
|
||||
The VNAND is preferred for future fusion opportunities. */
|
||||
notx = gen_rtx_NOT (V16QImode, sel);
|
||||
iorx = (TARGET_P8_VECTOR
|
||||
? gen_rtx_IOR (V16QImode, notx, notx)
|
||||
: gen_rtx_AND (V16QImode, notx, notx));
|
||||
emit_insn (gen_rtx_SET (norreg, iorx));
|
||||
if (TARGET_P9_VECTOR)
|
||||
{
|
||||
unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel),
|
||||
UNSPEC_VPERMR);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Invert the selector with a VNAND if available, else a VNOR.
|
||||
The VNAND is preferred for future fusion opportunities. */
|
||||
notx = gen_rtx_NOT (V16QImode, sel);
|
||||
iorx = (TARGET_P8_VECTOR
|
||||
? gen_rtx_IOR (V16QImode, notx, notx)
|
||||
: gen_rtx_AND (V16QImode, notx, notx));
|
||||
emit_insn (gen_rtx_SET (norreg, iorx));
|
||||
|
||||
/* Permute with operands reversed and adjusted selector. */
|
||||
unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
|
||||
UNSPEC_VPERM);
|
||||
/* Permute with operands reversed and adjusted selector. */
|
||||
unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
|
||||
UNSPEC_VPERM);
|
||||
}
|
||||
|
||||
/* Copy into target, possibly by way of a register. */
|
||||
if (!REG_P (target))
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
2016-05-23 Michael Meissner <meissner@linux.vnet.ibm.com>
|
||||
Kelvin Nilsen <kelvin@gcc.gnu.org>
|
||||
|
||||
* gcc.target/powerpc/p9-permute.c: Run test on big endian as well
|
||||
as little endian.
|
||||
|
||||
2016-05-23 Paolo Carlini <paolo.carlini@oracle.com>
|
||||
|
||||
PR c++/70972
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* { dg-do compile { target { powerpc64le-*-* } } } */
|
||||
/* { dg-do compile { target { powerpc64*-*-* } } } */
|
||||
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
|
||||
/* { dg-options "-mcpu=power9 -O2" } */
|
||||
/* { dg-require-effective-target powerpc_p9vector_ok } */
|
||||
|
@ -17,5 +17,6 @@ permute (vector long long *p, vector long long *q, vector unsigned char mask)
|
|||
return vec_perm (a, b, mask);
|
||||
}
|
||||
|
||||
/* expect xxpermr on little-endian, xxperm on big-endian */
|
||||
/* { dg-final { scan-assembler "xxperm" } } */
|
||||
/* { dg-final { scan-assembler-not "vperm" } } */
|
||||
|
|
Loading…
Add table
Reference in a new issue