x86: Mark scratch operand in ssse3_pshufbv8qi3 as earlyclobber
commit 16ed2601ad
Author: H.J. Lu <hongjiu.lu@intel.com>
Date: Wed May 15 15:26:19 2019 +0000
i386: Emulate MMX pshufb with SSE version
has
+(define_insn_and_split "ssse3_pshufbv8qi3"
+ [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
+ (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
+ (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
+ UNSPEC_PSHUFB))
+ (clobber (match_scratch:V4SI 3 "=X,x,Yv"))]
^^^ There are earlyclobber.
+ "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
+ "@
+ pshufb\t{%2, %0|%0, %2}
+ #
+ #"
+ "TARGET_MMX_WITH_SSE && reload_completed"
+ [(set (match_dup 3) (match_dup 5))
+ (set (match_dup 3)
+ (and:V4SI (match_dup 3) (match_dup 2)))
+ (set (match_dup 0)
+ (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))]
If input register operand 2 is dead after this insn, RA may choose it
as scratch operand. Since it isn't marked as earlyclobber, operand 2
becomes unused after split and then it gets optimized out. Mark scratch
operand as earlyclobber fixes the issue.
gcc/
PR target/94467
* config/i386/sse.md (ssse3_pshufbv8qi3): Mark scratch operand
as earlyclobber.
gcc/testsuite/
PR target/94467
* gcc.target/i386/pr94467-1.c: New test.
* gcc.target/i386/pr94467-2.c: Likewise.
This commit is contained in:
parent
b949f8e2ac
commit
bbcdf9bb3f
5 changed files with 101 additions and 1 deletions
|
@ -1,3 +1,9 @@
|
|||
2020-04-03 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
PR target/94467
|
||||
* config/i386/sse.md (ssse3_pshufbv8qi3): Mark scratch operand
|
||||
as earlyclobber.
|
||||
|
||||
2020-04-03 Jeff Law <law@redhat.com>
|
||||
|
||||
PR rtl-optimization/92264
|
||||
|
|
|
@ -16695,7 +16695,7 @@
|
|||
(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
|
||||
(match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
|
||||
UNSPEC_PSHUFB))
|
||||
(clobber (match_scratch:V4SI 3 "=X,x,Yv"))]
|
||||
(clobber (match_scratch:V4SI 3 "=X,&x,&Yv"))]
|
||||
"(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
|
||||
"@
|
||||
pshufb\t{%2, %0|%0, %2}
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
2020-04-03 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
PR target/94467
|
||||
* gcc.target/i386/pr94467-1.c: New test.
|
||||
* gcc.target/i386/pr94467-2.c: Likewise.
|
||||
|
||||
2020-04-03 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR target/94460
|
||||
|
|
40
gcc/testsuite/gcc.target/i386/pr94467-1.c
Normal file
40
gcc/testsuite/gcc.target/i386/pr94467-1.c
Normal file
|
@ -0,0 +1,40 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx } */
|
||||
/* { dg-options "-O -mavx" } */
|
||||
|
||||
#include "avx-check.h"
|
||||
|
||||
typedef char __attribute__ ((__vector_size__ (8))) v8qi;
|
||||
typedef short __attribute__ ((__vector_size__ (8))) v4hi;
|
||||
typedef int __attribute__ ((__vector_size__ (8))) v2si;
|
||||
typedef long long __attribute__ ((__vector_size__ (8))) v1di;
|
||||
typedef unsigned long long u64;
|
||||
u64 k, c;
|
||||
|
||||
v8qi g, h, p, q;
|
||||
v4hi d, e, f, l, n, o;
|
||||
v2si j;
|
||||
|
||||
u64
|
||||
foo (v4hi r)
|
||||
{
|
||||
v8qi s;
|
||||
f = (v4hi) j;
|
||||
e = __builtin_ia32_psrlwi ((v4hi) k, c);
|
||||
s = __builtin_ia32_pavgb (h, h);
|
||||
n = __builtin_ia32_pabsw (f);
|
||||
o = __builtin_ia32_psubusw (n, l);
|
||||
p = __builtin_ia32_packsswb (r, o);
|
||||
q = __builtin_ia32_pshufb (p, s);
|
||||
g = __builtin_ia32_punpcklbw (q, (v8qi) r);
|
||||
d = r;
|
||||
return (u64) g + (u64) h + (u64) j;
|
||||
}
|
||||
|
||||
static void
|
||||
avx_test (void)
|
||||
{
|
||||
u64 x = foo ((v4hi) { 5 });
|
||||
if (x != 0x0005000500050505)
|
||||
__builtin_abort ();
|
||||
}
|
48
gcc/testsuite/gcc.target/i386/pr94467-2.c
Normal file
48
gcc/testsuite/gcc.target/i386/pr94467-2.c
Normal file
|
@ -0,0 +1,48 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target ssse3 } */
|
||||
/* { dg-options "-O -mssse3" } */
|
||||
|
||||
#ifndef CHECK_H
|
||||
#define CHECK_H "ssse3-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST ssse3_test
|
||||
#endif
|
||||
|
||||
#include CHECK_H
|
||||
|
||||
typedef char __attribute__ ((__vector_size__ (8))) v8qi;
|
||||
typedef short __attribute__ ((__vector_size__ (8))) v4hi;
|
||||
typedef int __attribute__ ((__vector_size__ (8))) v2si;
|
||||
typedef long long __attribute__ ((__vector_size__ (8))) v1di;
|
||||
typedef unsigned long long u64;
|
||||
u64 k, c;
|
||||
|
||||
v8qi g, h, p, q;
|
||||
v4hi d, e, f, l, n, o;
|
||||
v2si j;
|
||||
|
||||
u64
|
||||
foo (v4hi r)
|
||||
{
|
||||
v8qi s;
|
||||
f = (v4hi) j;
|
||||
e = __builtin_ia32_psrlwi ((v4hi) k, c);
|
||||
s = __builtin_ia32_pavgb (h, h);
|
||||
n = __builtin_ia32_pabsw (f);
|
||||
o = __builtin_ia32_psubusw (n, l);
|
||||
p = __builtin_ia32_packsswb (r, o);
|
||||
q = __builtin_ia32_pshufb (p, s);
|
||||
g = __builtin_ia32_punpcklbw (q, (v8qi) r);
|
||||
d = r;
|
||||
return (u64) g + (u64) h + (u64) j;
|
||||
}
|
||||
|
||||
static void
|
||||
ssse3_test (void)
|
||||
{
|
||||
u64 x = foo ((v4hi) { 5 });
|
||||
if (x != 0x0005000500050505)
|
||||
__builtin_abort ();
|
||||
}
|
Loading…
Add table
Reference in a new issue