re PR tree-optimization/88828 (Inefficient update of the first element of vector registers)
2019-05-14 Richard Biener <rguenther@suse.de> H.J. Lu <hongjiu.lu@intel.com> PR tree-optimization/88828 * tree-ssa-forwprop.c (simplify_vector_constructor): Handle permuting in a single non-constant element not extracted from a vector. * gcc.target/i386/pr88828-1.c: New test. * gcc.target/i386/pr88828-1a.c: Likewise. * gcc.target/i386/pr88828-1b.c: Likewise. * gcc.target/i386/pr88828-1c.c: Likewise. * gcc.target/i386/pr88828-4a.c: Likewise. * gcc.target/i386/pr88828-4b.c: Likewise. * gcc.target/i386/pr88828-5a.c: Likewise. * gcc.target/i386/pr88828-5b.c: Likewise. * gcc.target/i386/pr88828-7.c: Likewise. * gcc.target/i386/pr88828-7a.c: Likewise. * gcc.target/i386/pr88828-7b.c: Likewise. * gcc.target/i386/pr88828-8.c: Likewise. * gcc.target/i386/pr88828-8a.c: Likewise. * gcc.target/i386/pr88828-8b.c: Likewise. * gcc.target/i386/pr88828-9.c: Likewise. * gcc.target/i386/pr88828-9a.c: Likewise. * gcc.target/i386/pr88828-9b.c: Likewise. Co-Authored-By: H.J. Lu <hongjiu.lu@intel.com> From-SVN: r271153
This commit is contained in:
parent
a52cf5cf27
commit
962372f9f8
20 changed files with 534 additions and 35 deletions
|
@ -1,3 +1,11 @@
|
|||
2019-05-14 Richard Biener <rguenther@suse.de>
|
||||
H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
PR tree-optimization/88828
|
||||
* tree-ssa-forwprop.c (simplify_vector_constructor): Handle
|
||||
permuting in a single non-constant element not extracted
|
||||
from a vector.
|
||||
|
||||
2019-05-14 Przemyslaw Wirkus <przemyslaw.wirkus@arm.com\>
|
||||
|
||||
* internal-fn.def (SIGNBIT): New.
|
||||
|
|
|
@ -1,3 +1,25 @@
|
|||
2019-05-14 Richard Biener <rguenther@suse.de>
|
||||
H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
PR tree-optimization/88828
|
||||
* gcc.target/i386/pr88828-1.c: New test.
|
||||
* gcc.target/i386/pr88828-1a.c: Likewise.
|
||||
* gcc.target/i386/pr88828-1b.c: Likewise.
|
||||
* gcc.target/i386/pr88828-1c.c: Likewise.
|
||||
* gcc.target/i386/pr88828-4a.c: Likewise.
|
||||
* gcc.target/i386/pr88828-4b.c: Likewise.
|
||||
* gcc.target/i386/pr88828-5a.c: Likewise.
|
||||
* gcc.target/i386/pr88828-5b.c: Likewise.
|
||||
* gcc.target/i386/pr88828-7.c: Likewise.
|
||||
* gcc.target/i386/pr88828-7a.c: Likewise.
|
||||
* gcc.target/i386/pr88828-7b.c: Likewise.
|
||||
* gcc.target/i386/pr88828-8.c: Likewise.
|
||||
* gcc.target/i386/pr88828-8a.c: Likewise.
|
||||
* gcc.target/i386/pr88828-8b.c: Likewise.
|
||||
* gcc.target/i386/pr88828-9.c: Likewise.
|
||||
* gcc.target/i386/pr88828-9a.c: Likewise.
|
||||
* gcc.target/i386/pr88828-9b.c: Likewise.
|
||||
|
||||
2019-05-14 Przemyslaw Wirkus <przemyslaw.wirkus@arm.com\>
|
||||
|
||||
* gcc.target/aarch64/signbitv4sf.c: New test.
|
||||
|
|
49
gcc/testsuite/gcc.target/i386/pr88828-1.c
Normal file
49
gcc/testsuite/gcc.target/i386/pr88828-1.c
Normal file
|
@ -0,0 +1,49 @@
|
|||
/* { dg-do run { target sse2_runtime } } */
|
||||
/* { dg-options "-O2 -msse2" } */
|
||||
|
||||
#include "pr88828-1a.c"
|
||||
#include "pr88828-1b.c"
|
||||
#include "pr88828-1c.c"
|
||||
|
||||
extern void abort ();
|
||||
|
||||
void
|
||||
do_check (__v4sf y, float f[4], float z)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
if (i == 0)
|
||||
{
|
||||
if (y[i] != z)
|
||||
abort ();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (y[i] != f[i])
|
||||
abort ();
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
float f[4] = { -11, 2, 55553, -4 };
|
||||
float z = 134567;
|
||||
__v4sf x = { f[0], f[1], f[2], f[3] };
|
||||
__v4sf y;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
if (x[i] != f[i])
|
||||
abort ();
|
||||
|
||||
y = foo1 (x, z);
|
||||
do_check (y, f, z);
|
||||
y = foo2 (x, z);
|
||||
do_check (y, f, z);
|
||||
y = foo3 (x, z);
|
||||
do_check (y, f, z);
|
||||
|
||||
return 0;
|
||||
}
|
17
gcc/testsuite/gcc.target/i386/pr88828-1a.c
Normal file
17
gcc/testsuite/gcc.target/i386/pr88828-1a.c
Normal file
|
@ -0,0 +1,17 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msse -mno-sse4" } */
|
||||
/* { dg-final { scan-assembler "movss" } } */
|
||||
/* { dg-final { scan-assembler-not "movaps" } } */
|
||||
/* { dg-final { scan-assembler-not "movlhps" } } */
|
||||
/* { dg-final { scan-assembler-not "unpcklps" } } */
|
||||
/* { dg-final { scan-assembler-not "shufps" } } */
|
||||
|
||||
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
|
||||
|
||||
__attribute__((noinline, noclone))
|
||||
__v4sf
|
||||
foo1 (__v4sf x, float f)
|
||||
{
|
||||
__v4sf y = { f, x[1], x[2], x[3] };
|
||||
return y;
|
||||
}
|
23
gcc/testsuite/gcc.target/i386/pr88828-1b.c
Normal file
23
gcc/testsuite/gcc.target/i386/pr88828-1b.c
Normal file
|
@ -0,0 +1,23 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msse -mno-sse4" } */
|
||||
/* { dg-final { scan-assembler "movss" } } */
|
||||
/* { dg-final { scan-assembler-not "movaps" } } */
|
||||
/* { dg-final { scan-assembler-not "movlhps" } } */
|
||||
/* { dg-final { scan-assembler-not "unpcklps" } } */
|
||||
/* { dg-final { scan-assembler-not "shufps" } } */
|
||||
|
||||
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
|
||||
|
||||
static __v4sf
|
||||
vector_init (float f0,float f1, float f2,float f3)
|
||||
{
|
||||
__v4sf y = { f0, f1, f2, f3 };
|
||||
return y;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone))
|
||||
__v4sf
|
||||
foo2 (__v4sf x, float f)
|
||||
{
|
||||
return vector_init (f, x[1], x[2], x[3]) ;
|
||||
}
|
18
gcc/testsuite/gcc.target/i386/pr88828-1c.c
Normal file
18
gcc/testsuite/gcc.target/i386/pr88828-1c.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msse -mno-sse4" } */
|
||||
/* { dg-final { scan-assembler "movss" } } */
|
||||
/* { dg-final { scan-assembler-not "movaps" } } */
|
||||
/* { dg-final { scan-assembler-not "movlhps" } } */
|
||||
/* { dg-final { scan-assembler-not "unpcklps" } } */
|
||||
/* { dg-final { scan-assembler-not "shufps" } } */
|
||||
|
||||
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
|
||||
|
||||
__attribute__((noinline, noclone))
|
||||
__v4sf
|
||||
foo3 (__v4sf x, float f)
|
||||
{
|
||||
__v4sf y = x;
|
||||
y[0] = f;
|
||||
return y;
|
||||
}
|
18
gcc/testsuite/gcc.target/i386/pr88828-4a.c
Normal file
18
gcc/testsuite/gcc.target/i386/pr88828-4a.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msse -mno-sse4" } */
|
||||
/* { dg-final { scan-assembler "movss" } } */
|
||||
/* { dg-final { scan-assembler-times "shufps" 1 } } */
|
||||
/* { dg-final { scan-assembler-not "movaps" } } */
|
||||
/* { dg-final { scan-assembler-not "movlhps" } } */
|
||||
/* { dg-final { scan-assembler-not "unpcklps" } } */
|
||||
|
||||
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
|
||||
|
||||
__attribute__((noinline, noclone))
|
||||
__v4sf
|
||||
foo (__v4sf x, float f)
|
||||
{
|
||||
__v4sf y = { x[0], x[2], x[3], x[1] };
|
||||
y[0] = f;
|
||||
return y;
|
||||
}
|
21
gcc/testsuite/gcc.target/i386/pr88828-4b.c
Normal file
21
gcc/testsuite/gcc.target/i386/pr88828-4b.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx" } */
|
||||
/* { dg-final { scan-assembler-times "vpermilps" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vmovss" 1 { target { ! ia32 } } } } */
|
||||
/* { dg-final { scan-assembler-times "vpinsrd" 1 { target ia32 } } } */
|
||||
/* { dg-final { scan-assembler-not "vmovss" { target ia32 } } } */
|
||||
/* { dg-final { scan-assembler-not "vshufps" } } */
|
||||
/* { dg-final { scan-assembler-not "vmovaps" } } */
|
||||
/* { dg-final { scan-assembler-not "vmovlhps" } } */
|
||||
/* { dg-final { scan-assembler-not "vunpcklps" } } */
|
||||
|
||||
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
|
||||
|
||||
__attribute__((noinline, noclone))
|
||||
__v4sf
|
||||
foo (__v4sf x, float f)
|
||||
{
|
||||
__v4sf y = { x[0], x[2], x[3], x[1] };
|
||||
y[0] = f;
|
||||
return y;
|
||||
}
|
18
gcc/testsuite/gcc.target/i386/pr88828-5a.c
Normal file
18
gcc/testsuite/gcc.target/i386/pr88828-5a.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msse -mno-sse4" } */
|
||||
/* { dg-final { scan-assembler "movss" } } */
|
||||
/* { dg-final { scan-assembler-times "shufps" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "movaps" 1 } } */
|
||||
/* { dg-final { scan-assembler-not "movlhps" } } */
|
||||
/* { dg-final { scan-assembler-not "unpcklps" } } */
|
||||
|
||||
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
|
||||
|
||||
__attribute__((noinline, noclone))
|
||||
__v4sf
|
||||
foo (__v4sf x, float f)
|
||||
{
|
||||
__v4sf y = { x[0], x[2], x[3], x[0] };
|
||||
y[3] = f;
|
||||
return y;
|
||||
}
|
20
gcc/testsuite/gcc.target/i386/pr88828-5b.c
Normal file
20
gcc/testsuite/gcc.target/i386/pr88828-5b.c
Normal file
|
@ -0,0 +1,20 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx" } */
|
||||
/* { dg-final { scan-assembler-times "vpermilps" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vinsertps" 1 } } */
|
||||
/* { dg-final { scan-assembler-not "vshufps" } } */
|
||||
/* { dg-final { scan-assembler-not "vmovss" } } */
|
||||
/* { dg-final { scan-assembler-not "vmovaps" } } */
|
||||
/* { dg-final { scan-assembler-not "vmovlhps" } } */
|
||||
/* { dg-final { scan-assembler-not "vunpcklps" } } */
|
||||
|
||||
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
|
||||
|
||||
__attribute__((noinline, noclone))
|
||||
__v4sf
|
||||
foo (__v4sf x, float f)
|
||||
{
|
||||
__v4sf y = { x[0], x[2], x[3], x[0] };
|
||||
y[3] = f;
|
||||
return y;
|
||||
}
|
53
gcc/testsuite/gcc.target/i386/pr88828-7.c
Normal file
53
gcc/testsuite/gcc.target/i386/pr88828-7.c
Normal file
|
@ -0,0 +1,53 @@
|
|||
/* { dg-do run { target sse2_runtime } } */
|
||||
/* { dg-options "-O2 -msse2 -fexcess-precision=standard" } */
|
||||
|
||||
#include "pr88828-7a.c"
|
||||
#include "pr88828-7b.c"
|
||||
|
||||
extern void abort ();
|
||||
|
||||
float
|
||||
bar (float x, float y)
|
||||
{
|
||||
return x / y - y * x;
|
||||
}
|
||||
|
||||
void
|
||||
do_check (__v4sf x, float f1[4], float f2[4])
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
if (i == 0)
|
||||
{
|
||||
if (x[i] != bar (f1[i], f2[i]))
|
||||
abort ();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (x[i] != f1[i])
|
||||
abort ();
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
float f1[4] = { -11, 2, 55553, -4 };
|
||||
float f2[4] = { 111, 3.3, -55.553, 4.8 };
|
||||
__v4sf x = { f1[0], f1[1], f1[2], f1[3] };
|
||||
__v4sf y = { f2[0], f2[1], f2[2], f2[3] };
|
||||
__v4sf z;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
if (x[i] != f1[i] || y[i] != f2[i] )
|
||||
abort ();
|
||||
|
||||
z = foo1 (x, y);
|
||||
do_check (z, f1, f2);
|
||||
x = foo2 (x, y);
|
||||
do_check (z, f1, f2);
|
||||
|
||||
return 0;
|
||||
}
|
16
gcc/testsuite/gcc.target/i386/pr88828-7a.c
Normal file
16
gcc/testsuite/gcc.target/i386/pr88828-7a.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msse -mno-sse4" } */
|
||||
/* { dg-final { scan-assembler-not "movlhps" } } */
|
||||
/* { dg-final { scan-assembler-not "unpckhps" } } */
|
||||
/* { dg-final { scan-assembler-not "unpcklps" } } */
|
||||
/* { dg-final { scan-assembler-not "shufps" } } */
|
||||
|
||||
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
|
||||
extern float bar (float, float);
|
||||
|
||||
__v4sf
|
||||
foo1 (__v4sf x, __v4sf y)
|
||||
{
|
||||
__v4sf z = { bar (x[0], y[0]), x[1], x[2], x[3] };
|
||||
return z;
|
||||
}
|
22
gcc/testsuite/gcc.target/i386/pr88828-7b.c
Normal file
22
gcc/testsuite/gcc.target/i386/pr88828-7b.c
Normal file
|
@ -0,0 +1,22 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msse -mno-sse4" } */
|
||||
/* { dg-final { scan-assembler-not "movlhps" } } */
|
||||
/* { dg-final { scan-assembler-not "unpckhps" } } */
|
||||
/* { dg-final { scan-assembler-not "unpcklps" } } */
|
||||
/* { dg-final { scan-assembler-not "shufps" } } */
|
||||
|
||||
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
|
||||
extern float bar (float, float);
|
||||
|
||||
static __v4sf
|
||||
vector_init (float f0,float f1, float f2,float f3)
|
||||
{
|
||||
__v4sf y = { f0, f1, f2, f3 };
|
||||
return y;
|
||||
}
|
||||
|
||||
__v4sf
|
||||
foo2 (__v4sf x, __v4sf y)
|
||||
{
|
||||
return vector_init (bar (x[0], y[0]), x[1], x[2], x[3]) ;
|
||||
}
|
46
gcc/testsuite/gcc.target/i386/pr88828-8.c
Normal file
46
gcc/testsuite/gcc.target/i386/pr88828-8.c
Normal file
|
@ -0,0 +1,46 @@
|
|||
/* { dg-do run { target sse2_runtime } } */
|
||||
/* { dg-options "-O2 -msse2" } */
|
||||
|
||||
#include "pr88828-8a.c"
|
||||
#include "pr88828-8b.c"
|
||||
|
||||
extern void abort ();
|
||||
|
||||
void
|
||||
do_check (__v4sf y, float f[4], float z)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
if (i == 0)
|
||||
{
|
||||
if (y[i] != z)
|
||||
abort ();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (y[i] != f[i])
|
||||
abort ();
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
float f[4] = { -11, 2, 55553, -4 };
|
||||
float z = 11.4;
|
||||
__v4sf x = { f[0], f[1], f[2], f[3] };
|
||||
__v4sf y;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
if (x[i] != f[i])
|
||||
abort ();
|
||||
|
||||
y = foo1 (x);
|
||||
do_check (y, f, z);
|
||||
y = foo2 (x);
|
||||
do_check (y, f, z);
|
||||
|
||||
return 0;
|
||||
}
|
15
gcc/testsuite/gcc.target/i386/pr88828-8a.c
Normal file
15
gcc/testsuite/gcc.target/i386/pr88828-8a.c
Normal file
|
@ -0,0 +1,15 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msse -mno-sse4" } */
|
||||
/* { dg-final { scan-assembler-not "movlhps" } } */
|
||||
/* { dg-final { scan-assembler-not "unpckhps" } } */
|
||||
/* { dg-final { scan-assembler-not "unpcklps" } } */
|
||||
/* { dg-final { scan-assembler-not "shufps" } } */
|
||||
|
||||
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
|
||||
|
||||
__v4sf
|
||||
foo1 (__v4sf x)
|
||||
{
|
||||
__v4sf z = { 11.4, x[1], x[2], x[3] };
|
||||
return z;
|
||||
}
|
21
gcc/testsuite/gcc.target/i386/pr88828-8b.c
Normal file
21
gcc/testsuite/gcc.target/i386/pr88828-8b.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msse -mno-sse4" } */
|
||||
/* { dg-final { scan-assembler-not "movlhps" } } */
|
||||
/* { dg-final { scan-assembler-not "unpckhps" } } */
|
||||
/* { dg-final { scan-assembler-not "unpcklps" } } */
|
||||
/* { dg-final { scan-assembler-not "shufps" } } */
|
||||
|
||||
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
|
||||
|
||||
static __v4sf
|
||||
vector_init (float f0,float f1, float f2,float f3)
|
||||
{
|
||||
__v4sf y = { f0, f1, f2, f3 };
|
||||
return y;
|
||||
}
|
||||
|
||||
__v4sf
|
||||
foo2 (__v4sf x)
|
||||
{
|
||||
return vector_init (11.4, x[1], x[2], x[3]) ;
|
||||
}
|
46
gcc/testsuite/gcc.target/i386/pr88828-9.c
Normal file
46
gcc/testsuite/gcc.target/i386/pr88828-9.c
Normal file
|
@ -0,0 +1,46 @@
|
|||
/* { dg-do run { target sse2_runtime } } */
|
||||
/* { dg-options "-O2 -msse2" } */
|
||||
|
||||
#include "pr88828-9a.c"
|
||||
#include "pr88828-9b.c"
|
||||
|
||||
extern void abort ();
|
||||
|
||||
void
|
||||
do_check (__v4sf y, float f[4], float z)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
if (i == 0)
|
||||
{
|
||||
if (y[i] != z)
|
||||
abort ();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (y[i] != f[i])
|
||||
abort ();
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
float f[4] = { -11, 2, 55553, -4 };
|
||||
float z = 11.4;
|
||||
__m128 x = (__m128) (__v4sf) { f[0], f[1], f[2], f[3] };
|
||||
__m128 y;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
if (x[i] != f[i])
|
||||
abort ();
|
||||
|
||||
y = foo1 (x);
|
||||
do_check (y, f, z);
|
||||
y = foo2 (x);
|
||||
do_check (y, f, z);
|
||||
|
||||
return 0;
|
||||
}
|
16
gcc/testsuite/gcc.target/i386/pr88828-9a.c
Normal file
16
gcc/testsuite/gcc.target/i386/pr88828-9a.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msse -mno-sse4" } */
|
||||
/* { dg-final { scan-assembler-not "movlhps" } } */
|
||||
/* { dg-final { scan-assembler-not "unpckhps" } } */
|
||||
/* { dg-final { scan-assembler-not "unpcklps" } } */
|
||||
/* { dg-final { scan-assembler-not "shufps" } } */
|
||||
|
||||
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
|
||||
typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
|
||||
|
||||
__m128
|
||||
foo1 (__m128 x)
|
||||
{
|
||||
__v4sf z = { 11.4, ((__v4sf) x)[1], ((__v4sf) x)[2], ((__v4sf) x) [3] };
|
||||
return (__m128) z;
|
||||
}
|
23
gcc/testsuite/gcc.target/i386/pr88828-9b.c
Normal file
23
gcc/testsuite/gcc.target/i386/pr88828-9b.c
Normal file
|
@ -0,0 +1,23 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msse -mno-sse4" } */
|
||||
/* { dg-final { scan-assembler-not "movlhps" } } */
|
||||
/* { dg-final { scan-assembler-not "unpckhps" } } */
|
||||
/* { dg-final { scan-assembler-not "unpcklps" } } */
|
||||
/* { dg-final { scan-assembler-not "shufps" } } */
|
||||
|
||||
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
|
||||
typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
|
||||
|
||||
static __m128
|
||||
vector_init (float f0,float f1, float f2,float f3)
|
||||
{
|
||||
__v4sf y = { f0, f1, f2, f3 };
|
||||
return (__m128) y;
|
||||
}
|
||||
|
||||
__m128
|
||||
foo2 (__m128 x)
|
||||
{
|
||||
return vector_init (11.4, ((__v4sf) x)[1], ((__v4sf) x)[2],
|
||||
((__v4sf) x) [3]);
|
||||
}
|
|
@ -2065,71 +2065,87 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
|
|||
conv_code = ERROR_MARK;
|
||||
maybe_ident = true;
|
||||
tree one_constant = NULL_TREE;
|
||||
tree one_nonconstant = NULL_TREE;
|
||||
auto_vec<tree> constants;
|
||||
constants.safe_grow_cleared (nelts);
|
||||
FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt)
|
||||
{
|
||||
tree ref, op1;
|
||||
unsigned int elem;
|
||||
|
||||
if (i >= nelts)
|
||||
return false;
|
||||
|
||||
/* Look for elements extracted and possibly converted from
|
||||
another vector. */
|
||||
op1 = get_bit_field_ref_def (elt->value, conv_code);
|
||||
if (op1)
|
||||
if (op1
|
||||
&& TREE_CODE ((ref = TREE_OPERAND (op1, 0))) == SSA_NAME
|
||||
&& VECTOR_TYPE_P (TREE_TYPE (ref))
|
||||
&& useless_type_conversion_p (TREE_TYPE (op1),
|
||||
TREE_TYPE (TREE_TYPE (ref)))
|
||||
&& known_eq (bit_field_size (op1), elem_size)
|
||||
&& constant_multiple_p (bit_field_offset (op1),
|
||||
elem_size, &elem))
|
||||
{
|
||||
ref = TREE_OPERAND (op1, 0);
|
||||
unsigned int j;
|
||||
for (j = 0; j < 2; ++j)
|
||||
{
|
||||
if (!orig[j])
|
||||
{
|
||||
if (TREE_CODE (ref) != SSA_NAME)
|
||||
return false;
|
||||
if (! VECTOR_TYPE_P (TREE_TYPE (ref))
|
||||
|| ! useless_type_conversion_p (TREE_TYPE (op1),
|
||||
TREE_TYPE (TREE_TYPE (ref))))
|
||||
return false;
|
||||
if (j && !useless_type_conversion_p (TREE_TYPE (orig[0]),
|
||||
TREE_TYPE (ref)))
|
||||
return false;
|
||||
orig[j] = ref;
|
||||
break;
|
||||
if (j == 0
|
||||
|| useless_type_conversion_p (TREE_TYPE (orig[0]),
|
||||
TREE_TYPE (ref)))
|
||||
break;
|
||||
}
|
||||
else if (ref == orig[j])
|
||||
break;
|
||||
}
|
||||
if (j == 2)
|
||||
return false;
|
||||
|
||||
unsigned int elt;
|
||||
if (maybe_ne (bit_field_size (op1), elem_size)
|
||||
|| !constant_multiple_p (bit_field_offset (op1), elem_size, &elt))
|
||||
return false;
|
||||
if (j)
|
||||
elt += nelts;
|
||||
if (elt != i)
|
||||
maybe_ident = false;
|
||||
sel.quick_push (elt);
|
||||
/* Found a suitable vector element. */
|
||||
if (j <= 2)
|
||||
{
|
||||
orig[j] = ref;
|
||||
if (j)
|
||||
elem += nelts;
|
||||
if (elem != i)
|
||||
maybe_ident = false;
|
||||
sel.quick_push (elem);
|
||||
continue;
|
||||
}
|
||||
/* Else fallthru. */
|
||||
}
|
||||
else if (CONSTANT_CLASS_P (elt->value))
|
||||
/* Handle elements not extracted from a vector.
|
||||
1. constants by permuting with constant vector
|
||||
2. a unique non-constant element by permuting with a splat vector */
|
||||
if (orig[1]
|
||||
&& orig[1] != error_mark_node)
|
||||
return false;
|
||||
orig[1] = error_mark_node;
|
||||
if (CONSTANT_CLASS_P (elt->value))
|
||||
{
|
||||
if (orig[1]
|
||||
&& orig[1] != error_mark_node)
|
||||
if (one_nonconstant)
|
||||
return false;
|
||||
orig[1] = error_mark_node;
|
||||
if (!one_constant)
|
||||
one_constant = elt->value;
|
||||
constants[i] = elt->value;
|
||||
sel.quick_push (i + nelts);
|
||||
maybe_ident = false;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
{
|
||||
if (one_constant)
|
||||
return false;
|
||||
if (!one_nonconstant)
|
||||
one_nonconstant = elt->value;
|
||||
else if (!operand_equal_p (one_nonconstant, elt->value, 0))
|
||||
return false;
|
||||
}
|
||||
sel.quick_push (i + nelts);
|
||||
maybe_ident = false;
|
||||
}
|
||||
if (i < nelts)
|
||||
return false;
|
||||
|
||||
if (! VECTOR_TYPE_P (TREE_TYPE (orig[0]))
|
||||
if (! orig[0]
|
||||
|| ! VECTOR_TYPE_P (TREE_TYPE (orig[0]))
|
||||
|| maybe_ne (TYPE_VECTOR_SUBPARTS (type),
|
||||
TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig[0]))))
|
||||
return false;
|
||||
|
@ -2165,9 +2181,19 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
|
|||
GET_MODE_SIZE (TYPE_MODE (type))))
|
||||
return false;
|
||||
op2 = vec_perm_indices_to_tree (mask_type, indices);
|
||||
bool convert_orig0 = false;
|
||||
if (!orig[1])
|
||||
orig[1] = orig[0];
|
||||
if (orig[1] == error_mark_node)
|
||||
else if (orig[1] == error_mark_node
|
||||
&& one_nonconstant)
|
||||
{
|
||||
gimple_seq seq = NULL;
|
||||
orig[1] = gimple_build_vector_from_val (&seq, UNKNOWN_LOCATION,
|
||||
type, one_nonconstant);
|
||||
gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
|
||||
convert_orig0 = true;
|
||||
}
|
||||
else if (orig[1] == error_mark_node)
|
||||
{
|
||||
tree_vector_builder vec (type, nelts, 1);
|
||||
for (unsigned i = 0; i < nelts; ++i)
|
||||
|
@ -2177,11 +2203,12 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
|
|||
/* ??? Push a don't-care value. */
|
||||
vec.quick_push (one_constant);
|
||||
orig[1] = vec.build ();
|
||||
convert_orig0 = true;
|
||||
}
|
||||
if (conv_code == ERROR_MARK)
|
||||
gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig[0],
|
||||
orig[1], op2);
|
||||
else if (TREE_CODE (orig[1]) == VECTOR_CST)
|
||||
else if (convert_orig0)
|
||||
{
|
||||
gimple *conv
|
||||
= gimple_build_assign (make_ssa_name (type), conv_code, orig[0]);
|
||||
|
|
Loading…
Add table
Reference in a new issue