re PR tree-optimization/88828 (Inefficient update of the first element of vector registers)

2019-05-14  Richard Biener  <rguenther@suse.de>
	H.J. Lu  <hongjiu.lu@intel.com>

	PR tree-optimization/88828
	* tree-ssa-forwprop.c (simplify_vector_constructor): Handle
	permuting in a single non-constant element not extracted
	from a vector.

	* gcc.target/i386/pr88828-1.c: New test.
	* gcc.target/i386/pr88828-1a.c: Likewise.
	* gcc.target/i386/pr88828-1b.c: Likewise.
	* gcc.target/i386/pr88828-1c.c: Likewise.
	* gcc.target/i386/pr88828-4a.c: Likewise.
	* gcc.target/i386/pr88828-4b.c: Likewise.
	* gcc.target/i386/pr88828-5a.c: Likewise.
	* gcc.target/i386/pr88828-5b.c: Likewise.
	* gcc.target/i386/pr88828-7.c: Likewise.
	* gcc.target/i386/pr88828-7a.c: Likewise.
	* gcc.target/i386/pr88828-7b.c: Likewise.
	* gcc.target/i386/pr88828-8.c: Likewise.
	* gcc.target/i386/pr88828-8a.c: Likewise.
	* gcc.target/i386/pr88828-8b.c: Likewise.
	* gcc.target/i386/pr88828-9.c: Likewise.
	* gcc.target/i386/pr88828-9a.c: Likewise.
	* gcc.target/i386/pr88828-9b.c: Likewise.

Co-Authored-By: H.J. Lu <hongjiu.lu@intel.com>

From-SVN: r271153
This commit is contained in:
Richard Biener 2019-05-14 09:11:15 +00:00 committed by Richard Biener
parent a52cf5cf27
commit 962372f9f8
20 changed files with 534 additions and 35 deletions

View file

@ -1,3 +1,11 @@
2019-05-14 Richard Biener <rguenther@suse.de>
H.J. Lu <hongjiu.lu@intel.com>
PR tree-optimization/88828
* tree-ssa-forwprop.c (simplify_vector_constructor): Handle
permuting in a single non-constant element not extracted
from a vector.
2019-05-14 Przemyslaw Wirkus <przemyslaw.wirkus@arm.com\>
* internal-fn.def (SIGNBIT): New.

View file

@ -1,3 +1,25 @@
2019-05-14 Richard Biener <rguenther@suse.de>
H.J. Lu <hongjiu.lu@intel.com>
PR tree-optimization/88828
* gcc.target/i386/pr88828-1.c: New test.
* gcc.target/i386/pr88828-1a.c: Likewise.
* gcc.target/i386/pr88828-1b.c: Likewise.
* gcc.target/i386/pr88828-1c.c: Likewise.
* gcc.target/i386/pr88828-4a.c: Likewise.
* gcc.target/i386/pr88828-4b.c: Likewise.
* gcc.target/i386/pr88828-5a.c: Likewise.
* gcc.target/i386/pr88828-5b.c: Likewise.
* gcc.target/i386/pr88828-7.c: Likewise.
* gcc.target/i386/pr88828-7a.c: Likewise.
* gcc.target/i386/pr88828-7b.c: Likewise.
* gcc.target/i386/pr88828-8.c: Likewise.
* gcc.target/i386/pr88828-8a.c: Likewise.
* gcc.target/i386/pr88828-8b.c: Likewise.
* gcc.target/i386/pr88828-9.c: Likewise.
* gcc.target/i386/pr88828-9a.c: Likewise.
* gcc.target/i386/pr88828-9b.c: Likewise.
2019-05-14 Przemyslaw Wirkus <przemyslaw.wirkus@arm.com\>
* gcc.target/aarch64/signbitv4sf.c: New test.

View file

@ -0,0 +1,49 @@
/* { dg-do run { target sse2_runtime } } */
/* { dg-options "-O2 -msse2" } */
#include "pr88828-1a.c"
#include "pr88828-1b.c"
#include "pr88828-1c.c"
extern void abort ();
void
do_check (__v4sf y, float f[4], float z)
{
int i;
for (i = 0; i < 4; i++)
if (i == 0)
{
if (y[i] != z)
abort ();
}
else
{
if (y[i] != f[i])
abort ();
}
}
int
main (void)
{
float f[4] = { -11, 2, 55553, -4 };
float z = 134567;
__v4sf x = { f[0], f[1], f[2], f[3] };
__v4sf y;
int i;
for (i = 0; i < 4; i++)
if (x[i] != f[i])
abort ();
y = foo1 (x, z);
do_check (y, f, z);
y = foo2 (x, z);
do_check (y, f, z);
y = foo3 (x, z);
do_check (y, f, z);
return 0;
}

View file

@ -0,0 +1,17 @@
/* { dg-do compile } */
/* { dg-options "-O2 -msse -mno-sse4" } */
/* { dg-final { scan-assembler "movss" } } */
/* { dg-final { scan-assembler-not "movaps" } } */
/* { dg-final { scan-assembler-not "movlhps" } } */
/* { dg-final { scan-assembler-not "unpcklps" } } */
/* { dg-final { scan-assembler-not "shufps" } } */
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
__attribute__((noinline, noclone))
__v4sf
foo1 (__v4sf x, float f)
{
__v4sf y = { f, x[1], x[2], x[3] };
return y;
}

View file

@ -0,0 +1,23 @@
/* { dg-do compile } */
/* { dg-options "-O2 -msse -mno-sse4" } */
/* { dg-final { scan-assembler "movss" } } */
/* { dg-final { scan-assembler-not "movaps" } } */
/* { dg-final { scan-assembler-not "movlhps" } } */
/* { dg-final { scan-assembler-not "unpcklps" } } */
/* { dg-final { scan-assembler-not "shufps" } } */
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
static __v4sf
vector_init (float f0,float f1, float f2,float f3)
{
__v4sf y = { f0, f1, f2, f3 };
return y;
}
__attribute__((noinline, noclone))
__v4sf
foo2 (__v4sf x, float f)
{
return vector_init (f, x[1], x[2], x[3]) ;
}

View file

@ -0,0 +1,18 @@
/* { dg-do compile } */
/* { dg-options "-O2 -msse -mno-sse4" } */
/* { dg-final { scan-assembler "movss" } } */
/* { dg-final { scan-assembler-not "movaps" } } */
/* { dg-final { scan-assembler-not "movlhps" } } */
/* { dg-final { scan-assembler-not "unpcklps" } } */
/* { dg-final { scan-assembler-not "shufps" } } */
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
__attribute__((noinline, noclone))
__v4sf
foo3 (__v4sf x, float f)
{
__v4sf y = x;
y[0] = f;
return y;
}

View file

@ -0,0 +1,18 @@
/* { dg-do compile } */
/* { dg-options "-O2 -msse -mno-sse4" } */
/* { dg-final { scan-assembler "movss" } } */
/* { dg-final { scan-assembler-times "shufps" 1 } } */
/* { dg-final { scan-assembler-not "movaps" } } */
/* { dg-final { scan-assembler-not "movlhps" } } */
/* { dg-final { scan-assembler-not "unpcklps" } } */
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
__attribute__((noinline, noclone))
__v4sf
foo (__v4sf x, float f)
{
__v4sf y = { x[0], x[2], x[3], x[1] };
y[0] = f;
return y;
}

View file

@ -0,0 +1,21 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mavx" } */
/* { dg-final { scan-assembler-times "vpermilps" 1 } } */
/* { dg-final { scan-assembler-times "vmovss" 1 { target { ! ia32 } } } } */
/* { dg-final { scan-assembler-times "vpinsrd" 1 { target ia32 } } } */
/* { dg-final { scan-assembler-not "vmovss" { target ia32 } } } */
/* { dg-final { scan-assembler-not "vshufps" } } */
/* { dg-final { scan-assembler-not "vmovaps" } } */
/* { dg-final { scan-assembler-not "vmovlhps" } } */
/* { dg-final { scan-assembler-not "vunpcklps" } } */
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
__attribute__((noinline, noclone))
__v4sf
foo (__v4sf x, float f)
{
__v4sf y = { x[0], x[2], x[3], x[1] };
y[0] = f;
return y;
}

View file

@ -0,0 +1,18 @@
/* { dg-do compile } */
/* { dg-options "-O2 -msse -mno-sse4" } */
/* { dg-final { scan-assembler "movss" } } */
/* { dg-final { scan-assembler-times "shufps" 2 } } */
/* { dg-final { scan-assembler-times "movaps" 1 } } */
/* { dg-final { scan-assembler-not "movlhps" } } */
/* { dg-final { scan-assembler-not "unpcklps" } } */
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
__attribute__((noinline, noclone))
__v4sf
foo (__v4sf x, float f)
{
__v4sf y = { x[0], x[2], x[3], x[0] };
y[3] = f;
return y;
}

View file

@ -0,0 +1,20 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mavx" } */
/* { dg-final { scan-assembler-times "vpermilps" 1 } } */
/* { dg-final { scan-assembler-times "vinsertps" 1 } } */
/* { dg-final { scan-assembler-not "vshufps" } } */
/* { dg-final { scan-assembler-not "vmovss" } } */
/* { dg-final { scan-assembler-not "vmovaps" } } */
/* { dg-final { scan-assembler-not "vmovlhps" } } */
/* { dg-final { scan-assembler-not "vunpcklps" } } */
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
__attribute__((noinline, noclone))
__v4sf
foo (__v4sf x, float f)
{
__v4sf y = { x[0], x[2], x[3], x[0] };
y[3] = f;
return y;
}

View file

@ -0,0 +1,53 @@
/* { dg-do run { target sse2_runtime } } */
/* { dg-options "-O2 -msse2 -fexcess-precision=standard" } */
#include "pr88828-7a.c"
#include "pr88828-7b.c"
extern void abort ();
float
bar (float x, float y)
{
return x / y - y * x;
}
void
do_check (__v4sf x, float f1[4], float f2[4])
{
int i;
for (i = 0; i < 4; i++)
if (i == 0)
{
if (x[i] != bar (f1[i], f2[i]))
abort ();
}
else
{
if (x[i] != f1[i])
abort ();
}
}
int
main (void)
{
float f1[4] = { -11, 2, 55553, -4 };
float f2[4] = { 111, 3.3, -55.553, 4.8 };
__v4sf x = { f1[0], f1[1], f1[2], f1[3] };
__v4sf y = { f2[0], f2[1], f2[2], f2[3] };
__v4sf z;
int i;
for (i = 0; i < 4; i++)
if (x[i] != f1[i] || y[i] != f2[i] )
abort ();
z = foo1 (x, y);
do_check (z, f1, f2);
x = foo2 (x, y);
do_check (z, f1, f2);
return 0;
}

View file

@ -0,0 +1,16 @@
/* { dg-do compile } */
/* { dg-options "-O2 -msse -mno-sse4" } */
/* { dg-final { scan-assembler-not "movlhps" } } */
/* { dg-final { scan-assembler-not "unpckhps" } } */
/* { dg-final { scan-assembler-not "unpcklps" } } */
/* { dg-final { scan-assembler-not "shufps" } } */
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
extern float bar (float, float);
__v4sf
foo1 (__v4sf x, __v4sf y)
{
__v4sf z = { bar (x[0], y[0]), x[1], x[2], x[3] };
return z;
}

View file

@ -0,0 +1,22 @@
/* { dg-do compile } */
/* { dg-options "-O2 -msse -mno-sse4" } */
/* { dg-final { scan-assembler-not "movlhps" } } */
/* { dg-final { scan-assembler-not "unpckhps" } } */
/* { dg-final { scan-assembler-not "unpcklps" } } */
/* { dg-final { scan-assembler-not "shufps" } } */
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
extern float bar (float, float);
static __v4sf
vector_init (float f0,float f1, float f2,float f3)
{
__v4sf y = { f0, f1, f2, f3 };
return y;
}
__v4sf
foo2 (__v4sf x, __v4sf y)
{
return vector_init (bar (x[0], y[0]), x[1], x[2], x[3]) ;
}

View file

@ -0,0 +1,46 @@
/* { dg-do run { target sse2_runtime } } */
/* { dg-options "-O2 -msse2" } */
#include "pr88828-8a.c"
#include "pr88828-8b.c"
extern void abort ();
void
do_check (__v4sf y, float f[4], float z)
{
int i;
for (i = 0; i < 4; i++)
if (i == 0)
{
if (y[i] != z)
abort ();
}
else
{
if (y[i] != f[i])
abort ();
}
}
int
main (void)
{
float f[4] = { -11, 2, 55553, -4 };
float z = 11.4;
__v4sf x = { f[0], f[1], f[2], f[3] };
__v4sf y;
int i;
for (i = 0; i < 4; i++)
if (x[i] != f[i])
abort ();
y = foo1 (x);
do_check (y, f, z);
y = foo2 (x);
do_check (y, f, z);
return 0;
}

View file

@ -0,0 +1,15 @@
/* { dg-do compile } */
/* { dg-options "-O2 -msse -mno-sse4" } */
/* { dg-final { scan-assembler-not "movlhps" } } */
/* { dg-final { scan-assembler-not "unpckhps" } } */
/* { dg-final { scan-assembler-not "unpcklps" } } */
/* { dg-final { scan-assembler-not "shufps" } } */
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
__v4sf
foo1 (__v4sf x)
{
__v4sf z = { 11.4, x[1], x[2], x[3] };
return z;
}

View file

@ -0,0 +1,21 @@
/* { dg-do compile } */
/* { dg-options "-O2 -msse -mno-sse4" } */
/* { dg-final { scan-assembler-not "movlhps" } } */
/* { dg-final { scan-assembler-not "unpckhps" } } */
/* { dg-final { scan-assembler-not "unpcklps" } } */
/* { dg-final { scan-assembler-not "shufps" } } */
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
static __v4sf
vector_init (float f0,float f1, float f2,float f3)
{
__v4sf y = { f0, f1, f2, f3 };
return y;
}
__v4sf
foo2 (__v4sf x)
{
return vector_init (11.4, x[1], x[2], x[3]) ;
}

View file

@ -0,0 +1,46 @@
/* { dg-do run { target sse2_runtime } } */
/* { dg-options "-O2 -msse2" } */
#include "pr88828-9a.c"
#include "pr88828-9b.c"
extern void abort ();
void
do_check (__v4sf y, float f[4], float z)
{
int i;
for (i = 0; i < 4; i++)
if (i == 0)
{
if (y[i] != z)
abort ();
}
else
{
if (y[i] != f[i])
abort ();
}
}
int
main (void)
{
float f[4] = { -11, 2, 55553, -4 };
float z = 11.4;
__m128 x = (__m128) (__v4sf) { f[0], f[1], f[2], f[3] };
__m128 y;
int i;
for (i = 0; i < 4; i++)
if (x[i] != f[i])
abort ();
y = foo1 (x);
do_check (y, f, z);
y = foo2 (x);
do_check (y, f, z);
return 0;
}

View file

@ -0,0 +1,16 @@
/* { dg-do compile } */
/* { dg-options "-O2 -msse -mno-sse4" } */
/* { dg-final { scan-assembler-not "movlhps" } } */
/* { dg-final { scan-assembler-not "unpckhps" } } */
/* { dg-final { scan-assembler-not "unpcklps" } } */
/* { dg-final { scan-assembler-not "shufps" } } */
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
__m128
foo1 (__m128 x)
{
__v4sf z = { 11.4, ((__v4sf) x)[1], ((__v4sf) x)[2], ((__v4sf) x) [3] };
return (__m128) z;
}

View file

@ -0,0 +1,23 @@
/* { dg-do compile } */
/* { dg-options "-O2 -msse -mno-sse4" } */
/* { dg-final { scan-assembler-not "movlhps" } } */
/* { dg-final { scan-assembler-not "unpckhps" } } */
/* { dg-final { scan-assembler-not "unpcklps" } } */
/* { dg-final { scan-assembler-not "shufps" } } */
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
static __m128
vector_init (float f0,float f1, float f2,float f3)
{
__v4sf y = { f0, f1, f2, f3 };
return (__m128) y;
}
__m128
foo2 (__m128 x)
{
return vector_init (11.4, ((__v4sf) x)[1], ((__v4sf) x)[2],
((__v4sf) x) [3]);
}

View file

@ -2065,71 +2065,87 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
conv_code = ERROR_MARK;
maybe_ident = true;
tree one_constant = NULL_TREE;
tree one_nonconstant = NULL_TREE;
auto_vec<tree> constants;
constants.safe_grow_cleared (nelts);
FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt)
{
tree ref, op1;
unsigned int elem;
if (i >= nelts)
return false;
/* Look for elements extracted and possibly converted from
another vector. */
op1 = get_bit_field_ref_def (elt->value, conv_code);
if (op1)
if (op1
&& TREE_CODE ((ref = TREE_OPERAND (op1, 0))) == SSA_NAME
&& VECTOR_TYPE_P (TREE_TYPE (ref))
&& useless_type_conversion_p (TREE_TYPE (op1),
TREE_TYPE (TREE_TYPE (ref)))
&& known_eq (bit_field_size (op1), elem_size)
&& constant_multiple_p (bit_field_offset (op1),
elem_size, &elem))
{
ref = TREE_OPERAND (op1, 0);
unsigned int j;
for (j = 0; j < 2; ++j)
{
if (!orig[j])
{
if (TREE_CODE (ref) != SSA_NAME)
return false;
if (! VECTOR_TYPE_P (TREE_TYPE (ref))
|| ! useless_type_conversion_p (TREE_TYPE (op1),
TREE_TYPE (TREE_TYPE (ref))))
return false;
if (j && !useless_type_conversion_p (TREE_TYPE (orig[0]),
TREE_TYPE (ref)))
return false;
orig[j] = ref;
break;
if (j == 0
|| useless_type_conversion_p (TREE_TYPE (orig[0]),
TREE_TYPE (ref)))
break;
}
else if (ref == orig[j])
break;
}
if (j == 2)
return false;
unsigned int elt;
if (maybe_ne (bit_field_size (op1), elem_size)
|| !constant_multiple_p (bit_field_offset (op1), elem_size, &elt))
return false;
if (j)
elt += nelts;
if (elt != i)
maybe_ident = false;
sel.quick_push (elt);
/* Found a suitable vector element. */
if (j <= 2)
{
orig[j] = ref;
if (j)
elem += nelts;
if (elem != i)
maybe_ident = false;
sel.quick_push (elem);
continue;
}
/* Else fallthru. */
}
else if (CONSTANT_CLASS_P (elt->value))
/* Handle elements not extracted from a vector.
1. constants by permuting with constant vector
2. a unique non-constant element by permuting with a splat vector */
if (orig[1]
&& orig[1] != error_mark_node)
return false;
orig[1] = error_mark_node;
if (CONSTANT_CLASS_P (elt->value))
{
if (orig[1]
&& orig[1] != error_mark_node)
if (one_nonconstant)
return false;
orig[1] = error_mark_node;
if (!one_constant)
one_constant = elt->value;
constants[i] = elt->value;
sel.quick_push (i + nelts);
maybe_ident = false;
}
else
return false;
{
if (one_constant)
return false;
if (!one_nonconstant)
one_nonconstant = elt->value;
else if (!operand_equal_p (one_nonconstant, elt->value, 0))
return false;
}
sel.quick_push (i + nelts);
maybe_ident = false;
}
if (i < nelts)
return false;
if (! VECTOR_TYPE_P (TREE_TYPE (orig[0]))
if (! orig[0]
|| ! VECTOR_TYPE_P (TREE_TYPE (orig[0]))
|| maybe_ne (TYPE_VECTOR_SUBPARTS (type),
TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig[0]))))
return false;
@ -2165,9 +2181,19 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
GET_MODE_SIZE (TYPE_MODE (type))))
return false;
op2 = vec_perm_indices_to_tree (mask_type, indices);
bool convert_orig0 = false;
if (!orig[1])
orig[1] = orig[0];
if (orig[1] == error_mark_node)
else if (orig[1] == error_mark_node
&& one_nonconstant)
{
gimple_seq seq = NULL;
orig[1] = gimple_build_vector_from_val (&seq, UNKNOWN_LOCATION,
type, one_nonconstant);
gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
convert_orig0 = true;
}
else if (orig[1] == error_mark_node)
{
tree_vector_builder vec (type, nelts, 1);
for (unsigned i = 0; i < nelts; ++i)
@ -2177,11 +2203,12 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
/* ??? Push a don't-care value. */
vec.quick_push (one_constant);
orig[1] = vec.build ();
convert_orig0 = true;
}
if (conv_code == ERROR_MARK)
gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig[0],
orig[1], op2);
else if (TREE_CODE (orig[1]) == VECTOR_CST)
else if (convert_orig0)
{
gimple *conv
= gimple_build_assign (make_ssa_name (type), conv_code, orig[0]);