[1/2] PR88598: Optimise x * { 0 or 1, 0 or 1, ... }
The PR has: vect__6.24_42 = vect__5.23_41 * { 0.0, 1.0e+0, 0.0, 0.0 }; which for -fno-signed-zeros -fno-signaling-nans can be simplified to: vect__6.24_42 = vect__5.23_41 & { 0, -1, 0, 0 }; I deliberately didn't handle COMPLEX_CST or CONSTRUCTOR in initializer_each_zero_or_onep since there are no current use cases. The patch also makes (un)signed_type_for handle floating-point types. I tried to audit all callers and the few that handle null returns would be unaffected. 2019-01-07 Richard Sandiford <richard.sandiford@arm.com> gcc/ PR tree-optimization/88598 * tree.h (initializer_each_zero_or_onep): Declare. * tree.c (initializer_each_zero_or_onep): New function. (signed_or_unsigned_type_for): Handle float types too. (unsigned_type_for, signed_type_for): Update comments accordingly. * match.pd: Fold x * { 0 or 1, 0 or 1, ...} to x & { 0 or -1, 0 or -1, ... }. gcc/testsuite/ PR tree-optimization/88598 * gcc.dg/pr88598-1.c: New test. * gcc.dg/pr88598-2.c: Likewise. * gcc.dg/pr88598-3.c: Likewise. * gcc.dg/pr88598-4.c: Likewise. * gcc.dg/pr88598-5.c: Likewise. From-SVN: r267645
This commit is contained in:
parent
3340164d5d
commit
46c66a46aa
10 changed files with 236 additions and 7 deletions
|
@ -1,3 +1,13 @@
|
|||
2019-01-07 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
PR tree-optimization/88598
|
||||
* tree.h (initializer_each_zero_or_onep): Declare.
|
||||
* tree.c (initializer_each_zero_or_onep): New function.
|
||||
(signed_or_unsigned_type_for): Handle float types too.
|
||||
(unsigned_type_for, signed_type_for): Update comments accordingly.
|
||||
* match.pd: Fold x * { 0 or 1, 0 or 1, ...} to
|
||||
x & { 0 or -1, 0 or -1, ... }.
|
||||
|
||||
2019-01-07 Jonathan Wakely <jwakely@redhat.com>
|
||||
|
||||
* doc/install.texi: Replace references to x86_64-unknown-linux-gnu
|
||||
|
|
17
gcc/match.pd
17
gcc/match.pd
|
@ -29,6 +29,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
integer_each_onep integer_truep integer_nonzerop
|
||||
real_zerop real_onep real_minus_onep
|
||||
zerop
|
||||
initializer_each_zero_or_onep
|
||||
CONSTANT_CLASS_P
|
||||
tree_expr_nonnegative_p
|
||||
tree_expr_nonzero_p
|
||||
|
@ -194,6 +195,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
|||
|| !COMPLEX_FLOAT_TYPE_P (type)))
|
||||
(negate @0)))
|
||||
|
||||
/* Transform x * { 0 or 1, 0 or 1, ... } into x & { 0 or -1, 0 or -1, ...},
|
||||
unless the target has native support for the former but not the latter. */
|
||||
(simplify
|
||||
(mult @0 VECTOR_CST@1)
|
||||
(if (initializer_each_zero_or_onep (@1)
|
||||
&& !HONOR_SNANS (type)
|
||||
&& !HONOR_SIGNED_ZEROS (type))
|
||||
(with { tree itype = FLOAT_TYPE_P (type) ? unsigned_type_for (type) : type; }
|
||||
(if (itype
|
||||
&& (!VECTOR_MODE_P (TYPE_MODE (type))
|
||||
|| (VECTOR_MODE_P (TYPE_MODE (itype))
|
||||
&& optab_handler (and_optab,
|
||||
TYPE_MODE (itype)) != CODE_FOR_nothing)))
|
||||
(view_convert (bit_and:itype (view_convert @0)
|
||||
(ne @1 { build_zero_cst (type); })))))))
|
||||
|
||||
(for cmp (gt ge lt le)
|
||||
outp (convert convert negate negate)
|
||||
outn (negate negate convert convert)
|
||||
|
|
|
@ -1,3 +1,12 @@
|
|||
2019-01-07 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
PR tree-optimization/88598
|
||||
* gcc.dg/pr88598-1.c: New test.
|
||||
* gcc.dg/pr88598-2.c: Likewise.
|
||||
* gcc.dg/pr88598-3.c: Likewise.
|
||||
* gcc.dg/pr88598-4.c: Likewise.
|
||||
* gcc.dg/pr88598-5.c: Likewise.
|
||||
|
||||
2019-01-07 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR tree-optimization/88676
|
||||
|
|
27
gcc/testsuite/gcc.dg/pr88598-1.c
Normal file
27
gcc/testsuite/gcc.dg/pr88598-1.c
Normal file
|
@ -0,0 +1,27 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O -fdump-tree-ccp1" } */
|
||||
|
||||
typedef int v4si __attribute__ ((vector_size (16)));
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
volatile v4si x1 = { 4, 5, 6, 7 };
|
||||
volatile v4si x2 = { 10, 11, 12, 13 };
|
||||
volatile v4si x3 = { 20, 21, 22, 23 };
|
||||
|
||||
x1 *= (v4si) { 0, 1, 1, 0 };
|
||||
x2 *= (v4si) { 1, 0, 0, 1 };
|
||||
x3 *= (v4si) { 0, 0, 1, 0 };
|
||||
|
||||
if (__builtin_memcmp ((void *) &x1, &(v4si) { 0, 5, 6, 0 }, sizeof (v4si))
|
||||
|| __builtin_memcmp ((void *) &x2, &(v4si) { 10, 0, 0, 13 },
|
||||
sizeof (v4si))
|
||||
|| __builtin_memcmp ((void *) &x3, &(v4si) { 0, 0, 22, 0 },
|
||||
sizeof (v4si)))
|
||||
__builtin_abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-not { \* } "ccp1" } } */
|
30
gcc/testsuite/gcc.dg/pr88598-2.c
Normal file
30
gcc/testsuite/gcc.dg/pr88598-2.c
Normal file
|
@ -0,0 +1,30 @@
|
|||
/* { dg-do run { target double64 } } */
|
||||
/* { dg-options "-O -fdump-tree-ccp1" } */
|
||||
/* { dg-add-options ieee } */
|
||||
|
||||
typedef double v4df __attribute__ ((vector_size (32)));
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
volatile v4df x1 = { 4, 5, 6, -7 };
|
||||
volatile v4df x2 = { 10, -11, 12, 13 };
|
||||
volatile v4df x3 = { 20, 21, 22, 23 };
|
||||
|
||||
x1 *= (v4df) { 0, 1, 1, 0 };
|
||||
x2 *= (v4df) { 1, 0, 0, 1 };
|
||||
x3 *= (v4df) { 0.0, -0.0, 1.0, -0.0 };
|
||||
|
||||
if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 6, -0.0 },
|
||||
sizeof (v4df))
|
||||
|| __builtin_memcmp ((void *) &x2, &(v4df) { 10, -0.0, 0, 13 },
|
||||
sizeof (v4df))
|
||||
|| __builtin_memcmp ((void *) &x3, &(v4df) { 0, -0.0, 22, -0.0 },
|
||||
sizeof (v4df)))
|
||||
__builtin_abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
|
||||
/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */
|
29
gcc/testsuite/gcc.dg/pr88598-3.c
Normal file
29
gcc/testsuite/gcc.dg/pr88598-3.c
Normal file
|
@ -0,0 +1,29 @@
|
|||
/* { dg-do run { target double64 } } */
|
||||
/* { dg-options "-O -fno-signed-zeros -fdump-tree-ccp1" } */
|
||||
/* { dg-add-options ieee } */
|
||||
|
||||
typedef double v4df __attribute__ ((vector_size (32)));
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
volatile v4df x1 = { 4, 5, 6, -7 };
|
||||
volatile v4df x2 = { 10, -11, 12, 13 };
|
||||
volatile v4df x3 = { 20, 21, 22, 23 };
|
||||
|
||||
x1 *= (v4df) { 0, 1, 1, 0 };
|
||||
x2 *= (v4df) { 1, 0, 0, 1 };
|
||||
x3 *= (v4df) { 0.0, -0.0, 1.0, -0.0 };
|
||||
|
||||
if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 6, 0 },
|
||||
sizeof (v4df))
|
||||
|| __builtin_memcmp ((void *) &x2, &(v4df) { 10, 0, 0, 13 },
|
||||
sizeof (v4df))
|
||||
|| __builtin_memcmp ((void *) &x3, &(v4df) { 0, 0, 22, 0 },
|
||||
sizeof (v4df)))
|
||||
__builtin_abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-not { \* } "ccp1" } } */
|
28
gcc/testsuite/gcc.dg/pr88598-4.c
Normal file
28
gcc/testsuite/gcc.dg/pr88598-4.c
Normal file
|
@ -0,0 +1,28 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O -fdump-tree-ccp1" } */
|
||||
|
||||
typedef int v4si __attribute__ ((vector_size (16)));
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
volatile v4si x1 = { 4, 5, 6, 7 };
|
||||
volatile v4si x2 = { 10, 11, 12, 13 };
|
||||
volatile v4si x3 = { 20, 21, 22, 23 };
|
||||
|
||||
x1 *= (v4si) { 0, 1, 2, 3 };
|
||||
x2 *= (v4si) { 1, 0, 2, 0 };
|
||||
x3 *= (v4si) { 0, 0, -1, 0 };
|
||||
|
||||
if (__builtin_memcmp ((void *) &x1, &(v4si) { 0, 5, 12, 21 }, sizeof (v4si))
|
||||
|| __builtin_memcmp ((void *) &x2, &(v4si) { 10, 0, 24, 0 },
|
||||
sizeof (v4si))
|
||||
|| __builtin_memcmp ((void *) &x3, &(v4si) { 0, 0, -22, 0 },
|
||||
sizeof (v4si)))
|
||||
__builtin_abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
|
||||
/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */
|
29
gcc/testsuite/gcc.dg/pr88598-5.c
Normal file
29
gcc/testsuite/gcc.dg/pr88598-5.c
Normal file
|
@ -0,0 +1,29 @@
|
|||
/* { dg-do run { target double64 } } */
|
||||
/* { dg-options "-O -fno-signed-zeros -fdump-tree-ccp1" } */
|
||||
/* { dg-add-options ieee } */
|
||||
|
||||
typedef double v4df __attribute__ ((vector_size (32)));
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
volatile v4df x1 = { 4, 5, 6, 7 };
|
||||
volatile v4df x2 = { 10, 11, 12, 13 };
|
||||
volatile v4df x3 = { 20, 21, 22, 23 };
|
||||
|
||||
x1 *= (v4df) { 0, 1, 2, 3 };
|
||||
x2 *= (v4df) { 1, 0, 2, 0 };
|
||||
x3 *= (v4df) { 0, 0, -1, 0 };
|
||||
|
||||
if (__builtin_memcmp ((void *) &x1, &(v4df) { 0, 5, 12, 21 }, sizeof (v4df))
|
||||
|| __builtin_memcmp ((void *) &x2, &(v4df) { 10, 0, 24, 0 },
|
||||
sizeof (v4df))
|
||||
|| __builtin_memcmp ((void *) &x3, &(v4df) { 0, 0, -22, 0 },
|
||||
sizeof (v4df)))
|
||||
__builtin_abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump { \* } "ccp1" } } */
|
||||
/* { dg-final { scan-tree-dump-not { \& } "ccp1" } } */
|
63
gcc/tree.c
63
gcc/tree.c
|
@ -11229,6 +11229,45 @@ initializer_zerop (const_tree init, bool *nonzero /* = NULL */)
|
|||
}
|
||||
}
|
||||
|
||||
/* Return true if EXPR is an initializer expression in which every element
|
||||
is a constant that is numerically equal to 0 or 1. The elements do not
|
||||
need to be equal to each other. */
|
||||
|
||||
bool
|
||||
initializer_each_zero_or_onep (const_tree expr)
|
||||
{
|
||||
STRIP_ANY_LOCATION_WRAPPER (expr);
|
||||
|
||||
switch (TREE_CODE (expr))
|
||||
{
|
||||
case INTEGER_CST:
|
||||
return integer_zerop (expr) || integer_onep (expr);
|
||||
|
||||
case REAL_CST:
|
||||
return real_zerop (expr) || real_onep (expr);
|
||||
|
||||
case VECTOR_CST:
|
||||
{
|
||||
unsigned HOST_WIDE_INT nelts = vector_cst_encoded_nelts (expr);
|
||||
if (VECTOR_CST_STEPPED_P (expr)
|
||||
&& !TYPE_VECTOR_SUBPARTS (TREE_TYPE (expr)).is_constant (&nelts))
|
||||
return false;
|
||||
|
||||
for (unsigned int i = 0; i < nelts; ++i)
|
||||
{
|
||||
tree elt = VECTOR_CST_ENCODED_ELT (expr, i);
|
||||
if (!initializer_each_zero_or_onep (elt))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check if vector VEC consists of all the equal elements and
|
||||
that the number of elements corresponds to the type of VEC.
|
||||
The function returns first element of the vector
|
||||
|
@ -11672,7 +11711,10 @@ int_cst_value (const_tree x)
|
|||
|
||||
/* If TYPE is an integral or pointer type, return an integer type with
|
||||
the same precision which is unsigned iff UNSIGNEDP is true, or itself
|
||||
if TYPE is already an integer type of signedness UNSIGNEDP. */
|
||||
if TYPE is already an integer type of signedness UNSIGNEDP.
|
||||
If TYPE is a floating-point type, return an integer type with the same
|
||||
bitsize and with the signedness given by UNSIGNEDP; this is useful
|
||||
when doing bit-level operations on a floating-point value. */
|
||||
|
||||
tree
|
||||
signed_or_unsigned_type_for (int unsignedp, tree type)
|
||||
|
@ -11702,17 +11744,23 @@ signed_or_unsigned_type_for (int unsignedp, tree type)
|
|||
return build_complex_type (inner2);
|
||||
}
|
||||
|
||||
if (!INTEGRAL_TYPE_P (type)
|
||||
&& !POINTER_TYPE_P (type)
|
||||
&& TREE_CODE (type) != OFFSET_TYPE)
|
||||
unsigned int bits;
|
||||
if (INTEGRAL_TYPE_P (type)
|
||||
|| POINTER_TYPE_P (type)
|
||||
|| TREE_CODE (type) == OFFSET_TYPE)
|
||||
bits = TYPE_PRECISION (type);
|
||||
else if (TREE_CODE (type) == REAL_TYPE)
|
||||
bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (type));
|
||||
else
|
||||
return NULL_TREE;
|
||||
|
||||
return build_nonstandard_integer_type (TYPE_PRECISION (type), unsignedp);
|
||||
return build_nonstandard_integer_type (bits, unsignedp);
|
||||
}
|
||||
|
||||
/* If TYPE is an integral or pointer type, return an integer type with
|
||||
the same precision which is unsigned, or itself if TYPE is already an
|
||||
unsigned integer type. */
|
||||
unsigned integer type. If TYPE is a floating-point type, return an
|
||||
unsigned integer type with the same bitsize as TYPE. */
|
||||
|
||||
tree
|
||||
unsigned_type_for (tree type)
|
||||
|
@ -11722,7 +11770,8 @@ unsigned_type_for (tree type)
|
|||
|
||||
/* If TYPE is an integral or pointer type, return an integer type with
|
||||
the same precision which is signed, or itself if TYPE is already a
|
||||
signed integer type. */
|
||||
signed integer type. If TYPE is a floating-point type, return a
|
||||
signed integer type with the same bitsize as TYPE. */
|
||||
|
||||
tree
|
||||
signed_type_for (tree type)
|
||||
|
|
|
@ -4506,6 +4506,7 @@ extern tree first_field (const_tree);
|
|||
combinations indicate definitive answers. */
|
||||
|
||||
extern bool initializer_zerop (const_tree, bool * = NULL);
|
||||
extern bool initializer_each_zero_or_onep (const_tree);
|
||||
|
||||
extern wide_int vector_cst_int_elt (const_tree, unsigned int);
|
||||
extern tree vector_cst_elt (const_tree, unsigned int);
|
||||
|
|
Loading…
Add table
Reference in a new issue