Use range info in split_constant_offset (PR 81635)

This patch implements the original suggestion for fixing PR 81635:
use range info in split_constant_offset to see whether a conversion
of a wrapping type can be split.  The range info problem described in:

    https://gcc.gnu.org/ml/gcc-patches/2017-08/msg01002.html

seems to have been fixed.

The patch is part 1.  There needs to be a follow-on patch to handle:

  for (unsigned int i = 0; i < n; i += 4)
    {
      ...[i + 2]...
      ...[i + 3]...

which the old SCEV test handles, but which the range check doesn't.
At the moment we record that the low two bits of "i" are clear,
but we still end up with a maximum range of 0xffffffff rather than
0xfffffffc.

2018-01-31  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
	PR tree-optimization/81635
	* tree-data-ref.c (split_constant_offset_1): For types that
	wrap on overflow, try to use range info to prove that wrapping
	cannot occur.

gcc/testsuite/
	PR tree-optimization/81635
	* gcc.dg/vect/bb-slp-pr81635-1.c: New test.
	* gcc.dg/vect/bb-slp-pr81635-2.c: Likewise.

From-SVN: r257296
This commit is contained in:
Richard Sandiford 2018-02-01 14:17:07 +00:00 committed by Richard Sandiford
parent d677263e6c
commit 3ae129323d
5 changed files with 206 additions and 2 deletions

View file

@ -1,3 +1,10 @@
2018-02-01 Richard Sandiford <richard.sandiford@linaro.org>
PR tree-optimization/81635
* tree-data-ref.c (split_constant_offset_1): For types that
wrap on overflow, try to use range info to prove that wrapping
cannot occur.
2018-02-01 Renlin Li <renlin.li@arm.com>
PR target/83370

View file

@ -1,3 +1,9 @@
2018-02-01 Richard Sandiford <richard.sandiford@linaro.org>
PR tree-optimization/81635
* gcc.dg/vect/bb-slp-pr81635-1.c: New test.
* gcc.dg/vect/bb-slp-pr81635-2.c: Likewise.
2018-02-01 Richard Sandiford <richard.sandiford@linaro.org>
PR target/83370

View file

@ -0,0 +1,92 @@
/* { dg-do compile } */
/* { dg-additional-options "-fno-tree-loop-vectorize" } */
/* { dg-require-effective-target vect_double } */
/* { dg-require-effective-target lp64 } */
void
f1 (double *p, double *q)
{
p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
for (unsigned int i = 0; i < 1000; i += 4)
{
double a = q[i] + p[i];
double b = q[i + 1] + p[i + 1];
q[i] = a;
q[i + 1] = b;
}
}
void
f2 (double *p, double *q)
{
p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
for (unsigned int i = 2; i < ~0U - 4; i += 4)
{
double a = q[i] + p[i];
double b = q[i + 1] + p[i + 1];
q[i] = a;
q[i + 1] = b;
}
}
void
f3 (double *p, double *q)
{
p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
for (unsigned int i = 0; i < ~0U - 3; i += 4)
{
double a = q[i + 2] + p[i + 2];
double b = q[i + 3] + p[i + 3];
q[i + 2] = a;
q[i + 3] = b;
}
}
void
f4 (double *p, double *q)
{
p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
for (unsigned int i = 0; i < 500; i += 6)
for (unsigned int j = 0; j < 500; j += 4)
{
double a = q[j] + p[i];
double b = q[j + 1] + p[i + 1];
q[i] = a;
q[i + 1] = b;
}
}
void
f5 (double *p, double *q)
{
p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
for (unsigned int i = 2; i < 1000; i += 4)
{
double a = q[i - 2] + p[i - 2];
double b = q[i - 1] + p[i - 1];
q[i - 2] = a;
q[i - 1] = b;
}
}
double p[1000];
double q[1000];
void
f6 (int n)
{
for (unsigned int i = 0; i < n; i += 4)
{
double a = q[i] + p[i];
double b = q[i + 1] + p[i + 1];
q[i] = a;
q[i + 1] = b;
}
}
/* { dg-final { scan-tree-dump-times "basic block vectorized" 6 "slp1" } } */

View file

@ -0,0 +1,64 @@
/* { dg-do compile } */
/* { dg-additional-options "-fno-tree-loop-vectorize" } */
/* { dg-require-effective-target lp64 } */
double p[1000];
double q[1000];
void
f1 (double *p, double *q)
{
p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
for (unsigned int i = 2; i < ~0U - 4; i += 4)
{
double a = q[i + 2] + p[i + 2];
double b = q[i + 3] + p[i + 3];
q[i + 2] = a;
q[i + 3] = b;
}
}
void
f2 (double *p, double *q)
{
p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
for (unsigned int i = 0; i < ~0U - 3; i += 4)
{
double a = q[i + 4] + p[i + 4];
double b = q[i + 5] + p[i + 5];
q[i + 4] = a;
q[i + 5] = b;
}
}
void
f3 (double *p, double *q)
{
p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
for (unsigned int i = 0; i < 1000; i += 4)
{
double a = q[i - 2] + p[i - 2];
double b = q[i - 1] + p[i - 1];
q[i - 2] = a;
q[i - 1] = b;
}
}
void
f4 (double *p, double *q)
{
p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2);
q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2);
for (unsigned int i = 2; i < 1000; i += 4)
{
double a = q[i - 4] + p[i - 4];
double b = q[i - 3] + p[i - 3];
q[i - 4] = a;
q[i - 3] = b;
}
}
/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp1" } } */

View file

@ -705,11 +705,46 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
and the outer precision is at least as large as the inner. */
tree itype = TREE_TYPE (op0);
if ((POINTER_TYPE_P (itype)
|| (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
|| (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
&& TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
&& (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
{
split_constant_offset (op0, &var0, off);
if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype))
{
/* Split the unconverted operand and try to prove that
wrapping isn't a problem. */
tree tmp_var, tmp_off;
split_constant_offset (op0, &tmp_var, &tmp_off);
/* See whether we have an SSA_NAME whose range is known
to be [A, B]. */
if (TREE_CODE (tmp_var) != SSA_NAME)
return false;
wide_int var_min, var_max;
if (get_range_info (tmp_var, &var_min, &var_max) != VR_RANGE)
return false;
/* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF)
is known to be [A + TMP_OFF, B + TMP_OFF], with all
operations done in ITYPE. The addition must overflow
at both ends of the range or at neither. */
bool overflow[2];
signop sgn = TYPE_SIGN (itype);
unsigned int prec = TYPE_PRECISION (itype);
wide_int woff = wi::to_wide (tmp_off, prec);
wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]);
wi::add (var_max, woff, sgn, &overflow[1]);
if (overflow[0] != overflow[1])
return false;
/* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR. */
widest_int diff = (widest_int::from (op0_min, sgn)
- widest_int::from (var_min, sgn));
var0 = tmp_var;
*off = wide_int_to_tree (ssizetype, diff);
}
else
split_constant_offset (op0, &var0, off);
*var = fold_convert (type, var0);
return true;
}