diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8cec738653b..e62b93d9a38 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2018-02-01 Richard Sandiford + + PR tree-optimization/81635 + * tree-data-ref.c (split_constant_offset_1): For types that + wrap on overflow, try to use range info to prove that wrapping + cannot occur. + 2018-02-01 Renlin Li PR target/83370 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 8fb5c321031..3f2f4473611 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2018-02-01 Richard Sandiford + + PR tree-optimization/81635 + * gcc.dg/vect/bb-slp-pr81635-1.c: New test. + * gcc.dg/vect/bb-slp-pr81635-2.c: Likewise. + 2018-02-01 Richard Sandiford PR target/83370 diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-1.c new file mode 100644 index 00000000000..f024dc78111 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-1.c @@ -0,0 +1,92 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-fno-tree-loop-vectorize" } */ +/* { dg-require-effective-target vect_double } */ +/* { dg-require-effective-target lp64 } */ + +void +f1 (double *p, double *q) +{ + p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2); + q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2); + for (unsigned int i = 0; i < 1000; i += 4) + { + double a = q[i] + p[i]; + double b = q[i + 1] + p[i + 1]; + q[i] = a; + q[i + 1] = b; + } +} + +void +f2 (double *p, double *q) +{ + p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2); + q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2); + for (unsigned int i = 2; i < ~0U - 4; i += 4) + { + double a = q[i] + p[i]; + double b = q[i + 1] + p[i + 1]; + q[i] = a; + q[i + 1] = b; + } +} + +void +f3 (double *p, double *q) +{ + p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2); + q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2); + for (unsigned int i = 0; i < ~0U - 3; i += 4) + { + double a = q[i + 2] + p[i + 2]; + double b = q[i + 3] + p[i + 3]; + q[i + 2] = a; + q[i + 3] = b; + } +} + +void +f4 (double *p, double *q) +{ + p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2); + q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2); + for (unsigned int i = 0; i < 500; i += 6) + for (unsigned int j = 0; j < 500; j += 4) + { + double a = q[j] + p[i]; + double b = q[j + 1] + p[i + 1]; + q[i] = a; + q[i + 1] = b; + } +} + +void +f5 (double *p, double *q) +{ + p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2); + q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2); + for (unsigned int i = 2; i < 1000; i += 4) + { + double a = q[i - 2] + p[i - 2]; + double b = q[i - 1] + p[i - 1]; + q[i - 2] = a; + q[i - 1] = b; + } +} + +double p[1000]; +double q[1000]; + +void +f6 (int n) +{ + for (unsigned int i = 0; i < n; i += 4) + { + double a = q[i] + p[i]; + double b = q[i + 1] + p[i + 1]; + q[i] = a; + q[i + 1] = b; + } +} + +/* { dg-final { scan-tree-dump-times "basic block vectorized" 6 "slp1" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-2.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-2.c new file mode 100644 index 00000000000..11e8f0f8526 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr81635-2.c @@ -0,0 +1,64 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-fno-tree-loop-vectorize" } */ +/* { dg-require-effective-target lp64 } */ + +double p[1000]; +double q[1000]; + +void +f1 (double *p, double *q) +{ + p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2); + q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2); + for (unsigned int i = 2; i < ~0U - 4; i += 4) + { + double a = q[i + 2] + p[i + 2]; + double b = q[i + 3] + p[i + 3]; + q[i + 2] = a; + q[i + 3] = b; + } +} + +void +f2 (double *p, double *q) +{ + p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2); + q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2); + for (unsigned int i = 0; i < ~0U - 3; i += 4) + { + double a = q[i + 4] + p[i + 4]; + double b = q[i + 5] + p[i + 5]; + q[i + 4] = a; + q[i + 5] = b; + } +} + +void +f3 (double *p, double *q) +{ + p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2); + q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2); + for (unsigned int i = 0; i < 1000; i += 4) + { + double a = q[i - 2] + p[i - 2]; + double b = q[i - 1] + p[i - 1]; + q[i - 2] = a; + q[i - 1] = b; + } +} + +void +f4 (double *p, double *q) +{ + p = (double *) __builtin_assume_aligned (p, sizeof (double) * 2); + q = (double *) __builtin_assume_aligned (q, sizeof (double) * 2); + for (unsigned int i = 2; i < 1000; i += 4) + { + double a = q[i - 4] + p[i - 4]; + double b = q[i - 3] + p[i - 3]; + q[i - 4] = a; + q[i - 3] = b; + } +} + +/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp1" } } */ diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index b5c0b7f4281..f3070d3a118 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -705,11 +705,46 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1, and the outer precision is at least as large as the inner. */ tree itype = TREE_TYPE (op0); if ((POINTER_TYPE_P (itype) - || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype))) + || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype))) && TYPE_PRECISION (type) >= TYPE_PRECISION (itype) && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type))) { - split_constant_offset (op0, &var0, off); + if (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_WRAPS (itype)) + { + /* Split the unconverted operand and try to prove that + wrapping isn't a problem. */ + tree tmp_var, tmp_off; + split_constant_offset (op0, &tmp_var, &tmp_off); + + /* See whether we have an SSA_NAME whose range is known + to be [A, B]. */ + if (TREE_CODE (tmp_var) != SSA_NAME) + return false; + wide_int var_min, var_max; + if (get_range_info (tmp_var, &var_min, &var_max) != VR_RANGE) + return false; + + /* See whether the range of OP0 (i.e. TMP_VAR + TMP_OFF) + is known to be [A + TMP_OFF, B + TMP_OFF], with all + operations done in ITYPE. The addition must overflow + at both ends of the range or at neither. */ + bool overflow[2]; + signop sgn = TYPE_SIGN (itype); + unsigned int prec = TYPE_PRECISION (itype); + wide_int woff = wi::to_wide (tmp_off, prec); + wide_int op0_min = wi::add (var_min, woff, sgn, &overflow[0]); + wi::add (var_max, woff, sgn, &overflow[1]); + if (overflow[0] != overflow[1]) + return false; + + /* Calculate (ssizetype) OP0 - (ssizetype) TMP_VAR. */ + widest_int diff = (widest_int::from (op0_min, sgn) + - widest_int::from (var_min, sgn)); + var0 = tmp_var; + *off = wide_int_to_tree (ssizetype, diff); + } + else + split_constant_offset (op0, &var0, off); *var = fold_convert (type, var0); return true; }