Use a single comparison for index-based alias checks
This patch rewrites the index-based alias checks to use conditions of the form: (unsigned T) (a - b + bias) <= limit E.g. before the patch: struct s { int x[100]; }; void f1 (struct s *s1, int a, int b) { for (int i = 0; i < 32; ++i) s1->x[i + a] += s1->x[i + b]; } used: add w3, w1, 3 cmp w3, w2 add w3, w2, 3 ccmp w1, w3, 0, ge ble .L2 whereas after the patch it uses: sub w3, w1, w2 add w3, w3, 3 cmp w3, 6 bls .L2 The patch also fixes the seg_len1 and seg_len2 negation for cases in which seg_len is a "negative unsigned" value narrower than 64 bits, like it is for 32-bit targets. Previously we'd end up with values like 0xffffffff000000001 instead of 1. 2019-11-16 Richard Sandiford <richard.sandiford@arm.com> gcc/ * tree-data-ref.c (create_intersect_range_checks_index): Rewrite the index tests to have the form (unsigned T) (B - A + bias) <= limit. gcc/testsuite/ * gcc.dg/vect/vect-alias-check-18.c: New test. * gcc.dg/vect/vect-alias-check-19.c: Likewise. * gcc.dg/vect/vect-alias-check-20.c: Likewise. From-SVN: r278354
This commit is contained in:
parent
b4d1b63573
commit
f9d6338bd1
6 changed files with 293 additions and 49 deletions
|
@ -1,3 +1,8 @@
|
|||
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* tree-data-ref.c (create_intersect_range_checks_index): Rewrite
|
||||
the index tests to have the form (unsigned T) (B - A + bias) <= limit.
|
||||
|
||||
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* tree-data-ref.c (create_intersect_range_checks_index)
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* gcc.dg/vect/vect-alias-check-18.c: New test.
|
||||
* gcc.dg/vect/vect-alias-check-19.c: Likewise.
|
||||
* gcc.dg/vect/vect-alias-check-20.c: Likewise.
|
||||
|
||||
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* gcc.dg/vect/vect-alias-check-1.c: Test for the type of alias check.
|
||||
|
|
64
gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c
Normal file
64
gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c
Normal file
|
@ -0,0 +1,64 @@
|
|||
#define N 200
|
||||
#define DIST 32
|
||||
|
||||
typedef signed char sc;
|
||||
typedef unsigned char uc;
|
||||
typedef signed short ss;
|
||||
typedef unsigned short us;
|
||||
typedef int si;
|
||||
typedef unsigned int ui;
|
||||
typedef signed long long sll;
|
||||
typedef unsigned long long ull;
|
||||
|
||||
#define FOR_EACH_TYPE(M) \
|
||||
M (sc) M (uc) \
|
||||
M (ss) M (us) \
|
||||
M (si) M (ui) \
|
||||
M (sll) M (ull) \
|
||||
M (float) M (double)
|
||||
|
||||
#define TEST_VALUE(I) ((I) * 11 / 2)
|
||||
|
||||
#define ADD_TEST(TYPE) \
|
||||
TYPE a_##TYPE[N * 2]; \
|
||||
void __attribute__((noinline, noclone)) \
|
||||
test_##TYPE (int x, int y) \
|
||||
{ \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
a_##TYPE[x - i] += a_##TYPE[y - i]; \
|
||||
}
|
||||
|
||||
#define DO_TEST(TYPE) \
|
||||
for (int i = 0; i < DIST * 2; ++i) \
|
||||
{ \
|
||||
for (int j = 0; j < N + DIST * 2; ++j) \
|
||||
a_##TYPE[j] = TEST_VALUE (j); \
|
||||
test_##TYPE (i + N - 1, DIST + N - 1); \
|
||||
for (int j = 0; j < N + DIST * 2; ++j) \
|
||||
{ \
|
||||
TYPE expected; \
|
||||
if (j < i || j >= i + N) \
|
||||
expected = TEST_VALUE (j); \
|
||||
else if (i >= DIST) \
|
||||
expected = ((TYPE) TEST_VALUE (j) \
|
||||
+ (TYPE) TEST_VALUE (j + DIST - i)); \
|
||||
else \
|
||||
expected = ((TYPE) TEST_VALUE (j) \
|
||||
+ a_##TYPE[j + DIST - i]); \
|
||||
if (expected != a_##TYPE[j]) \
|
||||
__builtin_abort (); \
|
||||
} \
|
||||
}
|
||||
|
||||
FOR_EACH_TYPE (ADD_TEST)
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
FOR_EACH_TYPE (DO_TEST)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump {flags: *WAR\n} "vect" { target vect_int } } } */
|
||||
/* { dg-final { scan-tree-dump "using an index-based overlap test" "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
|
62
gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c
Normal file
62
gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c
Normal file
|
@ -0,0 +1,62 @@
|
|||
#define N 200
|
||||
#define DIST 32
|
||||
|
||||
typedef signed char sc;
|
||||
typedef unsigned char uc;
|
||||
typedef signed short ss;
|
||||
typedef unsigned short us;
|
||||
typedef int si;
|
||||
typedef unsigned int ui;
|
||||
typedef signed long long sll;
|
||||
typedef unsigned long long ull;
|
||||
|
||||
#define FOR_EACH_TYPE(M) \
|
||||
M (sc) M (uc) \
|
||||
M (ss) M (us) \
|
||||
M (si) M (ui) \
|
||||
M (sll) M (ull) \
|
||||
M (float) M (double)
|
||||
|
||||
#define ADD_TEST(TYPE) \
|
||||
TYPE a_##TYPE[N * 2]; \
|
||||
void __attribute__((noinline, noclone)) \
|
||||
test_##TYPE (int x, int y) \
|
||||
{ \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
{ \
|
||||
a_##TYPE[i + x] = i; \
|
||||
a_##TYPE[i + y] = 42 - i * 2; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define DO_TEST(TYPE) \
|
||||
for (int i = 0; i < DIST * 2; ++i) \
|
||||
{ \
|
||||
__builtin_memset (a_##TYPE, 0, sizeof (a_##TYPE)); \
|
||||
test_##TYPE (DIST, i); \
|
||||
for (int j = 0; j < N + DIST * 2; ++j) \
|
||||
{ \
|
||||
TYPE expected = 0; \
|
||||
if (i > DIST && j >= i && j < i + N) \
|
||||
expected = 42 - (j - i) * 2; \
|
||||
if (j >= DIST && j < DIST + N) \
|
||||
expected = j - DIST; \
|
||||
if (i <= DIST && j >= i && j < i + N) \
|
||||
expected = 42 - (j - i) * 2; \
|
||||
if (expected != a_##TYPE[j]) \
|
||||
__builtin_abort (); \
|
||||
} \
|
||||
}
|
||||
|
||||
FOR_EACH_TYPE (ADD_TEST)
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
FOR_EACH_TYPE (DO_TEST)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump {flags: *WAW\n} "vect" { target vect_int } } } */
|
||||
/* { dg-final { scan-tree-dump "using an index-based overlap test" "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
|
66
gcc/testsuite/gcc.dg/vect/vect-alias-check-20.c
Normal file
66
gcc/testsuite/gcc.dg/vect/vect-alias-check-20.c
Normal file
|
@ -0,0 +1,66 @@
|
|||
#define N 200
|
||||
#define DIST 32
|
||||
|
||||
typedef signed char sc;
|
||||
typedef unsigned char uc;
|
||||
typedef signed short ss;
|
||||
typedef unsigned short us;
|
||||
typedef int si;
|
||||
typedef unsigned int ui;
|
||||
typedef signed long long sll;
|
||||
typedef unsigned long long ull;
|
||||
|
||||
#define FOR_EACH_TYPE(M) \
|
||||
M (sc) M (uc) \
|
||||
M (ss) M (us) \
|
||||
M (si) M (ui) \
|
||||
M (sll) M (ull) \
|
||||
M (float) M (double)
|
||||
|
||||
#define TEST_VALUE(I) ((I) * 11 / 2)
|
||||
|
||||
#define ADD_TEST(TYPE) \
|
||||
TYPE a_##TYPE[N * 2]; \
|
||||
TYPE __attribute__((noinline, noclone)) \
|
||||
test_##TYPE (int x, int y) \
|
||||
{ \
|
||||
TYPE res = 0; \
|
||||
for (int i = 0; i < N; ++i) \
|
||||
{ \
|
||||
a_##TYPE[i + x] = i; \
|
||||
res += a_##TYPE[i + y]; \
|
||||
} \
|
||||
return res; \
|
||||
}
|
||||
|
||||
#define DO_TEST(TYPE) \
|
||||
for (int i = 0; i < DIST * 2; ++i) \
|
||||
{ \
|
||||
for (int j = 0; j < N + DIST * 2; ++j) \
|
||||
a_##TYPE[j] = TEST_VALUE (j); \
|
||||
TYPE res = test_##TYPE (DIST, i); \
|
||||
for (int j = 0; j < N; ++j) \
|
||||
if (a_##TYPE[j + DIST] != (TYPE) j) \
|
||||
__builtin_abort (); \
|
||||
TYPE expected_res = 0; \
|
||||
for (int j = i; j < i + N; ++j) \
|
||||
if (i <= DIST && j >= DIST && j < DIST + N) \
|
||||
expected_res += j - DIST; \
|
||||
else \
|
||||
expected_res += TEST_VALUE (j); \
|
||||
if (expected_res != res) \
|
||||
__builtin_abort (); \
|
||||
}
|
||||
|
||||
FOR_EACH_TYPE (ADD_TEST)
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
FOR_EACH_TYPE (DO_TEST)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump {flags: *RAW\n} "vect" { target vect_int } } } */
|
||||
/* { dg-final { scan-tree-dump "using an index-based overlap test" "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
|
|
@ -1743,7 +1743,9 @@ prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
|
|||
|
||||
We can create expression based on index rather than address:
|
||||
|
||||
(i_0 + 4 < j_0 || j_0 + 4 < i_0)
|
||||
(unsigned) (i_0 - j_0 + 3) <= 6
|
||||
|
||||
i.e. the indices are less than 4 apart.
|
||||
|
||||
Note evolution step of index needs to be considered in comparison. */
|
||||
|
||||
|
@ -1780,15 +1782,8 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
|
|||
if (neg_step)
|
||||
{
|
||||
abs_step = -abs_step;
|
||||
seg_len1 = -seg_len1;
|
||||
seg_len2 = -seg_len2;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Include the access size in the length, so that we only have one
|
||||
tree addition below. */
|
||||
seg_len1 += dr_a.access_size;
|
||||
seg_len2 += dr_b.access_size;
|
||||
seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
|
||||
seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
|
||||
}
|
||||
|
||||
/* Infer the number of iterations with which the memory segment is accessed
|
||||
|
@ -1802,16 +1797,13 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
|
|||
|| !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
|
||||
return false;
|
||||
|
||||
poly_uint64 niter_access1 = 0, niter_access2 = 0;
|
||||
if (neg_step)
|
||||
{
|
||||
/* Divide each access size by the byte step, rounding up. */
|
||||
if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
|
||||
abs_step, &niter_access1)
|
||||
|| !can_div_trunc_p (dr_b.access_size + abs_step - 1,
|
||||
abs_step, &niter_access2))
|
||||
return false;
|
||||
}
|
||||
/* Divide each access size by the byte step, rounding up. */
|
||||
poly_uint64 niter_access1, niter_access2;
|
||||
if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
|
||||
abs_step, &niter_access1)
|
||||
|| !can_div_trunc_p (dr_b.access_size + abs_step - 1,
|
||||
abs_step, &niter_access2))
|
||||
return false;
|
||||
|
||||
unsigned int i;
|
||||
for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
|
||||
|
@ -1851,38 +1843,87 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
|
|||
index of data reference. Like segment length, index length is
|
||||
linear function of the number of iterations with index_step as
|
||||
the coefficient, i.e, niter_len * idx_step. */
|
||||
tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
|
||||
build_int_cst (TREE_TYPE (min1),
|
||||
niter_len1));
|
||||
tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
|
||||
build_int_cst (TREE_TYPE (min2),
|
||||
niter_len2));
|
||||
tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
|
||||
tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
|
||||
/* Adjust ranges for negative step. */
|
||||
offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
|
||||
SIGNED);
|
||||
if (neg_step)
|
||||
{
|
||||
/* IDX_LEN1 and IDX_LEN2 are negative in this case. */
|
||||
std::swap (min1, max1);
|
||||
std::swap (min2, max2);
|
||||
abs_idx_step = -abs_idx_step;
|
||||
poly_offset_int idx_len1 = abs_idx_step * niter_len1;
|
||||
poly_offset_int idx_len2 = abs_idx_step * niter_len2;
|
||||
poly_offset_int idx_access1 = abs_idx_step * niter_access1;
|
||||
poly_offset_int idx_access2 = abs_idx_step * niter_access2;
|
||||
|
||||
/* As with the lengths just calculated, we've measured the access
|
||||
sizes in iterations, so multiply them by the index step. */
|
||||
tree idx_access1
|
||||
= fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
|
||||
build_int_cst (TREE_TYPE (min1), niter_access1));
|
||||
tree idx_access2
|
||||
= fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
|
||||
build_int_cst (TREE_TYPE (min2), niter_access2));
|
||||
gcc_assert (known_ge (idx_len1, 0)
|
||||
&& known_ge (idx_len2, 0)
|
||||
&& known_ge (idx_access1, 0)
|
||||
&& known_ge (idx_access2, 0));
|
||||
|
||||
/* MINUS_EXPR because the above values are negative. */
|
||||
max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
|
||||
max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
|
||||
}
|
||||
tree part_cond_expr
|
||||
= fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
|
||||
fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
|
||||
fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
|
||||
/* Each access has the following pattern, with lengths measured
|
||||
in units of INDEX:
|
||||
|
||||
<-- idx_len -->
|
||||
<--- A: -ve step --->
|
||||
+-----+-------+-----+-------+-----+
|
||||
| n-1 | ..... | 0 | ..... | n-1 |
|
||||
+-----+-------+-----+-------+-----+
|
||||
<--- B: +ve step --->
|
||||
<-- idx_len -->
|
||||
|
|
||||
min
|
||||
|
||||
where "n" is the number of scalar iterations covered by the segment
|
||||
and where each access spans idx_access units.
|
||||
|
||||
A is the range of bytes accessed when the step is negative,
|
||||
B is the range when the step is positive.
|
||||
|
||||
When checking for general overlap, we need to test whether
|
||||
the range:
|
||||
|
||||
[min1 + low_offset1, min2 + high_offset1 + idx_access1 - 1]
|
||||
|
||||
overlaps:
|
||||
|
||||
[min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
|
||||
|
||||
where:
|
||||
|
||||
low_offsetN = +ve step ? 0 : -idx_lenN;
|
||||
high_offsetN = +ve step ? idx_lenN : 0;
|
||||
|
||||
This is equivalent to testing whether:
|
||||
|
||||
min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
|
||||
&& min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
|
||||
|
||||
Converting this into a single test, there is an overlap if:
|
||||
|
||||
0 <= min2 - min1 + bias <= limit
|
||||
|
||||
where bias = high_offset2 + idx_access2 - 1 - low_offset1
|
||||
limit = (high_offset1 - low_offset1 + idx_access1 - 1)
|
||||
+ (high_offset2 - low_offset2 + idx_access2 - 1)
|
||||
i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
|
||||
|
||||
Combining the tests requires limit to be computable in an unsigned
|
||||
form of the index type; if it isn't, we fall back to the usual
|
||||
pointer-based checks. */
|
||||
poly_offset_int limit = (idx_len1 + idx_access1 - 1
|
||||
+ idx_len2 + idx_access2 - 1);
|
||||
tree utype = unsigned_type_for (TREE_TYPE (min1));
|
||||
if (!wi::fits_to_tree_p (limit, utype))
|
||||
return false;
|
||||
|
||||
poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
|
||||
poly_offset_int high_offset2 = neg_step ? 0 : idx_len2;
|
||||
poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
|
||||
|
||||
tree subject = fold_build2 (MINUS_EXPR, utype,
|
||||
fold_convert (utype, min2),
|
||||
fold_convert (utype, min1));
|
||||
subject = fold_build2 (PLUS_EXPR, utype, subject,
|
||||
wide_int_to_tree (utype, bias));
|
||||
tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
|
||||
wide_int_to_tree (utype, limit));
|
||||
if (*cond_expr)
|
||||
*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
|
||||
*cond_expr, part_cond_expr);
|
||||
|
|
Loading…
Add table
Reference in a new issue