store-merging: Consider also overlapping stores earlier in the by bitpos sorting [PR97053]
As the testcases show, if we have something like: MEM <char[12]> [&b + 8B] = {}; MEM[(short *) &b] = 5; _5 = *x_4(D); MEM <long long unsigned int> [&b + 2B] = _5; MEM[(char *)&b + 16B] = 88; MEM[(int *)&b + 20B] = 1; then in sort_by_bitpos the stores are almost like in the given order, except the first store is after the = _5; store. We can't coalesce the = 5; store with = _5;, because the latter is MEM_REF, while the former INTEGER_CST, and we can't coalesce the = _5 store with the = {} store because the former is MEM_REF, the latter INTEGER_CST. But we happily coalesce the remaining 3 stores, which is wrong, because the = _5; store overlaps those and is in between them in the program order. We already have code to deal with similar cases in check_no_overlap, but we deal only with the following stores in sort_by_bitpos order, not the earlier ones. The following patch checks also the earlier ones. In coalesce_immediate_stores it computes the first one that needs to be checked (all the ones whose bitpos + bitsize is smaller or equal to merged_store->start don't need to be checked and don't need to be checked even for any following attempts because of the sort_by_bitpos sorting) and the end of that (that is the first store in the merged_store). 2020-09-16 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/97053 * gimple-ssa-store-merging.c (check_no_overlap): Add FIRST_ORDER, START, FIRST_EARLIER and LAST_EARLIER arguments. Return false if any stores between FIRST_EARLIER inclusive and LAST_EARLIER exclusive has order in between FIRST_ORDER and LAST_ORDER and overlaps the to be merged store. (imm_store_chain_info::try_coalesce_bswap): Add FIRST_EARLIER argument. Adjust check_no_overlap caller. (imm_store_chain_info::coalesce_immediate_stores): Add first_earlier and last_earlier variables, adjust them during iterations. Adjust check_no_overlap callers, call check_no_overlap even when extending overlapping stores by extra INTEGER_CST stores. * gcc.dg/store_merging_31.c: New test. * gcc.dg/store_merging_32.c: New test.
This commit is contained in:
parent
2c45b9ee5c
commit
bd909071ac
3 changed files with 222 additions and 10 deletions
|
@ -2116,7 +2116,8 @@ public:
|
|||
}
|
||||
}
|
||||
bool terminate_and_process_chain ();
|
||||
bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int);
|
||||
bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
|
||||
unsigned int);
|
||||
bool coalesce_immediate_stores ();
|
||||
bool output_merged_store (merged_store_group *);
|
||||
bool output_merged_stores ();
|
||||
|
@ -2443,14 +2444,39 @@ gather_bswap_load_refs (vec<tree> *refs, tree val)
|
|||
into the group. That way it will be its own store group and will
|
||||
not be touched. If ALL_INTEGER_CST_P and there are overlapping
|
||||
INTEGER_CST stores, those are mergeable using merge_overlapping,
|
||||
so don't return false for those. */
|
||||
so don't return false for those.
|
||||
|
||||
Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
|
||||
(exclusive), whether they don't overlap the bitrange START to END
|
||||
and have order in between FIRST_ORDER and LAST_ORDER. This is to
|
||||
prevent merging in cases like:
|
||||
MEM <char[12]> [&b + 8B] = {};
|
||||
MEM[(short *) &b] = 5;
|
||||
_5 = *x_4(D);
|
||||
MEM <long long unsigned int> [&b + 2B] = _5;
|
||||
MEM[(char *)&b + 16B] = 88;
|
||||
MEM[(int *)&b + 20B] = 1;
|
||||
The = {} store comes in sort_by_bitpos before the = 88 store, and can't
|
||||
be merged with it, because the = _5 store overlaps these and is in between
|
||||
them in sort_by_order ordering. If it was merged, the merged store would
|
||||
go after the = _5 store and thus change behavior. */
|
||||
|
||||
static bool
|
||||
check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
|
||||
bool all_integer_cst_p, unsigned int last_order,
|
||||
unsigned HOST_WIDE_INT end)
|
||||
bool all_integer_cst_p, unsigned int first_order,
|
||||
unsigned int last_order, unsigned HOST_WIDE_INT start,
|
||||
unsigned HOST_WIDE_INT end, unsigned int first_earlier,
|
||||
unsigned end_earlier)
|
||||
{
|
||||
unsigned int len = m_store_info.length ();
|
||||
for (unsigned int j = first_earlier; j < end_earlier; j++)
|
||||
{
|
||||
store_immediate_info *info = m_store_info[j];
|
||||
if (info->order > first_order
|
||||
&& info->order < last_order
|
||||
&& info->bitpos + info->bitsize > start)
|
||||
return false;
|
||||
}
|
||||
for (++i; i < len; ++i)
|
||||
{
|
||||
store_immediate_info *info = m_store_info[i];
|
||||
|
@ -2471,7 +2497,8 @@ check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
|
|||
bool
|
||||
imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
|
||||
unsigned int first,
|
||||
unsigned int try_size)
|
||||
unsigned int try_size,
|
||||
unsigned int first_earlier)
|
||||
{
|
||||
unsigned int len = m_store_info.length (), last = first;
|
||||
unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
|
||||
|
@ -2611,7 +2638,8 @@ imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
|
|||
if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
|
||||
return false;
|
||||
|
||||
if (!check_no_overlap (m_store_info, last, false, last_order, end))
|
||||
if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
|
||||
merged_store->start, end, first_earlier, first))
|
||||
return false;
|
||||
|
||||
/* Don't handle memory copy this way if normal non-bswap processing
|
||||
|
@ -2703,6 +2731,8 @@ imm_store_chain_info::coalesce_immediate_stores ()
|
|||
|
||||
store_immediate_info *info;
|
||||
unsigned int i, ignore = 0;
|
||||
unsigned int first_earlier = 0;
|
||||
unsigned int end_earlier = 0;
|
||||
|
||||
/* Order the stores by the bitposition they write to. */
|
||||
m_store_info.qsort (sort_by_bitpos);
|
||||
|
@ -2719,6 +2749,12 @@ imm_store_chain_info::coalesce_immediate_stores ()
|
|||
if (i <= ignore)
|
||||
goto done;
|
||||
|
||||
while (first_earlier < end_earlier
|
||||
&& (m_store_info[first_earlier]->bitpos
|
||||
+ m_store_info[first_earlier]->bitsize
|
||||
<= merged_store->start))
|
||||
first_earlier++;
|
||||
|
||||
/* First try to handle group of stores like:
|
||||
p[0] = data >> 24;
|
||||
p[1] = data >> 16;
|
||||
|
@ -2733,7 +2769,8 @@ imm_store_chain_info::coalesce_immediate_stores ()
|
|||
{
|
||||
unsigned int try_size;
|
||||
for (try_size = 64; try_size >= 16; try_size >>= 1)
|
||||
if (try_coalesce_bswap (merged_store, i - 1, try_size))
|
||||
if (try_coalesce_bswap (merged_store, i - 1, try_size,
|
||||
first_earlier))
|
||||
break;
|
||||
|
||||
if (try_size >= 16)
|
||||
|
@ -2741,7 +2778,10 @@ imm_store_chain_info::coalesce_immediate_stores ()
|
|||
ignore = i + merged_store->stores.length () - 1;
|
||||
m_merged_store_groups.safe_push (merged_store);
|
||||
if (ignore < m_store_info.length ())
|
||||
merged_store = new merged_store_group (m_store_info[ignore]);
|
||||
{
|
||||
merged_store = new merged_store_group (m_store_info[ignore]);
|
||||
end_earlier = ignore;
|
||||
}
|
||||
else
|
||||
merged_store = NULL;
|
||||
goto done;
|
||||
|
@ -2776,12 +2816,16 @@ imm_store_chain_info::coalesce_immediate_stores ()
|
|||
&& merged_store->only_constants
|
||||
&& info->lp_nr == merged_store->lp_nr)
|
||||
{
|
||||
unsigned int first_order
|
||||
= MIN (merged_store->first_order, info->order);
|
||||
unsigned int last_order
|
||||
= MAX (merged_store->last_order, info->order);
|
||||
unsigned HOST_WIDE_INT end
|
||||
= MAX (merged_store->start + merged_store->width,
|
||||
info->bitpos + info->bitsize);
|
||||
if (check_no_overlap (m_store_info, i, true, last_order, end))
|
||||
if (check_no_overlap (m_store_info, i, true, first_order,
|
||||
last_order, merged_store->start, end,
|
||||
first_earlier, end_earlier))
|
||||
{
|
||||
/* check_no_overlap call above made sure there are no
|
||||
overlapping stores with non-INTEGER_CST rhs_code
|
||||
|
@ -2810,6 +2854,7 @@ imm_store_chain_info::coalesce_immediate_stores ()
|
|||
do
|
||||
{
|
||||
unsigned int max_order = 0;
|
||||
unsigned int min_order = first_order;
|
||||
unsigned first_nonmergeable_int_order = ~0U;
|
||||
unsigned HOST_WIDE_INT this_end = end;
|
||||
k = i;
|
||||
|
@ -2836,6 +2881,7 @@ imm_store_chain_info::coalesce_immediate_stores ()
|
|||
break;
|
||||
}
|
||||
k = j;
|
||||
min_order = MIN (min_order, info2->order);
|
||||
this_end = MAX (this_end,
|
||||
info2->bitpos + info2->bitsize);
|
||||
}
|
||||
|
@ -2852,6 +2898,12 @@ imm_store_chain_info::coalesce_immediate_stores ()
|
|||
first_nonmergeable_order
|
||||
= MIN (first_nonmergeable_order, info2->order);
|
||||
}
|
||||
if (k > i
|
||||
&& !check_no_overlap (m_store_info, len - 1, true,
|
||||
min_order, try_order,
|
||||
merged_store->start, this_end,
|
||||
first_earlier, end_earlier))
|
||||
k = 0;
|
||||
if (k == 0)
|
||||
{
|
||||
if (last_order == try_order)
|
||||
|
@ -2937,9 +2989,12 @@ imm_store_chain_info::coalesce_immediate_stores ()
|
|||
info->ops_swapped_p = true;
|
||||
}
|
||||
if (check_no_overlap (m_store_info, i, false,
|
||||
MIN (merged_store->first_order, info->order),
|
||||
MAX (merged_store->last_order, info->order),
|
||||
merged_store->start,
|
||||
MAX (merged_store->start + merged_store->width,
|
||||
info->bitpos + info->bitsize)))
|
||||
info->bitpos + info->bitsize),
|
||||
first_earlier, end_earlier))
|
||||
{
|
||||
/* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
|
||||
if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
|
||||
|
@ -2985,6 +3040,7 @@ imm_store_chain_info::coalesce_immediate_stores ()
|
|||
delete merged_store;
|
||||
|
||||
merged_store = new merged_store_group (info);
|
||||
end_earlier = i;
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fputs ("New store group\n", dump_file);
|
||||
|
||||
|
|
27
gcc/testsuite/gcc.dg/store_merging_31.c
Normal file
27
gcc/testsuite/gcc.dg/store_merging_31.c
Normal file
|
@ -0,0 +1,27 @@
|
|||
/* PR tree-optimization/97053 */
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
struct S { short a; char b[9]; int c; char d; int e; };
|
||||
|
||||
__attribute__((noipa)) void
|
||||
foo (char *x, char *y)
|
||||
{
|
||||
if (__builtin_strcmp (x, "ABCDXXXX") != 0
|
||||
|| __builtin_strcmp (y, "ABCDXXXX") != 0)
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
char a[9] = "XXXXXXXX";
|
||||
struct S b = {};
|
||||
__builtin_memcpy (a, "ABCD", 4);
|
||||
b.a = 5;
|
||||
__builtin_memcpy (b.b, a, 8);
|
||||
b.d = 'X';
|
||||
b.e = 1;
|
||||
foo (a, b.b);
|
||||
return 0;
|
||||
}
|
129
gcc/testsuite/gcc.dg/store_merging_32.c
Normal file
129
gcc/testsuite/gcc.dg/store_merging_32.c
Normal file
|
@ -0,0 +1,129 @@
|
|||
/* PR tree-optimization/97053 */
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -fno-tree-dse" } */
|
||||
|
||||
struct __attribute__((packed, may_alias)) S { long long s; };
|
||||
struct __attribute__((packed, may_alias)) T { short t; };
|
||||
|
||||
__attribute__((noipa)) void
|
||||
test (char *p, char *q, int s)
|
||||
{
|
||||
if ((s & 1) == 0)
|
||||
{
|
||||
if (*(short __attribute__((may_alias)) *) &p[sizeof (short)]
|
||||
!= *(short __attribute__((may_alias)) *) &q[sizeof (short)]
|
||||
|| (((struct S __attribute__((may_alias)) *) &p[1])->s
|
||||
!= ((struct S __attribute__((may_alias)) *) &q[1])->s)
|
||||
|| (*(short __attribute__((may_alias)) *) &p[2 * sizeof (short)]
|
||||
!= *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)]))
|
||||
__builtin_abort ();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (*(short __attribute__((may_alias)) *) &p[sizeof (short)]
|
||||
!= *(short __attribute__((may_alias)) *) &q[sizeof (short)]
|
||||
|| (((struct S __attribute__((may_alias)) *) &p[1])->s
|
||||
!= ((struct S __attribute__((may_alias)) *) &q[1])->s)
|
||||
|| (((struct T __attribute__((may_alias)) *) &p[2 * sizeof (short) - 1])->t
|
||||
!= ((struct T __attribute__((may_alias)) *) &q[2 * sizeof (short) - 1])->t)
|
||||
|| p[3 * sizeof (short) - 2] != q[3 * sizeof (short) - 2])
|
||||
__builtin_abort ();
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
foo (long long *p, char *q, char *r, char *s)
|
||||
{
|
||||
char a[64] __attribute__((aligned (__alignof (short))));
|
||||
*(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
|
||||
((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
|
||||
*(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = 2;
|
||||
*(short __attribute__((may_alias)) *) &q[sizeof (short)] = 1;
|
||||
((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
|
||||
*(short __attribute__((may_alias)) *) &s[2 * sizeof (short)] = 2;
|
||||
test (a, q, 0);
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
bar (long long *p, char *q, char *r, char *s, char *t)
|
||||
{
|
||||
char a[64] __attribute__((aligned (__alignof (short))));
|
||||
*(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
|
||||
((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
|
||||
((struct T __attribute__((may_alias)) *) &a[2 * sizeof (short) - 1])->t = 2;
|
||||
a[3 * sizeof (short) - 2] = 3;
|
||||
*(short __attribute__((may_alias)) *) &q[sizeof (short)] = 1;
|
||||
((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
|
||||
((struct T __attribute__((may_alias)) *) &s[2 * sizeof (short) - 1])->t = 2;
|
||||
t[3 * sizeof (short) - 2] = 3;
|
||||
test (a, q, 1);
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
baz (long long *p, char *q, char *r, char *s)
|
||||
{
|
||||
char a[64] __attribute__((aligned (__alignof (short))));
|
||||
*(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = 2;
|
||||
((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
|
||||
*(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
|
||||
*(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = 2;
|
||||
((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
|
||||
*(short __attribute__((may_alias)) *) &s[sizeof (short)] = 1;
|
||||
test (a, q, 2);
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
qux (long long *p, char *q, char *r, char *s, char *t)
|
||||
{
|
||||
char a[64] __attribute__((aligned (__alignof (short))));
|
||||
*(short __attribute__((may_alias)) *) &a[2 * sizeof (short) - 1] = 2;
|
||||
((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
|
||||
a[3 * sizeof (short) - 2] = 3;
|
||||
*(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
|
||||
((struct T __attribute__((may_alias)) *) &q[2 * sizeof (short) - 1])->t = 2;
|
||||
((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
|
||||
s[3 * sizeof (short) - 2] = 3;
|
||||
((struct T __attribute__((may_alias)) *) &t[sizeof (short)])->t = 1;
|
||||
test (a, q, 3);
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
corge (long long *p, char *q, char *r, char *s, short u[3])
|
||||
{
|
||||
char a[64] __attribute__((aligned (__alignof (short))));
|
||||
*(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = u[2];
|
||||
((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
|
||||
*(short __attribute__((may_alias)) *) &a[sizeof (short)] = u[1];
|
||||
*(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = u[2];
|
||||
((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
|
||||
*(short __attribute__((may_alias)) *) &s[sizeof (short)] = u[1];
|
||||
test (a, q, 4);
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
garply (long long *p, char *q, char *r, char *s, short u[3])
|
||||
{
|
||||
char a[64] __attribute__((aligned (__alignof (short))));
|
||||
*(short __attribute__((may_alias)) *) &a[sizeof (short)] = u[1];
|
||||
((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
|
||||
*(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = u[2];
|
||||
*(short __attribute__((may_alias)) *) &s[sizeof (short)] = u[1];
|
||||
((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
|
||||
*(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = u[2];
|
||||
test (a, q, 6);
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
char a[64] __attribute__((aligned (__alignof (short))));
|
||||
long long p = -1LL;
|
||||
short u[] = { 1, 2, 3 };
|
||||
foo (&p, &a[0], &a[0], &a[0]);
|
||||
bar (&p, &a[0], &a[0], &a[0], &a[0]);
|
||||
baz (&p, &a[0], &a[0], &a[0]);
|
||||
qux (&p, &a[0], &a[0], &a[0], &a[0]);
|
||||
corge (&p, &a[0], &a[0], &a[0], u);
|
||||
garply (&p, &a[0], &a[0], &a[0], u);
|
||||
return 0;
|
||||
}
|
Loading…
Add table
Reference in a new issue