re PR tree-optimization/78821 (GCC7: Copying whole 32 bits structure field by field not optimised into copying whole 32 bits at once)
PR tree-optimization/78821 * gimple-ssa-store-merging.c (find_bswap_or_nop_load): Give up if base is TARGET_MEM_REF. If base is not MEM_REF, set base_addr to the address of the base rather than the base itself. (find_bswap_or_nop_1): Just use pointer comparison for vuse check. (find_bswap_or_nop_finalize): New function. (find_bswap_or_nop): Use it. (bswap_replace): Return a tree rather than bool, change first argument from gimple * to gimple_stmt_iterator, allow inserting into an empty sequence, allow ins_stmt to be NULL - then emit all stmts into gsi. Fix up MEM_REF address gimplification. (pass_optimize_bswap::execute): Adjust bswap_replace caller. (struct store_immediate_info): Add N and INS_STMT non-static data members. (store_immediate_info::store_immediate_info): Initialize them from newly added ctor args. (merged_store_group::apply_stores): Formatting fixes. Sort by bitpos at the end. (stmts_may_clobber_ref_p): For stores call also refs_anti_dependent_p. (gather_bswap_load_refs): New function. (imm_store_chain_info::try_coalesce_bswap): New method. (imm_store_chain_info::coalesce_immediate_stores): Use it. (split_group): Handle LROTATE_EXPR and NOP_EXPR rhs_code specially. (imm_store_chain_info::output_merged_store): Fail if number of new estimated stmts is bigger or equal than old. Handle LROTATE_EXPR and NOP_EXPR rhs_code. (pass_store_merging::process_store): Compute n and ins_stmt, if ins_stmt is non-NULL and the store rhs is otherwise invalid, use LROTATE_EXPR rhs_code. Pass n and ins_stmt to store_immediate_info ctor. (pass_store_merging::execute): Calculate dominators. * gcc.dg/store_merging_16.c: New test. From-SVN: r254948
This commit is contained in:
parent
dffec8ebdb
commit
4b84d9b8f9
4 changed files with 812 additions and 117 deletions
|
@ -1,5 +1,38 @@
|
|||
2017-11-20 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR tree-optimization/78821
|
||||
* gimple-ssa-store-merging.c (find_bswap_or_nop_load): Give up
|
||||
if base is TARGET_MEM_REF. If base is not MEM_REF, set base_addr
|
||||
to the address of the base rather than the base itself.
|
||||
(find_bswap_or_nop_1): Just use pointer comparison for vuse check.
|
||||
(find_bswap_or_nop_finalize): New function.
|
||||
(find_bswap_or_nop): Use it.
|
||||
(bswap_replace): Return a tree rather than bool, change first
|
||||
argument from gimple * to gimple_stmt_iterator, allow inserting
|
||||
into an empty sequence, allow ins_stmt to be NULL - then emit
|
||||
all stmts into gsi. Fix up MEM_REF address gimplification.
|
||||
(pass_optimize_bswap::execute): Adjust bswap_replace caller.
|
||||
(struct store_immediate_info): Add N and INS_STMT non-static
|
||||
data members.
|
||||
(store_immediate_info::store_immediate_info): Initialize them
|
||||
from newly added ctor args.
|
||||
(merged_store_group::apply_stores): Formatting fixes. Sort by
|
||||
bitpos at the end.
|
||||
(stmts_may_clobber_ref_p): For stores call also
|
||||
refs_anti_dependent_p.
|
||||
(gather_bswap_load_refs): New function.
|
||||
(imm_store_chain_info::try_coalesce_bswap): New method.
|
||||
(imm_store_chain_info::coalesce_immediate_stores): Use it.
|
||||
(split_group): Handle LROTATE_EXPR and NOP_EXPR rhs_code specially.
|
||||
(imm_store_chain_info::output_merged_store): Fail if number of
|
||||
new estimated stmts is bigger or equal than old. Handle LROTATE_EXPR
|
||||
and NOP_EXPR rhs_code.
|
||||
(pass_store_merging::process_store): Compute n and ins_stmt, if
|
||||
ins_stmt is non-NULL and the store rhs is otherwise invalid, use
|
||||
LROTATE_EXPR rhs_code. Pass n and ins_stmt to store_immediate_info
|
||||
ctor.
|
||||
(pass_store_merging::execute): Calculate dominators.
|
||||
|
||||
* tree-ssa-math-opts.c (nop_stats, bswap_stats, struct symbolic_number,
|
||||
BITS_PER_MARKER, MARKER_MASK, MARKER_BYTE_UNKNOWN, HEAD_MARKER, CMPNOP,
|
||||
CMPXCHG, do_shift_rotate, verify_symbolic_number_p,
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,3 +1,8 @@
|
|||
2017-11-20 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR tree-optimization/78821
|
||||
* gcc.dg/store_merging_16.c: New test.
|
||||
|
||||
2017-11-19 Jan Hubicka <hubicka@ucw.cz>
|
||||
|
||||
PR target/82281
|
||||
|
|
157
gcc/testsuite/gcc.dg/store_merging_16.c
Normal file
157
gcc/testsuite/gcc.dg/store_merging_16.c
Normal file
|
@ -0,0 +1,157 @@
|
|||
/* Only test on some 64-bit targets which do have bswap{si,di}2 patterns and
|
||||
are either big or little endian (not pdp endian). */
|
||||
/* { dg-do compile { target { lp64 && { i?86-*-* x86_64-*-* powerpc*-*-* aarch64*-*-* } } } } */
|
||||
/* { dg-require-effective-target store_merge } */
|
||||
/* { dg-options "-O2 -fdump-tree-store-merging" } */
|
||||
|
||||
__attribute__((noipa)) void
|
||||
f1 (unsigned char *p, unsigned long long q)
|
||||
{
|
||||
p[0] = q;
|
||||
p[1] = q >> 8;
|
||||
p[2] = q >> 16;
|
||||
p[3] = q >> 24;
|
||||
p[4] = q >> 32;
|
||||
p[5] = q >> 40;
|
||||
p[6] = q >> 48;
|
||||
p[7] = q >> 56;
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
f2 (unsigned char *p, unsigned long long q)
|
||||
{
|
||||
p[0] = q >> 56;
|
||||
p[1] = q >> 48;
|
||||
p[2] = q >> 40;
|
||||
p[3] = q >> 32;
|
||||
p[4] = q >> 24;
|
||||
p[5] = q >> 16;
|
||||
p[6] = q >> 8;
|
||||
p[7] = q;
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
f3 (unsigned char *__restrict p, unsigned char *__restrict q)
|
||||
{
|
||||
unsigned char q3 = q[3];
|
||||
unsigned char q2 = q[2];
|
||||
unsigned char q1 = q[1];
|
||||
unsigned char q0 = q[0];
|
||||
p[0] = q3;
|
||||
p[1] = q2;
|
||||
p[2] = q1;
|
||||
p[3] = q0;
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
f4 (unsigned char *__restrict p, unsigned char *__restrict q)
|
||||
{
|
||||
p[0] = q[3];
|
||||
p[1] = q[2];
|
||||
p[2] = q[1];
|
||||
p[3] = q[0];
|
||||
}
|
||||
|
||||
struct S { unsigned char a, b; unsigned short c; };
|
||||
|
||||
__attribute__((noipa)) void
|
||||
f5 (struct S *__restrict p, struct S *__restrict q)
|
||||
{
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
unsigned char pa = q->c >> 8;
|
||||
unsigned char pb = q->c;
|
||||
unsigned short pc = (q->a << 8) | q->b;
|
||||
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
unsigned char pa = q->c;
|
||||
unsigned char pb = q->c >> 8;
|
||||
unsigned short pc = q->a | (q->b << 8);
|
||||
#endif
|
||||
p->a = pa;
|
||||
p->b = pb;
|
||||
p->c = pc;
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
f6 (struct S *__restrict p, struct S *__restrict q)
|
||||
{
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
p->a = q->c >> 8;
|
||||
p->b = q->c;
|
||||
p->c = (q->a << 8) | q->b;
|
||||
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
p->a = q->c;
|
||||
p->b = q->c >> 8;
|
||||
p->c = q->a | (q->b << 8);
|
||||
#endif
|
||||
}
|
||||
|
||||
struct T { unsigned long long a : 8, b : 8, c : 8, d : 8, e : 8, f : 8, g : 8, h : 8; };
|
||||
|
||||
__attribute__((noipa)) void
|
||||
f7 (struct T *__restrict p, struct T *__restrict q)
|
||||
{
|
||||
p->a = q->h;
|
||||
p->b = q->g;
|
||||
p->c = q->f;
|
||||
p->d = q->e;
|
||||
p->e = q->d;
|
||||
p->f = q->c;
|
||||
p->g = q->b;
|
||||
p->h = q->a;
|
||||
}
|
||||
|
||||
struct S b = { 0x11, 0x12,
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
0x1413
|
||||
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
0x1314
|
||||
#endif
|
||||
};
|
||||
struct T e = { 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28 };
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
unsigned char a[8];
|
||||
int i;
|
||||
struct S b, c, d;
|
||||
f1 (a, 0x0102030405060708ULL);
|
||||
for (i = 0; i < 8; ++i)
|
||||
if (a[i] != 8 - i)
|
||||
__builtin_abort ();
|
||||
f2 (a, 0x0102030405060708ULL);
|
||||
for (i = 0; i < 8; ++i)
|
||||
if (a[i] != 1 + i)
|
||||
__builtin_abort ();
|
||||
f3 (a, a + 4);
|
||||
for (i = 0; i < 8; ++i)
|
||||
if (a[i] != (i < 4 ? 8 - i : 1 + i))
|
||||
__builtin_abort ();
|
||||
f2 (a, 0x090a0b0c0d0e0f10ULL);
|
||||
f4 (a + 4, a);
|
||||
for (i = 0; i < 8; ++i)
|
||||
if (a[i] != (i < 4 ? 9 + i : 16 - i))
|
||||
__builtin_abort ();
|
||||
f5 (&c, &b);
|
||||
if (c.a != 0x14 || c.b != 0x13
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
|| c.c != 0x1112
|
||||
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
|| c.c != 0x1211
|
||||
#endif
|
||||
)
|
||||
__builtin_abort ();
|
||||
f6 (&d, &c);
|
||||
if (d.a != 0x11 || d.b != 0x12 || d.c != b.c)
|
||||
__builtin_abort ();
|
||||
struct T f;
|
||||
f7 (&f, &e);
|
||||
if (f.a != 0x28 || f.b != 0x27 || f.c != 0x26 || f.d != 0x25
|
||||
|| f.e != 0x24 || f.f != 0x23 || f.g != 0x22 || f.h != 0x21)
|
||||
__builtin_abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "Merging successful" 7 "store-merging" } } */
|
||||
/* { dg-final { scan-tree-dump-times "__builtin_bswap64" 2 "store-merging" } } */
|
||||
/* { dg-final { scan-tree-dump-times "__builtin_bswap32" 4 "store-merging" } } */
|
Loading…
Add table
Reference in a new issue