tree-optimization/103345: Improved load merging.
This patch implements PR tree-optimization/103345 to merge adjacent loads when combined with addition or bitwise xor. The current code in gimple-ssa-store-merging.c's find_bswap_or_nop alreay handles ior, so that all that's required is to treat PLUS_EXPR and BIT_XOR_EXPR in the same way at BIT_IOR_EXPR. Many thanks to Andrew Pinski for pointing out that this also resolves PR target/98953. 2021-11-22 Roger Sayle <roger@nextmovesoftware.com> gcc/ChangeLog PR tree-optimization/98953 PR tree-optimization/103345 * gimple-ssa-store-merging.c (find_bswap_or_nop_1): Handle BIT_XOR_EXPR and PLUS_EXPR the same as BIT_IOR_EXPR. (pass_optimize_bswap::execute): Likewise. gcc/testsuite/ChangeLog PR tree-optimization/98953 PR tree-optimization/103345 * gcc.dg/tree-ssa/pr98953.c: New test case. * gcc.dg/tree-ssa/pr103345.c: New test case.
This commit is contained in:
parent
c38c547a70
commit
a944b5dec3
3 changed files with 72 additions and 4 deletions
|
@ -742,10 +742,7 @@ find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
|
|||
struct symbolic_number n1, n2;
|
||||
gimple *source_stmt, *source_stmt2;
|
||||
|
||||
if (code != BIT_IOR_EXPR)
|
||||
return NULL;
|
||||
|
||||
if (TREE_CODE (rhs2) != SSA_NAME)
|
||||
if (!rhs2 || TREE_CODE (rhs2) != SSA_NAME)
|
||||
return NULL;
|
||||
|
||||
rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
|
||||
|
@ -753,6 +750,8 @@ find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
|
|||
switch (code)
|
||||
{
|
||||
case BIT_IOR_EXPR:
|
||||
case BIT_XOR_EXPR:
|
||||
case PLUS_EXPR:
|
||||
source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
|
||||
|
||||
if (!source_stmt1)
|
||||
|
@ -1495,6 +1494,8 @@ pass_optimize_bswap::execute (function *fun)
|
|||
continue;
|
||||
/* Fall through. */
|
||||
case BIT_IOR_EXPR:
|
||||
case BIT_XOR_EXPR:
|
||||
case PLUS_EXPR:
|
||||
break;
|
||||
case CONSTRUCTOR:
|
||||
{
|
||||
|
|
53
gcc/testsuite/gcc.dg/tree-ssa/pr103345.c
Normal file
53
gcc/testsuite/gcc.dg/tree-ssa/pr103345.c
Normal file
|
@ -0,0 +1,53 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-bswap-details" } */
|
||||
|
||||
typedef unsigned int uint32_t;
|
||||
typedef unsigned char uint8_t;
|
||||
|
||||
uint32_t load_le_32_or(const uint8_t *ptr)
|
||||
{
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
return ((uint32_t)ptr[0]) |
|
||||
((uint32_t)ptr[1] << 8) |
|
||||
((uint32_t)ptr[2] << 16) |
|
||||
((uint32_t)ptr[3] << 24);
|
||||
#else
|
||||
return ((uint32_t)ptr[3]) |
|
||||
((uint32_t)ptr[2] << 8) |
|
||||
((uint32_t)ptr[1] << 16) |
|
||||
((uint32_t)ptr[0] << 24);
|
||||
#endif
|
||||
}
|
||||
|
||||
uint32_t load_le_32_add(const uint8_t *ptr)
|
||||
{
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
return ((uint32_t)ptr[0]) +
|
||||
((uint32_t)ptr[1] << 8) +
|
||||
((uint32_t)ptr[2] << 16) +
|
||||
((uint32_t)ptr[3] << 24);
|
||||
#else
|
||||
return ((uint32_t)ptr[3]) +
|
||||
((uint32_t)ptr[2] << 8) +
|
||||
((uint32_t)ptr[1] << 16) +
|
||||
((uint32_t)ptr[0] << 24);
|
||||
#endif
|
||||
}
|
||||
|
||||
uint32_t load_le_32_xor(const uint8_t *ptr)
|
||||
{
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
return ((uint32_t)ptr[0]) ^
|
||||
((uint32_t)ptr[1] << 8) ^
|
||||
((uint32_t)ptr[2] << 16) ^
|
||||
((uint32_t)ptr[3] << 24);
|
||||
#else
|
||||
return ((uint32_t)ptr[0]) ^
|
||||
((uint32_t)ptr[1] << 8) ^
|
||||
((uint32_t)ptr[2] << 16) ^
|
||||
((uint32_t)ptr[3] << 24);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "32 bit load in target endianness found" 3 "bswap" } } */
|
||||
|
14
gcc/testsuite/gcc.dg/tree-ssa/pr98953.c
Normal file
14
gcc/testsuite/gcc.dg/tree-ssa/pr98953.c
Normal file
|
@ -0,0 +1,14 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-bswap-details" } */
|
||||
|
||||
int foo(unsigned char *ptr)
|
||||
{
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
return ptr[0] + (ptr[1] << 8);
|
||||
#else
|
||||
return ptr[1] + (ptr[0] << 8);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump "16 bit load in target endianness found" "bswap" } } */
|
||||
|
Loading…
Add table
Reference in a new issue