From a944b5dec3adb28ed199234d2116145ca9010d6a Mon Sep 17 00:00:00 2001 From: Roger Sayle Date: Mon, 22 Nov 2021 18:15:36 +0000 Subject: [PATCH] tree-optimization/103345: Improved load merging. This patch implements PR tree-optimization/103345 to merge adjacent loads when combined with addition or bitwise xor. The current code in gimple-ssa-store-merging.c's find_bswap_or_nop alreay handles ior, so that all that's required is to treat PLUS_EXPR and BIT_XOR_EXPR in the same way at BIT_IOR_EXPR. Many thanks to Andrew Pinski for pointing out that this also resolves PR target/98953. 2021-11-22 Roger Sayle gcc/ChangeLog PR tree-optimization/98953 PR tree-optimization/103345 * gimple-ssa-store-merging.c (find_bswap_or_nop_1): Handle BIT_XOR_EXPR and PLUS_EXPR the same as BIT_IOR_EXPR. (pass_optimize_bswap::execute): Likewise. gcc/testsuite/ChangeLog PR tree-optimization/98953 PR tree-optimization/103345 * gcc.dg/tree-ssa/pr98953.c: New test case. * gcc.dg/tree-ssa/pr103345.c: New test case. --- gcc/gimple-ssa-store-merging.c | 9 ++-- gcc/testsuite/gcc.dg/tree-ssa/pr103345.c | 53 ++++++++++++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/pr98953.c | 14 +++++++ 3 files changed, 72 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr103345.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr98953.c diff --git a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c index 4efa200428a..1740c9ee1c1 100644 --- a/gcc/gimple-ssa-store-merging.c +++ b/gcc/gimple-ssa-store-merging.c @@ -742,10 +742,7 @@ find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit) struct symbolic_number n1, n2; gimple *source_stmt, *source_stmt2; - if (code != BIT_IOR_EXPR) - return NULL; - - if (TREE_CODE (rhs2) != SSA_NAME) + if (!rhs2 || TREE_CODE (rhs2) != SSA_NAME) return NULL; rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); @@ -753,6 +750,8 @@ find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit) switch (code) { case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + case PLUS_EXPR: source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1); if (!source_stmt1) @@ -1495,6 +1494,8 @@ pass_optimize_bswap::execute (function *fun) continue; /* Fall through. */ case BIT_IOR_EXPR: + case BIT_XOR_EXPR: + case PLUS_EXPR: break; case CONSTRUCTOR: { diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr103345.c b/gcc/testsuite/gcc.dg/tree-ssa/pr103345.c new file mode 100644 index 00000000000..94388b541c1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr103345.c @@ -0,0 +1,53 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-bswap-details" } */ + +typedef unsigned int uint32_t; +typedef unsigned char uint8_t; + +uint32_t load_le_32_or(const uint8_t *ptr) +{ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return ((uint32_t)ptr[0]) | + ((uint32_t)ptr[1] << 8) | + ((uint32_t)ptr[2] << 16) | + ((uint32_t)ptr[3] << 24); +#else + return ((uint32_t)ptr[3]) | + ((uint32_t)ptr[2] << 8) | + ((uint32_t)ptr[1] << 16) | + ((uint32_t)ptr[0] << 24); +#endif +} + +uint32_t load_le_32_add(const uint8_t *ptr) +{ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return ((uint32_t)ptr[0]) + + ((uint32_t)ptr[1] << 8) + + ((uint32_t)ptr[2] << 16) + + ((uint32_t)ptr[3] << 24); +#else + return ((uint32_t)ptr[3]) + + ((uint32_t)ptr[2] << 8) + + ((uint32_t)ptr[1] << 16) + + ((uint32_t)ptr[0] << 24); +#endif +} + +uint32_t load_le_32_xor(const uint8_t *ptr) +{ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return ((uint32_t)ptr[0]) ^ + ((uint32_t)ptr[1] << 8) ^ + ((uint32_t)ptr[2] << 16) ^ + ((uint32_t)ptr[3] << 24); +#else + return ((uint32_t)ptr[0]) ^ + ((uint32_t)ptr[1] << 8) ^ + ((uint32_t)ptr[2] << 16) ^ + ((uint32_t)ptr[3] << 24); +#endif +} + +/* { dg-final { scan-tree-dump-times "32 bit load in target endianness found" 3 "bswap" } } */ + diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr98953.c b/gcc/testsuite/gcc.dg/tree-ssa/pr98953.c new file mode 100644 index 00000000000..7687dc2871d --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr98953.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-bswap-details" } */ + +int foo(unsigned char *ptr) +{ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return ptr[0] + (ptr[1] << 8); +#else + return ptr[1] + (ptr[0] << 8); +#endif +} + +/* { dg-final { scan-tree-dump "16 bit load in target endianness found" "bswap" } } */ +