re PR tree-optimization/54733 (Missing opportunity to optimize endian independent load/store)
2014-05-23 Thomas Preud'homme <thomas.preudhomme@arm.com> PR tree-optimization/54733 gcc/ * tree-ssa-math-opts.c (nop_stats): New "bswap_stats" structure. (CMPNOP): Define. (find_bswap_or_nop_load): New. (find_bswap_1): Renamed to ... (find_bswap_or_nop_1): This. Also add support for memory source. (find_bswap): Renamed to ... (find_bswap_or_nop): This. Also add support for memory source and detection of bitwise operations equivalent to load in host endianness. (execute_optimize_bswap): Likewise. Also move its leading comment back in place and split statement transformation into ... (bswap_replace): This. gcc/testsuite * gcc.dg/optimize-bswapdi-3.c: New test to check extension of bswap optimization to support memory sources and bitwise operations equivalent to load in host endianness. * gcc.dg/optimize-bswaphi-1.c: Likewise. * gcc.dg/optimize-bswapsi-2.c: Likewise. * gcc.c-torture/execute/bswap-2.c: Likewise. From-SVN: r210843
This commit is contained in:
parent
eaa33a6a65
commit
73984f8494
7 changed files with 679 additions and 109 deletions
|
@ -1,3 +1,18 @@
|
|||
2014-05-23 Thomas Preud'homme <thomas.preudhomme@arm.com>
|
||||
|
||||
PR tree-optimization/54733
|
||||
* tree-ssa-math-opts.c (nop_stats): New "bswap_stats" structure.
|
||||
(CMPNOP): Define.
|
||||
(find_bswap_or_nop_load): New.
|
||||
(find_bswap_1): Renamed to ...
|
||||
(find_bswap_or_nop_1): This. Also add support for memory source.
|
||||
(find_bswap): Renamed to ...
|
||||
(find_bswap_or_nop): This. Also add support for memory source and
|
||||
detection of bitwise operations equivalent to load in host endianness.
|
||||
(execute_optimize_bswap): Likewise. Also move its leading comment back
|
||||
in place and split statement transformation into ...
|
||||
(bswap_replace): This.
|
||||
|
||||
2014-05-22 Vladimir Makarov <vmakarov@redhat.com>
|
||||
|
||||
PR rtl-optimization/61215
|
||||
|
|
|
@ -1,3 +1,13 @@
|
|||
2014-05-23 Thomas Preud'homme <thomas.preudhomme@arm.com>
|
||||
|
||||
PR tree-optimization/54733
|
||||
* gcc.dg/optimize-bswapdi-3.c: New test to check extension of bswap
|
||||
optimization to support memory sources and bitwise operations
|
||||
equivalent to load in host endianness.
|
||||
* gcc.dg/optimize-bswaphi-1.c: Likewise.
|
||||
* gcc.dg/optimize-bswapsi-2.c: Likewise.
|
||||
* gcc.c-torture/execute/bswap-2.c: Likewise.
|
||||
|
||||
2014-05-23 Thomas Preud'homme <thomas.preudhomme@arm.com>
|
||||
|
||||
* lib/target-supports.exp: New effective targets for architectures
|
||||
|
|
90
gcc/testsuite/gcc.c-torture/execute/bswap-2.c
Normal file
90
gcc/testsuite/gcc.c-torture/execute/bswap-2.c
Normal file
|
@ -0,0 +1,90 @@
|
|||
#ifdef __UINT32_TYPE__
|
||||
typedef __UINT32_TYPE__ uint32_t;
|
||||
#else
|
||||
typedef __UINT32_TYPE__ unsigned;
|
||||
#endif
|
||||
|
||||
struct bitfield {
|
||||
unsigned char f0:7;
|
||||
unsigned char f1:7;
|
||||
unsigned char f2:7;
|
||||
unsigned char f3:7;
|
||||
};
|
||||
|
||||
struct ok {
|
||||
unsigned char f0;
|
||||
unsigned char f1;
|
||||
unsigned char f2;
|
||||
unsigned char f3;
|
||||
};
|
||||
|
||||
union bf_or_uint32 {
|
||||
struct ok inval;
|
||||
struct bitfield bfval;
|
||||
};
|
||||
|
||||
__attribute__ ((noinline, noclone)) uint32_t
|
||||
partial_read_le32 (union bf_or_uint32 in)
|
||||
{
|
||||
return in.bfval.f0 | (in.bfval.f1 << 8)
|
||||
| (in.bfval.f2 << 16) | (in.bfval.f3 << 24);
|
||||
}
|
||||
|
||||
__attribute__ ((noinline, noclone)) uint32_t
|
||||
partial_read_be32 (union bf_or_uint32 in)
|
||||
{
|
||||
return in.bfval.f3 | (in.bfval.f2 << 8)
|
||||
| (in.bfval.f1 << 16) | (in.bfval.f0 << 24);
|
||||
}
|
||||
|
||||
__attribute__ ((noinline, noclone)) uint32_t
|
||||
fake_read_le32 (char *x, char *y)
|
||||
{
|
||||
unsigned char c0, c1, c2, c3;
|
||||
|
||||
c0 = x[0];
|
||||
c1 = x[1];
|
||||
*y = 1;
|
||||
c2 = x[2];
|
||||
c3 = x[3];
|
||||
return c0 | c1 << 8 | c2 << 16 | c3 << 24;
|
||||
}
|
||||
|
||||
__attribute__ ((noinline, noclone)) uint32_t
|
||||
fake_read_be32 (char *x, char *y)
|
||||
{
|
||||
unsigned char c0, c1, c2, c3;
|
||||
|
||||
c0 = x[0];
|
||||
c1 = x[1];
|
||||
*y = 1;
|
||||
c2 = x[2];
|
||||
c3 = x[3];
|
||||
return c3 | c2 << 8 | c1 << 16 | c0 << 24;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
union bf_or_uint32 bfin;
|
||||
uint32_t out;
|
||||
char cin[] = { 0x83, 0x85, 0x87, 0x89 };
|
||||
|
||||
if (sizeof (uint32_t) * __CHAR_BIT__ != 32)
|
||||
return 0;
|
||||
bfin.inval = (struct ok) { 0x83, 0x85, 0x87, 0x89 };
|
||||
out = partial_read_le32 (bfin);
|
||||
if (out != 0x09070503 && out != 0x88868482)
|
||||
__builtin_abort ();
|
||||
bfin.inval = (struct ok) { 0x83, 0x85, 0x87, 0x89 };
|
||||
out = partial_read_be32 (bfin);
|
||||
if (out != 0x03050709 && out != 0x82848688)
|
||||
__builtin_abort ();
|
||||
out = fake_read_le32 (cin, &cin[2]);
|
||||
if (out != 0x89018583)
|
||||
__builtin_abort ();
|
||||
out = fake_read_be32 (cin, &cin[2]);
|
||||
if (out != 0x83850189)
|
||||
__builtin_abort ();
|
||||
return 0;
|
||||
}
|
64
gcc/testsuite/gcc.dg/optimize-bswapdi-3.c
Normal file
64
gcc/testsuite/gcc.dg/optimize-bswapdi-3.c
Normal file
|
@ -0,0 +1,64 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target bswap64 } */
|
||||
/* { dg-require-effective-target stdint_types } */
|
||||
/* { dg-options "-O2 -fdump-tree-bswap" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
unsigned char data[8];
|
||||
|
||||
struct uint64_st {
|
||||
unsigned char u0, u1, u2, u3, u4, u5, u6, u7;
|
||||
};
|
||||
|
||||
uint64_t read_le64_1 (void)
|
||||
{
|
||||
return (uint64_t) data[0] | ((uint64_t) data[1] << 8)
|
||||
| ((uint64_t) data[2] << 16) | ((uint64_t) data[3] << 24)
|
||||
| ((uint64_t) data[4] << 32) | ((uint64_t) data[5] << 40)
|
||||
| ((uint64_t) data[6] << 48) | ((uint64_t) data[7] << 56);
|
||||
}
|
||||
|
||||
uint64_t read_le64_2 (struct uint64_st data)
|
||||
{
|
||||
return (uint64_t) data.u0 | ((uint64_t) data.u1 << 8)
|
||||
| ((uint64_t) data.u2 << 16) | ((uint64_t) data.u3 << 24)
|
||||
| ((uint64_t) data.u4 << 32) | ((uint64_t) data.u5 << 40)
|
||||
| ((uint64_t) data.u6 << 48) | ((uint64_t) data.u7 << 56);
|
||||
}
|
||||
|
||||
uint64_t read_le64_3 (unsigned char *data)
|
||||
{
|
||||
return (uint64_t) *data | ((uint64_t) *(data + 1) << 8)
|
||||
| ((uint64_t) *(data + 2) << 16) | ((uint64_t) *(data + 3) << 24)
|
||||
| ((uint64_t) *(data + 4) << 32) | ((uint64_t) *(data + 5) << 40)
|
||||
| ((uint64_t) *(data + 6) << 48) | ((uint64_t) *(data + 7) << 56);
|
||||
}
|
||||
|
||||
uint64_t read_be64_1 (void)
|
||||
{
|
||||
return (uint64_t) data[7] | ((uint64_t) data[6] << 8)
|
||||
| ((uint64_t) data[5] << 16) | ((uint64_t) data[4] << 24)
|
||||
| ((uint64_t) data[3] << 32) | ((uint64_t) data[2] << 40)
|
||||
| ((uint64_t) data[1] << 48) | ((uint64_t) data[0] << 56);
|
||||
}
|
||||
|
||||
uint64_t read_be64_2 (struct uint64_st data)
|
||||
{
|
||||
return (uint64_t) data.u7 | ((uint64_t) data.u6 << 8)
|
||||
| ((uint64_t) data.u5 << 16) | ((uint64_t) data.u4 << 24)
|
||||
| ((uint64_t) data.u3 << 32) | ((uint64_t) data.u2 << 40)
|
||||
| ((uint64_t) data.u1 << 48) | ((uint64_t) data.u0 << 56);
|
||||
}
|
||||
|
||||
uint64_t read_be64_3 (unsigned char *data)
|
||||
{
|
||||
return (uint64_t) *(data + 7) | ((uint64_t) *(data + 6) << 8)
|
||||
| ((uint64_t) *(data + 5) << 16) | ((uint64_t) *(data + 4) << 24)
|
||||
| ((uint64_t) *(data + 3) << 32) | ((uint64_t) *(data + 2) << 40)
|
||||
| ((uint64_t) *(data + 1) << 48) | ((uint64_t) *data << 56);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "64 bit load in host endianness found at" 3 "bswap" } } */
|
||||
/* { dg-final { scan-tree-dump-times "64 bit bswap implementation found at" 3 "bswap" { xfail alpha*-*-* arm*-*-* } } } */
|
||||
/* { dg-final { cleanup-tree-dump "bswap" } } */
|
47
gcc/testsuite/gcc.dg/optimize-bswaphi-1.c
Normal file
47
gcc/testsuite/gcc.dg/optimize-bswaphi-1.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target bswap16 } */
|
||||
/* { dg-require-effective-target stdint_types } */
|
||||
/* { dg-options "-O2 -fdump-tree-bswap" } */
|
||||
/* { dg-options "-O2 -fdump-tree-bswap -march=z900" { target s390-*-* } } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
unsigned char data[2];
|
||||
|
||||
struct uint16_st {
|
||||
unsigned char u0, u1;
|
||||
};
|
||||
|
||||
uint32_t read_le16_1 (void)
|
||||
{
|
||||
return data[0] | (data[1] << 8);
|
||||
}
|
||||
|
||||
uint32_t read_le16_2 (struct uint16_st data)
|
||||
{
|
||||
return data.u0 | (data.u1 << 8);
|
||||
}
|
||||
|
||||
uint32_t read_le16_3 (unsigned char *data)
|
||||
{
|
||||
return *data | (*(data + 1) << 8);
|
||||
}
|
||||
|
||||
uint32_t read_be16_1 (void)
|
||||
{
|
||||
return data[1] | (data[0] << 8);
|
||||
}
|
||||
|
||||
uint32_t read_be16_2 (struct uint16_st data)
|
||||
{
|
||||
return data.u1 | (data.u0 << 8);
|
||||
}
|
||||
|
||||
uint32_t read_be16_3 (unsigned char *data)
|
||||
{
|
||||
return *(data + 1) | (*data << 8);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "16 bit load in host endianness found at" 3 "bswap" } } */
|
||||
/* { dg-final { scan-tree-dump-times "16 bit bswap implementation found at" 3 "bswap" { xfail alpha*-*-* arm*-*-* } } } */
|
||||
/* { dg-final { cleanup-tree-dump "bswap" } } */
|
49
gcc/testsuite/gcc.dg/optimize-bswapsi-2.c
Normal file
49
gcc/testsuite/gcc.dg/optimize-bswapsi-2.c
Normal file
|
@ -0,0 +1,49 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target bswap32 } */
|
||||
/* { dg-require-effective-target stdint_types } */
|
||||
/* { dg-options "-O2 -fdump-tree-bswap" } */
|
||||
/* { dg-options "-O2 -fdump-tree-bswap -march=z900" { target s390-*-* } } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
extern unsigned char data[4];
|
||||
|
||||
struct uint32_st {
|
||||
unsigned char u0, u1, u2, u3;
|
||||
};
|
||||
|
||||
uint32_t read_le32_1 (void)
|
||||
{
|
||||
return data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
|
||||
}
|
||||
|
||||
uint32_t read_le32_2 (struct uint32_st data)
|
||||
{
|
||||
return data.u0 | (data.u1 << 8) | (data.u2 << 16) | (data.u3 << 24);
|
||||
}
|
||||
|
||||
uint32_t read_le32_3 (unsigned char *data)
|
||||
{
|
||||
return *data | (*(data + 1) << 8) | (*(data + 2) << 16)
|
||||
| (*(data + 3) << 24);
|
||||
}
|
||||
|
||||
uint32_t read_be32_1 (void)
|
||||
{
|
||||
return data[3] | (data[2] << 8) | (data[1] << 16) | (data[0] << 24);
|
||||
}
|
||||
|
||||
uint32_t read_be32_2 (struct uint32_st data)
|
||||
{
|
||||
return data.u3 | (data.u2 << 8) | (data.u1 << 16) | (data.u0 << 24);
|
||||
}
|
||||
|
||||
uint32_t read_be32_3 (unsigned char *data)
|
||||
{
|
||||
return *(data + 3) | (*(data + 2) << 8) | (*(data + 1) << 16)
|
||||
| (*data << 24);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "32 bit load in host endianness found at" 3 "bswap" } } */
|
||||
/* { dg-final { scan-tree-dump-times "32 bit bswap implementation found at" 3 "bswap" { xfail alpha*-*-* arm*-*-* } } } */
|
||||
/* { dg-final { cleanup-tree-dump "bswap" } } */
|
|
@ -98,6 +98,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "is-a.h"
|
||||
#include "gimple.h"
|
||||
#include "gimple-iterator.h"
|
||||
#include "gimplify.h"
|
||||
#include "gimplify-me.h"
|
||||
#include "stor-layout.h"
|
||||
#include "gimple-ssa.h"
|
||||
|
@ -170,15 +171,15 @@ static struct
|
|||
|
||||
static struct
|
||||
{
|
||||
/* Number of hand-written 16-bit bswaps found. */
|
||||
/* Number of hand-written 16-bit nop / bswaps found. */
|
||||
int found_16bit;
|
||||
|
||||
/* Number of hand-written 32-bit bswaps found. */
|
||||
/* Number of hand-written 32-bit nop / bswaps found. */
|
||||
int found_32bit;
|
||||
|
||||
/* Number of hand-written 64-bit bswaps found. */
|
||||
/* Number of hand-written 64-bit nop / bswaps found. */
|
||||
int found_64bit;
|
||||
} bswap_stats;
|
||||
} nop_stats, bswap_stats;
|
||||
|
||||
static struct
|
||||
{
|
||||
|
@ -1604,13 +1605,43 @@ make_pass_cse_sincos (gcc::context *ctxt)
|
|||
|
||||
0 - byte has the value 0
|
||||
1..size - byte contains the content of the byte
|
||||
number indexed with that value minus one */
|
||||
number indexed with that value minus one.
|
||||
|
||||
To detect permutations on memory sources (arrays and structures), a symbolic
|
||||
number is also associated a base address (the array or structure the load is
|
||||
made from), an offset from the base address and a range which gives the
|
||||
difference between the highest and lowest accessed memory location to make
|
||||
such a symbolic number. The range is thus different from size which reflects
|
||||
the size of the type of current expression. Note that for non memory source,
|
||||
range holds the same value as size.
|
||||
|
||||
For instance, for an array char a[], (short) a[0] | (short) a[3] would have
|
||||
a size of 2 but a range of 4 while (short) a[0] | ((short) a[0] << 1) would
|
||||
still have a size of 2 but this time a range of 1. */
|
||||
|
||||
struct symbolic_number {
|
||||
unsigned HOST_WIDEST_INT n;
|
||||
int size;
|
||||
tree base_addr;
|
||||
tree offset;
|
||||
HOST_WIDE_INT bytepos;
|
||||
tree alias_set;
|
||||
tree vuse;
|
||||
unsigned HOST_WIDE_INT range;
|
||||
};
|
||||
|
||||
/* The number which the find_bswap_or_nop_1 result should match in
|
||||
order to have a nop. The number is masked according to the size of
|
||||
the symbolic number before using it. */
|
||||
#define CMPNOP (sizeof (HOST_WIDEST_INT) < 8 ? 0 : \
|
||||
(unsigned HOST_WIDEST_INT)0x08070605 << 32 | 0x04030201)
|
||||
|
||||
/* The number which the find_bswap_or_nop_1 result should match in
|
||||
order to have a byte swap. The number is masked according to the
|
||||
size of the symbolic number before using it. */
|
||||
#define CMPXCHG (sizeof (HOST_WIDEST_INT) < 8 ? 0 : \
|
||||
(unsigned HOST_WIDEST_INT)0x01020304 << 32 | 0x05060708)
|
||||
|
||||
/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
|
||||
number N. Return false if the requested operation is not permitted
|
||||
on a symbolic number. */
|
||||
|
@ -1670,13 +1701,76 @@ verify_symbolic_number_p (struct symbolic_number *n, gimple stmt)
|
|||
return true;
|
||||
}
|
||||
|
||||
/* find_bswap_1 invokes itself recursively with N and tries to perform
|
||||
the operation given by the rhs of STMT on the result. If the
|
||||
operation could successfully be executed the function returns the
|
||||
tree expression of the source operand and NULL otherwise. */
|
||||
/* Check if STMT might be a byte swap or a nop from a memory source and returns
|
||||
the answer. If so, REF is that memory source and the base of the memory area
|
||||
accessed and the offset of the access from that base are recorded in N. */
|
||||
|
||||
bool
|
||||
find_bswap_or_nop_load (gimple stmt, tree ref, struct symbolic_number *n)
|
||||
{
|
||||
/* Leaf node is an array or component ref. Memorize its base and
|
||||
offset from base to compare to other such leaf node. */
|
||||
HOST_WIDE_INT bitsize, bitpos;
|
||||
enum machine_mode mode;
|
||||
int unsignedp, volatilep;
|
||||
|
||||
if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
|
||||
return false;
|
||||
|
||||
n->base_addr = get_inner_reference (ref, &bitsize, &bitpos, &n->offset,
|
||||
&mode, &unsignedp, &volatilep, false);
|
||||
|
||||
if (TREE_CODE (n->base_addr) == MEM_REF)
|
||||
{
|
||||
offset_int bit_offset = 0;
|
||||
tree off = TREE_OPERAND (n->base_addr, 1);
|
||||
|
||||
if (!integer_zerop (off))
|
||||
{
|
||||
offset_int boff, coff = mem_ref_offset (n->base_addr);
|
||||
boff = wi::lshift (coff, LOG2_BITS_PER_UNIT);
|
||||
bit_offset += boff;
|
||||
}
|
||||
|
||||
n->base_addr = TREE_OPERAND (n->base_addr, 0);
|
||||
|
||||
/* Avoid returning a negative bitpos as this may wreak havoc later. */
|
||||
if (wi::neg_p (bit_offset))
|
||||
{
|
||||
offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, false);
|
||||
offset_int tem = bit_offset.and_not (mask);
|
||||
/* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf.
|
||||
Subtract it to BIT_OFFSET and add it (scaled) to OFFSET. */
|
||||
bit_offset -= tem;
|
||||
tem = wi::arshift (tem, LOG2_BITS_PER_UNIT);
|
||||
if (n->offset)
|
||||
n->offset = size_binop (PLUS_EXPR, n->offset,
|
||||
wide_int_to_tree (sizetype, tem));
|
||||
else
|
||||
n->offset = wide_int_to_tree (sizetype, tem);
|
||||
}
|
||||
|
||||
bitpos += bit_offset.to_shwi ();
|
||||
}
|
||||
|
||||
if (bitpos % BITS_PER_UNIT)
|
||||
return false;
|
||||
if (bitsize % BITS_PER_UNIT)
|
||||
return false;
|
||||
|
||||
n->bytepos = bitpos / BITS_PER_UNIT;
|
||||
n->alias_set = reference_alias_ptr_type (ref);
|
||||
n->vuse = gimple_vuse (stmt);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
|
||||
the operation given by the rhs of STMT on the result. If the operation
|
||||
could successfully be executed the function returns the tree expression of
|
||||
the source operand and NULL otherwise. */
|
||||
|
||||
static tree
|
||||
find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
|
||||
find_bswap_or_nop_1 (gimple stmt, struct symbolic_number *n, int limit)
|
||||
{
|
||||
enum tree_code code;
|
||||
tree rhs1, rhs2 = NULL;
|
||||
|
@ -1689,6 +1783,9 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
|
|||
|
||||
rhs1 = gimple_assign_rhs1 (stmt);
|
||||
|
||||
if (find_bswap_or_nop_load (stmt, rhs1, n))
|
||||
return rhs1;
|
||||
|
||||
if (TREE_CODE (rhs1) != SSA_NAME)
|
||||
return NULL_TREE;
|
||||
|
||||
|
@ -1715,11 +1812,11 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
|
|||
&& code != CONVERT_EXPR)
|
||||
return NULL_TREE;
|
||||
|
||||
source_expr1 = find_bswap_1 (rhs1_stmt, n, limit - 1);
|
||||
source_expr1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
|
||||
|
||||
/* If find_bswap_1 returned NULL STMT is a leaf node and we have
|
||||
to initialize the symbolic number. */
|
||||
if (!source_expr1)
|
||||
/* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
|
||||
we have to initialize the symbolic number. */
|
||||
if (!source_expr1 || gimple_assign_load_p (rhs1_stmt))
|
||||
{
|
||||
/* Set up the symbolic number N by setting each byte to a
|
||||
value between 1 and the byte size of rhs1. The highest
|
||||
|
@ -1729,14 +1826,18 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
|
|||
if (n->size % BITS_PER_UNIT != 0)
|
||||
return NULL_TREE;
|
||||
n->size /= BITS_PER_UNIT;
|
||||
n->n = (sizeof (HOST_WIDEST_INT) < 8 ? 0 :
|
||||
(unsigned HOST_WIDEST_INT)0x08070605 << 32 | 0x04030201);
|
||||
n->range = n->size;
|
||||
n->n = CMPNOP;
|
||||
|
||||
if (n->size < (int)sizeof (HOST_WIDEST_INT))
|
||||
n->n &= ((unsigned HOST_WIDEST_INT)1 <<
|
||||
(n->size * BITS_PER_UNIT)) - 1;
|
||||
|
||||
source_expr1 = rhs1;
|
||||
if (!source_expr1)
|
||||
{
|
||||
n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
|
||||
source_expr1 = rhs1;
|
||||
}
|
||||
}
|
||||
|
||||
switch (code)
|
||||
|
@ -1777,6 +1878,8 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
|
|||
n->n &= ((unsigned HOST_WIDEST_INT)1 << type_size) - 1;
|
||||
}
|
||||
n->size = type_size / BITS_PER_UNIT;
|
||||
if (!n->base_addr)
|
||||
n->range = n->size;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
@ -1805,17 +1908,79 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
|
|||
switch (code)
|
||||
{
|
||||
case BIT_IOR_EXPR:
|
||||
source_expr1 = find_bswap_1 (rhs1_stmt, &n1, limit - 1);
|
||||
source_expr1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
|
||||
|
||||
if (!source_expr1)
|
||||
return NULL_TREE;
|
||||
|
||||
source_expr2 = find_bswap_1 (rhs2_stmt, &n2, limit - 1);
|
||||
source_expr2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
|
||||
|
||||
if (source_expr1 != source_expr2
|
||||
|| n1.size != n2.size)
|
||||
if (n1.size != n2.size || !source_expr2)
|
||||
return NULL_TREE;
|
||||
|
||||
if (!n1.vuse != !n2.vuse ||
|
||||
(n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
|
||||
return NULL_TREE;
|
||||
|
||||
if (source_expr1 != source_expr2)
|
||||
{
|
||||
HOST_WIDEST_INT inc, mask;
|
||||
unsigned i;
|
||||
HOST_WIDE_INT off_sub;
|
||||
struct symbolic_number *n_ptr;
|
||||
|
||||
if (!n1.base_addr || !n2.base_addr
|
||||
|| !operand_equal_p (n1.base_addr, n2.base_addr, 0))
|
||||
return NULL_TREE;
|
||||
if (!n1.offset != !n2.offset ||
|
||||
(n1.offset && !operand_equal_p (n1.offset, n2.offset, 0)))
|
||||
return NULL_TREE;
|
||||
|
||||
/* We swap n1 with n2 to have n1 < n2. */
|
||||
if (n2.bytepos < n1.bytepos)
|
||||
{
|
||||
struct symbolic_number tmpn;
|
||||
|
||||
tmpn = n2;
|
||||
n2 = n1;
|
||||
n1 = tmpn;
|
||||
source_expr1 = source_expr2;
|
||||
}
|
||||
|
||||
off_sub = n2.bytepos - n1.bytepos;
|
||||
|
||||
/* Check that the range of memory covered < biggest int size. */
|
||||
if (off_sub + n2.range > (int) sizeof (HOST_WIDEST_INT))
|
||||
return NULL_TREE;
|
||||
n->range = n2.range + off_sub;
|
||||
|
||||
/* Reinterpret byte marks in symbolic number holding the value of
|
||||
bigger weight according to host endianness. */
|
||||
inc = BYTES_BIG_ENDIAN ? off_sub + n2.range - n1.range : off_sub;
|
||||
mask = 0xFF;
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
n_ptr = &n1;
|
||||
else
|
||||
n_ptr = &n2;
|
||||
for (i = 0; i < sizeof (HOST_WIDEST_INT); i++, inc <<= 8,
|
||||
mask <<= 8)
|
||||
{
|
||||
if (n_ptr->n & mask)
|
||||
n_ptr->n += inc;
|
||||
}
|
||||
}
|
||||
else
|
||||
n->range = n1.range;
|
||||
|
||||
if (!n1.alias_set
|
||||
|| alias_ptr_types_compatible_p (n1.alias_set, n2.alias_set))
|
||||
n->alias_set = n1.alias_set;
|
||||
else
|
||||
n->alias_set = ptr_type_node;
|
||||
n->vuse = n1.vuse;
|
||||
n->base_addr = n1.base_addr;
|
||||
n->offset = n1.offset;
|
||||
n->bytepos = n1.bytepos;
|
||||
n->size = n1.size;
|
||||
for (i = 0, mask = 0xff; i < n->size; i++, mask <<= BITS_PER_UNIT)
|
||||
{
|
||||
|
@ -1840,57 +2005,75 @@ find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
|
|||
return NULL_TREE;
|
||||
}
|
||||
|
||||
/* Check if STMT completes a bswap implementation consisting of ORs,
|
||||
SHIFTs and ANDs. Return the source tree expression on which the
|
||||
byte swap is performed and NULL if no bswap was found. */
|
||||
/* Check if STMT completes a bswap implementation or a read in a given
|
||||
endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
|
||||
accordingly. It also sets N to represent the kind of operations
|
||||
performed: size of the resulting expression and whether it works on
|
||||
a memory source, and if so alias-set and vuse. At last, the
|
||||
function returns the source tree expression. */
|
||||
|
||||
static tree
|
||||
find_bswap (gimple stmt)
|
||||
find_bswap_or_nop (gimple stmt, struct symbolic_number *n, bool *bswap)
|
||||
{
|
||||
/* The number which the find_bswap result should match in order to
|
||||
have a full byte swap. The number is shifted to the left according
|
||||
to the size of the symbolic number before using it. */
|
||||
unsigned HOST_WIDEST_INT cmp =
|
||||
sizeof (HOST_WIDEST_INT) < 8 ? 0 :
|
||||
(unsigned HOST_WIDEST_INT)0x01020304 << 32 | 0x05060708;
|
||||
/* The number which the find_bswap_or_nop_1 result should match in order
|
||||
to have a full byte swap. The number is shifted to the right
|
||||
according to the size of the symbolic number before using it. */
|
||||
unsigned HOST_WIDEST_INT cmpxchg = CMPXCHG;
|
||||
unsigned HOST_WIDEST_INT cmpnop = CMPNOP;
|
||||
|
||||
struct symbolic_number n;
|
||||
tree source_expr;
|
||||
int limit;
|
||||
|
||||
/* The last parameter determines the depth search limit. It usually
|
||||
correlates directly to the number of bytes to be touched. We
|
||||
increase that number by three here in order to also
|
||||
cover signed -> unsigned converions of the src operand as can be seen
|
||||
correlates directly to the number n of bytes to be touched. We
|
||||
increase that number by log2(n) + 1 here in order to also
|
||||
cover signed -> unsigned conversions of the src operand as can be seen
|
||||
in libgcc, and for initial shift/and operation of the src operand. */
|
||||
limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
|
||||
limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
|
||||
source_expr = find_bswap_1 (stmt, &n, limit);
|
||||
source_expr = find_bswap_or_nop_1 (stmt, n, limit);
|
||||
|
||||
if (!source_expr)
|
||||
return NULL_TREE;
|
||||
|
||||
/* Zero out the extra bits of N and CMP. */
|
||||
if (n.size < (int)sizeof (HOST_WIDEST_INT))
|
||||
/* Find real size of result (highest non zero byte). */
|
||||
if (n->base_addr)
|
||||
{
|
||||
unsigned HOST_WIDEST_INT mask =
|
||||
((unsigned HOST_WIDEST_INT)1 << (n.size * BITS_PER_UNIT)) - 1;
|
||||
int rsize;
|
||||
unsigned HOST_WIDEST_INT tmpn;
|
||||
|
||||
n.n &= mask;
|
||||
cmp >>= (sizeof (HOST_WIDEST_INT) - n.size) * BITS_PER_UNIT;
|
||||
for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_UNIT, rsize++);
|
||||
n->range = rsize;
|
||||
}
|
||||
|
||||
/* A complete byte swap should make the symbolic number to start
|
||||
with the largest digit in the highest order byte. */
|
||||
if (cmp != n.n)
|
||||
/* Zero out the extra bits of N and CMP*. */
|
||||
if (n->range < (int)sizeof (HOST_WIDEST_INT))
|
||||
{
|
||||
unsigned HOST_WIDEST_INT mask;
|
||||
|
||||
mask = ((unsigned HOST_WIDEST_INT)1 << (n->range * BITS_PER_UNIT)) - 1;
|
||||
cmpxchg >>= (sizeof (HOST_WIDEST_INT) - n->range) * BITS_PER_UNIT;
|
||||
cmpnop &= mask;
|
||||
}
|
||||
|
||||
/* A complete byte swap should make the symbolic number to start with
|
||||
the largest digit in the highest order byte. Unchanged symbolic
|
||||
number indicates a read with same endianness as host architecture. */
|
||||
if (n->n == cmpnop)
|
||||
*bswap = false;
|
||||
else if (n->n == cmpxchg)
|
||||
*bswap = true;
|
||||
else
|
||||
return NULL_TREE;
|
||||
|
||||
/* Useless bit manipulation performed by code. */
|
||||
if (!n->base_addr && n->n == cmpnop)
|
||||
return NULL_TREE;
|
||||
|
||||
n->range *= BITS_PER_UNIT;
|
||||
return source_expr;
|
||||
}
|
||||
|
||||
/* Find manual byte swap implementations and turn them into a bswap
|
||||
builtin invokation. */
|
||||
|
||||
namespace {
|
||||
|
||||
const pass_data pass_data_optimize_bswap =
|
||||
|
@ -1924,6 +2107,156 @@ public:
|
|||
|
||||
}; // class pass_optimize_bswap
|
||||
|
||||
/* Perform the bswap optimization: replace the statement STMT at GSI
|
||||
with load type, VUSE and set-alias as described by N if a memory
|
||||
source is involved (N->base_addr is non null), followed by the
|
||||
builtin bswap invocation in FNDECL if BSWAP is true. SRC gives
|
||||
the source on which STMT is operating and N->range gives the
|
||||
size of the expression involved for maintaining some statistics. */
|
||||
|
||||
static bool
|
||||
bswap_replace (gimple stmt, gimple_stmt_iterator *gsi, tree src, tree fndecl,
|
||||
tree bswap_type, tree load_type, struct symbolic_number *n,
|
||||
bool bswap)
|
||||
{
|
||||
tree tmp, tgt;
|
||||
gimple call;
|
||||
|
||||
tgt = gimple_assign_lhs (stmt);
|
||||
|
||||
/* Need to load the value from memory first. */
|
||||
if (n->base_addr)
|
||||
{
|
||||
tree addr_expr, addr_tmp, val_expr, val_tmp;
|
||||
tree load_offset_ptr, aligned_load_type;
|
||||
gimple addr_stmt, load_stmt;
|
||||
unsigned align;
|
||||
|
||||
align = get_object_alignment (src);
|
||||
if (bswap && SLOW_UNALIGNED_ACCESS (TYPE_MODE (load_type), align))
|
||||
return false;
|
||||
|
||||
/* Compute address to load from and cast according to the size
|
||||
of the load. */
|
||||
addr_expr = build_fold_addr_expr (unshare_expr (src));
|
||||
if (is_gimple_min_invariant (addr_expr))
|
||||
addr_tmp = addr_expr;
|
||||
else
|
||||
{
|
||||
addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL,
|
||||
"load_src");
|
||||
addr_stmt = gimple_build_assign (addr_tmp, addr_expr);
|
||||
gsi_insert_before (gsi, addr_stmt, GSI_SAME_STMT);
|
||||
}
|
||||
|
||||
/* Perform the load. */
|
||||
aligned_load_type = load_type;
|
||||
if (align < TYPE_ALIGN (load_type))
|
||||
aligned_load_type = build_aligned_type (load_type, align);
|
||||
load_offset_ptr = build_int_cst (n->alias_set, 0);
|
||||
val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
|
||||
load_offset_ptr);
|
||||
|
||||
if (!bswap)
|
||||
{
|
||||
if (n->range == 16)
|
||||
nop_stats.found_16bit++;
|
||||
else if (n->range == 32)
|
||||
nop_stats.found_32bit++;
|
||||
else
|
||||
{
|
||||
gcc_assert (n->range == 64);
|
||||
nop_stats.found_64bit++;
|
||||
}
|
||||
|
||||
/* Convert the result of load if necessary. */
|
||||
if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type))
|
||||
{
|
||||
val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
|
||||
"load_dst");
|
||||
load_stmt = gimple_build_assign (val_tmp, val_expr);
|
||||
gimple_set_vuse (load_stmt, n->vuse);
|
||||
gsi_insert_before (gsi, load_stmt, GSI_SAME_STMT);
|
||||
gimple_assign_set_rhs_with_ops_1 (gsi, NOP_EXPR, val_tmp,
|
||||
NULL_TREE, NULL_TREE);
|
||||
}
|
||||
else
|
||||
gimple_assign_set_rhs_with_ops_1 (gsi, MEM_REF, val_expr,
|
||||
NULL_TREE, NULL_TREE);
|
||||
update_stmt (gsi_stmt (*gsi));
|
||||
|
||||
if (dump_file)
|
||||
{
|
||||
fprintf (dump_file,
|
||||
"%d bit load in host endianness found at: ",
|
||||
(int)n->range);
|
||||
print_gimple_stmt (dump_file, stmt, 0, 0);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
|
||||
load_stmt = gimple_build_assign (val_tmp, val_expr);
|
||||
gimple_set_vuse (load_stmt, n->vuse);
|
||||
gsi_insert_before (gsi, load_stmt, GSI_SAME_STMT);
|
||||
}
|
||||
src = val_tmp;
|
||||
}
|
||||
|
||||
if (n->range == 16)
|
||||
bswap_stats.found_16bit++;
|
||||
else if (n->range == 32)
|
||||
bswap_stats.found_32bit++;
|
||||
else
|
||||
{
|
||||
gcc_assert (n->range == 64);
|
||||
bswap_stats.found_64bit++;
|
||||
}
|
||||
|
||||
tmp = src;
|
||||
|
||||
/* Convert the src expression if necessary. */
|
||||
if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
|
||||
{
|
||||
gimple convert_stmt;
|
||||
tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
|
||||
convert_stmt = gimple_build_assign_with_ops (NOP_EXPR, tmp, src, NULL);
|
||||
gsi_insert_before (gsi, convert_stmt, GSI_SAME_STMT);
|
||||
}
|
||||
|
||||
call = gimple_build_call (fndecl, 1, tmp);
|
||||
|
||||
tmp = tgt;
|
||||
|
||||
/* Convert the result if necessary. */
|
||||
if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
|
||||
{
|
||||
gimple convert_stmt;
|
||||
tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
|
||||
convert_stmt = gimple_build_assign_with_ops (NOP_EXPR, tgt, tmp, NULL);
|
||||
gsi_insert_after (gsi, convert_stmt, GSI_SAME_STMT);
|
||||
}
|
||||
|
||||
gimple_call_set_lhs (call, tmp);
|
||||
|
||||
if (dump_file)
|
||||
{
|
||||
fprintf (dump_file, "%d bit bswap implementation found at: ",
|
||||
(int)n->range);
|
||||
print_gimple_stmt (dump_file, stmt, 0, 0);
|
||||
}
|
||||
|
||||
gsi_insert_after (gsi, call, GSI_SAME_STMT);
|
||||
gsi_remove (gsi, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Find manual byte swap implementations as well as load in a given
|
||||
endianness. Byte swaps are turned into a bswap builtin invokation
|
||||
while endian loads are converted to bswap builtin invokation or
|
||||
simple load according to the host endianness. */
|
||||
|
||||
unsigned int
|
||||
pass_optimize_bswap::execute (function *fun)
|
||||
{
|
||||
|
@ -1946,9 +2279,6 @@ pass_optimize_bswap::execute (function *fun)
|
|||
&& (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
|
||||
|| (bswap32_p && word_mode == SImode)));
|
||||
|
||||
if (!bswap16_p && !bswap32_p && !bswap64_p)
|
||||
return 0;
|
||||
|
||||
/* Determine the argument type of the builtins. The code later on
|
||||
assumes that the return and argument type are the same. */
|
||||
if (bswap16_p)
|
||||
|
@ -1969,6 +2299,7 @@ pass_optimize_bswap::execute (function *fun)
|
|||
bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
|
||||
}
|
||||
|
||||
memset (&nop_stats, 0, sizeof (nop_stats));
|
||||
memset (&bswap_stats, 0, sizeof (bswap_stats));
|
||||
|
||||
FOR_EACH_BB_FN (bb, fun)
|
||||
|
@ -1982,21 +2313,24 @@ pass_optimize_bswap::execute (function *fun)
|
|||
for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
|
||||
{
|
||||
gimple stmt = gsi_stmt (gsi);
|
||||
tree bswap_src, bswap_type;
|
||||
tree bswap_tmp;
|
||||
tree fndecl = NULL_TREE;
|
||||
int type_size;
|
||||
gimple call;
|
||||
tree fndecl = NULL_TREE, bswap_type = NULL_TREE;
|
||||
tree src, load_type;
|
||||
struct symbolic_number n;
|
||||
bool bswap;
|
||||
|
||||
if (!is_gimple_assign (stmt)
|
||||
|| gimple_assign_rhs_code (stmt) != BIT_IOR_EXPR)
|
||||
continue;
|
||||
|
||||
type_size = TYPE_PRECISION (gimple_expr_type (stmt));
|
||||
src = find_bswap_or_nop (stmt, &n, &bswap);
|
||||
|
||||
switch (type_size)
|
||||
if (!src)
|
||||
continue;
|
||||
|
||||
switch (n.range)
|
||||
{
|
||||
case 16:
|
||||
load_type = uint16_type_node;
|
||||
if (bswap16_p)
|
||||
{
|
||||
fndecl = builtin_decl_explicit (BUILT_IN_BSWAP16);
|
||||
|
@ -2004,6 +2338,7 @@ pass_optimize_bswap::execute (function *fun)
|
|||
}
|
||||
break;
|
||||
case 32:
|
||||
load_type = uint32_type_node;
|
||||
if (bswap32_p)
|
||||
{
|
||||
fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
|
||||
|
@ -2011,6 +2346,7 @@ pass_optimize_bswap::execute (function *fun)
|
|||
}
|
||||
break;
|
||||
case 64:
|
||||
load_type = uint64_type_node;
|
||||
if (bswap64_p)
|
||||
{
|
||||
fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
|
||||
|
@ -2021,62 +2357,21 @@ pass_optimize_bswap::execute (function *fun)
|
|||
continue;
|
||||
}
|
||||
|
||||
if (!fndecl)
|
||||
if (bswap && !fndecl)
|
||||
continue;
|
||||
|
||||
bswap_src = find_bswap (stmt);
|
||||
|
||||
if (!bswap_src)
|
||||
continue;
|
||||
|
||||
changed = true;
|
||||
if (type_size == 16)
|
||||
bswap_stats.found_16bit++;
|
||||
else if (type_size == 32)
|
||||
bswap_stats.found_32bit++;
|
||||
else
|
||||
bswap_stats.found_64bit++;
|
||||
|
||||
bswap_tmp = bswap_src;
|
||||
|
||||
/* Convert the src expression if necessary. */
|
||||
if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
|
||||
{
|
||||
gimple convert_stmt;
|
||||
bswap_tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
|
||||
convert_stmt = gimple_build_assign_with_ops
|
||||
(NOP_EXPR, bswap_tmp, bswap_src, NULL);
|
||||
gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
|
||||
}
|
||||
|
||||
call = gimple_build_call (fndecl, 1, bswap_tmp);
|
||||
|
||||
bswap_tmp = gimple_assign_lhs (stmt);
|
||||
|
||||
/* Convert the result if necessary. */
|
||||
if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
|
||||
{
|
||||
gimple convert_stmt;
|
||||
bswap_tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
|
||||
convert_stmt = gimple_build_assign_with_ops
|
||||
(NOP_EXPR, gimple_assign_lhs (stmt), bswap_tmp, NULL);
|
||||
gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
|
||||
}
|
||||
|
||||
gimple_call_set_lhs (call, bswap_tmp);
|
||||
|
||||
if (dump_file)
|
||||
{
|
||||
fprintf (dump_file, "%d bit bswap implementation found at: ",
|
||||
(int)type_size);
|
||||
print_gimple_stmt (dump_file, stmt, 0, 0);
|
||||
}
|
||||
|
||||
gsi_insert_after (&gsi, call, GSI_SAME_STMT);
|
||||
gsi_remove (&gsi, true);
|
||||
if (bswap_replace (stmt, &gsi, src, fndecl, bswap_type, load_type,
|
||||
&n, bswap))
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
statistics_counter_event (fun, "16-bit nop implementations found",
|
||||
nop_stats.found_16bit);
|
||||
statistics_counter_event (fun, "32-bit nop implementations found",
|
||||
nop_stats.found_32bit);
|
||||
statistics_counter_event (fun, "64-bit nop implementations found",
|
||||
nop_stats.found_64bit);
|
||||
statistics_counter_event (fun, "16-bit bswap implementations found",
|
||||
bswap_stats.found_16bit);
|
||||
statistics_counter_event (fun, "32-bit bswap implementations found",
|
||||
|
|
Loading…
Add table
Reference in a new issue