sccvn: Handle bitfields in vn_reference_lookup_3 [PR93582]
The following patch is first step towards fixing PR93582. vn_reference_lookup_3 right now punts on anything that isn't byte aligned, so to be able to lookup a constant bitfield store, one needs to use the exact same COMPONENT_REF, otherwise it isn't found. This patch lifts up that that restriction if the bits to be loaded are covered by a single store of a constant (keeps the restriction so far for the multiple store case, can tweak that incrementally, but I think for bisection etc. it is worth to do it one step at a time). 2020-02-13 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/93582 * fold-const.h (shift_bytes_in_array_left, shift_bytes_in_array_right): Declare. * fold-const.c (shift_bytes_in_array_left, shift_bytes_in_array_right): New function, moved from gimple-ssa-store-merging.c, no longer static. * gimple-ssa-store-merging.c (shift_bytes_in_array): Move to gimple-ssa-store-merging.c and rename to shift_bytes_in_array_left. (shift_bytes_in_array_right): Move to gimple-ssa-store-merging.c. (encode_tree_to_bitpos): Use shift_bytes_in_array_left instead of shift_bytes_in_array. (verify_shift_bytes_in_array): Rename to ... (verify_shift_bytes_in_array_left): ... this. Use shift_bytes_in_array_left instead of shift_bytes_in_array. (store_merging_c_tests): Call verify_shift_bytes_in_array_left instead of verify_shift_bytes_in_array. * tree-ssa-sccvn.c (vn_reference_lookup_3): For native_encode_expr / native_interpret_expr where the store covers all needed bits, punt on PDP-endian, otherwise allow all involved offsets and sizes not to be byte-aligned. * gcc.dg/tree-ssa/pr93582-1.c: New test. * gcc.dg/tree-ssa/pr93582-2.c: New test. * gcc.dg/tree-ssa/pr93582-3.c: New test.
This commit is contained in:
parent
8ea884b85e
commit
8aba425f4e
9 changed files with 222 additions and 88 deletions
|
@ -1,5 +1,26 @@
|
|||
2020-02-13 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR tree-optimization/93582
|
||||
* fold-const.h (shift_bytes_in_array_left,
|
||||
shift_bytes_in_array_right): Declare.
|
||||
* fold-const.c (shift_bytes_in_array_left,
|
||||
shift_bytes_in_array_right): New function, moved from
|
||||
gimple-ssa-store-merging.c, no longer static.
|
||||
* gimple-ssa-store-merging.c (shift_bytes_in_array): Move
|
||||
to gimple-ssa-store-merging.c and rename to shift_bytes_in_array_left.
|
||||
(shift_bytes_in_array_right): Move to gimple-ssa-store-merging.c.
|
||||
(encode_tree_to_bitpos): Use shift_bytes_in_array_left instead of
|
||||
shift_bytes_in_array.
|
||||
(verify_shift_bytes_in_array): Rename to ...
|
||||
(verify_shift_bytes_in_array_left): ... this. Use
|
||||
shift_bytes_in_array_left instead of shift_bytes_in_array.
|
||||
(store_merging_c_tests): Call verify_shift_bytes_in_array_left
|
||||
instead of verify_shift_bytes_in_array.
|
||||
* tree-ssa-sccvn.c (vn_reference_lookup_3): For native_encode_expr
|
||||
/ native_interpret_expr where the store covers all needed bits,
|
||||
punt on PDP-endian, otherwise allow all involved offsets and sizes
|
||||
not to be byte-aligned.
|
||||
|
||||
PR target/93673
|
||||
* config/i386/sse.md (k<code><mode>): Drop mode from last operand and
|
||||
use const_0_to_255_operand predicate instead of immediate_operand.
|
||||
|
|
|
@ -8354,6 +8354,70 @@ can_native_interpret_type_p (tree type)
|
|||
}
|
||||
}
|
||||
|
||||
/* Routines for manipulation of native_encode_expr encoded data if the encoded
|
||||
or extracted constant positions and/or sizes aren't byte aligned. */
|
||||
|
||||
/* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
|
||||
bits between adjacent elements. AMNT should be within
|
||||
[0, BITS_PER_UNIT).
|
||||
Example, AMNT = 2:
|
||||
00011111|11100000 << 2 = 01111111|10000000
|
||||
PTR[1] | PTR[0] PTR[1] | PTR[0]. */
|
||||
|
||||
void
|
||||
shift_bytes_in_array_left (unsigned char *ptr, unsigned int sz,
|
||||
unsigned int amnt)
|
||||
{
|
||||
if (amnt == 0)
|
||||
return;
|
||||
|
||||
unsigned char carry_over = 0U;
|
||||
unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
|
||||
unsigned char clear_mask = (~0U) << amnt;
|
||||
|
||||
for (unsigned int i = 0; i < sz; i++)
|
||||
{
|
||||
unsigned prev_carry_over = carry_over;
|
||||
carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
|
||||
|
||||
ptr[i] <<= amnt;
|
||||
if (i != 0)
|
||||
{
|
||||
ptr[i] &= clear_mask;
|
||||
ptr[i] |= prev_carry_over;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Like shift_bytes_in_array_left but for big-endian.
|
||||
Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
|
||||
bits between adjacent elements. AMNT should be within
|
||||
[0, BITS_PER_UNIT).
|
||||
Example, AMNT = 2:
|
||||
00011111|11100000 >> 2 = 00000111|11111000
|
||||
PTR[0] | PTR[1] PTR[0] | PTR[1]. */
|
||||
|
||||
void
|
||||
shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
|
||||
unsigned int amnt)
|
||||
{
|
||||
if (amnt == 0)
|
||||
return;
|
||||
|
||||
unsigned char carry_over = 0U;
|
||||
unsigned char carry_mask = ~(~0U << amnt);
|
||||
|
||||
for (unsigned int i = 0; i < sz; i++)
|
||||
{
|
||||
unsigned prev_carry_over = carry_over;
|
||||
carry_over = ptr[i] & carry_mask;
|
||||
|
||||
carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
|
||||
ptr[i] >>= amnt;
|
||||
ptr[i] |= prev_carry_over;
|
||||
}
|
||||
}
|
||||
|
||||
/* Try to view-convert VECTOR_CST EXPR to VECTOR_TYPE TYPE by operating
|
||||
directly on the VECTOR_CST encoding, in a way that works for variable-
|
||||
length vectors. Return the resulting VECTOR_CST on success or null
|
||||
|
|
|
@ -30,6 +30,10 @@ extern int native_encode_initializer (tree, unsigned char *, int,
|
|||
int off = -1);
|
||||
extern tree native_interpret_expr (tree, const unsigned char *, int);
|
||||
extern bool can_native_interpret_type_p (tree);
|
||||
extern void shift_bytes_in_array_left (unsigned char *, unsigned int,
|
||||
unsigned int);
|
||||
extern void shift_bytes_in_array_right (unsigned char *, unsigned int,
|
||||
unsigned int);
|
||||
|
||||
/* Fold constants as much as possible in an expression.
|
||||
Returns the simplified expression.
|
||||
|
|
|
@ -1475,66 +1475,6 @@ dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
|
|||
fprintf (fd, "\n");
|
||||
}
|
||||
|
||||
/* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
|
||||
bits between adjacent elements. AMNT should be within
|
||||
[0, BITS_PER_UNIT).
|
||||
Example, AMNT = 2:
|
||||
00011111|11100000 << 2 = 01111111|10000000
|
||||
PTR[1] | PTR[0] PTR[1] | PTR[0]. */
|
||||
|
||||
static void
|
||||
shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt)
|
||||
{
|
||||
if (amnt == 0)
|
||||
return;
|
||||
|
||||
unsigned char carry_over = 0U;
|
||||
unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
|
||||
unsigned char clear_mask = (~0U) << amnt;
|
||||
|
||||
for (unsigned int i = 0; i < sz; i++)
|
||||
{
|
||||
unsigned prev_carry_over = carry_over;
|
||||
carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
|
||||
|
||||
ptr[i] <<= amnt;
|
||||
if (i != 0)
|
||||
{
|
||||
ptr[i] &= clear_mask;
|
||||
ptr[i] |= prev_carry_over;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Like shift_bytes_in_array but for big-endian.
|
||||
Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
|
||||
bits between adjacent elements. AMNT should be within
|
||||
[0, BITS_PER_UNIT).
|
||||
Example, AMNT = 2:
|
||||
00011111|11100000 >> 2 = 00000111|11111000
|
||||
PTR[0] | PTR[1] PTR[0] | PTR[1]. */
|
||||
|
||||
static void
|
||||
shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
|
||||
unsigned int amnt)
|
||||
{
|
||||
if (amnt == 0)
|
||||
return;
|
||||
|
||||
unsigned char carry_over = 0U;
|
||||
unsigned char carry_mask = ~(~0U << amnt);
|
||||
|
||||
for (unsigned int i = 0; i < sz; i++)
|
||||
{
|
||||
unsigned prev_carry_over = carry_over;
|
||||
carry_over = ptr[i] & carry_mask;
|
||||
|
||||
carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
|
||||
ptr[i] >>= amnt;
|
||||
ptr[i] |= prev_carry_over;
|
||||
}
|
||||
}
|
||||
|
||||
/* Clear out LEN bits starting from bit START in the byte array
|
||||
PTR. This clears the bits to the *right* from START.
|
||||
START must be within [0, BITS_PER_UNIT) and counts starting from
|
||||
|
@ -1793,7 +1733,7 @@ encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
|
|||
/* Create the shifted version of EXPR. */
|
||||
if (!BYTES_BIG_ENDIAN)
|
||||
{
|
||||
shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
|
||||
shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
|
||||
if (shift_amnt == 0)
|
||||
byte_size--;
|
||||
}
|
||||
|
@ -5092,11 +5032,11 @@ verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
|
|||
}
|
||||
}
|
||||
|
||||
/* Test shift_bytes_in_array and that it carries bits across between
|
||||
/* Test shift_bytes_in_array_left and that it carries bits across between
|
||||
bytes correctly. */
|
||||
|
||||
static void
|
||||
verify_shift_bytes_in_array (void)
|
||||
verify_shift_bytes_in_array_left (void)
|
||||
{
|
||||
/* byte 1 | byte 0
|
||||
00011111 | 11100000. */
|
||||
|
@ -5105,13 +5045,13 @@ verify_shift_bytes_in_array (void)
|
|||
memcpy (in, orig, sizeof orig);
|
||||
|
||||
unsigned char expected[2] = { 0x80, 0x7f };
|
||||
shift_bytes_in_array (in, sizeof (in), 2);
|
||||
shift_bytes_in_array_left (in, sizeof (in), 2);
|
||||
verify_array_eq (in, expected, sizeof (in));
|
||||
|
||||
memcpy (in, orig, sizeof orig);
|
||||
memcpy (expected, orig, sizeof orig);
|
||||
/* Check that shifting by zero doesn't change anything. */
|
||||
shift_bytes_in_array (in, sizeof (in), 0);
|
||||
shift_bytes_in_array_left (in, sizeof (in), 0);
|
||||
verify_array_eq (in, expected, sizeof (in));
|
||||
|
||||
}
|
||||
|
@ -5196,7 +5136,7 @@ verify_clear_bit_region_be (void)
|
|||
void
|
||||
store_merging_c_tests (void)
|
||||
{
|
||||
verify_shift_bytes_in_array ();
|
||||
verify_shift_bytes_in_array_left ();
|
||||
verify_shift_bytes_in_array_right ();
|
||||
verify_clear_bit_region ();
|
||||
verify_clear_bit_region_be ();
|
||||
|
|
|
@ -1,3 +1,10 @@
|
|||
2020-02-13 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR tree-optimization/93582
|
||||
* gcc.dg/tree-ssa/pr93582-1.c: New test.
|
||||
* gcc.dg/tree-ssa/pr93582-2.c: New test.
|
||||
* gcc.dg/tree-ssa/pr93582-3.c: New test.
|
||||
|
||||
2020-02-13 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR testsuite/93717
|
||||
|
|
17
gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c
Normal file
17
gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c
Normal file
|
@ -0,0 +1,17 @@
|
|||
/* PR tree-optimization/93582 */
|
||||
/* { dg-do compile { target int32 } } */
|
||||
/* { dg-options "-O2 -fdump-tree-fre1" } */
|
||||
/* { dg-final { scan-tree-dump "return 1;" "fre1" } } */
|
||||
|
||||
union U {
|
||||
struct S { int a : 1, b : 4, c : 27; } s;
|
||||
struct T { int d : 2; int e : 2; int f : 28; } t;
|
||||
};
|
||||
|
||||
int
|
||||
foo (void)
|
||||
{
|
||||
union U u;
|
||||
u.s.b = 10;
|
||||
return u.t.e;
|
||||
}
|
17
gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c
Normal file
17
gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c
Normal file
|
@ -0,0 +1,17 @@
|
|||
/* PR tree-optimization/93582 */
|
||||
/* { dg-do compile { target int32 } } */
|
||||
/* { dg-options "-O2 -fdump-tree-fre1" } */
|
||||
/* { dg-final { scan-tree-dump "return 593;" "fre1" } } */
|
||||
|
||||
union U {
|
||||
struct S { int a : 1, b : 14, c : 17; } s;
|
||||
struct T { int d : 2; int e : 12; int f : 18; } t;
|
||||
};
|
||||
|
||||
int
|
||||
foo (void)
|
||||
{
|
||||
union U u;
|
||||
u.s.b = -7005;
|
||||
return u.t.e;
|
||||
}
|
18
gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c
Normal file
18
gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* PR tree-optimization/93582 */
|
||||
/* { dg-do compile { target int32 } } */
|
||||
/* { dg-options "-O2 -fdump-tree-fre1" } */
|
||||
/* { dg-final { scan-tree-dump "return 1;" "fre1" { target be } } } */
|
||||
/* { dg-final { scan-tree-dump "return 2;" "fre1" { target le } } } */
|
||||
|
||||
union U {
|
||||
struct S { int a : 1, b : 14, c : 17; } s;
|
||||
struct T { int d : 10; int e : 4; int f : 18; } t;
|
||||
};
|
||||
|
||||
int
|
||||
foo (void)
|
||||
{
|
||||
union U u;
|
||||
u.s.b = -7005;
|
||||
return u.t.e;
|
||||
}
|
|
@ -2586,13 +2586,13 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
|
|||
&& is_gimple_reg_type (vr->type)
|
||||
&& !contains_storage_order_barrier_p (vr->operands)
|
||||
&& gimple_assign_single_p (def_stmt)
|
||||
&& CHAR_BIT == 8 && BITS_PER_UNIT == 8
|
||||
&& CHAR_BIT == 8
|
||||
&& BITS_PER_UNIT == 8
|
||||
&& BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
|
||||
/* native_encode and native_decode operate on arrays of bytes
|
||||
and so fundamentally need a compile-time size and offset. */
|
||||
&& maxsize.is_constant (&maxsizei)
|
||||
&& maxsizei % BITS_PER_UNIT == 0
|
||||
&& offset.is_constant (&offseti)
|
||||
&& offseti % BITS_PER_UNIT == 0
|
||||
&& (is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt))
|
||||
|| (TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
|
||||
&& is_gimple_min_invariant (SSA_VAL (gimple_assign_rhs1 (def_stmt))))))
|
||||
|
@ -2617,8 +2617,6 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
|
|||
&& !reverse
|
||||
&& !storage_order_barrier_p (lhs)
|
||||
&& known_eq (maxsize2, size2)
|
||||
&& multiple_p (size2, BITS_PER_UNIT)
|
||||
&& multiple_p (offset2, BITS_PER_UNIT)
|
||||
&& adjust_offsets_for_equal_base_address (base, &offset,
|
||||
base2, &offset2)
|
||||
&& offset.is_constant (&offseti)
|
||||
|
@ -2629,37 +2627,80 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
|
|||
&& known_subrange_p (offseti, maxsizei, offset2, size2))
|
||||
{
|
||||
/* We support up to 512-bit values (for V8DFmode). */
|
||||
unsigned char buffer[64];
|
||||
unsigned char buffer[65];
|
||||
int len;
|
||||
|
||||
tree rhs = gimple_assign_rhs1 (def_stmt);
|
||||
if (TREE_CODE (rhs) == SSA_NAME)
|
||||
rhs = SSA_VAL (rhs);
|
||||
unsigned pad = 0;
|
||||
if (BYTES_BIG_ENDIAN
|
||||
&& is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
|
||||
{
|
||||
/* On big-endian the padding is at the 'front' so
|
||||
just skip the initial bytes. */
|
||||
fixed_size_mode mode
|
||||
= as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (rhs)));
|
||||
pad = GET_MODE_SIZE (mode) - size2i / BITS_PER_UNIT;
|
||||
}
|
||||
len = native_encode_expr (rhs,
|
||||
buffer, sizeof (buffer),
|
||||
((offseti - offset2i) / BITS_PER_UNIT
|
||||
+ pad));
|
||||
buffer, sizeof (buffer) - 1,
|
||||
(offseti - offset2i) / BITS_PER_UNIT);
|
||||
if (len > 0 && len * BITS_PER_UNIT >= maxsizei)
|
||||
{
|
||||
tree type = vr->type;
|
||||
unsigned char *buf = buffer;
|
||||
unsigned int amnt = 0;
|
||||
/* Make sure to interpret in a type that has a range
|
||||
covering the whole access size. */
|
||||
if (INTEGRAL_TYPE_P (vr->type)
|
||||
&& maxsizei != TYPE_PRECISION (vr->type))
|
||||
type = build_nonstandard_integer_type (maxsizei,
|
||||
TYPE_UNSIGNED (type));
|
||||
tree val = native_interpret_expr (type, buffer,
|
||||
maxsizei / BITS_PER_UNIT);
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
{
|
||||
/* For big-endian native_encode_expr stored the rhs
|
||||
such that the LSB of it is the LSB of buffer[len - 1].
|
||||
That bit is stored into memory at position
|
||||
offset2 + size2 - 1, i.e. in byte
|
||||
base + (offset2 + size2 - 1) / BITS_PER_UNIT.
|
||||
E.g. for offset2 1 and size2 14, rhs -1 and memory
|
||||
previously cleared that is:
|
||||
0 1
|
||||
01111111|11111110
|
||||
Now, if we want to extract offset 2 and size 12 from
|
||||
it using native_interpret_expr (which actually works
|
||||
for integral bitfield types in terms of byte size of
|
||||
the mode), the native_encode_expr stored the value
|
||||
into buffer as
|
||||
XX111111|11111111
|
||||
and returned len 2 (the X bits are outside of
|
||||
precision).
|
||||
Let sz be maxsize / BITS_PER_UNIT if not extracting
|
||||
a bitfield, and GET_MODE_SIZE otherwise.
|
||||
We need to align the LSB of the value we want to
|
||||
extract as the LSB of buf[sz - 1].
|
||||
The LSB from memory we need to read is at position
|
||||
offset + maxsize - 1. */
|
||||
HOST_WIDE_INT sz = maxsizei / BITS_PER_UNIT;
|
||||
if (INTEGRAL_TYPE_P (type))
|
||||
sz = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (type));
|
||||
amnt = ((unsigned HOST_WIDE_INT) offset2i + size2i
|
||||
- offseti - maxsizei) % BITS_PER_UNIT;
|
||||
if (amnt)
|
||||
shift_bytes_in_array_right (buffer, len, amnt);
|
||||
amnt = ((unsigned HOST_WIDE_INT) offset2i + size2i
|
||||
- offseti - maxsizei - amnt) / BITS_PER_UNIT;
|
||||
if ((unsigned HOST_WIDE_INT) sz + amnt > (unsigned) len)
|
||||
len = 0;
|
||||
else
|
||||
{
|
||||
buf = buffer + len - sz - amnt;
|
||||
len -= (buf - buffer);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
amnt = ((unsigned HOST_WIDE_INT) offset2i
|
||||
- offseti) % BITS_PER_UNIT;
|
||||
if (amnt)
|
||||
{
|
||||
buffer[len] = 0;
|
||||
shift_bytes_in_array_left (buffer, len + 1, amnt);
|
||||
buf = buffer + 1;
|
||||
}
|
||||
}
|
||||
tree val = native_interpret_expr (type, buf, len);
|
||||
/* If we chop off bits because the types precision doesn't
|
||||
match the memory access size this is ok when optimizing
|
||||
reads but not when called from the DSE code during
|
||||
|
@ -2677,7 +2718,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
|
|||
return data->finish (get_alias_set (lhs), val);
|
||||
}
|
||||
}
|
||||
else if (ranges_known_overlap_p (offseti, maxsizei, offset2i, size2i))
|
||||
else if (ranges_known_overlap_p (offseti, maxsizei, offset2i,
|
||||
size2i)
|
||||
&& maxsizei % BITS_PER_UNIT == 0
|
||||
&& offseti % BITS_PER_UNIT == 0
|
||||
&& size2i % BITS_PER_UNIT == 0
|
||||
&& offset2i % BITS_PER_UNIT == 0)
|
||||
{
|
||||
pd_data pd;
|
||||
tree rhs = gimple_assign_rhs1 (def_stmt);
|
||||
|
|
Loading…
Add table
Reference in a new issue