Alpha: Also use tree information to get base block alignment

We hardly ever emit code using machine instructions for aligned memory
accesses for block move and clear operation and the reason for this
appears to be that suboptimal alignment is often passed by the caller
and then we only try to find a better alignment by checking pseudo
register pointer alignment information, and from observation it's most
often only set for stack frame references.

This code originates from before Tree SSA days and we can do better
nowadays, by looking up the original tree node associated with a MEM
RTL, so implement this approach, factoring out repeating code from
`alpha_expand_block_move' and `alpha_expand_block_clear' to a new
function.

In some cases howewer tree information is not available while pointer
alignment is, such as with the case concerned with PR target/115459,
where we have:

(gdb) pr orig_src
(mem:BLK (plus:DI (reg/f:DI 65 virtual-stack-vars [ lock.206_2 ])
        (const_int 8368 [0x20b0])) [8  S18 A8])
(gdb) pr orig_dst
(mem/j/c:BLK (plus:DI (reg/f:DI 65 virtual-stack-vars [ lock.206_2 ])
        (const_int 8208 [0x2010])) [8 MEM[(struct gnat__debug_pools__print_info_stdout__internal__L_18__B1182b__S1183b___PAD *)_339].F[1 ...]{lb: 1 sz: 1}+0 S18 A128])
(gdb)

showing no tree information and the alignment of 8 only for `orig_src',
while indeed REGNO_POINTER_ALIGN returns 128 for pseudo 65.  So retain
the old approach and return the largest alignment determined and its
associated offset.

Add test cases accordingly and remove XFAILs from memclr-a2-o1-c9-ptr.c
now that it does get aligned code produced now.

	gcc/
	* config/alpha/alpha.cc
	(alpha_get_mem_rtx_alignment_and_offset): New function.
	(alpha_expand_block_move, alpha_expand_block_clear): Use it for
	alignment retrieval.

	gcc/testsuite/
	* gcc.target/alpha/memclr-a2-o1-c9-ptr.c: Remove XFAILs.
	* gcc.target/alpha/memcpy-di-aligned.c: New file.
	* gcc.target/alpha/memcpy-di-unaligned.c: New file.
	* gcc.target/alpha/memcpy-di-unaligned-dst.c: New file.
	* gcc.target/alpha/memcpy-di-unaligned-src.c: New file.
This commit is contained in:
Maciej W. Rozycki 2024-12-25 22:23:40 +00:00
parent 524fedd7f6
commit e0dae4da4c
6 changed files with 205 additions and 61 deletions

View file

@ -3771,6 +3771,78 @@ alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
emit_move_insn (st_addr_1, st_tmp_1);
}
/* Get the base alignment and offset of EXPR in A and O respectively.
Check for any pseudo register pointer alignment and for any tree
node information and return the largest alignment determined and
its associated offset. */
static void
alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o)
{
HOST_WIDE_INT tree_offset = 0, reg_offset = 0, mem_offset = 0;
int tree_align = 0, reg_align = 0, mem_align = MEM_ALIGN (expr);
gcc_assert (MEM_P (expr));
rtx addr = XEXP (expr, 0);
switch (GET_CODE (addr))
{
case REG:
reg_align = REGNO_POINTER_ALIGN (REGNO (addr));
break;
case PLUS:
if (REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
{
reg_offset = INTVAL (XEXP (addr, 1));
reg_align = REGNO_POINTER_ALIGN (REGNO (XEXP (addr, 0)));
}
break;
default:
break;
}
tree mem = MEM_EXPR (expr);
if (mem != NULL_TREE)
switch (TREE_CODE (mem))
{
case MEM_REF:
tree_offset = mem_ref_offset (mem).force_shwi ();
tree_align = get_object_alignment (get_base_address (mem));
break;
case COMPONENT_REF:
{
tree byte_offset = component_ref_field_offset (mem);
tree bit_offset = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (mem, 1));
poly_int64 offset;
if (!byte_offset
|| !poly_int_tree_p (byte_offset, &offset)
|| !tree_fits_shwi_p (bit_offset))
break;
tree_offset = offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT;
}
tree_align = get_object_alignment (get_base_address (mem));
break;
default:
break;
}
if (reg_align > mem_align)
{
mem_offset = reg_offset;
mem_align = reg_align;
}
if (tree_align > mem_align)
{
mem_offset = tree_offset;
mem_align = tree_align;
}
o = mem_offset;
a = mem_align;
}
/* Expand string/block move operations.
@ -3799,27 +3871,19 @@ alpha_expand_block_move (rtx operands[])
else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
return 0;
/* Look for additional alignment information from recorded register info. */
/* Look for stricter alignment. */
HOST_WIDE_INT c;
int a;
tmp = XEXP (orig_src, 0);
if (REG_P (tmp))
src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
else if (GET_CODE (tmp) == PLUS
&& REG_P (XEXP (tmp, 0))
&& CONST_INT_P (XEXP (tmp, 1)))
alpha_get_mem_rtx_alignment_and_offset (orig_src, a, c);
if (a > src_align)
{
unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
if (a > src_align)
{
if (a >= 64 && c % 8 == 0)
src_align = 64;
else if (a >= 32 && c % 4 == 0)
src_align = 32;
else if (a >= 16 && c % 2 == 0)
src_align = 16;
}
if (a >= 64 && c % 8 == 0)
src_align = 64;
else if (a >= 32 && c % 4 == 0)
src_align = 32;
else if (a >= 16 && c % 2 == 0)
src_align = 16;
if (MEM_P (orig_src) && MEM_ALIGN (orig_src) < src_align)
{
@ -3828,25 +3892,15 @@ alpha_expand_block_move (rtx operands[])
}
}
tmp = XEXP (orig_dst, 0);
if (REG_P (tmp))
dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
else if (GET_CODE (tmp) == PLUS
&& REG_P (XEXP (tmp, 0))
&& CONST_INT_P (XEXP (tmp, 1)))
alpha_get_mem_rtx_alignment_and_offset (orig_dst, a, c);
if (a > dst_align)
{
unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
if (a > dst_align)
{
if (a >= 64 && c % 8 == 0)
dst_align = 64;
else if (a >= 32 && c % 4 == 0)
dst_align = 32;
else if (a >= 16 && c % 2 == 0)
dst_align = 16;
}
if (a >= 64 && c % 8 == 0)
dst_align = 64;
else if (a >= 32 && c % 4 == 0)
dst_align = 32;
else if (a >= 16 && c % 2 == 0)
dst_align = 16;
if (MEM_P (orig_dst) && MEM_ALIGN (orig_dst) < dst_align)
{
@ -4048,7 +4102,6 @@ alpha_expand_block_clear (rtx operands[])
HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
HOST_WIDE_INT alignofs = 0;
rtx orig_dst = operands[0];
rtx tmp;
int i, words, ofs = 0;
if (orig_bytes <= 0)
@ -4057,25 +4110,18 @@ alpha_expand_block_clear (rtx operands[])
return 0;
/* Look for stricter alignment. */
tmp = XEXP (orig_dst, 0);
if (REG_P (tmp))
align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
else if (GET_CODE (tmp) == PLUS
&& REG_P (XEXP (tmp, 0))
&& CONST_INT_P (XEXP (tmp, 1)))
{
HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
HOST_WIDE_INT c;
int a;
if (a > align)
{
if (a >= 64)
align = a, alignofs = 8 - c % 8;
else if (a >= 32)
align = a, alignofs = 4 - c % 4;
else if (a >= 16)
align = a, alignofs = 2 - c % 2;
}
alpha_get_mem_rtx_alignment_and_offset (orig_dst, a, c);
if (a > align)
{
if (a >= 64)
align = a, alignofs = -c & 7;
else if (a >= 32)
align = a, alignofs = -c & 3;
else if (a >= 16)
align = a, alignofs = -c & 1;
if (MEM_P (orig_dst) && MEM_ALIGN (orig_dst) < align)
{

View file

@ -43,8 +43,8 @@ memclr_a2_o1_c9 (u_t *u)
that is with a byte store at offset 1 and with two unaligned load/store
pairs at offsets 2 and 9 each. */
/* { dg-final { scan-assembler-times "\\sldq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times "\\sldq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times "\\sstb\\s\\\$31,1\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times "\\sstq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times "\\sstq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times "\\sldq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 } } */
/* { dg-final { scan-assembler-times "\\sldq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 } } */
/* { dg-final { scan-assembler-times "\\sstb\\s\\\$31,1\\\(\\\$16\\\)\\s" 1 } } */
/* { dg-final { scan-assembler-times "\\sstq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 } } */
/* { dg-final { scan-assembler-times "\\sstq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 } } */

View file

@ -0,0 +1,16 @@
/* { dg-do compile } */
/* { dg-options "" } */
/* { dg-skip-if "" { *-*-* } { "-O0" } } */
unsigned long aligned_src_di[9] = { [0 ... 8] = 0xe6e7e8e9eaebeced };
unsigned long aligned_dst_di[9] = { [0 ... 8] = 0xdcdbdad9d8d7d6d5 };
void
memcpy_aligned_data_di (void)
{
__builtin_memcpy (aligned_dst_di + 1, aligned_src_di + 1, 56);
}
/* { dg-final { scan-assembler-times "\\sldq\\s" 7 } } */
/* { dg-final { scan-assembler-times "\\sstq\\s" 7 } } */
/* { dg-final { scan-assembler-not "\\s(?:ldq_u|stq_u)\\s" } } */

View file

@ -0,0 +1,16 @@
/* { dg-do compile } */
/* { dg-options "" } */
/* { dg-skip-if "" { *-*-* } { "-O0" } } */
unsigned long unaligned_src_di[9] = { [0 ... 8] = 0xfefdfcfbfaf9f8f7 };
void
memcpy_unaligned_dst_di (void *dst)
{
__builtin_memcpy (dst, unaligned_src_di + 1, 56);
}
/* { dg-final { scan-assembler-times "\\sldq\\s" 7 } } */
/* { dg-final { scan-assembler-times "\\sldq_u\\s" 2 } } */
/* { dg-final { scan-assembler-times "\\sstq_u\\s" 8 } } */
/* { dg-final { scan-assembler-not "\\sstq\\s" } } */

View file

@ -0,0 +1,15 @@
/* { dg-do compile } */
/* { dg-options "" } */
/* { dg-skip-if "" { *-*-* } { "-O0" } } */
unsigned long unaligned_dst_di[9] = { [0 ... 8] = 0xc4c5c6c7c8c9cacb };
void
memcpy_unaligned_src_di (const void *src)
{
__builtin_memcpy (unaligned_dst_di + 1, src, 56);
}
/* { dg-final { scan-assembler-times "\\sstq\\s" 7 } } */
/* { dg-final { scan-assembler-times "\\sldq_u\\s" 8 } } */
/* { dg-final { scan-assembler-not "\\s(?:ldq|stq_u)\\s" } } */

View file

@ -0,0 +1,51 @@
/* { dg-do run } */
/* { dg-additional-sources memcpy-di-aligned.c } */
/* { dg-additional-sources memcpy-di-unaligned-src.c } */
/* { dg-additional-sources memcpy-di-unaligned-dst.c } */
/* { dg-options "" } */
void memcpy_aligned_data_di (void);
void memcpy_unaligned_dst_di (void *);
void memcpy_unaligned_src_di (const void *);
extern unsigned long aligned_src_di[];
extern unsigned long aligned_dst_di[];
extern unsigned long unaligned_src_di[];
extern unsigned long unaligned_dst_di[];
int
main (void)
{
unsigned long v;
int i;
for (i = 1, v = 0x0807060504030201; i < 8; i++, v += 0x0808080808080808)
unaligned_src_di[i] = v;
asm ("" : : : "memory");
memcpy_unaligned_dst_di (aligned_src_di + 1);
asm ("" : : : "memory");
memcpy_aligned_data_di ();
asm ("" : : : "memory");
memcpy_unaligned_src_di (aligned_dst_di + 1);
asm ("" : : : "memory");
for (i = 1, v = 0x0807060504030201; i < 8; i++, v += 0x0808080808080808)
if (unaligned_dst_di[i] != v)
return 1;
if (unaligned_src_di[0] != 0xfefdfcfbfaf9f8f7)
return 1;
if (unaligned_src_di[8] != 0xfefdfcfbfaf9f8f7)
return 1;
if (aligned_src_di[0] != 0xe6e7e8e9eaebeced)
return 1;
if (aligned_src_di[8] != 0xe6e7e8e9eaebeced)
return 1;
if (aligned_dst_di[0] != 0xdcdbdad9d8d7d6d5)
return 1;
if (aligned_dst_di[8] != 0xdcdbdad9d8d7d6d5)
return 1;
if (unaligned_dst_di[0] != 0xc4c5c6c7c8c9cacb)
return 1;
if (unaligned_dst_di[8] != 0xc4c5c6c7c8c9cacb)
return 1;
return 0;
}