From e0dae4da4c45e3959b0624551f80283c45a60446 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Wed, 25 Dec 2024 22:23:40 +0000 Subject: [PATCH] Alpha: Also use tree information to get base block alignment We hardly ever emit code using machine instructions for aligned memory accesses for block move and clear operation and the reason for this appears to be that suboptimal alignment is often passed by the caller and then we only try to find a better alignment by checking pseudo register pointer alignment information, and from observation it's most often only set for stack frame references. This code originates from before Tree SSA days and we can do better nowadays, by looking up the original tree node associated with a MEM RTL, so implement this approach, factoring out repeating code from `alpha_expand_block_move' and `alpha_expand_block_clear' to a new function. In some cases howewer tree information is not available while pointer alignment is, such as with the case concerned with PR target/115459, where we have: (gdb) pr orig_src (mem:BLK (plus:DI (reg/f:DI 65 virtual-stack-vars [ lock.206_2 ]) (const_int 8368 [0x20b0])) [8 S18 A8]) (gdb) pr orig_dst (mem/j/c:BLK (plus:DI (reg/f:DI 65 virtual-stack-vars [ lock.206_2 ]) (const_int 8208 [0x2010])) [8 MEM[(struct gnat__debug_pools__print_info_stdout__internal__L_18__B1182b__S1183b___PAD *)_339].F[1 ...]{lb: 1 sz: 1}+0 S18 A128]) (gdb) showing no tree information and the alignment of 8 only for `orig_src', while indeed REGNO_POINTER_ALIGN returns 128 for pseudo 65. So retain the old approach and return the largest alignment determined and its associated offset. Add test cases accordingly and remove XFAILs from memclr-a2-o1-c9-ptr.c now that it does get aligned code produced now. gcc/ * config/alpha/alpha.cc (alpha_get_mem_rtx_alignment_and_offset): New function. (alpha_expand_block_move, alpha_expand_block_clear): Use it for alignment retrieval. gcc/testsuite/ * gcc.target/alpha/memclr-a2-o1-c9-ptr.c: Remove XFAILs. * gcc.target/alpha/memcpy-di-aligned.c: New file. * gcc.target/alpha/memcpy-di-unaligned.c: New file. * gcc.target/alpha/memcpy-di-unaligned-dst.c: New file. * gcc.target/alpha/memcpy-di-unaligned-src.c: New file. --- gcc/config/alpha/alpha.cc | 158 +++++++++++------- .../gcc.target/alpha/memclr-a2-o1-c9-ptr.c | 10 +- .../gcc.target/alpha/memcpy-di-aligned.c | 16 ++ .../alpha/memcpy-di-unaligned-dst.c | 16 ++ .../alpha/memcpy-di-unaligned-src.c | 15 ++ .../gcc.target/alpha/memcpy-di-unaligned.c | 51 ++++++ 6 files changed, 205 insertions(+), 61 deletions(-) create mode 100644 gcc/testsuite/gcc.target/alpha/memcpy-di-aligned.c create mode 100644 gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-dst.c create mode 100644 gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-src.c create mode 100644 gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned.c diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc index 07753297c38..3b3a237a955 100644 --- a/gcc/config/alpha/alpha.cc +++ b/gcc/config/alpha/alpha.cc @@ -3771,6 +3771,78 @@ alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem, emit_move_insn (st_addr_1, st_tmp_1); } +/* Get the base alignment and offset of EXPR in A and O respectively. + Check for any pseudo register pointer alignment and for any tree + node information and return the largest alignment determined and + its associated offset. */ + +static void +alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o) +{ + HOST_WIDE_INT tree_offset = 0, reg_offset = 0, mem_offset = 0; + int tree_align = 0, reg_align = 0, mem_align = MEM_ALIGN (expr); + + gcc_assert (MEM_P (expr)); + + rtx addr = XEXP (expr, 0); + switch (GET_CODE (addr)) + { + case REG: + reg_align = REGNO_POINTER_ALIGN (REGNO (addr)); + break; + + case PLUS: + if (REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1))) + { + reg_offset = INTVAL (XEXP (addr, 1)); + reg_align = REGNO_POINTER_ALIGN (REGNO (XEXP (addr, 0))); + } + break; + + default: + break; + } + + tree mem = MEM_EXPR (expr); + if (mem != NULL_TREE) + switch (TREE_CODE (mem)) + { + case MEM_REF: + tree_offset = mem_ref_offset (mem).force_shwi (); + tree_align = get_object_alignment (get_base_address (mem)); + break; + + case COMPONENT_REF: + { + tree byte_offset = component_ref_field_offset (mem); + tree bit_offset = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (mem, 1)); + poly_int64 offset; + if (!byte_offset + || !poly_int_tree_p (byte_offset, &offset) + || !tree_fits_shwi_p (bit_offset)) + break; + tree_offset = offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT; + } + tree_align = get_object_alignment (get_base_address (mem)); + break; + + default: + break; + } + + if (reg_align > mem_align) + { + mem_offset = reg_offset; + mem_align = reg_align; + } + if (tree_align > mem_align) + { + mem_offset = tree_offset; + mem_align = tree_align; + } + o = mem_offset; + a = mem_align; +} /* Expand string/block move operations. @@ -3799,27 +3871,19 @@ alpha_expand_block_move (rtx operands[]) else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD) return 0; - /* Look for additional alignment information from recorded register info. */ + /* Look for stricter alignment. */ + HOST_WIDE_INT c; + int a; - tmp = XEXP (orig_src, 0); - if (REG_P (tmp)) - src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp))); - else if (GET_CODE (tmp) == PLUS - && REG_P (XEXP (tmp, 0)) - && CONST_INT_P (XEXP (tmp, 1))) + alpha_get_mem_rtx_alignment_and_offset (orig_src, a, c); + if (a > src_align) { - unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); - unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); - - if (a > src_align) - { - if (a >= 64 && c % 8 == 0) - src_align = 64; - else if (a >= 32 && c % 4 == 0) - src_align = 32; - else if (a >= 16 && c % 2 == 0) - src_align = 16; - } + if (a >= 64 && c % 8 == 0) + src_align = 64; + else if (a >= 32 && c % 4 == 0) + src_align = 32; + else if (a >= 16 && c % 2 == 0) + src_align = 16; if (MEM_P (orig_src) && MEM_ALIGN (orig_src) < src_align) { @@ -3828,25 +3892,15 @@ alpha_expand_block_move (rtx operands[]) } } - tmp = XEXP (orig_dst, 0); - if (REG_P (tmp)) - dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp))); - else if (GET_CODE (tmp) == PLUS - && REG_P (XEXP (tmp, 0)) - && CONST_INT_P (XEXP (tmp, 1))) + alpha_get_mem_rtx_alignment_and_offset (orig_dst, a, c); + if (a > dst_align) { - unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); - unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); - - if (a > dst_align) - { - if (a >= 64 && c % 8 == 0) - dst_align = 64; - else if (a >= 32 && c % 4 == 0) - dst_align = 32; - else if (a >= 16 && c % 2 == 0) - dst_align = 16; - } + if (a >= 64 && c % 8 == 0) + dst_align = 64; + else if (a >= 32 && c % 4 == 0) + dst_align = 32; + else if (a >= 16 && c % 2 == 0) + dst_align = 16; if (MEM_P (orig_dst) && MEM_ALIGN (orig_dst) < dst_align) { @@ -4048,7 +4102,6 @@ alpha_expand_block_clear (rtx operands[]) HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT; HOST_WIDE_INT alignofs = 0; rtx orig_dst = operands[0]; - rtx tmp; int i, words, ofs = 0; if (orig_bytes <= 0) @@ -4057,25 +4110,18 @@ alpha_expand_block_clear (rtx operands[]) return 0; /* Look for stricter alignment. */ - tmp = XEXP (orig_dst, 0); - if (REG_P (tmp)) - align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp))); - else if (GET_CODE (tmp) == PLUS - && REG_P (XEXP (tmp, 0)) - && CONST_INT_P (XEXP (tmp, 1))) - { - HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1)); - int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0))); + HOST_WIDE_INT c; + int a; - if (a > align) - { - if (a >= 64) - align = a, alignofs = 8 - c % 8; - else if (a >= 32) - align = a, alignofs = 4 - c % 4; - else if (a >= 16) - align = a, alignofs = 2 - c % 2; - } + alpha_get_mem_rtx_alignment_and_offset (orig_dst, a, c); + if (a > align) + { + if (a >= 64) + align = a, alignofs = -c & 7; + else if (a >= 32) + align = a, alignofs = -c & 3; + else if (a >= 16) + align = a, alignofs = -c & 1; if (MEM_P (orig_dst) && MEM_ALIGN (orig_dst) < align) { diff --git a/gcc/testsuite/gcc.target/alpha/memclr-a2-o1-c9-ptr.c b/gcc/testsuite/gcc.target/alpha/memclr-a2-o1-c9-ptr.c index 06d0f0beffb..3f7edc890e4 100644 --- a/gcc/testsuite/gcc.target/alpha/memclr-a2-o1-c9-ptr.c +++ b/gcc/testsuite/gcc.target/alpha/memclr-a2-o1-c9-ptr.c @@ -43,8 +43,8 @@ memclr_a2_o1_c9 (u_t *u) that is with a byte store at offset 1 and with two unaligned load/store pairs at offsets 2 and 9 each. */ -/* { dg-final { scan-assembler-times "\\sldq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "\\sldq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "\\sstb\\s\\\$31,1\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "\\sstq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */ -/* { dg-final { scan-assembler-times "\\sstq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times "\\sldq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sldq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstb\\s\\\$31,1\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstq_u\\s\\\$\[0-9\]+,2\\\(\\\$16\\\)\\s" 1 } } */ +/* { dg-final { scan-assembler-times "\\sstq_u\\s\\\$\[0-9\]+,9\\\(\\\$16\\\)\\s" 1 } } */ diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-di-aligned.c b/gcc/testsuite/gcc.target/alpha/memcpy-di-aligned.c new file mode 100644 index 00000000000..fd3c2b90c57 --- /dev/null +++ b/gcc/testsuite/gcc.target/alpha/memcpy-di-aligned.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } } */ + +unsigned long aligned_src_di[9] = { [0 ... 8] = 0xe6e7e8e9eaebeced }; +unsigned long aligned_dst_di[9] = { [0 ... 8] = 0xdcdbdad9d8d7d6d5 }; + +void +memcpy_aligned_data_di (void) +{ + __builtin_memcpy (aligned_dst_di + 1, aligned_src_di + 1, 56); +} + +/* { dg-final { scan-assembler-times "\\sldq\\s" 7 } } */ +/* { dg-final { scan-assembler-times "\\sstq\\s" 7 } } */ +/* { dg-final { scan-assembler-not "\\s(?:ldq_u|stq_u)\\s" } } */ diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-dst.c b/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-dst.c new file mode 100644 index 00000000000..5e9b5c32e52 --- /dev/null +++ b/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-dst.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } } */ + +unsigned long unaligned_src_di[9] = { [0 ... 8] = 0xfefdfcfbfaf9f8f7 }; + +void +memcpy_unaligned_dst_di (void *dst) +{ + __builtin_memcpy (dst, unaligned_src_di + 1, 56); +} + +/* { dg-final { scan-assembler-times "\\sldq\\s" 7 } } */ +/* { dg-final { scan-assembler-times "\\sldq_u\\s" 2 } } */ +/* { dg-final { scan-assembler-times "\\sstq_u\\s" 8 } } */ +/* { dg-final { scan-assembler-not "\\sstq\\s" } } */ diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-src.c b/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-src.c new file mode 100644 index 00000000000..912fa56dcc0 --- /dev/null +++ b/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned-src.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } } */ + +unsigned long unaligned_dst_di[9] = { [0 ... 8] = 0xc4c5c6c7c8c9cacb }; + +void +memcpy_unaligned_src_di (const void *src) +{ + __builtin_memcpy (unaligned_dst_di + 1, src, 56); +} + +/* { dg-final { scan-assembler-times "\\sstq\\s" 7 } } */ +/* { dg-final { scan-assembler-times "\\sldq_u\\s" 8 } } */ +/* { dg-final { scan-assembler-not "\\s(?:ldq|stq_u)\\s" } } */ diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned.c b/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned.c new file mode 100644 index 00000000000..fe7fc9b1d17 --- /dev/null +++ b/gcc/testsuite/gcc.target/alpha/memcpy-di-unaligned.c @@ -0,0 +1,51 @@ +/* { dg-do run } */ +/* { dg-additional-sources memcpy-di-aligned.c } */ +/* { dg-additional-sources memcpy-di-unaligned-src.c } */ +/* { dg-additional-sources memcpy-di-unaligned-dst.c } */ +/* { dg-options "" } */ + +void memcpy_aligned_data_di (void); +void memcpy_unaligned_dst_di (void *); +void memcpy_unaligned_src_di (const void *); + +extern unsigned long aligned_src_di[]; +extern unsigned long aligned_dst_di[]; +extern unsigned long unaligned_src_di[]; +extern unsigned long unaligned_dst_di[]; + +int +main (void) +{ + unsigned long v; + int i; + + for (i = 1, v = 0x0807060504030201; i < 8; i++, v += 0x0808080808080808) + unaligned_src_di[i] = v; + asm ("" : : : "memory"); + memcpy_unaligned_dst_di (aligned_src_di + 1); + asm ("" : : : "memory"); + memcpy_aligned_data_di (); + asm ("" : : : "memory"); + memcpy_unaligned_src_di (aligned_dst_di + 1); + asm ("" : : : "memory"); + for (i = 1, v = 0x0807060504030201; i < 8; i++, v += 0x0808080808080808) + if (unaligned_dst_di[i] != v) + return 1; + if (unaligned_src_di[0] != 0xfefdfcfbfaf9f8f7) + return 1; + if (unaligned_src_di[8] != 0xfefdfcfbfaf9f8f7) + return 1; + if (aligned_src_di[0] != 0xe6e7e8e9eaebeced) + return 1; + if (aligned_src_di[8] != 0xe6e7e8e9eaebeced) + return 1; + if (aligned_dst_di[0] != 0xdcdbdad9d8d7d6d5) + return 1; + if (aligned_dst_di[8] != 0xdcdbdad9d8d7d6d5) + return 1; + if (unaligned_dst_di[0] != 0xc4c5c6c7c8c9cacb) + return 1; + if (unaligned_dst_di[8] != 0xc4c5c6c7c8c9cacb) + return 1; + return 0; +}