diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 1d27bb8b58c..ce29b372bd0 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -18051,23 +18051,29 @@ get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset, return true; } -/* The function returns true if the target storage location of - mem1 is adjacent to the target storage location of mem2 */ -/* Return 1 if memory locations are adjacent. */ +/* If the target storage locations of arguments MEM1 and MEM2 are + adjacent, then return the argument that has the lower address. + Otherwise, return NULL_RTX. */ -static bool +static rtx adjacent_mem_locations (rtx mem1, rtx mem2) { rtx reg1, reg2; HOST_WIDE_INT off1, size1, off2, size2; - if (get_memref_parts (mem1, ®1, &off1, &size1) - && get_memref_parts (mem2, ®2, &off2, &size2)) - return ((REGNO (reg1) == REGNO (reg2)) - && ((off1 + size1 == off2) - || (off2 + size2 == off1))); + if (MEM_P (mem1) + && MEM_P (mem2) + && get_memref_parts (mem1, ®1, &off1, &size1) + && get_memref_parts (mem2, ®2, &off2, &size2) + && REGNO (reg1) == REGNO (reg2)) + { + if (off1 + size1 == off2) + return mem1; + else if (off2 + size2 == off1) + return mem2; + } - return false; + return NULL_RTX; } /* This function returns true if it can be determined that the two MEM @@ -26708,8 +26714,8 @@ rs6000_split_multireg_move (rtx dst, rtx src) for (int i = 0; i < nregs; i += reg_mode_nregs) { - unsigned subreg = - (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i); + unsigned subreg + = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i); rtx dst2 = adjust_address (dst, reg_mode, offset); rtx src2 = gen_rtx_REG (reg_mode, reg + subreg); offset += size; @@ -26726,8 +26732,8 @@ rs6000_split_multireg_move (rtx dst, rtx src) for (int i = 0; i < nregs; i += reg_mode_nregs) { - unsigned subreg = - (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i); + unsigned subreg + = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i); rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg); rtx src2 = adjust_address (src, reg_mode, offset); offset += size; @@ -26752,13 +26758,53 @@ rs6000_split_multireg_move (rtx dst, rtx src) if (GET_MODE (src) == OOmode) gcc_assert (VSX_REGNO_P (REGNO (dst))); - reg_mode = GET_MODE (XVECEXP (src, 0, 0)); int nvecs = XVECLEN (src, 0); for (int i = 0; i < nvecs; i++) { - int index = WORDS_BIG_ENDIAN ? i : nvecs - 1 - i; - rtx dst_i = gen_rtx_REG (reg_mode, reg + index); - emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i))); + rtx op; + int regno = reg + i; + + if (WORDS_BIG_ENDIAN) + { + op = XVECEXP (src, 0, i); + + /* If we are loading an even VSX register and the memory location + is adjacent to the next register's memory location (if any), + then we can load them both with one LXVP instruction. */ + if ((regno & 1) == 0) + { + rtx op2 = XVECEXP (src, 0, i + 1); + if (adjacent_mem_locations (op, op2) == op) + { + op = adjust_address (op, OOmode, 0); + /* Skip the next register, since we're going to + load it together with this register. */ + i++; + } + } + } + else + { + op = XVECEXP (src, 0, nvecs - i - 1); + + /* If we are loading an even VSX register and the memory location + is adjacent to the next register's memory location (if any), + then we can load them both with one LXVP instruction. */ + if ((regno & 1) == 0) + { + rtx op2 = XVECEXP (src, 0, nvecs - i - 2); + if (adjacent_mem_locations (op2, op) == op2) + { + op = adjust_address (op2, OOmode, 0); + /* Skip the next register, since we're going to + load it together with this register. */ + i++; + } + } + } + + rtx dst_i = gen_rtx_REG (GET_MODE (op), regno); + emit_insn (gen_rtx_SET (dst_i, op)); } /* We are writing an accumulator register, so we have to diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-9.c b/gcc/testsuite/gcc.target/powerpc/mma-builtin-9.c new file mode 100644 index 00000000000..397d0f1db35 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-9.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +typedef unsigned char vec_t __attribute__((vector_size(16))); + +void +foo (__vector_pair *dst, vec_t *src) +{ + __vector_pair pair; + /* Adjacent loads should be combined into one lxvp instruction. */ + __builtin_vsx_build_pair (&pair, src[0], src[1]); + *dst = pair; +} + +void +bar (__vector_quad *dst, vec_t *src) +{ + __vector_quad quad; + /* Adjacent loads should be combined into two lxvp instructions. */ + __builtin_mma_build_acc (&quad, src[0], src[1], src[2], src[3]); + *dst = quad; +} + +/* { dg-final { scan-assembler-not {\mlxv\M} } } */ +/* { dg-final { scan-assembler-not {\mstxv\M} } } */ +/* { dg-final { scan-assembler-times {\mlxvp\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 3 } } */