diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index de11de5e079..1d27bb8b58c 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -16690,382 +16690,6 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, emit_move_insn (orig_after, after); } -/* Emit instructions to move SRC to DST. Called by splitters for - multi-register moves. It will emit at most one instruction for - each register that is accessed; that is, it won't emit li/lis pairs - (or equivalent for 64-bit code). One of SRC or DST must be a hard - register. */ - -void -rs6000_split_multireg_move (rtx dst, rtx src) -{ - /* The register number of the first register being moved. */ - int reg; - /* The mode that is to be moved. */ - machine_mode mode; - /* The mode that the move is being done in, and its size. */ - machine_mode reg_mode; - int reg_mode_size; - /* The number of registers that will be moved. */ - int nregs; - - reg = REG_P (dst) ? REGNO (dst) : REGNO (src); - mode = GET_MODE (dst); - nregs = hard_regno_nregs (reg, mode); - - /* If we have a vector quad register for MMA, and this is a load or store, - see if we can use vector paired load/stores. */ - if (mode == XOmode && TARGET_MMA - && (MEM_P (dst) || MEM_P (src))) - { - reg_mode = OOmode; - nregs /= 2; - } - /* If we have a vector pair/quad mode, split it into two/four separate - vectors. */ - else if (mode == OOmode || mode == XOmode) - reg_mode = V1TImode; - else if (FP_REGNO_P (reg)) - reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : - (TARGET_HARD_FLOAT ? DFmode : SFmode); - else if (ALTIVEC_REGNO_P (reg)) - reg_mode = V16QImode; - else - reg_mode = word_mode; - reg_mode_size = GET_MODE_SIZE (reg_mode); - - gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode)); - - /* TDmode residing in FP registers is special, since the ISA requires that - the lower-numbered word of a register pair is always the most significant - word, even in little-endian mode. This does not match the usual subreg - semantics, so we cannnot use simplify_gen_subreg in those cases. Access - the appropriate constituent registers "by hand" in little-endian mode. - - Note we do not need to check for destructive overlap here since TDmode - can only reside in even/odd register pairs. */ - if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN) - { - rtx p_src, p_dst; - int i; - - for (i = 0; i < nregs; i++) - { - if (REG_P (src) && FP_REGNO_P (REGNO (src))) - p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i); - else - p_src = simplify_gen_subreg (reg_mode, src, mode, - i * reg_mode_size); - - if (REG_P (dst) && FP_REGNO_P (REGNO (dst))) - p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i); - else - p_dst = simplify_gen_subreg (reg_mode, dst, mode, - i * reg_mode_size); - - emit_insn (gen_rtx_SET (p_dst, p_src)); - } - - return; - } - - /* The __vector_pair and __vector_quad modes are multi-register - modes, so if we have to load or store the registers, we have to be - careful to properly swap them if we're in little endian mode - below. This means the last register gets the first memory - location. We also need to be careful of using the right register - numbers if we are splitting XO to OO. */ - if (mode == OOmode || mode == XOmode) - { - nregs = hard_regno_nregs (reg, mode); - int reg_mode_nregs = hard_regno_nregs (reg, reg_mode); - if (MEM_P (dst)) - { - unsigned offset = 0; - unsigned size = GET_MODE_SIZE (reg_mode); - - /* If we are reading an accumulator register, we have to - deprime it before we can access it. */ - if (TARGET_MMA - && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) - emit_insn (gen_mma_xxmfacc (src, src)); - - for (int i = 0; i < nregs; i += reg_mode_nregs) - { - unsigned subreg = - (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i); - rtx dst2 = adjust_address (dst, reg_mode, offset); - rtx src2 = gen_rtx_REG (reg_mode, reg + subreg); - offset += size; - emit_insn (gen_rtx_SET (dst2, src2)); - } - - return; - } - - if (MEM_P (src)) - { - unsigned offset = 0; - unsigned size = GET_MODE_SIZE (reg_mode); - - for (int i = 0; i < nregs; i += reg_mode_nregs) - { - unsigned subreg = - (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i); - rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg); - rtx src2 = adjust_address (src, reg_mode, offset); - offset += size; - emit_insn (gen_rtx_SET (dst2, src2)); - } - - /* If we are writing an accumulator register, we have to - prime it after we've written it. */ - if (TARGET_MMA - && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) - emit_insn (gen_mma_xxmtacc (dst, dst)); - - return; - } - - if (GET_CODE (src) == UNSPEC) - { - gcc_assert (XINT (src, 1) == UNSPEC_MMA_ASSEMBLE); - gcc_assert (REG_P (dst)); - if (GET_MODE (src) == XOmode) - gcc_assert (FP_REGNO_P (REGNO (dst))); - if (GET_MODE (src) == OOmode) - gcc_assert (VSX_REGNO_P (REGNO (dst))); - - reg_mode = GET_MODE (XVECEXP (src, 0, 0)); - int nvecs = XVECLEN (src, 0); - for (int i = 0; i < nvecs; i++) - { - int index = WORDS_BIG_ENDIAN ? i : nvecs - 1 - i; - rtx dst_i = gen_rtx_REG (reg_mode, reg + index); - emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i))); - } - - /* We are writing an accumulator register, so we have to - prime it after we've written it. */ - if (GET_MODE (src) == XOmode) - emit_insn (gen_mma_xxmtacc (dst, dst)); - - return; - } - - /* Register -> register moves can use common code. */ - } - - if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst))) - { - /* If we are reading an accumulator register, we have to - deprime it before we can access it. */ - if (TARGET_MMA - && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) - emit_insn (gen_mma_xxmfacc (src, src)); - - /* Move register range backwards, if we might have destructive - overlap. */ - int i; - /* XO/OO are opaque so cannot use subregs. */ - if (mode == OOmode || mode == XOmode ) - { - for (i = nregs - 1; i >= 0; i--) - { - rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i); - rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i); - emit_insn (gen_rtx_SET (dst_i, src_i)); - } - } - else - { - for (i = nregs - 1; i >= 0; i--) - emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode, - i * reg_mode_size), - simplify_gen_subreg (reg_mode, src, mode, - i * reg_mode_size))); - } - - /* If we are writing an accumulator register, we have to - prime it after we've written it. */ - if (TARGET_MMA - && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) - emit_insn (gen_mma_xxmtacc (dst, dst)); - } - else - { - int i; - int j = -1; - bool used_update = false; - rtx restore_basereg = NULL_RTX; - - if (MEM_P (src) && INT_REGNO_P (reg)) - { - rtx breg; - - if (GET_CODE (XEXP (src, 0)) == PRE_INC - || GET_CODE (XEXP (src, 0)) == PRE_DEC) - { - rtx delta_rtx; - breg = XEXP (XEXP (src, 0), 0); - delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC - ? GEN_INT (GET_MODE_SIZE (GET_MODE (src))) - : GEN_INT (-GET_MODE_SIZE (GET_MODE (src)))); - emit_insn (gen_add3_insn (breg, breg, delta_rtx)); - src = replace_equiv_address (src, breg); - } - else if (! rs6000_offsettable_memref_p (src, reg_mode, true)) - { - if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY) - { - rtx basereg = XEXP (XEXP (src, 0), 0); - if (TARGET_UPDATE) - { - rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0); - emit_insn (gen_rtx_SET (ndst, - gen_rtx_MEM (reg_mode, - XEXP (src, 0)))); - used_update = true; - } - else - emit_insn (gen_rtx_SET (basereg, - XEXP (XEXP (src, 0), 1))); - src = replace_equiv_address (src, basereg); - } - else - { - rtx basereg = gen_rtx_REG (Pmode, reg); - emit_insn (gen_rtx_SET (basereg, XEXP (src, 0))); - src = replace_equiv_address (src, basereg); - } - } - - breg = XEXP (src, 0); - if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM) - breg = XEXP (breg, 0); - - /* If the base register we are using to address memory is - also a destination reg, then change that register last. */ - if (REG_P (breg) - && REGNO (breg) >= REGNO (dst) - && REGNO (breg) < REGNO (dst) + nregs) - j = REGNO (breg) - REGNO (dst); - } - else if (MEM_P (dst) && INT_REGNO_P (reg)) - { - rtx breg; - - if (GET_CODE (XEXP (dst, 0)) == PRE_INC - || GET_CODE (XEXP (dst, 0)) == PRE_DEC) - { - rtx delta_rtx; - breg = XEXP (XEXP (dst, 0), 0); - delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC - ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst))) - : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst)))); - - /* We have to update the breg before doing the store. - Use store with update, if available. */ - - if (TARGET_UPDATE) - { - rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0); - emit_insn (TARGET_32BIT - ? (TARGET_POWERPC64 - ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc) - : gen_movsi_si_update (breg, breg, delta_rtx, nsrc)) - : gen_movdi_di_update (breg, breg, delta_rtx, nsrc)); - used_update = true; - } - else - emit_insn (gen_add3_insn (breg, breg, delta_rtx)); - dst = replace_equiv_address (dst, breg); - } - else if (!rs6000_offsettable_memref_p (dst, reg_mode, true) - && GET_CODE (XEXP (dst, 0)) != LO_SUM) - { - if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY) - { - rtx basereg = XEXP (XEXP (dst, 0), 0); - if (TARGET_UPDATE) - { - rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0); - emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode, - XEXP (dst, 0)), - nsrc)); - used_update = true; - } - else - emit_insn (gen_rtx_SET (basereg, - XEXP (XEXP (dst, 0), 1))); - dst = replace_equiv_address (dst, basereg); - } - else - { - rtx basereg = XEXP (XEXP (dst, 0), 0); - rtx offsetreg = XEXP (XEXP (dst, 0), 1); - gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS - && REG_P (basereg) - && REG_P (offsetreg) - && REGNO (basereg) != REGNO (offsetreg)); - if (REGNO (basereg) == 0) - { - rtx tmp = offsetreg; - offsetreg = basereg; - basereg = tmp; - } - emit_insn (gen_add3_insn (basereg, basereg, offsetreg)); - restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg); - dst = replace_equiv_address (dst, basereg); - } - } - else if (GET_CODE (XEXP (dst, 0)) != LO_SUM) - gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true)); - } - - /* If we are reading an accumulator register, we have to - deprime it before we can access it. */ - if (TARGET_MMA && REG_P (src) - && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) - emit_insn (gen_mma_xxmfacc (src, src)); - - for (i = 0; i < nregs; i++) - { - /* Calculate index to next subword. */ - ++j; - if (j == nregs) - j = 0; - - /* If compiler already emitted move of first word by - store with update, no need to do anything. */ - if (j == 0 && used_update) - continue; - - /* XO/OO are opaque so cannot use subregs. */ - if (mode == OOmode || mode == XOmode ) - { - rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j); - rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j); - emit_insn (gen_rtx_SET (dst_i, src_i)); - } - else - emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode, - j * reg_mode_size), - simplify_gen_subreg (reg_mode, src, mode, - j * reg_mode_size))); - } - - /* If we are writing an accumulator register, we have to - prime it after we've written it. */ - if (TARGET_MMA && REG_P (dst) - && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) - emit_insn (gen_mma_xxmtacc (dst, dst)); - - if (restore_basereg != NULL_RTX) - emit_insn (restore_basereg); - } -} - static GTY(()) alias_set_type TOC_alias_set = -1; alias_set_type @@ -26982,6 +26606,381 @@ rs6000_split_logical (rtx operands[3], return; } +/* Emit instructions to move SRC to DST. Called by splitters for + multi-register moves. It will emit at most one instruction for + each register that is accessed; that is, it won't emit li/lis pairs + (or equivalent for 64-bit code). One of SRC or DST must be a hard + register. */ + +void +rs6000_split_multireg_move (rtx dst, rtx src) +{ + /* The register number of the first register being moved. */ + int reg; + /* The mode that is to be moved. */ + machine_mode mode; + /* The mode that the move is being done in, and its size. */ + machine_mode reg_mode; + int reg_mode_size; + /* The number of registers that will be moved. */ + int nregs; + + reg = REG_P (dst) ? REGNO (dst) : REGNO (src); + mode = GET_MODE (dst); + nregs = hard_regno_nregs (reg, mode); + + /* If we have a vector quad register for MMA, and this is a load or store, + see if we can use vector paired load/stores. */ + if (mode == XOmode && TARGET_MMA + && (MEM_P (dst) || MEM_P (src))) + { + reg_mode = OOmode; + nregs /= 2; + } + /* If we have a vector pair/quad mode, split it into two/four separate + vectors. */ + else if (mode == OOmode || mode == XOmode) + reg_mode = V1TImode; + else if (FP_REGNO_P (reg)) + reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : + (TARGET_HARD_FLOAT ? DFmode : SFmode); + else if (ALTIVEC_REGNO_P (reg)) + reg_mode = V16QImode; + else + reg_mode = word_mode; + reg_mode_size = GET_MODE_SIZE (reg_mode); + + gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode)); + + /* TDmode residing in FP registers is special, since the ISA requires that + the lower-numbered word of a register pair is always the most significant + word, even in little-endian mode. This does not match the usual subreg + semantics, so we cannnot use simplify_gen_subreg in those cases. Access + the appropriate constituent registers "by hand" in little-endian mode. + + Note we do not need to check for destructive overlap here since TDmode + can only reside in even/odd register pairs. */ + if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN) + { + rtx p_src, p_dst; + int i; + + for (i = 0; i < nregs; i++) + { + if (REG_P (src) && FP_REGNO_P (REGNO (src))) + p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i); + else + p_src = simplify_gen_subreg (reg_mode, src, mode, + i * reg_mode_size); + + if (REG_P (dst) && FP_REGNO_P (REGNO (dst))) + p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i); + else + p_dst = simplify_gen_subreg (reg_mode, dst, mode, + i * reg_mode_size); + + emit_insn (gen_rtx_SET (p_dst, p_src)); + } + + return; + } + + /* The __vector_pair and __vector_quad modes are multi-register + modes, so if we have to load or store the registers, we have to be + careful to properly swap them if we're in little endian mode + below. This means the last register gets the first memory + location. We also need to be careful of using the right register + numbers if we are splitting XO to OO. */ + if (mode == OOmode || mode == XOmode) + { + nregs = hard_regno_nregs (reg, mode); + int reg_mode_nregs = hard_regno_nregs (reg, reg_mode); + if (MEM_P (dst)) + { + unsigned offset = 0; + unsigned size = GET_MODE_SIZE (reg_mode); + + /* If we are reading an accumulator register, we have to + deprime it before we can access it. */ + if (TARGET_MMA + && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) + emit_insn (gen_mma_xxmfacc (src, src)); + + for (int i = 0; i < nregs; i += reg_mode_nregs) + { + unsigned subreg = + (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i); + rtx dst2 = adjust_address (dst, reg_mode, offset); + rtx src2 = gen_rtx_REG (reg_mode, reg + subreg); + offset += size; + emit_insn (gen_rtx_SET (dst2, src2)); + } + + return; + } + + if (MEM_P (src)) + { + unsigned offset = 0; + unsigned size = GET_MODE_SIZE (reg_mode); + + for (int i = 0; i < nregs; i += reg_mode_nregs) + { + unsigned subreg = + (WORDS_BIG_ENDIAN) ? i : (nregs - reg_mode_nregs - i); + rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg); + rtx src2 = adjust_address (src, reg_mode, offset); + offset += size; + emit_insn (gen_rtx_SET (dst2, src2)); + } + + /* If we are writing an accumulator register, we have to + prime it after we've written it. */ + if (TARGET_MMA + && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) + emit_insn (gen_mma_xxmtacc (dst, dst)); + + return; + } + + if (GET_CODE (src) == UNSPEC) + { + gcc_assert (XINT (src, 1) == UNSPEC_MMA_ASSEMBLE); + gcc_assert (REG_P (dst)); + if (GET_MODE (src) == XOmode) + gcc_assert (FP_REGNO_P (REGNO (dst))); + if (GET_MODE (src) == OOmode) + gcc_assert (VSX_REGNO_P (REGNO (dst))); + + reg_mode = GET_MODE (XVECEXP (src, 0, 0)); + int nvecs = XVECLEN (src, 0); + for (int i = 0; i < nvecs; i++) + { + int index = WORDS_BIG_ENDIAN ? i : nvecs - 1 - i; + rtx dst_i = gen_rtx_REG (reg_mode, reg + index); + emit_insn (gen_rtx_SET (dst_i, XVECEXP (src, 0, i))); + } + + /* We are writing an accumulator register, so we have to + prime it after we've written it. */ + if (GET_MODE (src) == XOmode) + emit_insn (gen_mma_xxmtacc (dst, dst)); + + return; + } + + /* Register -> register moves can use common code. */ + } + + if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst))) + { + /* If we are reading an accumulator register, we have to + deprime it before we can access it. */ + if (TARGET_MMA + && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) + emit_insn (gen_mma_xxmfacc (src, src)); + + /* Move register range backwards, if we might have destructive + overlap. */ + int i; + /* XO/OO are opaque so cannot use subregs. */ + if (mode == OOmode || mode == XOmode ) + { + for (i = nregs - 1; i >= 0; i--) + { + rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i); + rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i); + emit_insn (gen_rtx_SET (dst_i, src_i)); + } + } + else + { + for (i = nregs - 1; i >= 0; i--) + emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode, + i * reg_mode_size), + simplify_gen_subreg (reg_mode, src, mode, + i * reg_mode_size))); + } + + /* If we are writing an accumulator register, we have to + prime it after we've written it. */ + if (TARGET_MMA + && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) + emit_insn (gen_mma_xxmtacc (dst, dst)); + } + else + { + int i; + int j = -1; + bool used_update = false; + rtx restore_basereg = NULL_RTX; + + if (MEM_P (src) && INT_REGNO_P (reg)) + { + rtx breg; + + if (GET_CODE (XEXP (src, 0)) == PRE_INC + || GET_CODE (XEXP (src, 0)) == PRE_DEC) + { + rtx delta_rtx; + breg = XEXP (XEXP (src, 0), 0); + delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC + ? GEN_INT (GET_MODE_SIZE (GET_MODE (src))) + : GEN_INT (-GET_MODE_SIZE (GET_MODE (src)))); + emit_insn (gen_add3_insn (breg, breg, delta_rtx)); + src = replace_equiv_address (src, breg); + } + else if (! rs6000_offsettable_memref_p (src, reg_mode, true)) + { + if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY) + { + rtx basereg = XEXP (XEXP (src, 0), 0); + if (TARGET_UPDATE) + { + rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0); + emit_insn (gen_rtx_SET (ndst, + gen_rtx_MEM (reg_mode, + XEXP (src, 0)))); + used_update = true; + } + else + emit_insn (gen_rtx_SET (basereg, + XEXP (XEXP (src, 0), 1))); + src = replace_equiv_address (src, basereg); + } + else + { + rtx basereg = gen_rtx_REG (Pmode, reg); + emit_insn (gen_rtx_SET (basereg, XEXP (src, 0))); + src = replace_equiv_address (src, basereg); + } + } + + breg = XEXP (src, 0); + if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM) + breg = XEXP (breg, 0); + + /* If the base register we are using to address memory is + also a destination reg, then change that register last. */ + if (REG_P (breg) + && REGNO (breg) >= REGNO (dst) + && REGNO (breg) < REGNO (dst) + nregs) + j = REGNO (breg) - REGNO (dst); + } + else if (MEM_P (dst) && INT_REGNO_P (reg)) + { + rtx breg; + + if (GET_CODE (XEXP (dst, 0)) == PRE_INC + || GET_CODE (XEXP (dst, 0)) == PRE_DEC) + { + rtx delta_rtx; + breg = XEXP (XEXP (dst, 0), 0); + delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC + ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst))) + : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst)))); + + /* We have to update the breg before doing the store. + Use store with update, if available. */ + + if (TARGET_UPDATE) + { + rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0); + emit_insn (TARGET_32BIT + ? (TARGET_POWERPC64 + ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc) + : gen_movsi_si_update (breg, breg, delta_rtx, nsrc)) + : gen_movdi_di_update (breg, breg, delta_rtx, nsrc)); + used_update = true; + } + else + emit_insn (gen_add3_insn (breg, breg, delta_rtx)); + dst = replace_equiv_address (dst, breg); + } + else if (!rs6000_offsettable_memref_p (dst, reg_mode, true) + && GET_CODE (XEXP (dst, 0)) != LO_SUM) + { + if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY) + { + rtx basereg = XEXP (XEXP (dst, 0), 0); + if (TARGET_UPDATE) + { + rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0); + emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode, + XEXP (dst, 0)), + nsrc)); + used_update = true; + } + else + emit_insn (gen_rtx_SET (basereg, + XEXP (XEXP (dst, 0), 1))); + dst = replace_equiv_address (dst, basereg); + } + else + { + rtx basereg = XEXP (XEXP (dst, 0), 0); + rtx offsetreg = XEXP (XEXP (dst, 0), 1); + gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS + && REG_P (basereg) + && REG_P (offsetreg) + && REGNO (basereg) != REGNO (offsetreg)); + if (REGNO (basereg) == 0) + { + rtx tmp = offsetreg; + offsetreg = basereg; + basereg = tmp; + } + emit_insn (gen_add3_insn (basereg, basereg, offsetreg)); + restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg); + dst = replace_equiv_address (dst, basereg); + } + } + else if (GET_CODE (XEXP (dst, 0)) != LO_SUM) + gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true)); + } + + /* If we are reading an accumulator register, we have to + deprime it before we can access it. */ + if (TARGET_MMA && REG_P (src) + && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src))) + emit_insn (gen_mma_xxmfacc (src, src)); + + for (i = 0; i < nregs; i++) + { + /* Calculate index to next subword. */ + ++j; + if (j == nregs) + j = 0; + + /* If compiler already emitted move of first word by + store with update, no need to do anything. */ + if (j == 0 && used_update) + continue; + + /* XO/OO are opaque so cannot use subregs. */ + if (mode == OOmode || mode == XOmode ) + { + rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j); + rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j); + emit_insn (gen_rtx_SET (dst_i, src_i)); + } + else + emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode, + j * reg_mode_size), + simplify_gen_subreg (reg_mode, src, mode, + j * reg_mode_size))); + } + + /* If we are writing an accumulator register, we have to + prime it after we've written it. */ + if (TARGET_MMA && REG_P (dst) + && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst))) + emit_insn (gen_mma_xxmtacc (dst, dst)); + + if (restore_basereg != NULL_RTX) + emit_insn (restore_basereg); + } +} /* Return true if the peephole2 can combine a load involving a combination of an addis instruction and a load with an offset that can be fused together on