Improve stack adjustment by reusing a temporary move immediate from the epilog...
Improve stack adjustment by reusing a temporary move immediate from the epilog if the register is still valid in the epilog. This generates smaller code for leaf functions with a stack size of more then 4KB. gcc/ * config/aarch64/aarch64.c (aarch64_add_constant_internal): Add extra argument to allow emitting the move immediate. Use add/sub with positive immediate. (aarch64_add_constant): Add inline function. (aarch64_add_sp): Likewise. (aarch64_sub_sp): Likewise. (aarch64_expand_prologue): Call aarch64_sub_sp. (aarch64_expand_epilogue): Call aarch64_add_sp. Decide when to leave out move. (aarch64_output_mi_thunk): Call aarch64_add_constant. testsuite/ * gcc.target/aarch64/test_frame_17.c: New test. From-SVN: r241420
This commit is contained in:
parent
4b0685d915
commit
5be6b295d0
3 changed files with 67 additions and 24 deletions
|
@ -1,3 +1,16 @@
|
|||
2016-10-21 Wilco Dijkstra <wdijkstr@arm.com>
|
||||
|
||||
* config/aarch64/aarch64.c (aarch64_add_constant_internal):
|
||||
Add extra argument to allow emitting the move immediate.
|
||||
Use add/sub with positive immediate.
|
||||
(aarch64_add_constant): Add inline function.
|
||||
(aarch64_add_sp): Likewise.
|
||||
(aarch64_sub_sp): Likewise.
|
||||
(aarch64_expand_prologue): Call aarch64_sub_sp.
|
||||
(aarch64_expand_epilogue): Call aarch64_add_sp.
|
||||
Decide when to leave out move.
|
||||
(aarch64_output_mi_thunk): Call aarch64_add_constant.
|
||||
|
||||
2016-10-21 Wilco Dijkstra <wdijkstr@arm.com>
|
||||
|
||||
* config/aarch64/aarch64.c (aarch64_layout_frame):
|
||||
|
|
|
@ -1954,26 +1954,31 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
|
|||
aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
|
||||
}
|
||||
|
||||
/* Add DELTA to REGNUM in mode MODE. SCRATCHREG can be used to held
|
||||
intermediate value if necessary.
|
||||
/* Add DELTA to REGNUM in mode MODE. SCRATCHREG can be used to hold a
|
||||
temporary value if necessary. FRAME_RELATED_P should be true if
|
||||
the RTX_FRAME_RELATED flag should be set and CFA adjustments added
|
||||
to the generated instructions. If SCRATCHREG is known to hold
|
||||
abs (delta), EMIT_MOVE_IMM can be set to false to avoid emitting the
|
||||
immediate again.
|
||||
|
||||
This function is sometimes used to adjust the stack pointer, so we must
|
||||
ensure that it can never cause transient stack deallocation by writing an
|
||||
invalid value into REGNUM. */
|
||||
Since this function may be used to adjust the stack pointer, we must
|
||||
ensure that it cannot cause transient stack deallocation (for example
|
||||
by first incrementing SP and then decrementing when adjusting by a
|
||||
large immediate). */
|
||||
|
||||
static void
|
||||
aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
|
||||
HOST_WIDE_INT delta, bool frame_related_p)
|
||||
aarch64_add_constant_internal (machine_mode mode, int regnum, int scratchreg,
|
||||
HOST_WIDE_INT delta, bool frame_related_p,
|
||||
bool emit_move_imm)
|
||||
{
|
||||
HOST_WIDE_INT mdelta = abs_hwi (delta);
|
||||
rtx this_rtx = gen_rtx_REG (mode, regnum);
|
||||
rtx_insn *insn;
|
||||
|
||||
/* Do nothing if mdelta is zero. */
|
||||
if (!mdelta)
|
||||
return;
|
||||
|
||||
/* We only need single instruction if the offset fit into add/sub. */
|
||||
/* Single instruction adjustment. */
|
||||
if (aarch64_uimm12_shift (mdelta))
|
||||
{
|
||||
insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta)));
|
||||
|
@ -1981,11 +1986,10 @@ aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
|
|||
return;
|
||||
}
|
||||
|
||||
/* We need two add/sub instructions, each one performing part of the
|
||||
calculation. Don't do this if the addend can be loaded into register with
|
||||
a single instruction, in that case we prefer a move to a scratch register
|
||||
following by an addition. */
|
||||
if (mdelta < 0x1000000 && !aarch64_move_imm (delta, mode))
|
||||
/* Emit 2 additions/subtractions if the adjustment is less than 24 bits.
|
||||
Only do this if mdelta is not a 16-bit move as adjusting using a move
|
||||
is better. */
|
||||
if (mdelta < 0x1000000 && !aarch64_move_imm (mdelta, mode))
|
||||
{
|
||||
HOST_WIDE_INT low_off = mdelta & 0xfff;
|
||||
|
||||
|
@ -1997,10 +2001,12 @@ aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
|
|||
return;
|
||||
}
|
||||
|
||||
/* Otherwise use generic function to handle all other situations. */
|
||||
/* Emit a move immediate if required and an addition/subtraction. */
|
||||
rtx scratch_rtx = gen_rtx_REG (mode, scratchreg);
|
||||
aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (delta), true, mode);
|
||||
insn = emit_insn (gen_add2_insn (this_rtx, scratch_rtx));
|
||||
if (emit_move_imm)
|
||||
aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (mdelta), true, mode);
|
||||
insn = emit_insn (delta < 0 ? gen_sub2_insn (this_rtx, scratch_rtx)
|
||||
: gen_add2_insn (this_rtx, scratch_rtx));
|
||||
if (frame_related_p)
|
||||
{
|
||||
RTX_FRAME_RELATED_P (insn) = frame_related_p;
|
||||
|
@ -2009,6 +2015,27 @@ aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
|
|||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
|
||||
HOST_WIDE_INT delta)
|
||||
{
|
||||
aarch64_add_constant_internal (mode, regnum, scratchreg, delta, false, true);
|
||||
}
|
||||
|
||||
static inline void
|
||||
aarch64_add_sp (int scratchreg, HOST_WIDE_INT delta, bool emit_move_imm)
|
||||
{
|
||||
aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, delta,
|
||||
true, emit_move_imm);
|
||||
}
|
||||
|
||||
static inline void
|
||||
aarch64_sub_sp (int scratchreg, HOST_WIDE_INT delta, bool frame_related_p)
|
||||
{
|
||||
aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, -delta,
|
||||
frame_related_p, true);
|
||||
}
|
||||
|
||||
static bool
|
||||
aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
|
||||
tree exp ATTRIBUTE_UNUSED)
|
||||
|
@ -3230,7 +3257,7 @@ aarch64_expand_prologue (void)
|
|||
aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size);
|
||||
}
|
||||
|
||||
aarch64_add_constant (Pmode, SP_REGNUM, IP0_REGNUM, -initial_adjust, true);
|
||||
aarch64_sub_sp (IP0_REGNUM, initial_adjust, true);
|
||||
|
||||
if (callee_adjust != 0)
|
||||
aarch64_push_regs (reg1, reg2, callee_adjust);
|
||||
|
@ -3251,8 +3278,7 @@ aarch64_expand_prologue (void)
|
|||
callee_adjust != 0 || frame_pointer_needed);
|
||||
aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
|
||||
callee_adjust != 0 || frame_pointer_needed);
|
||||
aarch64_add_constant (Pmode, SP_REGNUM, IP1_REGNUM, -final_adjust,
|
||||
!frame_pointer_needed);
|
||||
aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed);
|
||||
}
|
||||
|
||||
/* Return TRUE if we can use a simple_return insn.
|
||||
|
@ -3317,7 +3343,7 @@ aarch64_expand_epilogue (bool for_sibcall)
|
|||
RTX_FRAME_RELATED_P (insn) = callee_adjust == 0;
|
||||
}
|
||||
else
|
||||
aarch64_add_constant (Pmode, SP_REGNUM, IP1_REGNUM, final_adjust, true);
|
||||
aarch64_add_sp (IP1_REGNUM, final_adjust, df_regs_ever_live_p (IP1_REGNUM));
|
||||
|
||||
aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
|
||||
callee_adjust != 0, &cfi_ops);
|
||||
|
@ -3340,7 +3366,7 @@ aarch64_expand_epilogue (bool for_sibcall)
|
|||
cfi_ops = NULL;
|
||||
}
|
||||
|
||||
aarch64_add_constant (Pmode, SP_REGNUM, IP0_REGNUM, initial_adjust, true);
|
||||
aarch64_add_sp (IP0_REGNUM, initial_adjust, df_regs_ever_live_p (IP0_REGNUM));
|
||||
|
||||
if (cfi_ops)
|
||||
{
|
||||
|
@ -3435,7 +3461,7 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
|
|||
emit_note (NOTE_INSN_PROLOGUE_END);
|
||||
|
||||
if (vcall_offset == 0)
|
||||
aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta, false);
|
||||
aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
|
||||
else
|
||||
{
|
||||
gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
|
||||
|
@ -3451,7 +3477,7 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
|
|||
addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
|
||||
plus_constant (Pmode, this_rtx, delta));
|
||||
else
|
||||
aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta, false);
|
||||
aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
|
||||
}
|
||||
|
||||
if (Pmode == ptr_mode)
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
2016-10-21 Wilco Dijkstra <wdijkstr@arm.com>
|
||||
|
||||
* gcc.target/aarch64/test_frame_17.c: New test.
|
||||
|
||||
2016-10-21 Andris Pavenis <andris.pavenis@iki.fi>
|
||||
|
||||
PR preprocessor/71681
|
||||
|
|
Loading…
Add table
Reference in a new issue