Improve stack adjustment by reusing a temporary move immediate from the epilog...

Improve stack adjustment by reusing a temporary move immediate from the epilog
if the register is still valid in the epilog.  This generates smaller code for
leaf functions with a stack size of more then 4KB.

gcc/
	* config/aarch64/aarch64.c (aarch64_add_constant_internal):
	Add extra argument to allow emitting the move immediate.
	Use add/sub with positive immediate.
	(aarch64_add_constant): Add inline function.
	(aarch64_add_sp): Likewise.
	(aarch64_sub_sp): Likewise.
	(aarch64_expand_prologue): Call aarch64_sub_sp.
	(aarch64_expand_epilogue): Call aarch64_add_sp.
	Decide when to leave out move.
	(aarch64_output_mi_thunk): Call aarch64_add_constant.

testsuite/
	* gcc.target/aarch64/test_frame_17.c: New test.

From-SVN: r241420
This commit is contained in:
Wilco Dijkstra 2016-10-21 17:15:22 +00:00 committed by Wilco Dijkstra
parent 4b0685d915
commit 5be6b295d0
3 changed files with 67 additions and 24 deletions

View file

@ -1,3 +1,16 @@
2016-10-21 Wilco Dijkstra <wdijkstr@arm.com>
* config/aarch64/aarch64.c (aarch64_add_constant_internal):
Add extra argument to allow emitting the move immediate.
Use add/sub with positive immediate.
(aarch64_add_constant): Add inline function.
(aarch64_add_sp): Likewise.
(aarch64_sub_sp): Likewise.
(aarch64_expand_prologue): Call aarch64_sub_sp.
(aarch64_expand_epilogue): Call aarch64_add_sp.
Decide when to leave out move.
(aarch64_output_mi_thunk): Call aarch64_add_constant.
2016-10-21 Wilco Dijkstra <wdijkstr@arm.com>
* config/aarch64/aarch64.c (aarch64_layout_frame):

View file

@ -1954,26 +1954,31 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
}
/* Add DELTA to REGNUM in mode MODE. SCRATCHREG can be used to held
intermediate value if necessary.
/* Add DELTA to REGNUM in mode MODE. SCRATCHREG can be used to hold a
temporary value if necessary. FRAME_RELATED_P should be true if
the RTX_FRAME_RELATED flag should be set and CFA adjustments added
to the generated instructions. If SCRATCHREG is known to hold
abs (delta), EMIT_MOVE_IMM can be set to false to avoid emitting the
immediate again.
This function is sometimes used to adjust the stack pointer, so we must
ensure that it can never cause transient stack deallocation by writing an
invalid value into REGNUM. */
Since this function may be used to adjust the stack pointer, we must
ensure that it cannot cause transient stack deallocation (for example
by first incrementing SP and then decrementing when adjusting by a
large immediate). */
static void
aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
HOST_WIDE_INT delta, bool frame_related_p)
aarch64_add_constant_internal (machine_mode mode, int regnum, int scratchreg,
HOST_WIDE_INT delta, bool frame_related_p,
bool emit_move_imm)
{
HOST_WIDE_INT mdelta = abs_hwi (delta);
rtx this_rtx = gen_rtx_REG (mode, regnum);
rtx_insn *insn;
/* Do nothing if mdelta is zero. */
if (!mdelta)
return;
/* We only need single instruction if the offset fit into add/sub. */
/* Single instruction adjustment. */
if (aarch64_uimm12_shift (mdelta))
{
insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta)));
@ -1981,11 +1986,10 @@ aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
return;
}
/* We need two add/sub instructions, each one performing part of the
calculation. Don't do this if the addend can be loaded into register with
a single instruction, in that case we prefer a move to a scratch register
following by an addition. */
if (mdelta < 0x1000000 && !aarch64_move_imm (delta, mode))
/* Emit 2 additions/subtractions if the adjustment is less than 24 bits.
Only do this if mdelta is not a 16-bit move as adjusting using a move
is better. */
if (mdelta < 0x1000000 && !aarch64_move_imm (mdelta, mode))
{
HOST_WIDE_INT low_off = mdelta & 0xfff;
@ -1997,10 +2001,12 @@ aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
return;
}
/* Otherwise use generic function to handle all other situations. */
/* Emit a move immediate if required and an addition/subtraction. */
rtx scratch_rtx = gen_rtx_REG (mode, scratchreg);
aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (delta), true, mode);
insn = emit_insn (gen_add2_insn (this_rtx, scratch_rtx));
if (emit_move_imm)
aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (mdelta), true, mode);
insn = emit_insn (delta < 0 ? gen_sub2_insn (this_rtx, scratch_rtx)
: gen_add2_insn (this_rtx, scratch_rtx));
if (frame_related_p)
{
RTX_FRAME_RELATED_P (insn) = frame_related_p;
@ -2009,6 +2015,27 @@ aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
}
}
static inline void
aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
HOST_WIDE_INT delta)
{
aarch64_add_constant_internal (mode, regnum, scratchreg, delta, false, true);
}
static inline void
aarch64_add_sp (int scratchreg, HOST_WIDE_INT delta, bool emit_move_imm)
{
aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, delta,
true, emit_move_imm);
}
static inline void
aarch64_sub_sp (int scratchreg, HOST_WIDE_INT delta, bool frame_related_p)
{
aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, -delta,
frame_related_p, true);
}
static bool
aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
tree exp ATTRIBUTE_UNUSED)
@ -3230,7 +3257,7 @@ aarch64_expand_prologue (void)
aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size);
}
aarch64_add_constant (Pmode, SP_REGNUM, IP0_REGNUM, -initial_adjust, true);
aarch64_sub_sp (IP0_REGNUM, initial_adjust, true);
if (callee_adjust != 0)
aarch64_push_regs (reg1, reg2, callee_adjust);
@ -3251,8 +3278,7 @@ aarch64_expand_prologue (void)
callee_adjust != 0 || frame_pointer_needed);
aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
callee_adjust != 0 || frame_pointer_needed);
aarch64_add_constant (Pmode, SP_REGNUM, IP1_REGNUM, -final_adjust,
!frame_pointer_needed);
aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed);
}
/* Return TRUE if we can use a simple_return insn.
@ -3317,7 +3343,7 @@ aarch64_expand_epilogue (bool for_sibcall)
RTX_FRAME_RELATED_P (insn) = callee_adjust == 0;
}
else
aarch64_add_constant (Pmode, SP_REGNUM, IP1_REGNUM, final_adjust, true);
aarch64_add_sp (IP1_REGNUM, final_adjust, df_regs_ever_live_p (IP1_REGNUM));
aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
callee_adjust != 0, &cfi_ops);
@ -3340,7 +3366,7 @@ aarch64_expand_epilogue (bool for_sibcall)
cfi_ops = NULL;
}
aarch64_add_constant (Pmode, SP_REGNUM, IP0_REGNUM, initial_adjust, true);
aarch64_add_sp (IP0_REGNUM, initial_adjust, df_regs_ever_live_p (IP0_REGNUM));
if (cfi_ops)
{
@ -3435,7 +3461,7 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
emit_note (NOTE_INSN_PROLOGUE_END);
if (vcall_offset == 0)
aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta, false);
aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
else
{
gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
@ -3451,7 +3477,7 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
plus_constant (Pmode, this_rtx, delta));
else
aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta, false);
aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
}
if (Pmode == ptr_mode)

View file

@ -1,3 +1,7 @@
2016-10-21 Wilco Dijkstra <wdijkstr@arm.com>
* gcc.target/aarch64/test_frame_17.c: New test.
2016-10-21 Andris Pavenis <andris.pavenis@iki.fi>
PR preprocessor/71681