xtensa: Eliminate unused stack frame allocation/freeing

In the example below, 'x' is once placed on the stack frame and then read
into registers as the argument value of bar():

    /* example */
    struct foo {
      int a, b;
    };
    extern struct foo bar(struct foo);
    struct foo test(void) {
      struct foo x = { 0, 1 };
      return bar(x);
    }

Thanks to the dead store elimination, the initialization of 'x' turns into
merely loading the immediates to registers, but corresponding stack frame
growth is not rolled back.  As a result:

    ;; prereq: the CALL0 ABI
    ;; before
    test:
	addi	sp, sp, -16	// unused stack frame allocation/freeing
	movi.n	a2, 0
	movi.n	a3, 1
	addi	sp, sp, 16	// because no instructions that refer to
	j.l	bar, a9		// the stack pointer between the two

This patch eliminates such unused stack frame allocation/freeing:

    ;; after
    test:
	movi.n	a2, 0
	movi.n	a3, 1
	j.l	bar, a9

gcc/ChangeLog:

	* config/xtensa/xtensa.cc (machine_function): New boolean member as
	a flag that controls whether to emit the insns for stack pointer
	adjustment inside of the pro/epilogue.
	(xtensa_emit_adjust_stack_ptr): New function to share the common
	codes and to emit insns if not inhibited.
	(xtensa_expand_epilogue): Change to use the function mentioned
	above when using the CALL0 ABI.
	(xtensa_expand_prologue): Ditto.
	And also change to set the inhibit flag used by
	xtensa_emit_adjust_stack_ptr() to true if the stack pointer is only
	used for its own adjustment.
This commit is contained in:
Takayuki 'January June' Suwa 2022-09-09 06:16:31 +09:00 committed by Max Filippov
parent c81b60b8c6
commit 06c2756e82

View file

@ -102,6 +102,7 @@ struct GTY(()) machine_function
int callee_save_size;
bool frame_laid_out;
bool epilogue_done;
bool inhibit_logues_a1_adjusts;
};
/* Vector, indexed by hard register number, which contains 1 for a
@ -3048,7 +3049,7 @@ xtensa_output_literal (FILE *file, rtx x, machine_mode mode, int labelno)
}
static bool
xtensa_call_save_reg(int regno)
xtensa_call_save_reg (int regno)
{
if (TARGET_WINDOWED_ABI)
return false;
@ -3084,7 +3085,7 @@ compute_frame_size (poly_int64 size)
cfun->machine->callee_save_size = 0;
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
{
if (xtensa_call_save_reg(regno))
if (xtensa_call_save_reg (regno))
cfun->machine->callee_save_size += UNITS_PER_WORD;
}
@ -3139,6 +3140,49 @@ xtensa_initial_elimination_offset (int from, int to ATTRIBUTE_UNUSED)
return offset;
}
#define ADJUST_SP_NONE 0x0
#define ADJUST_SP_NEED_NOTE 0x1
#define ADJUST_SP_FRAME_PTR 0x2
static void
xtensa_emit_adjust_stack_ptr (HOST_WIDE_INT offset, int flags)
{
rtx_insn *insn;
rtx ptr = (flags & ADJUST_SP_FRAME_PTR) ? hard_frame_pointer_rtx
: stack_pointer_rtx;
if (cfun->machine->inhibit_logues_a1_adjusts)
return;
if (xtensa_simm8 (offset)
|| xtensa_simm8x256 (offset))
insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, GEN_INT (offset)));
else
{
rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
if (offset < 0)
{
emit_move_insn (tmp_reg, GEN_INT (-offset));
insn = emit_insn (gen_subsi3 (stack_pointer_rtx, ptr, tmp_reg));
}
else
{
emit_move_insn (tmp_reg, GEN_INT (offset));
insn = emit_insn (gen_addsi3 (stack_pointer_rtx, ptr, tmp_reg));
}
}
if (flags & ADJUST_SP_NEED_NOTE)
{
rtx note_rtx = gen_rtx_SET (stack_pointer_rtx,
plus_constant (Pmode, stack_pointer_rtx,
offset));
RTX_FRAME_RELATED_P (insn) = 1;
add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
}
}
/* minimum frame = reg save area (4 words) plus static chain (1 word)
and the total number of words must be a multiple of 128 bits. */
#define MIN_FRAME_SIZE (8 * UNITS_PER_WORD)
@ -3174,17 +3218,30 @@ xtensa_expand_prologue (void)
int regno;
HOST_WIDE_INT offset = 0;
int callee_save_size = cfun->machine->callee_save_size;
df_ref ref;
bool stack_pointer_needed = frame_pointer_needed
|| crtl->calls_eh_return;
/* Check if the function body really needs the stack pointer. */
if (!stack_pointer_needed)
for (ref = DF_REG_USE_CHAIN (A1_REG);
ref; ref = DF_REF_NEXT_REG (ref))
if (DF_REF_CLASS (ref) == DF_REF_REGULAR
&& NONJUMP_INSN_P (DF_REF_INSN (ref)))
stack_pointer_needed = true;
/* Check if callee-saved registers really need saving to the stack. */
if (!stack_pointer_needed)
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
if (xtensa_call_save_reg (regno))
stack_pointer_needed = true;
cfun->machine->inhibit_logues_a1_adjusts = !stack_pointer_needed;
/* -128 is a limit of single addi instruction. */
if (IN_RANGE (total_size, 1, 128))
{
insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (-total_size)));
RTX_FRAME_RELATED_P (insn) = 1;
note_rtx = gen_rtx_SET (stack_pointer_rtx,
plus_constant (Pmode, stack_pointer_rtx,
-total_size));
add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
xtensa_emit_adjust_stack_ptr (-total_size,
ADJUST_SP_NEED_NOTE);
offset = total_size - UNITS_PER_WORD;
}
else if (callee_save_size)
@ -3194,33 +3251,14 @@ xtensa_expand_prologue (void)
* move it to its final location. */
if (total_size > 1024)
{
insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (-callee_save_size)));
RTX_FRAME_RELATED_P (insn) = 1;
note_rtx = gen_rtx_SET (stack_pointer_rtx,
plus_constant (Pmode, stack_pointer_rtx,
-callee_save_size));
add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
xtensa_emit_adjust_stack_ptr (-callee_save_size,
ADJUST_SP_NEED_NOTE);
offset = callee_save_size - UNITS_PER_WORD;
}
else
{
if (xtensa_simm8x256 (-total_size))
insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
stack_pointer_rtx,
GEN_INT (-total_size)));
else
{
rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
emit_move_insn (tmp_reg, GEN_INT (total_size));
insn = emit_insn (gen_subsi3 (stack_pointer_rtx,
stack_pointer_rtx, tmp_reg));
}
RTX_FRAME_RELATED_P (insn) = 1;
note_rtx = gen_rtx_SET (stack_pointer_rtx,
plus_constant (Pmode, stack_pointer_rtx,
-total_size));
add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
xtensa_emit_adjust_stack_ptr (-total_size,
ADJUST_SP_NEED_NOTE);
offset = total_size - UNITS_PER_WORD;
}
}
@ -3242,27 +3280,8 @@ xtensa_expand_prologue (void)
}
if (total_size > 1024
|| (!callee_save_size && total_size > 128))
{
if (xtensa_simm8x256 (callee_save_size - total_size))
insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
stack_pointer_rtx,
GEN_INT (callee_save_size -
total_size)));
else
{
rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
emit_move_insn (tmp_reg, GEN_INT (total_size -
callee_save_size));
insn = emit_insn (gen_subsi3 (stack_pointer_rtx,
stack_pointer_rtx, tmp_reg));
}
RTX_FRAME_RELATED_P (insn) = 1;
note_rtx = gen_rtx_SET (stack_pointer_rtx,
plus_constant (Pmode, stack_pointer_rtx,
callee_save_size -
total_size));
add_reg_note (insn, REG_FRAME_RELATED_EXPR, note_rtx);
}
xtensa_emit_adjust_stack_ptr (callee_save_size - total_size,
ADJUST_SP_NEED_NOTE);
}
if (frame_pointer_needed)
@ -3329,21 +3348,11 @@ xtensa_expand_epilogue (bool sibcall_p)
if (cfun->machine->current_frame_size > (frame_pointer_needed ? 127 : 1024))
{
if (xtensa_simm8x256 (cfun->machine->current_frame_size -
cfun->machine->callee_save_size))
emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ?
hard_frame_pointer_rtx : stack_pointer_rtx,
GEN_INT (cfun->machine->current_frame_size -
cfun->machine->callee_save_size)));
else
{
rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
emit_move_insn (tmp_reg, GEN_INT (cfun->machine->current_frame_size -
cfun->machine->callee_save_size));
emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ?
hard_frame_pointer_rtx : stack_pointer_rtx,
tmp_reg));
}
xtensa_emit_adjust_stack_ptr (cfun->machine->current_frame_size -
cfun->machine->callee_save_size,
frame_pointer_needed
? ADJUST_SP_FRAME_PTR
: ADJUST_SP_NONE);
offset = cfun->machine->callee_save_size - UNITS_PER_WORD;
}
else
@ -3384,24 +3393,11 @@ xtensa_expand_epilogue (bool sibcall_p)
else
offset = cfun->machine->callee_save_size;
if (offset)
emit_insn (gen_addsi3 (stack_pointer_rtx,
stack_pointer_rtx,
GEN_INT (offset)));
xtensa_emit_adjust_stack_ptr (offset, ADJUST_SP_NONE);
}
else
{
if (xtensa_simm8x256 (cfun->machine->current_frame_size))
emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (cfun->machine->current_frame_size)));
else
{
rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG);
emit_move_insn (tmp_reg,
GEN_INT (cfun->machine->current_frame_size));
emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
tmp_reg));
}
}
xtensa_emit_adjust_stack_ptr (cfun->machine->current_frame_size,
ADJUST_SP_NONE);
}
if (crtl->calls_eh_return)