aarch64: Add GCS support for nonlocal stack save

Nonlocal stack save and restore has to also save and restore the GCS
pointer. This is used in __builtin_setjmp/longjmp and nonlocal goto.

The GCS specific code is only emitted if GCS branch-protection is
enabled and the code always checks at runtime if GCS is enabled.

The new -mbranch-protection=gcs and old -mbranch-protection=none code
are ABI compatible: jmpbuf for __builtin_setjmp has space for 5
pointers, the layout is

  old layout: fp, pc, sp, unused, unused
  new layout: fp, pc, sp, gcsp, unused

Note: the ILP32 code generation is wrong as it saves the pointers with
Pmode (i.e. 8 bytes per pointer), but the user supplied buffer size is
for 5 pointers (4 bytes per pointer), this is not fixed.

The nonlocal goto has no ABI compatibility issues as the goto and its
destination are in the same translation unit.

We use CDImode to allow extra space for GCS without the effect of 16-byte
alignment.

gcc/ChangeLog:

	* config/aarch64/aarch64.h (STACK_SAVEAREA_MODE): Make space for gcs.
	* config/aarch64/aarch64.md (save_stack_nonlocal): New.
	(restore_stack_nonlocal): New.
	* tree-nested.cc (get_nl_goto_field): Updated.
This commit is contained in:
Szabolcs Nagy 2024-11-14 16:15:08 +00:00 committed by Richard Sandiford
parent bca0fc1410
commit 41479351d6
3 changed files with 91 additions and 2 deletions

View file

@ -1308,6 +1308,13 @@ typedef struct
#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
/* Have space for both SP and GCSPR in the NONLOCAL case in
emit_stack_save as well as in __builtin_setjmp, __builtin_longjmp
and __builtin_nonlocal_goto.
Note: On ILP32 the documented buf size is not enough PR84150. */
#define STACK_SAVEAREA_MODE(LEVEL) \
((LEVEL) == SAVE_NONLOCAL ? E_CDImode : Pmode)
#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, LR_REGNUM)
#define RETURN_ADDR_RTX aarch64_return_addr

View file

@ -1199,6 +1199,88 @@
(const_int 1)))]
)
(define_expand "save_stack_nonlocal"
[(set (match_operand 0 "memory_operand")
(match_operand 1 "register_operand"))]
""
{
rtx stack_slot = adjust_address (operands[0], Pmode, 0);
emit_move_insn (stack_slot, operands[1]);
if (aarch64_gcs_enabled ())
{
/* Save GCS with code like
mov x16, 1
chkfeat x16
tbnz x16, 0, .L_done
mrs tmp, gcspr_el0
str tmp, [%0, 8]
.L_done: */
rtx done_label = gen_label_rtx ();
rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
emit_move_insn (r16, const1_rtx);
emit_insn (gen_aarch64_chkfeat ());
emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
rtx gcs_slot = adjust_address (operands[0], Pmode, GET_MODE_SIZE (Pmode));
rtx gcs = gen_reg_rtx (Pmode);
emit_insn (gen_aarch64_load_gcspr (gcs));
emit_move_insn (gcs_slot, gcs);
emit_label (done_label);
}
DONE;
})
(define_expand "restore_stack_nonlocal"
[(set (match_operand 0 "register_operand" "")
(match_operand 1 "memory_operand" ""))]
""
{
rtx stack_slot = adjust_address (operands[1], Pmode, 0);
emit_move_insn (operands[0], stack_slot);
if (aarch64_gcs_enabled ())
{
/* Restore GCS with code like
mov x16, 1
chkfeat x16
tbnz x16, 0, .L_done
ldr tmp1, [%1, 8]
mrs tmp2, gcspr_el0
subs tmp2, tmp1, tmp2
b.eq .L_done
.L_loop:
gcspopm
subs tmp2, tmp2, 8
b.ne .L_loop
.L_done: */
rtx loop_label = gen_label_rtx ();
rtx done_label = gen_label_rtx ();
rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
emit_move_insn (r16, const1_rtx);
emit_insn (gen_aarch64_chkfeat ());
emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
rtx gcs_slot = adjust_address (operands[1], Pmode, GET_MODE_SIZE (Pmode));
rtx gcs_old = gen_reg_rtx (Pmode);
emit_move_insn (gcs_old, gcs_slot);
rtx gcs_now = gen_reg_rtx (Pmode);
emit_insn (gen_aarch64_load_gcspr (gcs_now));
emit_insn (gen_subdi3_compare1 (gcs_now, gcs_old, gcs_now));
rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
rtx cmp_rtx = gen_rtx_fmt_ee (EQ, DImode, cc_reg, const0_rtx);
emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, done_label));
emit_label (loop_label);
emit_insn (gen_aarch64_gcspopm_xzr ());
emit_insn (gen_adddi3_compare0 (gcs_now, gcs_now, GEN_INT (-8)));
cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
cmp_rtx = gen_rtx_fmt_ee (NE, DImode, cc_reg, const0_rtx);
emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, loop_label));
emit_label (done_label);
}
DONE;
})
;; -------------------------------------------------------------------
;; Subroutine calls and sibcalls
;; -------------------------------------------------------------------

View file

@ -783,8 +783,8 @@ get_nl_goto_field (struct nesting_info *info)
else
type = lang_hooks.types.type_for_mode (Pmode, 1);
scalar_int_mode mode
= as_a <scalar_int_mode> (STACK_SAVEAREA_MODE (SAVE_NONLOCAL));
fixed_size_mode mode
= as_a <fixed_size_mode> (STACK_SAVEAREA_MODE (SAVE_NONLOCAL));
size = GET_MODE_SIZE (mode);
size = size / GET_MODE_SIZE (Pmode);
size = size + 1;