arm: Use deltas for Arm switch tables

For normal optimization for the Arm state in gcc we get an uncompressed
table of jump targets. This is in the middle of the text segment
far larger than necessary, especially at -Os.
This patch compresses the table to use deltas in a similar manner to
Thumb code generation.
Similar code is also used for -fpic where we currently generate a jump
to a jump. In this format the jumps are too dense for the hardware branch
predictor to handle accurately, so execution is likely to be very expensive.

Changes to switch statements for arm include a new function to handle the
assembly generation for different machine modes. This allows for more
optimisation to be performed in aout.h where arm has switched from using
ASM_OUTPUT_ADDR_VEC_ELT to using ASM_OUTPUT_ADDR_DIFF_ELT.
In ASM_OUTPUT_ADDR_DIFF_ELT new assembly generation options have been
added to utilise the different machine modes. Additional changes
made to the casesi expand and insn, CASE_VECTOR_PC_RELATIVE,
CASE_VECTOR_SHORTEN_MODE and LABEL_ALIGN_AFTER_BARRIER are all
to accomodate this new approach to switch statement generation.

New tests have been added and no regressions on arm-none-eabi.

gcc/ChangeLog:

	* config/arm/aout.h (ASM_OUTPUT_ADDR_DIFF_ELT): Add table output
	for different machine modes for arm.
	* config/arm/arm-protos.h (arm_output_casesi): New prototype.
	* config/arm/arm.h (CASE_VECTOR_PC_RELATIVE): Make arm use
	ASM_OUTPUT_ADDR_DIFF_ELT.
	(CASE_VECTOR_SHORTEN_MODE): Change table size calculation for
	TARGET_ARM.
	(LABEL_ALIGN_AFTER_BARRIER): Change to accommodate .p2align 2
	for TARGET_ARM.
	* config/arm/arm.cc (arm_output_casesi): New function.
	* config/arm/arm.md (arm_casesi_internal): Change casesi expand
	and insn.
	for arm to use new function arm_output_casesi.

gcc/testsuite/ChangeLog:

	* gcc.target/arm/arm-switchstatement.c: New test.
This commit is contained in:
Richard Ball 2023-10-26 16:18:50 +01:00
parent 2ae00adb32
commit 7006e5d2d7
6 changed files with 242 additions and 12 deletions

View file

@ -183,7 +183,28 @@
do \
{ \
if (TARGET_ARM) \
asm_fprintf (STREAM, "\tb\t%LL%d\n", VALUE); \
{ \
switch (GET_MODE (body)) \
{ \
case E_QImode: \
asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LL%d-4)/4\n", \
VALUE, REL); \
break; \
case E_HImode: \
asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LL%d-4)/4\n", \
VALUE, REL); \
break; \
case E_SImode: \
if (flag_pic) \
asm_fprintf (STREAM, "\t.word\t%LL%d-%LL%d-4\n", \
VALUE, REL); \
else \
asm_fprintf (STREAM, "\t.word\t%LL%d\n", VALUE); \
break; \
default: \
gcc_unreachable (); \
} \
} \
else if (TARGET_THUMB1) \
{ \
if (flag_pic || optimize_size) \

View file

@ -261,6 +261,7 @@ extern void thumb_expand_cpymemqi (rtx *);
extern rtx arm_return_addr (int, rtx);
extern void thumb_reload_out_hi (rtx *);
extern void thumb_set_return_address (rtx, rtx);
extern const char *arm_output_casesi (rtx *);
extern const char *thumb1_output_casesi (rtx *);
extern const char *thumb2_output_casesi (rtx *);
#endif

View file

@ -30464,6 +30464,51 @@ arm_output_iwmmxt_tinsr (rtx *operands)
return "";
}
/* Output an arm casesi dispatch sequence. Used by arm_casesi_internal insn.
Responsible for the handling of switch statements in arm. */
const char *
arm_output_casesi (rtx *operands)
{
rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
output_asm_insn ("cmp\t%0, %1", operands);
output_asm_insn ("bhi\t%l3", operands);
switch (GET_MODE (diff_vec))
{
case E_QImode:
output_asm_insn ("adr\t%4, %l2", operands);
if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
output_asm_insn ("ldrb\t%4, [%4, %0]", operands);
else
output_asm_insn ("ldrsb\t%4, [%4, %0]", operands);
return "add\t%|pc, %|pc, %4, lsl #2";
case E_HImode:
output_asm_insn ("adr\t%4, %l2", operands);
output_asm_insn ("add\t%4, %4, %0", operands);
if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
output_asm_insn ("ldrh\t%4, [%4, %0]", operands);
else
output_asm_insn ("ldrsh\t%4, [%4, %0]", operands);
return "add\t%|pc, %|pc, %4, lsl #2";
case E_SImode:
if (flag_pic)
{
output_asm_insn ("adr\t%4, %l2", operands);
output_asm_insn ("ldr\t%4, [%4, %0, lsl #2]", operands);
return "add\t%|pc, %|pc, %4";
}
output_asm_insn ("adr\t%4, %l2", operands);
return "ldr\t%|pc, [%4, %0, lsl #2]";
default:
gcc_unreachable ();
}
}
/* Output a Thumb-1 casesi dispatch sequence. */
const char *
thumb1_output_casesi (rtx *operands)

View file

@ -2092,7 +2092,7 @@ enum arm_auto_incmodes
for the index in the tablejump instruction. */
#define CASE_VECTOR_MODE Pmode
#define CASE_VECTOR_PC_RELATIVE ((TARGET_THUMB2 \
#define CASE_VECTOR_PC_RELATIVE ((TARGET_ARM || TARGET_THUMB2 \
|| (TARGET_THUMB1 \
&& (optimize_size || flag_pic))) \
&& (!target_pure_code))
@ -2109,9 +2109,19 @@ enum arm_auto_incmodes
: min >= -4096 && max < 4096 \
? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, HImode) \
: SImode) \
: ((min < 0 || max >= 0x20000 || !TARGET_THUMB2) ? SImode \
: (max >= 0x200) ? HImode \
: QImode))
: (TARGET_THUMB2 \
? ((min > 0 && max < 0x200) ? QImode \
: (min > 0 && max <= 0x20000) ? HImode \
: SImode) \
: ((min >= 0 && max < 1024) \
? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 1, QImode) \
: (min >= -512 && max <= 508) \
? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, QImode) \
:(min >= 0 && max < 262144) \
? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 1, HImode) \
: (min >= -131072 && max <=131068) \
? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, HImode) \
: SImode)))
/* signed 'char' is most compatible, but RISC OS wants it unsigned.
unsigned is probably best, but may break some code. */
@ -2301,7 +2311,7 @@ extern int making_const_table;
#define LABEL_ALIGN_AFTER_BARRIER(LABEL) \
(GET_CODE (PATTERN (prev_active_insn (LABEL))) == ADDR_DIFF_VEC \
? 1 : 0)
? (TARGET_ARM ? 2 : 1) : 0)
#define ARM_DECLARE_FUNCTION_NAME(STREAM, NAME, DECL) \
arm_declare_function_name ((STREAM), (NAME), (DECL));

View file

@ -9556,6 +9556,8 @@
(match_dup 4)
(label_ref:SI (match_operand 3 ""))))
(clobber (reg:CC CC_REGNUM))
(clobber (match_scratch:SI 5))
(clobber (match_scratch:SI 6))
(use (label_ref:SI (match_operand 2 "")))])]
"TARGET_ARM"
{
@ -9576,15 +9578,15 @@
(label_ref:SI (match_operand 2 "" ""))))
(label_ref:SI (match_operand 3 "" ""))))
(clobber (reg:CC CC_REGNUM))
(clobber (match_scratch:SI 4 "=&r"))
(clobber (match_scratch:SI 5 "=r"))
(use (label_ref:SI (match_dup 2)))])]
"TARGET_ARM"
"*
if (flag_pic)
return \"cmp\\t%0, %1\;addls\\t%|pc, %|pc, %0, asl #2\;b\\t%l3\";
return \"cmp\\t%0, %1\;ldrls\\t%|pc, [%|pc, %0, asl #2]\;b\\t%l3\";
"
{
return arm_output_casesi (operands);
}
[(set_attr "conds" "clob")
(set_attr "length" "12")
(set_attr "length" "24")
(set_attr "type" "multiple")]
)

View file

@ -0,0 +1,151 @@
/* { dg-do compile } */
/* { dg-options "-O2 --param case-values-threshold=1 -fno-reorder-blocks -fno-tree-dce" } */
/* { dg-require-effective-target arm_nothumb } */
/* { dg-final { check-function-bodies "**" "" "" } } */
#define NOP "nop;"
#define NOP2 NOP NOP
#define NOP4 NOP2 NOP2
#define NOP8 NOP4 NOP4
#define NOP16 NOP8 NOP8
#define NOP32 NOP16 NOP16
#define NOP64 NOP32 NOP32
#define NOP128 NOP64 NOP64
#define NOP256 NOP128 NOP128
#define NOP512 NOP256 NOP256
#define NOP1024 NOP512 NOP512
#define NOP2048 NOP1024 NOP1024
#define NOP4096 NOP2048 NOP2048
#define NOP8192 NOP4096 NOP4096
#define NOP16384 NOP8192 NOP8192
#define NOP32768 NOP16384 NOP16384
#define NOP65536 NOP32768 NOP32768
#define NOP131072 NOP65536 NOP65536
enum z
{
a = 1,
b,
c,
d,
e,
f = 7,
};
inline void QIFunction (const char* flag)
{
asm volatile (NOP32);
return;
}
inline void HIFunction (const char* flag)
{
asm volatile (NOP512);
return;
}
inline void SIFunction (const char* flag)
{
asm volatile (NOP131072);
return;
}
/*
**QImode_test:
** ...
** adr (r[0-9]+), .L[0-9]+
** ldrb \1, \[\1, r[0-9]+\]
** add pc, pc, \1, lsl #2
** ...
*/
__attribute__ ((noinline)) __attribute__ ((noclone)) const char* QImode_test(enum z x)
{
switch (x)
{
case d:
QIFunction("QItest");
return "InlineASM";
case f:
return "TEST";
default:
return "Default";
}
}
/* { dg-final { scan-assembler ".byte" } } */
/*
**HImode_test:
** ...
** adr (r[0-9]+), .L[0-9]+
** add \1, \1, (r[0-9]+)
** ldrh \1, \[\1, \2\]
** add pc, pc, \1, lsl #2
** ...
*/
__attribute__ ((noinline)) __attribute__ ((noclone)) const char* HImode_test(enum z x)
{
switch (x)
{
case d:
HIFunction("HItest");
return "InlineASM";
case f:
return "TEST";
default:
return "Default";
}
}
/* { dg-final { scan-assembler ".2byte" } } */
/*
**SImode_test:
** ...
** adr (r[0-9]+), .L[0-9]+
** ldr pc, \[\1, r[0-9]+, lsl #2\]
** ...
*/
__attribute__ ((noinline)) __attribute__ ((noclone)) const char* SImode_test(enum z x)
{
switch (x)
{
case d:
SIFunction("SItest");
return "InlineASM";
case f:
return "TEST";
default:
return "Default";
}
}
/* { dg-final { scan-assembler ".word" } } */
/*
**backwards_branch_test:
** ...
** adr (r[0-9]+), .L[0-9]+
** add \1, \1, (r[0-9]+)
** ldrsh \1, \[\1, \2\]
** add pc, pc, \1, lsl #2
** ...
*/
__attribute__ ((noinline)) __attribute__ ((noclone)) const char* backwards_branch_test(enum z x, int flag)
{
if (flag == 5)
{
backwards:
asm volatile (NOP512);
return "ASM";
}
switch (x)
{
case d:
goto backwards;
case f:
return "TEST";
default:
return "Default";
}
}