AArch64: Switch off early scheduling

The early scheduler takes up ~33% of the total build time, however it doesn't
provide a meaningful performance gain.  This is partly because modern OoO cores
need far less scheduling, partly because the scheduler tends to create many
unnecessary spills by increasing register pressure.  Building applications
56% faster is far more useful than ~0.1% improvement on SPEC, so switch off
early scheduling on AArch64.  Codesize reduces by ~0.2%.

Fix various tests that depend on scheduling by explicitly adding -fschedule-insns.

gcc:
	* common/config/aarch64/aarch64-common.cc: Switch off fschedule_insns.

gcc/testsuite:
	* gcc.dg/guality/pr36728-3.c: Remove XFAIL.
	* gcc.dg/guality/pr68860-1.c: Likewise.
	* gcc.dg/guality/pr68860-2.c: Likewise.
	* gcc.target/aarch64/ldp_aligned.c: Fix test.
	* gcc.target/aarch64/ldp_always.c: Likewise.
	* gcc.target/aarch64/ldp_stp_10.c: Add -fschedule-insns.
	* gcc.target/aarch64/ldp_stp_12.c: Likewise.
	* gcc.target/aarch64/ldp_stp_13.c: Remove test.
	* gcc.target/aarch64/ldp_stp_21.c: Add -fschedule-insns.
	* gcc.target/aarch64/ldp_stp_8.c: Likewise.
	* gcc.target/aarch64/ldp_vec_v2sf.c: Likewise.
	* gcc.target/aarch64/ldp_vec_v2si.c: Likewise.
	* gcc.target/aarch64/test_frame_16.c: Fix test.
	* gcc.target/aarch64/sve/vcond_12.c: Add -fschedule-insns.
	* gcc.target/aarch64/sve/acle/general/ldff1_3.c: Likewise.
This commit is contained in:
Wilco Dijkstra 2024-11-01 14:40:26 +00:00
parent 45d306a835
commit c5db3f50bd
16 changed files with 18 additions and 77 deletions

View file

@ -53,6 +53,8 @@ static const struct default_options aarch_option_optimization_table[] =
{ OPT_LEVELS_ALL, OPT_fomit_frame_pointer, NULL, 0 },
/* Enable -fsched-pressure by default when optimizing. */
{ OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
/* Disable early scheduling due to high compile-time overheads. */
{ OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
/* Enable redundant extension instructions removal at -O2 and higher. */
{ OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_mearly_ra_, NULL, AARCH64_EARLY_RA_ALL },

View file

@ -30,7 +30,7 @@ foo (int arg1, int arg2, int arg3, int arg4, int arg5, int arg6, int arg7)
/* { dg-final { gdb-test 14 "arg5" "5" } } */
/* { dg-final { gdb-test 14 "arg6" "6" } } */
/* { dg-final { gdb-test 14 "arg7" "30" } } */
/* { dg-final { gdb-test 14 "y" "2" { xfail { aarch64*-*-* && { any-opts "-O3" } } } } } */
/* { dg-final { gdb-test 14 "y" "2" } } */
/* { dg-final { gdb-test 16 "arg1" "1" { target { ! "s390*-*-*" } } } } */
/* { dg-final { gdb-test 16 "arg2" "2" { target { ! "s390*-*-*" } } } } */
/* { dg-final { gdb-test 16 "arg3" "3" } } */

View file

@ -31,7 +31,7 @@ foo (int arg1, int arg2, int arg3, int arg4, int arg5, int arg6, int arg7, int a
/* { dg-final { gdb-test 14 "arg6" "6" } } */
/* { dg-final { gdb-test 14 "arg7" "30" } } */
/* { dg-final { gdb-test 14 "arg8" "7" } } */
/* { dg-final { gdb-test 14 "y" "2" { xfail { aarch64*-*-* && { { any-opts "-O2" "-O3" } && { no-opts "-flto" } } } } } } */
/* { dg-final { gdb-test 14 "y" "2" } } */
/* { dg-final { gdb-test 16 "arg1" "1" { target { ! "s390*-*-*" } } } } */
/* { dg-final { gdb-test 16 "arg2" "2" { target { ! "s390*-*-*" } } } } */
/* { dg-final { gdb-test 16 "arg3" "3" } } */

View file

@ -31,7 +31,7 @@ foo (int arg1, int arg2, int arg3, int arg4, int arg5, int arg6, int arg7, int a
/* { dg-final { gdb-test 14 "arg6" "6" } } */
/* { dg-final { gdb-test 14 "arg7" "30" } } */
/* { dg-final { gdb-test 14 "arg8" "7" } } */
/* { dg-final { gdb-test 14 "y" "2" { xfail { aarch64*-*-* && { any-opts "-O3" } } } } } */
/* { dg-final { gdb-test 14 "y" "2" } } */
/* { dg-final { gdb-test 16 "arg1" "1" { target { ! "s390*-*-*" } } } } */
/* { dg-final { gdb-test 16 "arg2" "2" { target { ! "s390*-*-*" } } } } */
/* { dg-final { gdb-test 16 "arg3" "3" } } */

View file

@ -14,25 +14,11 @@ TYPE ldp_aligned_##TYPE(char* ptr){ \
return a_0 + a_1; \
}
#define LDP_TEST_ADJUST_ALIGNED(TYPE) \
TYPE ldp_aligned_adjust_##TYPE(char* ptr){ \
TYPE a_0, a_1, a_2, a_3, a_4; \
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
a_0 = arr[100]; \
a_1 = arr[101]; \
a_2 = arr[102]; \
a_3 = arr[103]; \
a_4 = arr[110]; \
return a_0 + a_1 + a_2 + a_3 + a_4; \
}
LDP_TEST_ALIGNED(int32_t);
LDP_TEST_ALIGNED(int64_t);
LDP_TEST_ALIGNED(v4si);
LDP_TEST_ADJUST_ALIGNED(int32_t);
LDP_TEST_ADJUST_ALIGNED(int64_t);
/* { dg-final { scan-assembler-times "ldp\tw\[0-9\]+, w\[0-9\]" 3 } } */
/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\]" 3 } } */
/* { dg-final { scan-assembler-times "ldp\tw\[0-9\]+, w\[0-9\]" 1 } } */
/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\]" 1 } } */
/* { dg-final { scan-assembler-times "ldp\tq\[0-9\]+, q\[0-9\]" 1 } } */

View file

@ -24,43 +24,14 @@ TYPE ldp_unaligned_##TYPE(char* ptr){ \
return a_0 + a_1; \
}
#define LDP_TEST_ADJUST_ALIGNED(TYPE) \
TYPE ldp_aligned_adjust_##TYPE(char* ptr){ \
TYPE a_0, a_1, a_2, a_3, a_4; \
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
a_0 = arr[100]; \
a_1 = arr[101]; \
a_2 = arr[102]; \
a_3 = arr[103]; \
a_4 = arr[110]; \
return a_0 + a_1 + a_2 + a_3 + a_4; \
}
#define LDP_TEST_ADJUST_UNALIGNED(TYPE) \
TYPE ldp_unaligned_adjust_##TYPE(char* ptr){ \
TYPE a_0, a_1, a_2, a_3, a_4; \
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
TYPE *a = arr+1; \
a_0 = a[100]; \
a_1 = a[101]; \
a_2 = a[102]; \
a_3 = a[103]; \
a_4 = a[110]; \
return a_0 + a_1 + a_2 + a_3 + a_4; \
}
LDP_TEST_ALIGNED(int32_t);
LDP_TEST_ALIGNED(int64_t);
LDP_TEST_ALIGNED(v4si);
LDP_TEST_UNALIGNED(int32_t);
LDP_TEST_UNALIGNED(int64_t);
LDP_TEST_UNALIGNED(v4si);
LDP_TEST_ADJUST_ALIGNED(int32_t);
LDP_TEST_ADJUST_ALIGNED(int64_t);
LDP_TEST_ADJUST_UNALIGNED(int32_t);
LDP_TEST_ADJUST_UNALIGNED(int64_t);
/* { dg-final { scan-assembler-times "ldp\tw\[0-9\]+, w\[0-9\]" 6 } } */
/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\]" 6 } } */
/* { dg-final { scan-assembler-times "ldp\tw\[0-9\]+, w\[0-9\]" 2 } } */
/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\]" 2 } } */
/* { dg-final { scan-assembler-times "ldp\tq\[0-9\]+, q\[0-9\]" 2 } } */

View file

@ -1,4 +1,4 @@
/* { dg-options "-O2" } */
/* { dg-options "-O2 -fschedule-insns" } */
int
load (int *arr)

View file

@ -1,4 +1,4 @@
/* { dg-options "-O2" } */
/* { dg-options "-O2 -fschedule-insns" } */
void
store_offset (int *array, int x, int y)

View file

@ -1,18 +0,0 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mabi=ilp32" } */
long long
load_long (long long int *arr)
{
return arr[400] << 1 + arr[401] << 1 + arr[403] << 1 + arr[404] << 1;
}
/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\]+, " 2 } } */
int
load (int *arr)
{
return arr[527] << 1 + arr[400] << 1 + arr[401] << 1 + arr[528] << 1;
}
/* { dg-final { scan-assembler-times "ldp\tw\[0-9\]+, w\[0-9\]+, " 2 } } */

View file

@ -1,4 +1,4 @@
/* { dg-options "-O2" } */
/* { dg-options "-O2 -fschedule-insns" } */
#pragma GCC target "+nosimd+fp"

View file

@ -1,4 +1,4 @@
/* { dg-options "-O2" } */
/* { dg-options "-O2 -fschedule-insns" } */
typedef float __attribute__ ((vector_size (8))) fvec;
typedef int __attribute__ ((vector_size (8))) ivec;

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-options "-O2 -fschedule-insns" } */
typedef float __attribute__((vector_size(8))) vec;

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-options "-O2 -fschedule-insns" } */
typedef int __attribute__((vector_size(8))) vec;

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-options "-O2 -fschedule-insns" } */
#include <arm_sve.h>

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
/* { dg-options "-O2 -ftree-vectorize -ffast-math -fschedule-insns" } */
#include <stdint.h>

View file

@ -17,7 +17,7 @@ double vararg_outgoing (int x1, ...)
double a1 = x1, a2 = x1 * 2, a3 = x1 * 3, a4 = x1 * 4, a5 = x1 * 5, a6 = x1 * 6;
__builtin_va_list vl;
__builtin_va_start (vl, x1);
outgoing (vl, a1, a2, a3, a4, a5, a6, REP64 (1));
outgoing (vl, a1, a2, a3, a4, a5, a6, REP64 (1), REP8 (1));
__builtin_va_end (vl);
return a1 + a2 + a3 + a4 + a5 + a6;
}