From c5db3f50bdf34ea96fd193a2a66d686401053bd2 Mon Sep 17 00:00:00 2001 From: Wilco Dijkstra Date: Fri, 1 Nov 2024 14:40:26 +0000 Subject: [PATCH] AArch64: Switch off early scheduling The early scheduler takes up ~33% of the total build time, however it doesn't provide a meaningful performance gain. This is partly because modern OoO cores need far less scheduling, partly because the scheduler tends to create many unnecessary spills by increasing register pressure. Building applications 56% faster is far more useful than ~0.1% improvement on SPEC, so switch off early scheduling on AArch64. Codesize reduces by ~0.2%. Fix various tests that depend on scheduling by explicitly adding -fschedule-insns. gcc: * common/config/aarch64/aarch64-common.cc: Switch off fschedule_insns. gcc/testsuite: * gcc.dg/guality/pr36728-3.c: Remove XFAIL. * gcc.dg/guality/pr68860-1.c: Likewise. * gcc.dg/guality/pr68860-2.c: Likewise. * gcc.target/aarch64/ldp_aligned.c: Fix test. * gcc.target/aarch64/ldp_always.c: Likewise. * gcc.target/aarch64/ldp_stp_10.c: Add -fschedule-insns. * gcc.target/aarch64/ldp_stp_12.c: Likewise. * gcc.target/aarch64/ldp_stp_13.c: Remove test. * gcc.target/aarch64/ldp_stp_21.c: Add -fschedule-insns. * gcc.target/aarch64/ldp_stp_8.c: Likewise. * gcc.target/aarch64/ldp_vec_v2sf.c: Likewise. * gcc.target/aarch64/ldp_vec_v2si.c: Likewise. * gcc.target/aarch64/test_frame_16.c: Fix test. * gcc.target/aarch64/sve/vcond_12.c: Add -fschedule-insns. * gcc.target/aarch64/sve/acle/general/ldff1_3.c: Likewise. --- gcc/common/config/aarch64/aarch64-common.cc | 2 ++ gcc/testsuite/gcc.dg/guality/pr36728-3.c | 2 +- gcc/testsuite/gcc.dg/guality/pr68860-1.c | 2 +- gcc/testsuite/gcc.dg/guality/pr68860-2.c | 2 +- .../gcc.target/aarch64/ldp_aligned.c | 18 ++-------- gcc/testsuite/gcc.target/aarch64/ldp_always.c | 33 ++----------------- gcc/testsuite/gcc.target/aarch64/ldp_stp_10.c | 2 +- gcc/testsuite/gcc.target/aarch64/ldp_stp_12.c | 2 +- gcc/testsuite/gcc.target/aarch64/ldp_stp_13.c | 18 ---------- gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c | 2 +- gcc/testsuite/gcc.target/aarch64/ldp_stp_8.c | 2 +- .../gcc.target/aarch64/ldp_vec_v2sf.c | 2 +- .../gcc.target/aarch64/ldp_vec_v2si.c | 2 +- .../aarch64/sve/acle/general/ldff1_3.c | 2 +- .../gcc.target/aarch64/sve/vcond_12.c | 2 +- .../gcc.target/aarch64/test_frame_16.c | 2 +- 16 files changed, 18 insertions(+), 77 deletions(-) delete mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_13.c diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc index dd941347504..92df8b61930 100644 --- a/gcc/common/config/aarch64/aarch64-common.cc +++ b/gcc/common/config/aarch64/aarch64-common.cc @@ -53,6 +53,8 @@ static const struct default_options aarch_option_optimization_table[] = { OPT_LEVELS_ALL, OPT_fomit_frame_pointer, NULL, 0 }, /* Enable -fsched-pressure by default when optimizing. */ { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 }, + /* Disable early scheduling due to high compile-time overheads. */ + { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 }, /* Enable redundant extension instructions removal at -O2 and higher. */ { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_mearly_ra_, NULL, AARCH64_EARLY_RA_ALL }, diff --git a/gcc/testsuite/gcc.dg/guality/pr36728-3.c b/gcc/testsuite/gcc.dg/guality/pr36728-3.c index 589009b2e82..4700d50f8bc 100644 --- a/gcc/testsuite/gcc.dg/guality/pr36728-3.c +++ b/gcc/testsuite/gcc.dg/guality/pr36728-3.c @@ -30,7 +30,7 @@ foo (int arg1, int arg2, int arg3, int arg4, int arg5, int arg6, int arg7) /* { dg-final { gdb-test 14 "arg5" "5" } } */ /* { dg-final { gdb-test 14 "arg6" "6" } } */ /* { dg-final { gdb-test 14 "arg7" "30" } } */ -/* { dg-final { gdb-test 14 "y" "2" { xfail { aarch64*-*-* && { any-opts "-O3" } } } } } */ +/* { dg-final { gdb-test 14 "y" "2" } } */ /* { dg-final { gdb-test 16 "arg1" "1" { target { ! "s390*-*-*" } } } } */ /* { dg-final { gdb-test 16 "arg2" "2" { target { ! "s390*-*-*" } } } } */ /* { dg-final { gdb-test 16 "arg3" "3" } } */ diff --git a/gcc/testsuite/gcc.dg/guality/pr68860-1.c b/gcc/testsuite/gcc.dg/guality/pr68860-1.c index bbd9d6e6032..8c8d8354587 100644 --- a/gcc/testsuite/gcc.dg/guality/pr68860-1.c +++ b/gcc/testsuite/gcc.dg/guality/pr68860-1.c @@ -31,7 +31,7 @@ foo (int arg1, int arg2, int arg3, int arg4, int arg5, int arg6, int arg7, int a /* { dg-final { gdb-test 14 "arg6" "6" } } */ /* { dg-final { gdb-test 14 "arg7" "30" } } */ /* { dg-final { gdb-test 14 "arg8" "7" } } */ -/* { dg-final { gdb-test 14 "y" "2" { xfail { aarch64*-*-* && { { any-opts "-O2" "-O3" } && { no-opts "-flto" } } } } } } */ +/* { dg-final { gdb-test 14 "y" "2" } } */ /* { dg-final { gdb-test 16 "arg1" "1" { target { ! "s390*-*-*" } } } } */ /* { dg-final { gdb-test 16 "arg2" "2" { target { ! "s390*-*-*" } } } } */ /* { dg-final { gdb-test 16 "arg3" "3" } } */ diff --git a/gcc/testsuite/gcc.dg/guality/pr68860-2.c b/gcc/testsuite/gcc.dg/guality/pr68860-2.c index a18a04e60b2..070efbcd10c 100644 --- a/gcc/testsuite/gcc.dg/guality/pr68860-2.c +++ b/gcc/testsuite/gcc.dg/guality/pr68860-2.c @@ -31,7 +31,7 @@ foo (int arg1, int arg2, int arg3, int arg4, int arg5, int arg6, int arg7, int a /* { dg-final { gdb-test 14 "arg6" "6" } } */ /* { dg-final { gdb-test 14 "arg7" "30" } } */ /* { dg-final { gdb-test 14 "arg8" "7" } } */ -/* { dg-final { gdb-test 14 "y" "2" { xfail { aarch64*-*-* && { any-opts "-O3" } } } } } */ +/* { dg-final { gdb-test 14 "y" "2" } } */ /* { dg-final { gdb-test 16 "arg1" "1" { target { ! "s390*-*-*" } } } } */ /* { dg-final { gdb-test 16 "arg2" "2" { target { ! "s390*-*-*" } } } } */ /* { dg-final { gdb-test 16 "arg3" "3" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_aligned.c b/gcc/testsuite/gcc.target/aarch64/ldp_aligned.c index 75495d71df2..8ec2b0392b8 100644 --- a/gcc/testsuite/gcc.target/aarch64/ldp_aligned.c +++ b/gcc/testsuite/gcc.target/aarch64/ldp_aligned.c @@ -14,25 +14,11 @@ TYPE ldp_aligned_##TYPE(char* ptr){ \ return a_0 + a_1; \ } -#define LDP_TEST_ADJUST_ALIGNED(TYPE) \ -TYPE ldp_aligned_adjust_##TYPE(char* ptr){ \ - TYPE a_0, a_1, a_2, a_3, a_4; \ - TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \ - a_0 = arr[100]; \ - a_1 = arr[101]; \ - a_2 = arr[102]; \ - a_3 = arr[103]; \ - a_4 = arr[110]; \ - return a_0 + a_1 + a_2 + a_3 + a_4; \ -} - LDP_TEST_ALIGNED(int32_t); LDP_TEST_ALIGNED(int64_t); LDP_TEST_ALIGNED(v4si); -LDP_TEST_ADJUST_ALIGNED(int32_t); -LDP_TEST_ADJUST_ALIGNED(int64_t); -/* { dg-final { scan-assembler-times "ldp\tw\[0-9\]+, w\[0-9\]" 3 } } */ -/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\]" 3 } } */ +/* { dg-final { scan-assembler-times "ldp\tw\[0-9\]+, w\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\]" 1 } } */ /* { dg-final { scan-assembler-times "ldp\tq\[0-9\]+, q\[0-9\]" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_always.c b/gcc/testsuite/gcc.target/aarch64/ldp_always.c index 9cada57db89..5ffb98a886e 100644 --- a/gcc/testsuite/gcc.target/aarch64/ldp_always.c +++ b/gcc/testsuite/gcc.target/aarch64/ldp_always.c @@ -24,43 +24,14 @@ TYPE ldp_unaligned_##TYPE(char* ptr){ \ return a_0 + a_1; \ } -#define LDP_TEST_ADJUST_ALIGNED(TYPE) \ -TYPE ldp_aligned_adjust_##TYPE(char* ptr){ \ - TYPE a_0, a_1, a_2, a_3, a_4; \ - TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \ - a_0 = arr[100]; \ - a_1 = arr[101]; \ - a_2 = arr[102]; \ - a_3 = arr[103]; \ - a_4 = arr[110]; \ - return a_0 + a_1 + a_2 + a_3 + a_4; \ -} - -#define LDP_TEST_ADJUST_UNALIGNED(TYPE) \ -TYPE ldp_unaligned_adjust_##TYPE(char* ptr){ \ - TYPE a_0, a_1, a_2, a_3, a_4; \ - TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \ - TYPE *a = arr+1; \ - a_0 = a[100]; \ - a_1 = a[101]; \ - a_2 = a[102]; \ - a_3 = a[103]; \ - a_4 = a[110]; \ - return a_0 + a_1 + a_2 + a_3 + a_4; \ -} - LDP_TEST_ALIGNED(int32_t); LDP_TEST_ALIGNED(int64_t); LDP_TEST_ALIGNED(v4si); LDP_TEST_UNALIGNED(int32_t); LDP_TEST_UNALIGNED(int64_t); LDP_TEST_UNALIGNED(v4si); -LDP_TEST_ADJUST_ALIGNED(int32_t); -LDP_TEST_ADJUST_ALIGNED(int64_t); -LDP_TEST_ADJUST_UNALIGNED(int32_t); -LDP_TEST_ADJUST_UNALIGNED(int64_t); -/* { dg-final { scan-assembler-times "ldp\tw\[0-9\]+, w\[0-9\]" 6 } } */ -/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\]" 6 } } */ +/* { dg-final { scan-assembler-times "ldp\tw\[0-9\]+, w\[0-9\]" 2 } } */ +/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\]" 2 } } */ /* { dg-final { scan-assembler-times "ldp\tq\[0-9\]+, q\[0-9\]" 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_10.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_10.c index 31f392901d2..ac4828af761 100644 --- a/gcc/testsuite/gcc.target/aarch64/ldp_stp_10.c +++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_10.c @@ -1,4 +1,4 @@ -/* { dg-options "-O2" } */ +/* { dg-options "-O2 -fschedule-insns" } */ int load (int *arr) diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_12.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_12.c index 718e82b53f0..495e199270a 100644 --- a/gcc/testsuite/gcc.target/aarch64/ldp_stp_12.c +++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_12.c @@ -1,4 +1,4 @@ -/* { dg-options "-O2" } */ +/* { dg-options "-O2 -fschedule-insns" } */ void store_offset (int *array, int x, int y) diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_13.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_13.c deleted file mode 100644 index 9cc3942f153..00000000000 --- a/gcc/testsuite/gcc.target/aarch64/ldp_stp_13.c +++ /dev/null @@ -1,18 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -mabi=ilp32" } */ - -long long -load_long (long long int *arr) -{ - return arr[400] << 1 + arr[401] << 1 + arr[403] << 1 + arr[404] << 1; -} - -/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\]+, " 2 } } */ - -int -load (int *arr) -{ - return arr[527] << 1 + arr[400] << 1 + arr[401] << 1 + arr[528] << 1; -} - -/* { dg-final { scan-assembler-times "ldp\tw\[0-9\]+, w\[0-9\]+, " 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c index d54c322ce86..ac7bc164840 100644 --- a/gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c +++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c @@ -1,4 +1,4 @@ -/* { dg-options "-O2" } */ +/* { dg-options "-O2 -fschedule-insns" } */ #pragma GCC target "+nosimd+fp" diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_8.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_8.c index b25678323b8..2adf151491b 100644 --- a/gcc/testsuite/gcc.target/aarch64/ldp_stp_8.c +++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_8.c @@ -1,4 +1,4 @@ -/* { dg-options "-O2" } */ +/* { dg-options "-O2 -fschedule-insns" } */ typedef float __attribute__ ((vector_size (8))) fvec; typedef int __attribute__ ((vector_size (8))) ivec; diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_vec_v2sf.c b/gcc/testsuite/gcc.target/aarch64/ldp_vec_v2sf.c index fbdae1c6cff..7a87fe7dd0a 100644 --- a/gcc/testsuite/gcc.target/aarch64/ldp_vec_v2sf.c +++ b/gcc/testsuite/gcc.target/aarch64/ldp_vec_v2sf.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2" } */ +/* { dg-options "-O2 -fschedule-insns" } */ typedef float __attribute__((vector_size(8))) vec; diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_vec_v2si.c b/gcc/testsuite/gcc.target/aarch64/ldp_vec_v2si.c index 7714cd6cd9e..068f53e28ce 100644 --- a/gcc/testsuite/gcc.target/aarch64/ldp_vec_v2si.c +++ b/gcc/testsuite/gcc.target/aarch64/ldp_vec_v2si.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2" } */ +/* { dg-options "-O2 -fschedule-insns" } */ typedef int __attribute__((vector_size(8))) vec; diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_3.c index 41ad0bcea00..14eacce09c0 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_3.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2" } */ +/* { dg-options "-O2 -fschedule-insns" } */ #include diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_12.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_12.c index de650bf39e2..59dcc0abecf 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/vcond_12.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_12.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -fschedule-insns" } */ #include diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_16.c b/gcc/testsuite/gcc.target/aarch64/test_frame_16.c index 28f3826adad..0f67458f718 100644 --- a/gcc/testsuite/gcc.target/aarch64/test_frame_16.c +++ b/gcc/testsuite/gcc.target/aarch64/test_frame_16.c @@ -17,7 +17,7 @@ double vararg_outgoing (int x1, ...) double a1 = x1, a2 = x1 * 2, a3 = x1 * 3, a4 = x1 * 4, a5 = x1 * 5, a6 = x1 * 6; __builtin_va_list vl; __builtin_va_start (vl, x1); - outgoing (vl, a1, a2, a3, a4, a5, a6, REP64 (1)); + outgoing (vl, a1, a2, a3, a4, a5, a6, REP64 (1), REP8 (1)); __builtin_va_end (vl); return a1 + a2 + a3 + a4 + a5 + a6; }