AArch64: Switch off early scheduling
The early scheduler takes up ~33% of the total build time, however it doesn't provide a meaningful performance gain. This is partly because modern OoO cores need far less scheduling, partly because the scheduler tends to create many unnecessary spills by increasing register pressure. Building applications 56% faster is far more useful than ~0.1% improvement on SPEC, so switch off early scheduling on AArch64. Codesize reduces by ~0.2%. Fix various tests that depend on scheduling by explicitly adding -fschedule-insns. gcc: * common/config/aarch64/aarch64-common.cc: Switch off fschedule_insns. gcc/testsuite: * gcc.dg/guality/pr36728-3.c: Remove XFAIL. * gcc.dg/guality/pr68860-1.c: Likewise. * gcc.dg/guality/pr68860-2.c: Likewise. * gcc.target/aarch64/ldp_aligned.c: Fix test. * gcc.target/aarch64/ldp_always.c: Likewise. * gcc.target/aarch64/ldp_stp_10.c: Add -fschedule-insns. * gcc.target/aarch64/ldp_stp_12.c: Likewise. * gcc.target/aarch64/ldp_stp_13.c: Remove test. * gcc.target/aarch64/ldp_stp_21.c: Add -fschedule-insns. * gcc.target/aarch64/ldp_stp_8.c: Likewise. * gcc.target/aarch64/ldp_vec_v2sf.c: Likewise. * gcc.target/aarch64/ldp_vec_v2si.c: Likewise. * gcc.target/aarch64/test_frame_16.c: Fix test. * gcc.target/aarch64/sve/vcond_12.c: Add -fschedule-insns. * gcc.target/aarch64/sve/acle/general/ldff1_3.c: Likewise.
This commit is contained in:
parent
45d306a835
commit
c5db3f50bd
16 changed files with 18 additions and 77 deletions
|
@ -53,6 +53,8 @@ static const struct default_options aarch_option_optimization_table[] =
|
|||
{ OPT_LEVELS_ALL, OPT_fomit_frame_pointer, NULL, 0 },
|
||||
/* Enable -fsched-pressure by default when optimizing. */
|
||||
{ OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
|
||||
/* Disable early scheduling due to high compile-time overheads. */
|
||||
{ OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
|
||||
/* Enable redundant extension instructions removal at -O2 and higher. */
|
||||
{ OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_mearly_ra_, NULL, AARCH64_EARLY_RA_ALL },
|
||||
|
|
|
@ -30,7 +30,7 @@ foo (int arg1, int arg2, int arg3, int arg4, int arg5, int arg6, int arg7)
|
|||
/* { dg-final { gdb-test 14 "arg5" "5" } } */
|
||||
/* { dg-final { gdb-test 14 "arg6" "6" } } */
|
||||
/* { dg-final { gdb-test 14 "arg7" "30" } } */
|
||||
/* { dg-final { gdb-test 14 "y" "2" { xfail { aarch64*-*-* && { any-opts "-O3" } } } } } */
|
||||
/* { dg-final { gdb-test 14 "y" "2" } } */
|
||||
/* { dg-final { gdb-test 16 "arg1" "1" { target { ! "s390*-*-*" } } } } */
|
||||
/* { dg-final { gdb-test 16 "arg2" "2" { target { ! "s390*-*-*" } } } } */
|
||||
/* { dg-final { gdb-test 16 "arg3" "3" } } */
|
||||
|
|
|
@ -31,7 +31,7 @@ foo (int arg1, int arg2, int arg3, int arg4, int arg5, int arg6, int arg7, int a
|
|||
/* { dg-final { gdb-test 14 "arg6" "6" } } */
|
||||
/* { dg-final { gdb-test 14 "arg7" "30" } } */
|
||||
/* { dg-final { gdb-test 14 "arg8" "7" } } */
|
||||
/* { dg-final { gdb-test 14 "y" "2" { xfail { aarch64*-*-* && { { any-opts "-O2" "-O3" } && { no-opts "-flto" } } } } } } */
|
||||
/* { dg-final { gdb-test 14 "y" "2" } } */
|
||||
/* { dg-final { gdb-test 16 "arg1" "1" { target { ! "s390*-*-*" } } } } */
|
||||
/* { dg-final { gdb-test 16 "arg2" "2" { target { ! "s390*-*-*" } } } } */
|
||||
/* { dg-final { gdb-test 16 "arg3" "3" } } */
|
||||
|
|
|
@ -31,7 +31,7 @@ foo (int arg1, int arg2, int arg3, int arg4, int arg5, int arg6, int arg7, int a
|
|||
/* { dg-final { gdb-test 14 "arg6" "6" } } */
|
||||
/* { dg-final { gdb-test 14 "arg7" "30" } } */
|
||||
/* { dg-final { gdb-test 14 "arg8" "7" } } */
|
||||
/* { dg-final { gdb-test 14 "y" "2" { xfail { aarch64*-*-* && { any-opts "-O3" } } } } } */
|
||||
/* { dg-final { gdb-test 14 "y" "2" } } */
|
||||
/* { dg-final { gdb-test 16 "arg1" "1" { target { ! "s390*-*-*" } } } } */
|
||||
/* { dg-final { gdb-test 16 "arg2" "2" { target { ! "s390*-*-*" } } } } */
|
||||
/* { dg-final { gdb-test 16 "arg3" "3" } } */
|
||||
|
|
|
@ -14,25 +14,11 @@ TYPE ldp_aligned_##TYPE(char* ptr){ \
|
|||
return a_0 + a_1; \
|
||||
}
|
||||
|
||||
#define LDP_TEST_ADJUST_ALIGNED(TYPE) \
|
||||
TYPE ldp_aligned_adjust_##TYPE(char* ptr){ \
|
||||
TYPE a_0, a_1, a_2, a_3, a_4; \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
a_0 = arr[100]; \
|
||||
a_1 = arr[101]; \
|
||||
a_2 = arr[102]; \
|
||||
a_3 = arr[103]; \
|
||||
a_4 = arr[110]; \
|
||||
return a_0 + a_1 + a_2 + a_3 + a_4; \
|
||||
}
|
||||
|
||||
LDP_TEST_ALIGNED(int32_t);
|
||||
LDP_TEST_ALIGNED(int64_t);
|
||||
LDP_TEST_ALIGNED(v4si);
|
||||
LDP_TEST_ADJUST_ALIGNED(int32_t);
|
||||
LDP_TEST_ADJUST_ALIGNED(int64_t);
|
||||
|
||||
/* { dg-final { scan-assembler-times "ldp\tw\[0-9\]+, w\[0-9\]" 3 } } */
|
||||
/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\]" 3 } } */
|
||||
/* { dg-final { scan-assembler-times "ldp\tw\[0-9\]+, w\[0-9\]" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\]" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "ldp\tq\[0-9\]+, q\[0-9\]" 1 } } */
|
||||
|
||||
|
|
|
@ -24,43 +24,14 @@ TYPE ldp_unaligned_##TYPE(char* ptr){ \
|
|||
return a_0 + a_1; \
|
||||
}
|
||||
|
||||
#define LDP_TEST_ADJUST_ALIGNED(TYPE) \
|
||||
TYPE ldp_aligned_adjust_##TYPE(char* ptr){ \
|
||||
TYPE a_0, a_1, a_2, a_3, a_4; \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
a_0 = arr[100]; \
|
||||
a_1 = arr[101]; \
|
||||
a_2 = arr[102]; \
|
||||
a_3 = arr[103]; \
|
||||
a_4 = arr[110]; \
|
||||
return a_0 + a_1 + a_2 + a_3 + a_4; \
|
||||
}
|
||||
|
||||
#define LDP_TEST_ADJUST_UNALIGNED(TYPE) \
|
||||
TYPE ldp_unaligned_adjust_##TYPE(char* ptr){ \
|
||||
TYPE a_0, a_1, a_2, a_3, a_4; \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
TYPE *a = arr+1; \
|
||||
a_0 = a[100]; \
|
||||
a_1 = a[101]; \
|
||||
a_2 = a[102]; \
|
||||
a_3 = a[103]; \
|
||||
a_4 = a[110]; \
|
||||
return a_0 + a_1 + a_2 + a_3 + a_4; \
|
||||
}
|
||||
|
||||
LDP_TEST_ALIGNED(int32_t);
|
||||
LDP_TEST_ALIGNED(int64_t);
|
||||
LDP_TEST_ALIGNED(v4si);
|
||||
LDP_TEST_UNALIGNED(int32_t);
|
||||
LDP_TEST_UNALIGNED(int64_t);
|
||||
LDP_TEST_UNALIGNED(v4si);
|
||||
LDP_TEST_ADJUST_ALIGNED(int32_t);
|
||||
LDP_TEST_ADJUST_ALIGNED(int64_t);
|
||||
LDP_TEST_ADJUST_UNALIGNED(int32_t);
|
||||
LDP_TEST_ADJUST_UNALIGNED(int64_t);
|
||||
|
||||
/* { dg-final { scan-assembler-times "ldp\tw\[0-9\]+, w\[0-9\]" 6 } } */
|
||||
/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\]" 6 } } */
|
||||
/* { dg-final { scan-assembler-times "ldp\tw\[0-9\]+, w\[0-9\]" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\]" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "ldp\tq\[0-9\]+, q\[0-9\]" 2 } } */
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* { dg-options "-O2" } */
|
||||
/* { dg-options "-O2 -fschedule-insns" } */
|
||||
|
||||
int
|
||||
load (int *arr)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* { dg-options "-O2" } */
|
||||
/* { dg-options "-O2 -fschedule-insns" } */
|
||||
|
||||
void
|
||||
store_offset (int *array, int x, int y)
|
||||
|
|
|
@ -1,18 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mabi=ilp32" } */
|
||||
|
||||
long long
|
||||
load_long (long long int *arr)
|
||||
{
|
||||
return arr[400] << 1 + arr[401] << 1 + arr[403] << 1 + arr[404] << 1;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\]+, " 2 } } */
|
||||
|
||||
int
|
||||
load (int *arr)
|
||||
{
|
||||
return arr[527] << 1 + arr[400] << 1 + arr[401] << 1 + arr[528] << 1;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "ldp\tw\[0-9\]+, w\[0-9\]+, " 2 } } */
|
|
@ -1,4 +1,4 @@
|
|||
/* { dg-options "-O2" } */
|
||||
/* { dg-options "-O2 -fschedule-insns" } */
|
||||
|
||||
#pragma GCC target "+nosimd+fp"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* { dg-options "-O2" } */
|
||||
/* { dg-options "-O2 -fschedule-insns" } */
|
||||
|
||||
typedef float __attribute__ ((vector_size (8))) fvec;
|
||||
typedef int __attribute__ ((vector_size (8))) ivec;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2" } */
|
||||
/* { dg-options "-O2 -fschedule-insns" } */
|
||||
|
||||
typedef float __attribute__((vector_size(8))) vec;
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2" } */
|
||||
/* { dg-options "-O2 -fschedule-insns" } */
|
||||
|
||||
typedef int __attribute__((vector_size(8))) vec;
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2" } */
|
||||
/* { dg-options "-O2 -fschedule-insns" } */
|
||||
|
||||
#include <arm_sve.h>
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -ffast-math -fschedule-insns" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@ double vararg_outgoing (int x1, ...)
|
|||
double a1 = x1, a2 = x1 * 2, a3 = x1 * 3, a4 = x1 * 4, a5 = x1 * 5, a6 = x1 * 6;
|
||||
__builtin_va_list vl;
|
||||
__builtin_va_start (vl, x1);
|
||||
outgoing (vl, a1, a2, a3, a4, a5, a6, REP64 (1));
|
||||
outgoing (vl, a1, a2, a3, a4, a5, a6, REP64 (1), REP8 (1));
|
||||
__builtin_va_end (vl);
|
||||
return a1 + a2 + a3 + a4 + a5 + a6;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue