AArch64: Remove AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS
Remove the tune AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS since it is only used by an old core and doesn't properly support -Os. SPECINT_2017 shows that removing it has no performance difference, while codesize is reduced by 0.07%. gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_mode_valid_for_sched_fusion_p): Remove check for AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS. (aarch64_advsimd_ldp_stp_p): Likewise. (aarch64_stp_sequence_cost): Likewise. (aarch64_expand_cpymem): Likewise. (aarch64_expand_setmem): Likewise. * config/aarch64/aarch64-ldp-fusion.cc (ldp_operand_mode_ok_p): Likewise. * config/aarch64/aarch64-ldpstp.md: Likewise. * config/aarch64/aarch64-tuning-flags.def: Remove NO_LDP_STP_QREGS. * config/aarch64/tuning_models/emag.h: Likewise. * config/aarch64/tuning_models/xgene1.h: Likewise. gcc/testsuite: * gcc.target/aarch64/ldp_stp_q_disable.c: Remove test.
This commit is contained in:
parent
5716f8daf3
commit
768fbb56b3
7 changed files with 8 additions and 63 deletions
|
@ -315,17 +315,9 @@ any_post_modify_p (rtx x)
|
|||
static bool
|
||||
ldp_operand_mode_ok_p (machine_mode mode)
|
||||
{
|
||||
const bool allow_qregs
|
||||
= !(aarch64_tune_params.extra_tuning_flags
|
||||
& AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS);
|
||||
|
||||
if (!aarch64_ldpstp_operand_mode_p (mode))
|
||||
return false;
|
||||
|
||||
const auto size = GET_MODE_SIZE (mode).to_constant ();
|
||||
if (size == 16 && !allow_qregs)
|
||||
return false;
|
||||
|
||||
// We don't pair up TImode accesses before RA because TImode is
|
||||
// special in that it can be allocated to a pair of GPRs or a single
|
||||
// FPR, and the RA is best placed to make that decision.
|
||||
|
|
|
@ -96,9 +96,7 @@
|
|||
(set (match_operand:VQ2 2 "register_operand" "")
|
||||
(match_operand:VQ2 3 "memory_operand" ""))]
|
||||
"TARGET_FLOAT
|
||||
&& aarch64_operands_ok_for_ldpstp (operands, true)
|
||||
&& (aarch64_tune_params.extra_tuning_flags
|
||||
& AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0"
|
||||
&& aarch64_operands_ok_for_ldpstp (operands, true)"
|
||||
[(const_int 0)]
|
||||
{
|
||||
aarch64_finish_ldpstp_peephole (operands, true);
|
||||
|
@ -111,9 +109,7 @@
|
|||
(set (match_operand:VQ2 2 "memory_operand" "")
|
||||
(match_operand:VQ2 3 "register_operand" ""))]
|
||||
"TARGET_FLOAT
|
||||
&& aarch64_operands_ok_for_ldpstp (operands, false)
|
||||
&& (aarch64_tune_params.extra_tuning_flags
|
||||
& AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0"
|
||||
&& aarch64_operands_ok_for_ldpstp (operands, false)"
|
||||
[(const_int 0)]
|
||||
{
|
||||
aarch64_finish_ldpstp_peephole (operands, false);
|
||||
|
|
|
@ -36,9 +36,6 @@ AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS)
|
|||
are not considered cheap. */
|
||||
AARCH64_EXTRA_TUNING_OPTION ("cheap_shift_extend", CHEAP_SHIFT_EXTEND)
|
||||
|
||||
/* Disallow load/store pair instructions on Q-registers. */
|
||||
AARCH64_EXTRA_TUNING_OPTION ("no_ldp_stp_qregs", NO_LDP_STP_QREGS)
|
||||
|
||||
AARCH64_EXTRA_TUNING_OPTION ("rename_load_regs", RENAME_LOAD_REGS)
|
||||
|
||||
AARCH64_EXTRA_TUNING_OPTION ("cse_sve_vl_constants", CSE_SVE_VL_CONSTANTS)
|
||||
|
|
|
@ -10400,9 +10400,7 @@ aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
|
|||
|| mode == SDmode || mode == DDmode
|
||||
|| (aarch64_vector_mode_supported_p (mode)
|
||||
&& (known_eq (GET_MODE_SIZE (mode), 8)
|
||||
|| (known_eq (GET_MODE_SIZE (mode), 16)
|
||||
&& (aarch64_tune_params.extra_tuning_flags
|
||||
& AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0)));
|
||||
|| known_eq (GET_MODE_SIZE (mode), 16)));
|
||||
}
|
||||
|
||||
/* Return true if REGNO is a virtual pointer register, or an eliminable
|
||||
|
@ -16519,10 +16517,6 @@ aarch64_advsimd_ldp_stp_p (enum vect_cost_for_stmt kind,
|
|||
return false;
|
||||
}
|
||||
|
||||
if (aarch64_tune_params.extra_tuning_flags
|
||||
& AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS)
|
||||
return false;
|
||||
|
||||
return is_gimple_assign (stmt_info->stmt);
|
||||
}
|
||||
|
||||
|
@ -17170,9 +17164,6 @@ aarch64_stp_sequence_cost (unsigned int count, vect_cost_for_stmt kind,
|
|||
/* Count 1 insn per vector if we can't form STP Q pairs. */
|
||||
if (aarch64_sve_mode_p (TYPE_MODE (vectype)))
|
||||
return count * 2;
|
||||
if (aarch64_tune_params.extra_tuning_flags
|
||||
& AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS)
|
||||
return count * 2;
|
||||
|
||||
if (stmt_info)
|
||||
{
|
||||
|
@ -26625,11 +26616,9 @@ aarch64_expand_cpymem (rtx *operands, bool is_memmove)
|
|||
return aarch64_expand_cpymem_mops (operands, is_memmove);
|
||||
|
||||
unsigned HOST_WIDE_INT size = UINTVAL (operands[2]);
|
||||
bool use_ldpq = TARGET_SIMD && !(aarch64_tune_params.extra_tuning_flags
|
||||
& AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS);
|
||||
|
||||
/* Set inline limits for memmove/memcpy. MOPS has a separate threshold. */
|
||||
unsigned max_copy_size = use_ldpq ? 256 : 128;
|
||||
unsigned max_copy_size = TARGET_SIMD ? 256 : 128;
|
||||
unsigned mops_threshold = is_memmove ? aarch64_mops_memmove_size_threshold
|
||||
: aarch64_mops_memcpy_size_threshold;
|
||||
|
||||
|
@ -26834,11 +26823,8 @@ aarch64_expand_setmem (rtx *operands)
|
|||
/* Convert len to bits to make the rest of the code simpler. */
|
||||
n = len * BITS_PER_UNIT;
|
||||
|
||||
/* Maximum amount to copy in one go. We allow 256-bit chunks based on the
|
||||
AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS tuning parameter. */
|
||||
const int copy_limit = (aarch64_tune_params.extra_tuning_flags
|
||||
& AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS)
|
||||
? GET_MODE_BITSIZE (TImode) : 256;
|
||||
/* Maximum amount to copy in one go. We allow 256-bit chunks. */
|
||||
const int copy_limit = 256;
|
||||
|
||||
while (n > 0)
|
||||
{
|
||||
|
|
|
@ -51,7 +51,7 @@ static const struct tune_params emag_tunings =
|
|||
2, /* min_div_recip_mul_df. */
|
||||
17, /* max_case_values. */
|
||||
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS), /* tune_flags. */
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&xgene1_prefetch_tune,
|
||||
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
|
||||
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
|
||||
|
|
|
@ -136,7 +136,7 @@ static const struct tune_params xgene1_tunings =
|
|||
2, /* min_div_recip_mul_df. */
|
||||
17, /* max_case_values. */
|
||||
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS), /* tune_flags. */
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&xgene1_prefetch_tune,
|
||||
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
|
||||
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
|
||||
|
|
|
@ -1,26 +0,0 @@
|
|||
/* { dg-options "-O2 -moverride=tune=no_ldp_stp_qregs" } */
|
||||
|
||||
typedef float float32x4_t __attribute__ ((__vector_size__ ((16))));
|
||||
|
||||
float32x4_t arr[4][4];
|
||||
|
||||
void
|
||||
foo (float32x4_t x, float32x4_t y)
|
||||
{
|
||||
arr[0][1] = x;
|
||||
arr[1][0] = y;
|
||||
arr[2][0] = x;
|
||||
arr[1][1] = y;
|
||||
arr[0][2] = x;
|
||||
arr[0][3] = y;
|
||||
arr[1][2] = x;
|
||||
arr[2][1] = y;
|
||||
arr[3][0] = x;
|
||||
arr[3][1] = y;
|
||||
arr[2][2] = x;
|
||||
arr[1][3] = y;
|
||||
arr[2][3] = x;
|
||||
arr[3][2] = y;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-not "stp\tq\[0-9\]+, q\[0-9\]" } } */
|
Loading…
Add table
Reference in a new issue