aarch64: Fine-grained policies to control ldp-stp formation
This patch implements the following TODO in gcc/config/aarch64/aarch64.cc to provide the requested behaviour for handling ldp and stp: /* Allow the tuning structure to disable LDP instruction formation from combining instructions (e.g., in peephole2). TODO: Implement fine-grained tuning control for LDP and STP: 1. control policies for load and store separately; 2. support the following policies: - default (use what is in the tuning structure) - always - never - aligned (only if the compiler can prove that the load will be aligned to 2 * element_size) */ It provides two new and concrete target-specific command-line parameters --param=aarch64-ldp-policy= and --param=aarch64-stp-policy= to give the ability to control load and store policies seperately as stated in part 1 of the TODO. The accepted values for both parameters are: * default: Use the policy of the tuning structure (default). * always: Emit ldp/stp regardless of alignment. * never: Do not emit ldp/stp. * aligned: In order to emit ldp/stp, first check if the load/store will be aligned to 2 * element_size. Bootstrapped and regtested aarch64-linux. gcc/ChangeLog: * config/aarch64/aarch64-opts.h (enum aarch64_ldp_policy): New enum type. (enum aarch64_stp_policy): New enum type. * config/aarch64/aarch64-protos.h (struct tune_params): Add appropriate enums for the policies. (aarch64_mem_ok_with_ldpstp_policy_model): New declaration. * config/aarch64/aarch64-tuning-flags.def (AARCH64_EXTRA_TUNING_OPTION): Remove superseded tuning options. * config/aarch64/aarch64.cc (aarch64_parse_ldp_policy): New function to parse ldp-policy parameter. (aarch64_parse_stp_policy): New function to parse stp-policy parameter. (aarch64_override_options_internal): Call parsing functions. (aarch64_mem_ok_with_ldpstp_policy_model): New function. (aarch64_operands_ok_for_ldpstp): Add call to aarch64_mem_ok_with_ldpstp_policy_model for parameter-value check and alignment check and remove superseded ones. (aarch64_operands_adjust_ok_for_ldpstp): Add call to aarch64_mem_ok_with_ldpstp_policy_model for parameter-value check and alignment check and remove superseded ones. * config/aarch64/aarch64.opt (aarch64-ldp-policy): New param. (aarch64-stp-policy): New param. * doc/invoke.texi: Document the parameters accordingly. gcc/testsuite/ChangeLog: * gcc.target/aarch64/ampere1-no_ldp_combine.c: Removed. * gcc.target/aarch64/ldp_aligned.c: New test. * gcc.target/aarch64/ldp_always.c: New test. * gcc.target/aarch64/ldp_never.c: New test. * gcc.target/aarch64/stp_aligned.c: New test. * gcc.target/aarch64/stp_always.c: New test. * gcc.target/aarch64/stp_never.c: New test. Signed-off-by: Manos Anagnostakis <manos.anagnostakis@vrull.eu> Co-Authored-By: Philipp Tomsich <philipp.tomsich@vrull.eu> Co-Authored-By: Manolis Tsamis <manolis.tsamis@vrull.eu>
This commit is contained in:
parent
b31218bc93
commit
834fc2bf05
13 changed files with 632 additions and 76 deletions
|
@ -108,4 +108,20 @@ enum aarch64_key_type {
|
|||
AARCH64_KEY_B
|
||||
};
|
||||
|
||||
/* Load pair policy type. */
|
||||
enum aarch64_ldp_policy {
|
||||
LDP_POLICY_DEFAULT,
|
||||
LDP_POLICY_ALWAYS,
|
||||
LDP_POLICY_NEVER,
|
||||
LDP_POLICY_ALIGNED
|
||||
};
|
||||
|
||||
/* Store pair policy type. */
|
||||
enum aarch64_stp_policy {
|
||||
STP_POLICY_DEFAULT,
|
||||
STP_POLICY_ALWAYS,
|
||||
STP_POLICY_NEVER,
|
||||
STP_POLICY_ALIGNED
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -568,6 +568,30 @@ struct tune_params
|
|||
/* Place prefetch struct pointer at the end to enable type checking
|
||||
errors when tune_params misses elements (e.g., from erroneous merges). */
|
||||
const struct cpu_prefetch_tune *prefetch;
|
||||
/* An enum specifying how to handle load pairs using a fine-grained policy:
|
||||
- LDP_POLICY_ALIGNED: Emit ldp if the source pointer is aligned
|
||||
to at least double the alignment of the type.
|
||||
- LDP_POLICY_ALWAYS: Emit ldp regardless of alignment.
|
||||
- LDP_POLICY_NEVER: Do not emit ldp. */
|
||||
|
||||
enum aarch64_ldp_policy_model
|
||||
{
|
||||
LDP_POLICY_ALIGNED,
|
||||
LDP_POLICY_ALWAYS,
|
||||
LDP_POLICY_NEVER
|
||||
} ldp_policy_model;
|
||||
/* An enum specifying how to handle store pairs using a fine-grained policy:
|
||||
- STP_POLICY_ALIGNED: Emit stp if the source pointer is aligned
|
||||
to at least double the alignment of the type.
|
||||
- STP_POLICY_ALWAYS: Emit stp regardless of alignment.
|
||||
- STP_POLICY_NEVER: Do not emit stp. */
|
||||
|
||||
enum aarch64_stp_policy_model
|
||||
{
|
||||
STP_POLICY_ALIGNED,
|
||||
STP_POLICY_ALWAYS,
|
||||
STP_POLICY_NEVER
|
||||
} stp_policy_model;
|
||||
};
|
||||
|
||||
/* Classifies an address.
|
||||
|
@ -1015,6 +1039,7 @@ bool extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset);
|
|||
bool aarch64_mergeable_load_pair_p (machine_mode, rtx, rtx);
|
||||
bool aarch64_operands_ok_for_ldpstp (rtx *, bool, machine_mode);
|
||||
bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, machine_mode);
|
||||
bool aarch64_mem_ok_with_ldpstp_policy_model (rtx, bool, machine_mode);
|
||||
void aarch64_swap_ldrstr_operands (rtx *, bool);
|
||||
|
||||
extern void aarch64_asm_output_pool_epilogue (FILE *, const char *,
|
||||
|
|
|
@ -30,11 +30,6 @@
|
|||
|
||||
AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS)
|
||||
|
||||
/* Don't create non-8 byte aligned load/store pair. That is if the
|
||||
two load/stores are not at least 8 byte aligned don't create load/store
|
||||
pairs. */
|
||||
AARCH64_EXTRA_TUNING_OPTION ("slow_unaligned_ldpw", SLOW_UNALIGNED_LDPW)
|
||||
|
||||
/* Some of the optional shift to some arthematic instructions are
|
||||
considered cheap. Logical shift left <=4 with or without a
|
||||
zero extend are considered cheap. Sign extend; non logical shift left
|
||||
|
@ -44,9 +39,6 @@ AARCH64_EXTRA_TUNING_OPTION ("cheap_shift_extend", CHEAP_SHIFT_EXTEND)
|
|||
/* Disallow load/store pair instructions on Q-registers. */
|
||||
AARCH64_EXTRA_TUNING_OPTION ("no_ldp_stp_qregs", NO_LDP_STP_QREGS)
|
||||
|
||||
/* Disallow load-pair instructions to be formed in combine/peephole. */
|
||||
AARCH64_EXTRA_TUNING_OPTION ("no_ldp_combine", NO_LDP_COMBINE)
|
||||
|
||||
AARCH64_EXTRA_TUNING_OPTION ("rename_load_regs", RENAME_LOAD_REGS)
|
||||
|
||||
AARCH64_EXTRA_TUNING_OPTION ("cse_sve_vl_constants", CSE_SVE_VL_CONSTANTS)
|
||||
|
|
|
@ -1357,7 +1357,9 @@ static const struct tune_params generic_tunings =
|
|||
Neoverse V1. It does not have a noticeable effect on A64FX and should
|
||||
have at most a very minor effect on SVE2 cores. */
|
||||
(AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS), /* tune_flags. */
|
||||
&generic_prefetch_tune
|
||||
&generic_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALWAYS, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALWAYS /* stp_policy_model. */
|
||||
};
|
||||
|
||||
static const struct tune_params cortexa35_tunings =
|
||||
|
@ -1391,7 +1393,9 @@ static const struct tune_params cortexa35_tunings =
|
|||
0, /* max_case_values. */
|
||||
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&generic_prefetch_tune
|
||||
&generic_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALWAYS, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALWAYS /* stp_policy_model. */
|
||||
};
|
||||
|
||||
static const struct tune_params cortexa53_tunings =
|
||||
|
@ -1425,7 +1429,9 @@ static const struct tune_params cortexa53_tunings =
|
|||
0, /* max_case_values. */
|
||||
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&generic_prefetch_tune
|
||||
&generic_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALWAYS, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALWAYS /* stp_policy_model. */
|
||||
};
|
||||
|
||||
static const struct tune_params cortexa57_tunings =
|
||||
|
@ -1459,7 +1465,9 @@ static const struct tune_params cortexa57_tunings =
|
|||
0, /* max_case_values. */
|
||||
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_RENAME_FMA_REGS), /* tune_flags. */
|
||||
&generic_prefetch_tune
|
||||
&generic_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALWAYS, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALWAYS /* stp_policy_model. */
|
||||
};
|
||||
|
||||
static const struct tune_params cortexa72_tunings =
|
||||
|
@ -1493,7 +1501,9 @@ static const struct tune_params cortexa72_tunings =
|
|||
0, /* max_case_values. */
|
||||
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&generic_prefetch_tune
|
||||
&generic_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALWAYS, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALWAYS /* stp_policy_model. */
|
||||
};
|
||||
|
||||
static const struct tune_params cortexa73_tunings =
|
||||
|
@ -1527,7 +1537,9 @@ static const struct tune_params cortexa73_tunings =
|
|||
0, /* max_case_values. */
|
||||
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&generic_prefetch_tune
|
||||
&generic_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALWAYS, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALWAYS /* stp_policy_model. */
|
||||
};
|
||||
|
||||
|
||||
|
@ -1562,7 +1574,9 @@ static const struct tune_params exynosm1_tunings =
|
|||
48, /* max_case_values. */
|
||||
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&exynosm1_prefetch_tune
|
||||
&exynosm1_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALWAYS, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALWAYS /* stp_policy_model. */
|
||||
};
|
||||
|
||||
static const struct tune_params thunderxt88_tunings =
|
||||
|
@ -1594,8 +1608,10 @@ static const struct tune_params thunderxt88_tunings =
|
|||
2, /* min_div_recip_mul_df. */
|
||||
0, /* max_case_values. */
|
||||
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
|
||||
&thunderxt88_prefetch_tune
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&thunderxt88_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALIGNED, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALIGNED /* stp_policy_model. */
|
||||
};
|
||||
|
||||
static const struct tune_params thunderx_tunings =
|
||||
|
@ -1627,9 +1643,10 @@ static const struct tune_params thunderx_tunings =
|
|||
2, /* min_div_recip_mul_df. */
|
||||
0, /* max_case_values. */
|
||||
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
|
||||
| AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
|
||||
&thunderx_prefetch_tune
|
||||
(AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
|
||||
&thunderx_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALIGNED, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALIGNED /* stp_policy_model. */
|
||||
};
|
||||
|
||||
static const struct tune_params tsv110_tunings =
|
||||
|
@ -1663,7 +1680,9 @@ static const struct tune_params tsv110_tunings =
|
|||
0, /* max_case_values. */
|
||||
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&tsv110_prefetch_tune
|
||||
&tsv110_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALWAYS, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALWAYS /* stp_policy_model. */
|
||||
};
|
||||
|
||||
static const struct tune_params xgene1_tunings =
|
||||
|
@ -1696,7 +1715,9 @@ static const struct tune_params xgene1_tunings =
|
|||
17, /* max_case_values. */
|
||||
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS), /* tune_flags. */
|
||||
&xgene1_prefetch_tune
|
||||
&xgene1_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALWAYS, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALWAYS /* stp_policy_model. */
|
||||
};
|
||||
|
||||
static const struct tune_params emag_tunings =
|
||||
|
@ -1729,7 +1750,9 @@ static const struct tune_params emag_tunings =
|
|||
17, /* max_case_values. */
|
||||
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS), /* tune_flags. */
|
||||
&xgene1_prefetch_tune
|
||||
&xgene1_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALWAYS, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALWAYS /* stp_policy_model. */
|
||||
};
|
||||
|
||||
static const struct tune_params qdf24xx_tunings =
|
||||
|
@ -1763,7 +1786,9 @@ static const struct tune_params qdf24xx_tunings =
|
|||
0, /* max_case_values. */
|
||||
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS, /* tune_flags. */
|
||||
&qdf24xx_prefetch_tune
|
||||
&qdf24xx_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALWAYS, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALWAYS /* stp_policy_model. */
|
||||
};
|
||||
|
||||
/* Tuning structure for the Qualcomm Saphira core. Default to falkor values
|
||||
|
@ -1799,7 +1824,9 @@ static const struct tune_params saphira_tunings =
|
|||
0, /* max_case_values. */
|
||||
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&generic_prefetch_tune
|
||||
&generic_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALWAYS, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALWAYS /* stp_policy_model. */
|
||||
};
|
||||
|
||||
static const struct tune_params thunderx2t99_tunings =
|
||||
|
@ -1833,7 +1860,9 @@ static const struct tune_params thunderx2t99_tunings =
|
|||
0, /* max_case_values. */
|
||||
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&thunderx2t99_prefetch_tune
|
||||
&thunderx2t99_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALWAYS, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALWAYS /* stp_policy_model. */
|
||||
};
|
||||
|
||||
static const struct tune_params thunderx3t110_tunings =
|
||||
|
@ -1867,7 +1896,9 @@ static const struct tune_params thunderx3t110_tunings =
|
|||
0, /* max_case_values. */
|
||||
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&thunderx3t110_prefetch_tune
|
||||
&thunderx3t110_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALWAYS, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALWAYS /* stp_policy_model. */
|
||||
};
|
||||
|
||||
static const struct tune_params neoversen1_tunings =
|
||||
|
@ -1900,7 +1931,9 @@ static const struct tune_params neoversen1_tunings =
|
|||
0, /* max_case_values. */
|
||||
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
|
||||
&generic_prefetch_tune
|
||||
&generic_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALWAYS, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALWAYS /* stp_policy_model. */
|
||||
};
|
||||
|
||||
static const struct tune_params ampere1_tunings =
|
||||
|
@ -1936,8 +1969,10 @@ static const struct tune_params ampere1_tunings =
|
|||
2, /* min_div_recip_mul_df. */
|
||||
0, /* max_case_values. */
|
||||
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NO_LDP_COMBINE), /* tune_flags. */
|
||||
&ere1_prefetch_tune
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&ere1_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALIGNED, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALIGNED /* stp_policy_model. */
|
||||
};
|
||||
|
||||
static const struct tune_params ampere1a_tunings =
|
||||
|
@ -1974,8 +2009,10 @@ static const struct tune_params ampere1a_tunings =
|
|||
2, /* min_div_recip_mul_df. */
|
||||
0, /* max_case_values. */
|
||||
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NO_LDP_COMBINE), /* tune_flags. */
|
||||
&ere1_prefetch_tune
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&ere1_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALIGNED, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALIGNED /* stp_policy_model. */
|
||||
};
|
||||
|
||||
static const advsimd_vec_cost neoversev1_advsimd_vector_cost =
|
||||
|
@ -2156,7 +2193,9 @@ static const struct tune_params neoversev1_tunings =
|
|||
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
|
||||
| AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
|
||||
| AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
|
||||
&generic_prefetch_tune
|
||||
&generic_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALWAYS, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALWAYS /* stp_policy_model. */
|
||||
};
|
||||
|
||||
static const sve_vec_cost neoverse512tvb_sve_vector_cost =
|
||||
|
@ -2293,7 +2332,9 @@ static const struct tune_params neoverse512tvb_tunings =
|
|||
(AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
|
||||
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
|
||||
| AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
|
||||
&generic_prefetch_tune
|
||||
&generic_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALWAYS, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALWAYS /* stp_policy_model. */
|
||||
};
|
||||
|
||||
static const advsimd_vec_cost neoversen2_advsimd_vector_cost =
|
||||
|
@ -2483,7 +2524,9 @@ static const struct tune_params neoversen2_tunings =
|
|||
| AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
|
||||
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
|
||||
| AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
|
||||
&generic_prefetch_tune
|
||||
&generic_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALWAYS, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALWAYS /* stp_policy_model. */
|
||||
};
|
||||
|
||||
static const advsimd_vec_cost neoversev2_advsimd_vector_cost =
|
||||
|
@ -2673,7 +2716,9 @@ static const struct tune_params neoversev2_tunings =
|
|||
| AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
|
||||
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
|
||||
| AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
|
||||
&generic_prefetch_tune
|
||||
&generic_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALWAYS, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALWAYS /* stp_policy_model. */
|
||||
};
|
||||
|
||||
static const struct tune_params a64fx_tunings =
|
||||
|
@ -2706,7 +2751,9 @@ static const struct tune_params a64fx_tunings =
|
|||
0, /* max_case_values. */
|
||||
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
|
||||
&a64fx_prefetch_tune
|
||||
&a64fx_prefetch_tune,
|
||||
tune_params::LDP_POLICY_ALWAYS, /* ldp_policy_model. */
|
||||
tune_params::STP_POLICY_ALWAYS /* stp_policy_model. */
|
||||
};
|
||||
|
||||
/* Support for fine-grained override of the tuning structures. */
|
||||
|
@ -17819,6 +17866,36 @@ aarch64_parse_tune (const char *to_parse, const struct processor **res)
|
|||
return AARCH_PARSE_INVALID_ARG;
|
||||
}
|
||||
|
||||
/* Parse a command-line -param=aarch64-ldp-policy= parameter. VALUE is
|
||||
the value of the parameter. */
|
||||
|
||||
static void
|
||||
aarch64_parse_ldp_policy (enum aarch64_ldp_policy value,
|
||||
struct tune_params* tune)
|
||||
{
|
||||
if (value == LDP_POLICY_ALWAYS)
|
||||
tune->ldp_policy_model = tune_params::LDP_POLICY_ALWAYS;
|
||||
else if (value == LDP_POLICY_NEVER)
|
||||
tune->ldp_policy_model = tune_params::LDP_POLICY_NEVER;
|
||||
else if (value == LDP_POLICY_ALIGNED)
|
||||
tune->ldp_policy_model = tune_params::LDP_POLICY_ALIGNED;
|
||||
}
|
||||
|
||||
/* Parse a command-line -param=aarch64-stp-policy= parameter. VALUE is
|
||||
the value of the parameter. */
|
||||
|
||||
static void
|
||||
aarch64_parse_stp_policy (enum aarch64_stp_policy value,
|
||||
struct tune_params* tune)
|
||||
{
|
||||
if (value == STP_POLICY_ALWAYS)
|
||||
tune->stp_policy_model = tune_params::STP_POLICY_ALWAYS;
|
||||
else if (value == STP_POLICY_NEVER)
|
||||
tune->stp_policy_model = tune_params::STP_POLICY_NEVER;
|
||||
else if (value == STP_POLICY_ALIGNED)
|
||||
tune->stp_policy_model = tune_params::STP_POLICY_ALIGNED;
|
||||
}
|
||||
|
||||
/* Parse TOKEN, which has length LENGTH to see if it is an option
|
||||
described in FLAG. If it is, return the index bit for that fusion type.
|
||||
If not, error (printing OPTION_NAME) and return zero. */
|
||||
|
@ -18167,6 +18244,14 @@ aarch64_override_options_internal (struct gcc_options *opts)
|
|||
aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
|
||||
&aarch64_tune_params);
|
||||
|
||||
if (opts->x_aarch64_ldp_policy_param)
|
||||
aarch64_parse_ldp_policy (opts->x_aarch64_ldp_policy_param,
|
||||
&aarch64_tune_params);
|
||||
|
||||
if (opts->x_aarch64_stp_policy_param)
|
||||
aarch64_parse_stp_policy (opts->x_aarch64_stp_policy_param,
|
||||
&aarch64_tune_params);
|
||||
|
||||
/* This target defaults to strict volatile bitfields. */
|
||||
if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
|
||||
opts->x_flag_strict_volatile_bitfields = 1;
|
||||
|
@ -26457,6 +26542,43 @@ aarch64_mergeable_load_pair_p (machine_mode mode, rtx mem1, rtx mem2)
|
|||
return aarch64_check_consecutive_mems (&mem1, &mem2, nullptr);
|
||||
}
|
||||
|
||||
/* Return true if MEM agrees with the ldp-stp policy model.
|
||||
Otherwise, false. */
|
||||
|
||||
bool
|
||||
aarch64_mem_ok_with_ldpstp_policy_model (rtx mem, bool load, machine_mode mode)
|
||||
{
|
||||
/* If we have LDP_POLICY_NEVER, reject the load pair. */
|
||||
if (load
|
||||
&& aarch64_tune_params.ldp_policy_model == tune_params::LDP_POLICY_NEVER)
|
||||
return false;
|
||||
|
||||
/* If we have STP_POLICY_NEVER, reject the store pair. */
|
||||
if (!load
|
||||
&& aarch64_tune_params.stp_policy_model == tune_params::STP_POLICY_NEVER)
|
||||
return false;
|
||||
|
||||
/* If we have LDP_POLICY_ALIGNED,
|
||||
do not emit the load pair unless the alignment is checked to be
|
||||
at least double the alignment of the type. */
|
||||
if (load
|
||||
&& aarch64_tune_params.ldp_policy_model == tune_params::LDP_POLICY_ALIGNED
|
||||
&& !optimize_function_for_size_p (cfun)
|
||||
&& MEM_ALIGN (mem) < 2 * GET_MODE_ALIGNMENT (mode))
|
||||
return false;
|
||||
|
||||
/* If we have STP_POLICY_ALIGNED,
|
||||
do not emit the store pair unless the alignment is checked to be
|
||||
at least double the alignment of the type. */
|
||||
if (!load
|
||||
&& aarch64_tune_params.stp_policy_model == tune_params::STP_POLICY_ALIGNED
|
||||
&& !optimize_function_for_size_p (cfun)
|
||||
&& MEM_ALIGN (mem) < 2 * GET_MODE_ALIGNMENT (mode))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Given OPERANDS of consecutive load/store, check if we can merge
|
||||
them into ldp/stp. LOAD is true if they are load instructions.
|
||||
MODE is the mode of memory operands. */
|
||||
|
@ -26468,20 +26590,6 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
|
|||
enum reg_class rclass_1, rclass_2;
|
||||
rtx mem_1, mem_2, reg_1, reg_2;
|
||||
|
||||
/* Allow the tuning structure to disable LDP instruction formation
|
||||
from combining instructions (e.g., in peephole2).
|
||||
TODO: Implement fine-grained tuning control for LDP and STP:
|
||||
1. control policies for load and store separately;
|
||||
2. support the following policies:
|
||||
- default (use what is in the tuning structure)
|
||||
- always
|
||||
- never
|
||||
- aligned (only if the compiler can prove that the
|
||||
load will be aligned to 2 * element_size) */
|
||||
if (load && (aarch64_tune_params.extra_tuning_flags
|
||||
& AARCH64_EXTRA_TUNE_NO_LDP_COMBINE))
|
||||
return false;
|
||||
|
||||
if (load)
|
||||
{
|
||||
mem_1 = operands[1];
|
||||
|
@ -26506,13 +26614,8 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
|
|||
if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
|
||||
return false;
|
||||
|
||||
/* If we have SImode and slow unaligned ldp,
|
||||
check the alignment to be at least 8 byte. */
|
||||
if (mode == SImode
|
||||
&& (aarch64_tune_params.extra_tuning_flags
|
||||
& AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
|
||||
&& !optimize_size
|
||||
&& MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
|
||||
/* Check if mem_1 is ok with the ldp-stp policy model. */
|
||||
if (!aarch64_mem_ok_with_ldpstp_policy_model (mem_1, load, mode))
|
||||
return false;
|
||||
|
||||
/* Check if the addresses are in the form of [base+offset]. */
|
||||
|
@ -26729,13 +26832,8 @@ aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
|
|||
if (offvals[0] % msize != offvals[2] % msize)
|
||||
return false;
|
||||
|
||||
/* If we have SImode and slow unaligned ldp,
|
||||
check the alignment to be at least 8 byte. */
|
||||
if (mode == SImode
|
||||
&& (aarch64_tune_params.extra_tuning_flags
|
||||
& AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
|
||||
&& !optimize_size
|
||||
&& MEM_ALIGN (mem[0]) < 8 * BITS_PER_UNIT)
|
||||
/* Check if mem[0] is ok with the ldp-stp policy model. */
|
||||
if (!aarch64_mem_ok_with_ldpstp_policy_model (mem[0], load, mode))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
|
|
@ -337,3 +337,41 @@ Constant memset size in bytes from which to start using MOPS sequence.
|
|||
-param=aarch64-vect-unroll-limit=
|
||||
Target Joined UInteger Var(aarch64_vect_unroll_limit) Init(4) Param
|
||||
Limit how much the autovectorizer may unroll a loop.
|
||||
|
||||
-param=aarch64-ldp-policy=
|
||||
Target Joined Var(aarch64_ldp_policy_param) Enum(aarch64_ldp_policy) Init(LDP_POLICY_DEFAULT) Param
|
||||
--param=aarch64-ldp-policy=[default|always|never|aligned] Fine-grained policy for load pairs.
|
||||
|
||||
Enum
|
||||
Name(aarch64_ldp_policy) Type(enum aarch64_ldp_policy) UnknownError(unknown aarch64_ldp_policy mode %qs)
|
||||
|
||||
EnumValue
|
||||
Enum(aarch64_ldp_policy) String(default) Value(LDP_POLICY_DEFAULT)
|
||||
|
||||
EnumValue
|
||||
Enum(aarch64_ldp_policy) String(always) Value(LDP_POLICY_ALWAYS)
|
||||
|
||||
EnumValue
|
||||
Enum(aarch64_ldp_policy) String(never) Value(LDP_POLICY_NEVER)
|
||||
|
||||
EnumValue
|
||||
Enum(aarch64_ldp_policy) String(aligned) Value(LDP_POLICY_ALIGNED)
|
||||
|
||||
-param=aarch64-stp-policy=
|
||||
Target Joined Var(aarch64_stp_policy_param) Enum(aarch64_stp_policy) Init(STP_POLICY_DEFAULT) Param
|
||||
--param=aarch64-stp-policy=[default|always|never|aligned] Fine-grained policy for store pairs.
|
||||
|
||||
Enum
|
||||
Name(aarch64_stp_policy) Type(enum aarch64_stp_policy) UnknownError(unknown aarch64_stp_policy mode %qs)
|
||||
|
||||
EnumValue
|
||||
Enum(aarch64_stp_policy) String(default) Value(STP_POLICY_DEFAULT)
|
||||
|
||||
EnumValue
|
||||
Enum(aarch64_stp_policy) String(always) Value(STP_POLICY_ALWAYS)
|
||||
|
||||
EnumValue
|
||||
Enum(aarch64_stp_policy) String(never) Value(STP_POLICY_NEVER)
|
||||
|
||||
EnumValue
|
||||
Enum(aarch64_stp_policy) String(aligned) Value(STP_POLICY_ALIGNED)
|
||||
|
|
|
@ -16512,6 +16512,26 @@ Use both Advanced SIMD and SVE. Prefer SVE when the costs are deemed equal.
|
|||
@end table
|
||||
The default value is 0.
|
||||
|
||||
@item aarch64-ldp-policy
|
||||
Fine-grained policy for load pairs.
|
||||
With @option{--param=aarch64-ldp-policy=default}, use the policy of the
|
||||
tuning structure. This is the current default.
|
||||
With @option{--param=aarch64-ldp-policy=always}, emit ldp regardless
|
||||
of alignment.
|
||||
With @option{--param=aarch64-ldp-policy=never}, do not emit ldp.
|
||||
With @option{--param=aarch64-ldp-policy=aligned}, emit ldp only if the
|
||||
source pointer is aligned to at least double the alignment of the type.
|
||||
|
||||
@item aarch64-stp-policy
|
||||
Fine-grained policy for store pairs.
|
||||
With @option{--param=aarch64-stp-policy=default}, use the policy of the
|
||||
tuning structure. This is the current default.
|
||||
With @option{--param=aarch64-stp-policy=always}, emit stp regardless
|
||||
of alignment.
|
||||
With @option{--param=aarch64-stp-policy=never}, do not emit stp.
|
||||
With @option{--param=aarch64-stp-policy=aligned}, emit stp only if the
|
||||
source pointer is aligned to at least double the alignment of the type.
|
||||
|
||||
@item aarch64-loop-vect-issue-rate-niters
|
||||
The tuning for some AArch64 CPUs tries to take both latencies and issue
|
||||
rates into account when deciding whether a loop should be vectorized
|
||||
|
|
|
@ -1,11 +0,0 @@
|
|||
/* { dg-options "-O3 -mtune=ampere1" } */
|
||||
|
||||
long
|
||||
foo (long a[])
|
||||
{
|
||||
return a[0] + a[1];
|
||||
}
|
||||
|
||||
/* We should see two ldrs instead of one ldp. */
|
||||
/* { dg-final { scan-assembler {\tldr\t} } } */
|
||||
/* { dg-final { scan-assembler-not {\tldp\t} } } */
|
66
gcc/testsuite/gcc.target/aarch64/ldp_aligned.c
Normal file
66
gcc/testsuite/gcc.target/aarch64/ldp_aligned.c
Normal file
|
@ -0,0 +1,66 @@
|
|||
/* { dg-options "-O2 --param=aarch64-ldp-policy=aligned -mcpu=generic" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef int v4si __attribute__ ((vector_size (16)));
|
||||
|
||||
#define LDP_TEST_ALIGNED(TYPE) \
|
||||
TYPE ldp_aligned_##TYPE(char* ptr){ \
|
||||
TYPE a_0, a_1; \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
a_0 = arr[0]; \
|
||||
a_1 = arr[1]; \
|
||||
return a_0 + a_1; \
|
||||
}
|
||||
|
||||
#define LDP_TEST_UNALIGNED(TYPE) \
|
||||
TYPE ldp_unaligned_##TYPE(char* ptr){ \
|
||||
TYPE a_0, a_1; \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
TYPE *a = arr+1; \
|
||||
a_0 = a[0]; \
|
||||
a_1 = a[1]; \
|
||||
return a_0 + a_1; \
|
||||
}
|
||||
|
||||
#define LDP_TEST_ADJUST_ALIGNED(TYPE) \
|
||||
TYPE ldp_aligned_adjust_##TYPE(char* ptr){ \
|
||||
TYPE a_0, a_1, a_2, a_3, a_4; \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
a_0 = arr[100]; \
|
||||
a_1 = arr[101]; \
|
||||
a_2 = arr[102]; \
|
||||
a_3 = arr[103]; \
|
||||
a_4 = arr[110]; \
|
||||
return a_0 + a_1 + a_2 + a_3 + a_4; \
|
||||
}
|
||||
|
||||
#define LDP_TEST_ADJUST_UNALIGNED(TYPE) \
|
||||
TYPE ldp_unaligned_adjust_##TYPE(char* ptr){ \
|
||||
TYPE a_0, a_1, a_2, a_3, a_4; \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
TYPE *a = arr+1; \
|
||||
a_0 = a[100]; \
|
||||
a_1 = a[101]; \
|
||||
a_2 = a[102]; \
|
||||
a_3 = a[103]; \
|
||||
a_4 = a[110]; \
|
||||
return a_0 + a_1 + a_2 + a_3 + a_4; \
|
||||
}
|
||||
|
||||
LDP_TEST_ALIGNED(int32_t);
|
||||
LDP_TEST_ALIGNED(int64_t);
|
||||
LDP_TEST_ALIGNED(v4si);
|
||||
LDP_TEST_UNALIGNED(int32_t);
|
||||
LDP_TEST_UNALIGNED(int64_t);
|
||||
LDP_TEST_UNALIGNED(v4si);
|
||||
LDP_TEST_ADJUST_ALIGNED(int32_t);
|
||||
LDP_TEST_ADJUST_ALIGNED(int64_t);
|
||||
LDP_TEST_ADJUST_UNALIGNED(int32_t);
|
||||
LDP_TEST_ADJUST_UNALIGNED(int64_t);
|
||||
|
||||
/* { dg-final { scan-assembler-times "ldp\tw\[0-9\]+, w\[0-9\]" 3 } } */
|
||||
/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\]" 3 } } */
|
||||
/* { dg-final { scan-assembler-times "ldp\tq\[0-9\]+, q\[0-9\]" 1 } } */
|
||||
|
66
gcc/testsuite/gcc.target/aarch64/ldp_always.c
Normal file
66
gcc/testsuite/gcc.target/aarch64/ldp_always.c
Normal file
|
@ -0,0 +1,66 @@
|
|||
/* { dg-options "-O2 --param=aarch64-ldp-policy=always -mcpu=generic" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef int v4si __attribute__ ((vector_size (16)));
|
||||
|
||||
#define LDP_TEST_ALIGNED(TYPE) \
|
||||
TYPE ldp_aligned_##TYPE(char* ptr){ \
|
||||
TYPE a_0, a_1; \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
a_0 = arr[0]; \
|
||||
a_1 = arr[1]; \
|
||||
return a_0 + a_1; \
|
||||
}
|
||||
|
||||
#define LDP_TEST_UNALIGNED(TYPE) \
|
||||
TYPE ldp_unaligned_##TYPE(char* ptr){ \
|
||||
TYPE a_0, a_1; \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
TYPE *a = arr+1; \
|
||||
a_0 = a[0]; \
|
||||
a_1 = a[1]; \
|
||||
return a_0 + a_1; \
|
||||
}
|
||||
|
||||
#define LDP_TEST_ADJUST_ALIGNED(TYPE) \
|
||||
TYPE ldp_aligned_adjust_##TYPE(char* ptr){ \
|
||||
TYPE a_0, a_1, a_2, a_3, a_4; \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
a_0 = arr[100]; \
|
||||
a_1 = arr[101]; \
|
||||
a_2 = arr[102]; \
|
||||
a_3 = arr[103]; \
|
||||
a_4 = arr[110]; \
|
||||
return a_0 + a_1 + a_2 + a_3 + a_4; \
|
||||
}
|
||||
|
||||
#define LDP_TEST_ADJUST_UNALIGNED(TYPE) \
|
||||
TYPE ldp_unaligned_adjust_##TYPE(char* ptr){ \
|
||||
TYPE a_0, a_1, a_2, a_3, a_4; \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
TYPE *a = arr+1; \
|
||||
a_0 = a[100]; \
|
||||
a_1 = a[101]; \
|
||||
a_2 = a[102]; \
|
||||
a_3 = a[103]; \
|
||||
a_4 = a[110]; \
|
||||
return a_0 + a_1 + a_2 + a_3 + a_4; \
|
||||
}
|
||||
|
||||
LDP_TEST_ALIGNED(int32_t);
|
||||
LDP_TEST_ALIGNED(int64_t);
|
||||
LDP_TEST_ALIGNED(v4si);
|
||||
LDP_TEST_UNALIGNED(int32_t);
|
||||
LDP_TEST_UNALIGNED(int64_t);
|
||||
LDP_TEST_UNALIGNED(v4si);
|
||||
LDP_TEST_ADJUST_ALIGNED(int32_t);
|
||||
LDP_TEST_ADJUST_ALIGNED(int64_t);
|
||||
LDP_TEST_ADJUST_UNALIGNED(int32_t);
|
||||
LDP_TEST_ADJUST_UNALIGNED(int64_t);
|
||||
|
||||
/* { dg-final { scan-assembler-times "ldp\tw\[0-9\]+, w\[0-9\]" 6 } } */
|
||||
/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\]" 6 } } */
|
||||
/* { dg-final { scan-assembler-times "ldp\tq\[0-9\]+, q\[0-9\]" 2 } } */
|
||||
|
66
gcc/testsuite/gcc.target/aarch64/ldp_never.c
Normal file
66
gcc/testsuite/gcc.target/aarch64/ldp_never.c
Normal file
|
@ -0,0 +1,66 @@
|
|||
/* { dg-options "-O2 --param=aarch64-ldp-policy=never -mcpu=generic" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef int v4si __attribute__ ((vector_size (16)));
|
||||
|
||||
#define LDP_TEST_ALIGNED(TYPE) \
|
||||
TYPE ldp_aligned_##TYPE(char* ptr){ \
|
||||
TYPE a_0, a_1; \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
a_0 = arr[0]; \
|
||||
a_1 = arr[1]; \
|
||||
return a_0 + a_1; \
|
||||
}
|
||||
|
||||
#define LDP_TEST_UNALIGNED(TYPE) \
|
||||
TYPE ldp_unaligned_##TYPE(char* ptr){ \
|
||||
TYPE a_0, a_1; \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
TYPE *a = arr+1; \
|
||||
a_0 = a[0]; \
|
||||
a_1 = a[1]; \
|
||||
return a_0 + a_1; \
|
||||
}
|
||||
|
||||
#define LDP_TEST_ADJUST_ALIGNED(TYPE) \
|
||||
TYPE ldp_aligned_adjust_##TYPE(char* ptr){ \
|
||||
TYPE a_0, a_1, a_2, a_3, a_4; \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
a_0 = arr[100]; \
|
||||
a_1 = arr[101]; \
|
||||
a_2 = arr[102]; \
|
||||
a_3 = arr[103]; \
|
||||
a_4 = arr[110]; \
|
||||
return a_0 + a_1 + a_2 + a_3 + a_4; \
|
||||
}
|
||||
|
||||
#define LDP_TEST_ADJUST_UNALIGNED(TYPE) \
|
||||
TYPE ldp_unaligned_adjust_##TYPE(char* ptr){ \
|
||||
TYPE a_0, a_1, a_2, a_3, a_4; \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
TYPE *a = arr+1; \
|
||||
a_0 = a[100]; \
|
||||
a_1 = a[101]; \
|
||||
a_2 = a[102]; \
|
||||
a_3 = a[103]; \
|
||||
a_4 = a[110]; \
|
||||
return a_0 + a_1 + a_2 + a_3 + a_4; \
|
||||
}
|
||||
|
||||
LDP_TEST_ALIGNED(int32_t);
|
||||
LDP_TEST_ALIGNED(int64_t);
|
||||
LDP_TEST_ALIGNED(v4si);
|
||||
LDP_TEST_UNALIGNED(int32_t);
|
||||
LDP_TEST_UNALIGNED(int64_t);
|
||||
LDP_TEST_UNALIGNED(v4si);
|
||||
LDP_TEST_ADJUST_ALIGNED(int32_t);
|
||||
LDP_TEST_ADJUST_ALIGNED(int64_t);
|
||||
LDP_TEST_ADJUST_UNALIGNED(int32_t);
|
||||
LDP_TEST_ADJUST_UNALIGNED(int64_t);
|
||||
|
||||
/* { dg-final { scan-assembler-times "ldp\tw\[0-9\]+, w\[0-9\]" 0 } } */
|
||||
/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\]" 0 } } */
|
||||
/* { dg-final { scan-assembler-times "ldp\tq\[0-9\]+, q\[0-9\]" 0 } } */
|
||||
|
60
gcc/testsuite/gcc.target/aarch64/stp_aligned.c
Normal file
60
gcc/testsuite/gcc.target/aarch64/stp_aligned.c
Normal file
|
@ -0,0 +1,60 @@
|
|||
/* { dg-options "-O2 --param=aarch64-stp-policy=aligned -mcpu=generic" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef int v4si __attribute__ ((vector_size (16)));
|
||||
|
||||
#define STP_TEST_ALIGNED(TYPE) \
|
||||
TYPE *stp_aligned_##TYPE(char* ptr, TYPE x){ \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
arr[0] = x; \
|
||||
arr[1] = x; \
|
||||
return arr; \
|
||||
}
|
||||
|
||||
#define STP_TEST_UNALIGNED(TYPE) \
|
||||
TYPE *stp_unaligned_##TYPE(char* ptr, TYPE x){ \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
TYPE *a = arr+1; \
|
||||
a[0] = x; \
|
||||
a[1] = x; \
|
||||
return a; \
|
||||
}
|
||||
|
||||
#define STP_TEST_ADJUST_ALIGNED(TYPE) \
|
||||
TYPE *stp_aligned_adjust_##TYPE(char* ptr, TYPE x){ \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
arr[100] = x; \
|
||||
arr[101] = x; \
|
||||
arr[102] = x; \
|
||||
arr[103] = x; \
|
||||
return arr; \
|
||||
}
|
||||
|
||||
#define STP_TEST_ADJUST_UNALIGNED(TYPE) \
|
||||
TYPE *stp_unaligned_adjust_##TYPE(char* ptr, TYPE x){ \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
TYPE *a = arr+1; \
|
||||
a[100] = x; \
|
||||
a[101] = x; \
|
||||
a[102] = x; \
|
||||
a[103] = x; \
|
||||
return a; \
|
||||
}
|
||||
|
||||
STP_TEST_ALIGNED(int32_t);
|
||||
STP_TEST_ALIGNED(int64_t);
|
||||
STP_TEST_ALIGNED(v4si);
|
||||
STP_TEST_UNALIGNED(int32_t);
|
||||
STP_TEST_UNALIGNED(int64_t);
|
||||
STP_TEST_UNALIGNED(v4si);
|
||||
STP_TEST_ADJUST_ALIGNED(int32_t);
|
||||
STP_TEST_ADJUST_ALIGNED(int64_t);
|
||||
STP_TEST_ADJUST_UNALIGNED(int32_t);
|
||||
STP_TEST_ADJUST_UNALIGNED(int64_t);
|
||||
|
||||
/* { dg-final { scan-assembler-times "stp\tw\[0-9\]+, w\[0-9\]" 3 } } */
|
||||
/* { dg-final { scan-assembler-times "stp\tx\[0-9\]+, x\[0-9\]" 3 } } */
|
||||
/* { dg-final { scan-assembler-times "stp\tq\[0-9\]+, q\[0-9\]" 1 } } */
|
||||
|
60
gcc/testsuite/gcc.target/aarch64/stp_always.c
Normal file
60
gcc/testsuite/gcc.target/aarch64/stp_always.c
Normal file
|
@ -0,0 +1,60 @@
|
|||
/* { dg-options "-O2 --param=aarch64-stp-policy=always -mcpu=generic" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef int v4si __attribute__ ((vector_size (16)));
|
||||
|
||||
#define STP_TEST_ALIGNED(TYPE) \
|
||||
TYPE *stp_aligned_##TYPE(char* ptr, TYPE x){ \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
arr[0] = x; \
|
||||
arr[1] = x; \
|
||||
return arr; \
|
||||
}
|
||||
|
||||
#define STP_TEST_UNALIGNED(TYPE) \
|
||||
TYPE *stp_unaligned_##TYPE(char* ptr, TYPE x){ \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
TYPE *a = arr+1; \
|
||||
a[0] = x; \
|
||||
a[1] = x; \
|
||||
return a; \
|
||||
}
|
||||
|
||||
#define STP_TEST_ADJUST_ALIGNED(TYPE) \
|
||||
TYPE *stp_aligned_adjust_##TYPE(char* ptr, TYPE x){ \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
arr[100] = x; \
|
||||
arr[101] = x; \
|
||||
arr[102] = x; \
|
||||
arr[103] = x; \
|
||||
return arr; \
|
||||
}
|
||||
|
||||
#define STP_TEST_ADJUST_UNALIGNED(TYPE) \
|
||||
TYPE *stp_unaligned_adjust_##TYPE(char* ptr, TYPE x){ \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
TYPE *a = arr+1; \
|
||||
a[100] = x; \
|
||||
a[101] = x; \
|
||||
a[102] = x; \
|
||||
a[103] = x; \
|
||||
return a; \
|
||||
}
|
||||
|
||||
STP_TEST_ALIGNED(int32_t);
|
||||
STP_TEST_ALIGNED(int64_t);
|
||||
STP_TEST_ALIGNED(v4si);
|
||||
STP_TEST_UNALIGNED(int32_t);
|
||||
STP_TEST_UNALIGNED(int64_t);
|
||||
STP_TEST_UNALIGNED(v4si);
|
||||
STP_TEST_ADJUST_ALIGNED(int32_t);
|
||||
STP_TEST_ADJUST_ALIGNED(int64_t);
|
||||
STP_TEST_ADJUST_UNALIGNED(int32_t);
|
||||
STP_TEST_ADJUST_UNALIGNED(int64_t);
|
||||
|
||||
/* { dg-final { scan-assembler-times "stp\tw\[0-9\]+, w\[0-9\]" 6 } } */
|
||||
/* { dg-final { scan-assembler-times "stp\tx\[0-9\]+, x\[0-9\]" 6 } } */
|
||||
/* { dg-final { scan-assembler-times "stp\tq\[0-9\]+, q\[0-9\]" 2 } } */
|
||||
|
60
gcc/testsuite/gcc.target/aarch64/stp_never.c
Normal file
60
gcc/testsuite/gcc.target/aarch64/stp_never.c
Normal file
|
@ -0,0 +1,60 @@
|
|||
/* { dg-options "-O2 --param=aarch64-stp-policy=never -mcpu=generic" } */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef int v4si __attribute__ ((vector_size (16)));
|
||||
|
||||
#define STP_TEST_ALIGNED(TYPE) \
|
||||
TYPE *stp_aligned_##TYPE(char* ptr, TYPE x){ \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
arr[0] = x; \
|
||||
arr[1] = x; \
|
||||
return arr; \
|
||||
}
|
||||
|
||||
#define STP_TEST_UNALIGNED(TYPE) \
|
||||
TYPE *stp_unaligned_##TYPE(char* ptr, TYPE x){ \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
TYPE *a = arr+1; \
|
||||
a[0] = x; \
|
||||
a[1] = x; \
|
||||
return a; \
|
||||
}
|
||||
|
||||
#define STP_TEST_ADJUST_ALIGNED(TYPE) \
|
||||
TYPE *stp_aligned_adjust_##TYPE(char* ptr, TYPE x){ \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
arr[100] = x; \
|
||||
arr[101] = x; \
|
||||
arr[102] = x; \
|
||||
arr[103] = x; \
|
||||
return arr; \
|
||||
}
|
||||
|
||||
#define STP_TEST_ADJUST_UNALIGNED(TYPE) \
|
||||
TYPE *stp_unaligned_adjust_##TYPE(char* ptr, TYPE x){ \
|
||||
TYPE *arr = (TYPE*) ((uintptr_t)ptr & ~(2 * 8 * _Alignof(TYPE) - 1)); \
|
||||
TYPE *a = arr+1; \
|
||||
a[100] = x; \
|
||||
a[101] = x; \
|
||||
a[102] = x; \
|
||||
a[103] = x; \
|
||||
return a; \
|
||||
}
|
||||
|
||||
STP_TEST_ALIGNED(int32_t);
|
||||
STP_TEST_ALIGNED(int64_t);
|
||||
STP_TEST_ALIGNED(v4si);
|
||||
STP_TEST_UNALIGNED(int32_t);
|
||||
STP_TEST_UNALIGNED(int64_t);
|
||||
STP_TEST_UNALIGNED(v4si);
|
||||
STP_TEST_ADJUST_ALIGNED(int32_t);
|
||||
STP_TEST_ADJUST_ALIGNED(int64_t);
|
||||
STP_TEST_ADJUST_UNALIGNED(int32_t);
|
||||
STP_TEST_ADJUST_UNALIGNED(int64_t);
|
||||
|
||||
/* { dg-final { scan-assembler-times "stp\tw\[0-9\]+, w\[0-9\]" 0 } } */
|
||||
/* { dg-final { scan-assembler-times "stp\tx\[0-9\]+, x\[0-9\]" 0 } } */
|
||||
/* { dg-final { scan-assembler-times "stp\tq\[0-9\]+, q\[0-9\]" 0 } } */
|
||||
|
Loading…
Add table
Reference in a new issue