From 6b8b0c8e243bdc122a9ddd42030275494b1148ff Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 26 Mar 2021 16:08:38 +0000 Subject: [PATCH] aarch64: Add costs for LD[34] and ST[34] postincrements Most postincrements are cheap on Neoverse V1, but it's generally better to avoid them on LD[34] and ST[34] instructions. This patch adds separate address costs fields for these cases. Other CPUs continue to use the same costs for all postincrements. gcc/ * config/aarch64/aarch64-protos.h (cpu_addrcost_table::post_modify_ld3_st3): New member variable. (cpu_addrcost_table::post_modify_ld4_st4): Likewise. * config/aarch64/aarch64.c (generic_addrcost_table): Update accordingly, using the same costs as for post_modify. (exynosm1_addrcost_table, xgene1_addrcost_table): Likewise. (thunderx2t99_addrcost_table, thunderx3t110_addrcost_table): (tsv110_addrcost_table, qdf24xx_addrcost_table): Likewise. (a64fx_addrcost_table): Likewise. (neoversev1_addrcost_table): New. (neoversev1_tunings): Use neoversev1_addrcost_table. (aarch64_address_cost): Use the new post_modify costs for CImode and XImode. --- gcc/config/aarch64/aarch64-protos.h | 2 ++ gcc/config/aarch64/aarch64.c | 45 +++++++++++++++++++++++++++-- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index ca1ed9e8758..d5d5417370e 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -177,6 +177,8 @@ struct cpu_addrcost_table const struct scale_addr_mode_cost addr_scale_costs; const int pre_modify; const int post_modify; + const int post_modify_ld3_st3; + const int post_modify_ld4_st4; const int register_offset; const int register_sextend; const int register_zextend; diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 6d961bea5dc..a573850b3fd 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -364,6 +364,8 @@ static const struct cpu_addrcost_table generic_addrcost_table = }, 0, /* pre_modify */ 0, /* post_modify */ + 0, /* post_modify_ld3_st3 */ + 0, /* post_modify_ld4_st4 */ 0, /* register_offset */ 0, /* register_sextend */ 0, /* register_zextend */ @@ -380,6 +382,8 @@ static const struct cpu_addrcost_table exynosm1_addrcost_table = }, 0, /* pre_modify */ 0, /* post_modify */ + 0, /* post_modify_ld3_st3 */ + 0, /* post_modify_ld4_st4 */ 1, /* register_offset */ 1, /* register_sextend */ 2, /* register_zextend */ @@ -396,6 +400,8 @@ static const struct cpu_addrcost_table xgene1_addrcost_table = }, 1, /* pre_modify */ 1, /* post_modify */ + 1, /* post_modify_ld3_st3 */ + 1, /* post_modify_ld4_st4 */ 0, /* register_offset */ 1, /* register_sextend */ 1, /* register_zextend */ @@ -412,6 +418,8 @@ static const struct cpu_addrcost_table thunderx2t99_addrcost_table = }, 0, /* pre_modify */ 0, /* post_modify */ + 0, /* post_modify_ld3_st3 */ + 0, /* post_modify_ld4_st4 */ 2, /* register_offset */ 3, /* register_sextend */ 3, /* register_zextend */ @@ -428,6 +436,8 @@ static const struct cpu_addrcost_table thunderx3t110_addrcost_table = }, 0, /* pre_modify */ 0, /* post_modify */ + 0, /* post_modify_ld3_st3 */ + 0, /* post_modify_ld4_st4 */ 2, /* register_offset */ 3, /* register_sextend */ 3, /* register_zextend */ @@ -444,6 +454,8 @@ static const struct cpu_addrcost_table tsv110_addrcost_table = }, 0, /* pre_modify */ 0, /* post_modify */ + 0, /* post_modify_ld3_st3 */ + 0, /* post_modify_ld4_st4 */ 0, /* register_offset */ 1, /* register_sextend */ 1, /* register_zextend */ @@ -460,6 +472,8 @@ static const struct cpu_addrcost_table qdf24xx_addrcost_table = }, 1, /* pre_modify */ 1, /* post_modify */ + 1, /* post_modify_ld3_st3 */ + 1, /* post_modify_ld4_st4 */ 3, /* register_offset */ 3, /* register_sextend */ 3, /* register_zextend */ @@ -476,12 +490,32 @@ static const struct cpu_addrcost_table a64fx_addrcost_table = }, 0, /* pre_modify */ 0, /* post_modify */ + 0, /* post_modify_ld3_st3 */ + 0, /* post_modify_ld4_st4 */ 2, /* register_offset */ 3, /* register_sextend */ 3, /* register_zextend */ 0, /* imm_offset */ }; +static const struct cpu_addrcost_table neoversev1_addrcost_table = +{ + { + 1, /* hi */ + 0, /* si */ + 0, /* di */ + 1, /* ti */ + }, + 0, /* pre_modify */ + 0, /* post_modify */ + 3, /* post_modify_ld3_st3 */ + 3, /* post_modify_ld4_st4 */ + 0, /* register_offset */ + 0, /* register_sextend */ + 0, /* register_zextend */ + 0 /* imm_offset */ +}; + static const struct cpu_regmove_cost generic_regmove_cost = { 1, /* GP2GP */ @@ -1777,7 +1811,7 @@ static const struct cpu_vector_cost neoversev1_vector_cost = static const struct tune_params neoversev1_tunings = { &cortexa76_extra_costs, - &generic_addrcost_table, + &neoversev1_addrcost_table, &generic_regmove_cost, &neoversev1_vector_cost, &generic_branch_cost, @@ -12077,7 +12111,14 @@ aarch64_address_cost (rtx x, if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY) cost += addr_cost->pre_modify; else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY) - cost += addr_cost->post_modify; + { + if (mode == CImode) + cost += addr_cost->post_modify_ld3_st3; + else if (mode == XImode) + cost += addr_cost->post_modify_ld4_st4; + else + cost += addr_cost->post_modify; + } else gcc_unreachable ();