diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 39765211a7a..3f83c29dc36 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2018-12-07 Kyrylo Tkachov + + * config/aarch64/aarch64-opts.h (aarch64_sve_vector_bits_enum): + Add SVE_NOT_IMPLEMENTED value. + * config/aarch64/aarch64-protos.h (struct tune_params): Add sve_width + field. + * config/aarch64/aarch64.c (generic_tunings,cortexa35_tunings, + cortexa53_tunings, cortexa57_tunings, cortexa72_tunings, + cortexa73_tunings, exynosm1_tunings, thunderx_tunings, + thunderx_tunings, tsv110_tunings, xgene1_tunings, qdf24xx_tunings, + saphira_tunings, thunderx2t99_tunings, emag_tunings): + Specify sve_width. + (aarch64_estimated_poly_value): Define. + (TARGET_ESTIMATED_POLY_VALUE): Define. + 2018-12-07 Paul A. Clarke PR target/88408 diff --git a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h index 7a5c6d7664f..1ac056b66da 100644 --- a/gcc/config/aarch64/aarch64-opts.h +++ b/gcc/config/aarch64/aarch64-opts.h @@ -84,6 +84,7 @@ enum aarch64_function_type { /* SVE vector register sizes. */ enum aarch64_sve_vector_bits_enum { SVE_SCALABLE, + SVE_NOT_IMPLEMENTED = SVE_SCALABLE, SVE_128 = 128, SVE_256 = 256, SVE_512 = 512, diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 1fe1a50d52a..4ed886bd200 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -252,6 +252,10 @@ struct tune_params const struct cpu_vector_cost *vec_costs; const struct cpu_branch_cost *branch_costs; const struct cpu_approx_modes *approx_modes; + /* Width of the SVE registers or SVE_NOT_IMPLEMENTED if not applicable. + Only used for tuning decisions, does not disable VLA + vectorization. */ + enum aarch64_sve_vector_bits_enum sve_width; int memmov_cost; int issue_rate; unsigned int fusible_ops; diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 490a2038b8e..ba9b5ad4efe 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -681,6 +681,7 @@ static const struct tune_params generic_tunings = &generic_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 4, /* memmov_cost */ 2, /* issue_rate */ (AARCH64_FUSE_AES_AESMC), /* fusible_ops */ @@ -706,6 +707,7 @@ static const struct tune_params cortexa35_tunings = &generic_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 4, /* memmov_cost */ 1, /* issue_rate */ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD @@ -732,6 +734,7 @@ static const struct tune_params cortexa53_tunings = &generic_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 4, /* memmov_cost */ 2, /* issue_rate */ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD @@ -758,6 +761,7 @@ static const struct tune_params cortexa57_tunings = &cortexa57_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 4, /* memmov_cost */ 3, /* issue_rate */ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD @@ -784,6 +788,7 @@ static const struct tune_params cortexa72_tunings = &cortexa57_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 4, /* memmov_cost */ 3, /* issue_rate */ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD @@ -810,6 +815,7 @@ static const struct tune_params cortexa73_tunings = &cortexa57_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 4, /* memmov_cost. */ 2, /* issue_rate. */ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD @@ -838,6 +844,7 @@ static const struct tune_params exynosm1_tunings = &exynosm1_vector_cost, &generic_branch_cost, &exynosm1_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 4, /* memmov_cost */ 3, /* issue_rate */ (AARCH64_FUSE_AES_AESMC), /* fusible_ops */ @@ -863,6 +870,7 @@ static const struct tune_params thunderxt88_tunings = &thunderx_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 6, /* memmov_cost */ 2, /* issue_rate */ AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */ @@ -888,6 +896,7 @@ static const struct tune_params thunderx_tunings = &thunderx_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 6, /* memmov_cost */ 2, /* issue_rate */ AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */ @@ -914,6 +923,7 @@ static const struct tune_params tsv110_tunings = &tsv110_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 4, /* memmov_cost */ 4, /* issue_rate */ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH @@ -940,6 +950,7 @@ static const struct tune_params xgene1_tunings = &xgene1_vector_cost, &generic_branch_cost, &xgene1_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 6, /* memmov_cost */ 4, /* issue_rate */ AARCH64_FUSE_NOTHING, /* fusible_ops */ @@ -965,6 +976,7 @@ static const struct tune_params emag_tunings = &xgene1_vector_cost, &generic_branch_cost, &xgene1_approx_modes, + SVE_NOT_IMPLEMENTED, 6, /* memmov_cost */ 4, /* issue_rate */ AARCH64_FUSE_NOTHING, /* fusible_ops */ @@ -990,6 +1002,7 @@ static const struct tune_params qdf24xx_tunings = &qdf24xx_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 4, /* memmov_cost */ 4, /* issue_rate */ (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD @@ -1018,6 +1031,7 @@ static const struct tune_params saphira_tunings = &generic_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 4, /* memmov_cost */ 4, /* issue_rate */ (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD @@ -1044,6 +1058,7 @@ static const struct tune_params thunderx2t99_tunings = &thunderx2t99_vector_cost, &generic_branch_cost, &generic_approx_modes, + SVE_NOT_IMPLEMENTED, /* sve_width */ 4, /* memmov_cost. */ 4, /* issue_rate. */ (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC @@ -17869,6 +17884,25 @@ aarch64_speculation_safe_value (machine_mode mode, return result; } +/* Implement TARGET_ESTIMATED_POLY_VALUE. + Look into the tuning structure for an estimate. + VAL.coeffs[1] is multiplied by the number of VQ chunks over the initial + Advanced SIMD 128 bits. */ + +static HOST_WIDE_INT +aarch64_estimated_poly_value (poly_int64 val) +{ + enum aarch64_sve_vector_bits_enum width_source + = aarch64_tune_params.sve_width; + + /* If we still don't have an estimate, use the default. */ + if (width_source == SVE_SCALABLE) + return default_estimated_poly_value (val); + + HOST_WIDE_INT over_128 = width_source - 128; + return val.coeffs[0] + val.coeffs[1] * over_128 / 128; +} + /* Target-specific selftests. */ #if CHECKING_P @@ -18348,6 +18382,9 @@ aarch64_libgcc_floating_mode_supported_p #undef TARGET_SPECULATION_SAFE_VALUE #define TARGET_SPECULATION_SAFE_VALUE aarch64_speculation_safe_value +#undef TARGET_ESTIMATED_POLY_VALUE +#define TARGET_ESTIMATED_POLY_VALUE aarch64_estimated_poly_value + #if CHECKING_P #undef TARGET_RUN_TARGET_SELFTESTS #define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests