diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e7b04334fb5..047052835f0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2019-11-13 Richard Sandiford + + * tree-vect-loop.c (vect_estimate_min_profitable_iters): Include + the cost of generating loop masks. + 2019-11-13 Richard Sandiford * tree-vectorizer.h (vect_apply_runtime_profitability_check_p): diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a253a5397cc..834c17a6d7f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,13 @@ +2019-11-13 Richard Sandiford + + * gcc.target/aarch64/sve/mask_struct_store_3.c: Add + -fno-vect-cost-model. + * gcc.target/aarch64/sve/mask_struct_store_3_run.c: Likewise. + * gcc.target/aarch64/sve/peel_ind_2.c: Likewise. + * gcc.target/aarch64/sve/peel_ind_2_run.c: Likewise. + * gcc.target/aarch64/sve/peel_ind_3.c: Likewise. + * gcc.target/aarch64/sve/peel_ind_3_run.c: Likewise. + 2019-11-13 Richard Sandiford PR c++/92206 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_3.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_3.c index 001f5be8ff5..1765d54a483 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_3.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -fno-vect-cost-model" } */ #include diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_3_run.c index 31d661b6594..4dbe0335c72 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_3_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_struct_store_3_run.c @@ -1,5 +1,5 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -fno-vect-cost-model" } */ #include "mask_struct_store_3.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c index e792cdf2cad..df82d58ea77 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* Pick an arbitrary target for which unaligned accesses are more expensive. */ -/* { dg-options "-O3 -msve-vector-bits=256 -mtune=thunderx" } */ +/* { dg-options "-O3 -msve-vector-bits=256 -mtune=thunderx -fno-vect-cost-model" } */ #define N 512 #define START 7 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2_run.c index 9c5ae1bd068..b9785356d18 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2_run.c @@ -1,6 +1,6 @@ /* { dg-do run { target aarch64_sve_hw } } */ /* { dg-options "-O3 -mtune=thunderx" } */ -/* { dg-options "-O3 -mtune=thunderx -msve-vector-bits=256" { target aarch64_sve256_hw } } */ +/* { dg-options "-O3 -mtune=thunderx -msve-vector-bits=256 -fno-vect-cost-model" { target aarch64_sve256_hw } } */ #include "peel_ind_2.c" diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_3.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_3.c index 441589eef60..1707f02fe92 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_3.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* Pick an arbitrary target for which unaligned accesses are more expensive. */ -/* { dg-options "-O3 -msve-vector-bits=256 -mtune=thunderx" } */ +/* { dg-options "-O3 -msve-vector-bits=256 -mtune=thunderx -fno-vect-cost-model" } */ #define N 32 #define MAX_START 8 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_3_run.c index 384a38eb8ec..98389675d79 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_3_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_3_run.c @@ -1,6 +1,6 @@ /* { dg-do run { target aarch64_sve_hw } } */ -/* { dg-options "-O3 -mtune=thunderx" } */ -/* { dg-options "-O3 -mtune=thunderx -msve-vector-bits=256" { target aarch64_sve256_hw } } */ +/* { dg-options "-O3 -mtune=thunderx -fno-vect-cost-model" } */ +/* { dg-options "-O3 -mtune=thunderx -msve-vector-bits=256 -fno-vect-cost-model" { target aarch64_sve256_hw } } */ #include "peel_ind_3.c" diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 83fb8486640..005fa308911 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -3291,6 +3291,32 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, si->kind, si->stmt_info, si->misalign, vect_epilogue); } + + /* Calculate how many masks we need to generate. */ + unsigned int num_masks = 0; + rgroup_masks *rgm; + unsigned int num_vectors_m1; + FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), num_vectors_m1, rgm) + if (rgm->mask_type) + num_masks += num_vectors_m1 + 1; + gcc_assert (num_masks > 0); + + /* In the worst case, we need to generate each mask in the prologue + and in the loop body. One of the loop body mask instructions + replaces the comparison in the scalar loop, and since we don't + count the scalar comparison against the scalar body, we shouldn't + count that vector instruction against the vector body either. + + Sometimes we can use unpacks instead of generating prologue + masks and sometimes the prologue mask will fold to a constant, + so the actual prologue cost might be smaller. However, it's + simpler and safer to use the worst-case cost; if this ends up + being the tie-breaker between vectorizing or not, then it's + probably better not to vectorize. */ + (void) add_stmt_cost (target_cost_data, num_masks, vector_stmt, + NULL, 0, vect_prologue); + (void) add_stmt_cost (target_cost_data, num_masks - 1, vector_stmt, + NULL, 0, vect_body); } else if (npeel < 0) {