From a7d4310aed539b04345894ebafb49ca364780653 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Thu, 27 Jul 2023 20:06:37 +0200 Subject: [PATCH] Fix profile update after RTL unrolling This patch fixes profile update after RTL unroll, that is now done same way as in tree one. We still produce (slightly) corrupted profile for multiple exit loops I can try to fix incrementally. I also updated testcases to look for profile mismatches so they do not creep back in again. gcc/ChangeLog: * cfgloop.h (single_dom_exit): Declare. * cfgloopmanip.h (update_exit_probability_after_unrolling): Declare. * cfgrtl.cc (struct cfg_hooks): Fix comment. * loop-unroll.cc (unroll_loop_constant_iterations): Update exit edge. * tree-ssa-loop-ivopts.h (single_dom_exit): Do not declare it here. * tree-ssa-loop-manip.cc (update_exit_probability_after_unrolling): Break out from ... (tree_transform_and_unroll_loop): ... here; gcc/testsuite/ChangeLog: * gcc.dg/tree-prof/peel-1.c: Test for profile mismatches. * gcc.dg/tree-prof/unroll-1.c: Test for profile mismatches. * gcc.dg/tree-ssa/peel1.c: Test for profile mismatches. * gcc.dg/unroll-1.c: Test for profile mismatches. * gcc.dg/unroll-3.c: Test for profile mismatches. * gcc.dg/unroll-4.c: Test for profile mismatches. * gcc.dg/unroll-5.c: Test for profile mismatches. * gcc.dg/unroll-6.c: Test for profile mismatches. --- gcc/cfgloop.h | 1 + gcc/cfgloopmanip.h | 1 + gcc/cfgrtl.cc | 2 +- gcc/loop-unroll.cc | 8 ++++- gcc/testsuite/gcc.dg/tree-prof/peel-1.c | 4 ++- gcc/testsuite/gcc.dg/tree-prof/unroll-1.c | 5 +-- gcc/testsuite/gcc.dg/tree-ssa/peel1.c | 3 +- gcc/testsuite/gcc.dg/unroll-1.c | 4 ++- gcc/testsuite/gcc.dg/unroll-3.c | 3 +- gcc/testsuite/gcc.dg/unroll-4.c | 3 +- gcc/testsuite/gcc.dg/unroll-5.c | 3 +- gcc/testsuite/gcc.dg/unroll-6.c | 3 +- gcc/tree-ssa-loop-ivopts.h | 1 - gcc/tree-ssa-loop-manip.cc | 41 +++++++++++++---------- 14 files changed, 53 insertions(+), 29 deletions(-) diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index 22293e1c237..c4622d4b853 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -921,6 +921,7 @@ extern bool get_estimated_loop_iterations (class loop *loop, widest_int *nit); extern bool get_max_loop_iterations (const class loop *loop, widest_int *nit); extern bool get_likely_max_loop_iterations (class loop *loop, widest_int *nit); extern int bb_loop_depth (const_basic_block); +extern edge single_dom_exit (class loop *); /* Converts VAL to widest_int. */ diff --git a/gcc/cfgloopmanip.h b/gcc/cfgloopmanip.h index af6a29f70c4..dab7b31c1e7 100644 --- a/gcc/cfgloopmanip.h +++ b/gcc/cfgloopmanip.h @@ -68,5 +68,6 @@ class loop * loop_version (class loop *, void *, void adjust_loop_info_after_peeling (class loop *loop, int npeel, bool precise); void scale_dominated_blocks_in_loop (class loop *loop, basic_block bb, profile_count num, profile_count den); +void update_exit_probability_after_unrolling (class loop *loop, edge new_exit); #endif /* GCC_CFGLOOPMANIP_H */ diff --git a/gcc/cfgrtl.cc b/gcc/cfgrtl.cc index 36e43d0d737..abcb472e2a2 100644 --- a/gcc/cfgrtl.cc +++ b/gcc/cfgrtl.cc @@ -5409,7 +5409,7 @@ struct cfg_hooks cfg_layout_rtl_cfg_hooks = { rtl_flow_call_edges_add, NULL, /* execute_on_growing_pred */ NULL, /* execute_on_shrinking_pred */ - duplicate_loop_body_to_header_edge, /* duplicate loop for trees */ + duplicate_loop_body_to_header_edge, /* duplicate loop for rtl */ rtl_lv_add_condition_to_bb, /* lv_add_condition_to_bb */ NULL, /* lv_adjust_loop_header_phi*/ rtl_extract_cond_bb_edges, /* extract_cond_bb_edges */ diff --git a/gcc/loop-unroll.cc b/gcc/loop-unroll.cc index 93333d8ba11..bbfa6ccc770 100644 --- a/gcc/loop-unroll.cc +++ b/gcc/loop-unroll.cc @@ -487,6 +487,7 @@ unroll_loop_constant_iterations (class loop *loop) bool exit_at_end = loop_exit_at_end_p (loop); struct opt_info *opt_info = NULL; bool ok; + bool flat = maybe_flat_loop_profile (loop); niter = desc->niter; @@ -603,9 +604,14 @@ unroll_loop_constant_iterations (class loop *loop) ok = duplicate_loop_body_to_header_edge ( loop, loop_latch_edge (loop), max_unroll, wont_exit, desc->out_edge, &remove_edges, - DLTHE_FLAG_UPDATE_FREQ | (opt_info ? DLTHE_RECORD_COPY_NUMBER : 0)); + DLTHE_FLAG_UPDATE_FREQ | (opt_info ? DLTHE_RECORD_COPY_NUMBER : 0) + | (flat ? DLTHE_FLAG_FLAT_PROFILE : 0)); gcc_assert (ok); + edge new_exit = single_dom_exit (loop); + if (new_exit) + update_exit_probability_after_unrolling (loop, new_exit); + if (opt_info) { apply_opt_in_copies (opt_info, max_unroll, true, true); diff --git a/gcc/testsuite/gcc.dg/tree-prof/peel-1.c b/gcc/testsuite/gcc.dg/tree-prof/peel-1.c index 7245b68c1ee..32ecccb16da 100644 --- a/gcc/testsuite/gcc.dg/tree-prof/peel-1.c +++ b/gcc/testsuite/gcc.dg/tree-prof/peel-1.c @@ -1,4 +1,4 @@ -/* { dg-options "-O3 -fdump-tree-cunroll-details -fno-unroll-loops -fpeel-loops" } */ +/* { dg-options "-O3 -fdump-tree-cunroll-details-blocks -fdump-tree-optimized-details-blocks -fno-unroll-loops -fpeel-loops" } */ void abort(); int a[1000]; @@ -21,3 +21,5 @@ main() return 0; } /* { dg-final-use { scan-tree-dump "Peeled loop ., 1 times" "cunroll" } } */ +/* { dg-final-use-not-autofdo { scan-tree-dump-not "Invalid sum" "cunroll" } } */ +/* { dg-final-use-not-autofdo { scan-tree-dump-not "Invalid sum" "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-prof/unroll-1.c b/gcc/testsuite/gcc.dg/tree-prof/unroll-1.c index 3ad0cf019b3..0b25c1f2f1c 100644 --- a/gcc/testsuite/gcc.dg/tree-prof/unroll-1.c +++ b/gcc/testsuite/gcc.dg/tree-prof/unroll-1.c @@ -1,4 +1,4 @@ -/* { dg-options "-O3 -fdump-rtl-loop2_unroll-details -funroll-loops -fno-peel-loops" } */ +/* { dg-options "-O3 -fdump-rtl-loop2_unroll-details-blocks -funroll-loops -fno-peel-loops" } */ void abort (); int a[1000]; @@ -20,4 +20,5 @@ main() t(); return 0; } -/* { dg-final-use { scan-rtl-dump "considering unrolling loop with constant number of iterations" "loop2_unroll" } } */ +/* { dg-final-use-not-autofdo { scan-rtl-dump "considering unrolling loop with constant number of iterations" "loop2_unroll" } } */ +/* { dg-final-use-not-autofdo { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/peel1.c b/gcc/testsuite/gcc.dg/tree-ssa/peel1.c index dc5848cb5c5..bc136605e94 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/peel1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/peel1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -fno-tree-vectorize -fdump-tree-cunroll-details" } */ +/* { dg-options "-O3 -fno-tree-vectorize -fdump-tree-cunroll-details-blocks" } */ struct foo {int b; int a[3];} foo; void add(struct foo *a,int l) { @@ -9,3 +9,4 @@ void add(struct foo *a,int l) } /* { dg-final { scan-tree-dump "Loop 1 likely iterates at most 2 times." "cunroll"} } */ /* { dg-final { scan-tree-dump "Peeled loop 1, 3 times." "cunroll"} } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "cunroll" } } */ diff --git a/gcc/testsuite/gcc.dg/unroll-1.c b/gcc/testsuite/gcc.dg/unroll-1.c index e7032891823..ff2cbb07b22 100644 --- a/gcc/testsuite/gcc.dg/unroll-1.c +++ b/gcc/testsuite/gcc.dg/unroll-1.c @@ -1,7 +1,7 @@ /* PR optimization/8599 */ /* { dg-do run } */ /* { dg-options "-O2 -funroll-loops" } */ -/* { dg-options "-mtune=k6 -O2 -funroll-loops" { target { { i?86-*-* x86_64-*-* } && ia32 } } } */ +/* { dg-options "-mtune=k6 -O2 -funroll-loops -fdump-rtl-loop2_unroll-details-blocks" { target { { i?86-*-* x86_64-*-* } && ia32 } } } */ extern void abort (void); @@ -25,3 +25,5 @@ int main() abort (); return 0; } +/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */ +/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */ diff --git a/gcc/testsuite/gcc.dg/unroll-3.c b/gcc/testsuite/gcc.dg/unroll-3.c index 10bf59b9a2e..fbc8378c73d 100644 --- a/gcc/testsuite/gcc.dg/unroll-3.c +++ b/gcc/testsuite/gcc.dg/unroll-3.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunrolli=foo -fenable-tree-cunrolli=foo" } */ +/* { dg-options "-O2 -fdump-tree-cunrolli-details-blocks -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunrolli=foo -fenable-tree-cunrolli=foo" } */ unsigned a[100], b[100]; inline void bar() @@ -29,3 +29,4 @@ int foo2(void) } /* { dg-final { scan-tree-dump-times "loop with 2 iterations completely unrolled" 1 "cunrolli" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "cunrolli" } } */ diff --git a/gcc/testsuite/gcc.dg/unroll-4.c b/gcc/testsuite/gcc.dg/unroll-4.c index 17f19421227..055ef3f3545 100644 --- a/gcc/testsuite/gcc.dg/unroll-4.c +++ b/gcc/testsuite/gcc.dg/unroll-4.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo -fdisable-tree-cunrolli=foo2" } */ +/* { dg-options "-O2 -fdump-tree-cunrolli-details-blocks -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo -fdisable-tree-cunrolli=foo2" } */ unsigned a[100], b[100]; inline void bar() @@ -29,3 +29,4 @@ int foo2(void) } /* { dg-final { scan-tree-dump-times "loop with 2 iterations completely unrolled" 1 "cunrolli" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "cunrolli" } } */ diff --git a/gcc/testsuite/gcc.dg/unroll-5.c b/gcc/testsuite/gcc.dg/unroll-5.c index f3bdebe9882..1f22b1fa5d6 100644 --- a/gcc/testsuite/gcc.dg/unroll-5.c +++ b/gcc/testsuite/gcc.dg/unroll-5.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo2 -fdisable-tree-cunrolli=foo" } */ +/* { dg-options "-O2 -fdump-tree-cunrolli-details-blocks -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo2 -fdisable-tree-cunrolli=foo" } */ unsigned a[100], b[100]; inline void bar() @@ -29,3 +29,4 @@ int foo2(void) } /* { dg-final { scan-tree-dump-times "loop with 2 iterations completely unrolled" 1 "cunrolli" } } */ +/* { dg-final { scan-tree-dump-not "Invalid sum" "cunrolli" } } */ diff --git a/gcc/testsuite/gcc.dg/unroll-6.c b/gcc/testsuite/gcc.dg/unroll-6.c index e4c231ea79f..7664bbff109 100644 --- a/gcc/testsuite/gcc.dg/unroll-6.c +++ b/gcc/testsuite/gcc.dg/unroll-6.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -fdump-rtl-loop2_unroll -funroll-loops" } */ +/* { dg-options "-O3 -fdump-rtl-loop2_unroll-details-blocks -funroll-loops" } */ /* { dg-require-effective-target int32plus } */ void abort (void); @@ -32,3 +32,4 @@ int t2() /* { dg-final { scan-rtl-dump-not "realistic bound: 999999" "loop2_unroll" } } */ /* { dg-final { scan-rtl-dump-times " upper bound: 2999999" 1 "loop2_unroll" } } */ /* { dg-final { scan-rtl-dump-times "realistic bound: 2999999" 1 "loop2_unroll" } } */ +/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" { xfail *-*-* } } } */ diff --git a/gcc/tree-ssa-loop-ivopts.h b/gcc/tree-ssa-loop-ivopts.h index 7a53ce47f10..31ec893b9cb 100644 --- a/gcc/tree-ssa-loop-ivopts.h +++ b/gcc/tree-ssa-loop-ivopts.h @@ -20,7 +20,6 @@ along with GCC; see the file COPYING3. If not see #ifndef GCC_TREE_SSA_LOOP_IVOPTS_H #define GCC_TREE_SSA_LOOP_IVOPTS_H -extern edge single_dom_exit (class loop *); extern void dump_iv (FILE *, struct iv *); extern void dump_use (FILE *, struct iv_use *); extern void dump_uses (FILE *, struct ivopts_data *); diff --git a/gcc/tree-ssa-loop-manip.cc b/gcc/tree-ssa-loop-manip.cc index 8e3b1057b6f..e58892e235c 100644 --- a/gcc/tree-ssa-loop-manip.cc +++ b/gcc/tree-ssa-loop-manip.cc @@ -1040,6 +1040,29 @@ determine_exit_conditions (class loop *loop, class tree_niter_desc *desc, *exit_bound = bound; } +/* Updat NEW_EXIT probability after loop has been unrolled. */ + +void +update_exit_probability_after_unrolling (class loop *loop, edge new_exit) +{ + /* gimple_duplicate_loop_body_to_header_edge depending on + DLTHE_FLAG_UPDATE_FREQ either keeps original frequency of the loop header + or scales it down accordingly. + However exit edge probability is kept as original. Fix it if needed + and compensate. */ + profile_probability new_prob + = loop_preheader_edge + (loop)->count ().probability_in (new_exit->src->count); + if (!(new_prob == new_exit->probability)) + { + profile_count old_count = new_exit->src->count - new_exit->count (); + set_edge_probability_and_rescale_others (new_exit, new_prob); + profile_count new_count = new_exit->src->count - new_exit->count (); + scale_dominated_blocks_in_loop (loop, new_exit->src, + new_count, old_count); + } +} + /* Unroll LOOP FACTOR times. LOOP is known to have a single exit edge whose source block dominates the latch. DESC describes the number of iterations of LOOP. @@ -1266,23 +1289,7 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor, update_ssa (TODO_update_ssa); new_exit = single_dom_exit (loop); - - /* gimple_duplicate_loop_body_to_header_edge depending on - DLTHE_FLAG_UPDATE_FREQ either keeps original frequency of the loop header - or scales it down accordingly. - However exit edge probability is kept as original. Fix it if needed - and compensate. */ - profile_probability new_prob - = loop_preheader_edge - (loop)->count ().probability_in (new_exit->src->count); - if (!(new_prob == new_exit->probability)) - { - profile_count old_count = new_exit->src->count - new_exit->count (); - set_edge_probability_and_rescale_others (new_exit, new_prob); - profile_count new_count = new_exit->src->count - new_exit->count (); - scale_dominated_blocks_in_loop (loop, new_exit->src, - new_count, old_count); - } + update_exit_probability_after_unrolling (loop, new_exit); if (!single_loop_p) { /* Finally create the new counter for number of iterations and add