Fix profile update after RTL unrolling
This patch fixes profile update after RTL unroll, that is now done same way as in tree one. We still produce (slightly) corrupted profile for multiple exit loops I can try to fix incrementally. I also updated testcases to look for profile mismatches so they do not creep back in again. gcc/ChangeLog: * cfgloop.h (single_dom_exit): Declare. * cfgloopmanip.h (update_exit_probability_after_unrolling): Declare. * cfgrtl.cc (struct cfg_hooks): Fix comment. * loop-unroll.cc (unroll_loop_constant_iterations): Update exit edge. * tree-ssa-loop-ivopts.h (single_dom_exit): Do not declare it here. * tree-ssa-loop-manip.cc (update_exit_probability_after_unrolling): Break out from ... (tree_transform_and_unroll_loop): ... here; gcc/testsuite/ChangeLog: * gcc.dg/tree-prof/peel-1.c: Test for profile mismatches. * gcc.dg/tree-prof/unroll-1.c: Test for profile mismatches. * gcc.dg/tree-ssa/peel1.c: Test for profile mismatches. * gcc.dg/unroll-1.c: Test for profile mismatches. * gcc.dg/unroll-3.c: Test for profile mismatches. * gcc.dg/unroll-4.c: Test for profile mismatches. * gcc.dg/unroll-5.c: Test for profile mismatches. * gcc.dg/unroll-6.c: Test for profile mismatches.
This commit is contained in:
parent
081e25d3cf
commit
a7d4310aed
14 changed files with 53 additions and 29 deletions
|
@ -921,6 +921,7 @@ extern bool get_estimated_loop_iterations (class loop *loop, widest_int *nit);
|
|||
extern bool get_max_loop_iterations (const class loop *loop, widest_int *nit);
|
||||
extern bool get_likely_max_loop_iterations (class loop *loop, widest_int *nit);
|
||||
extern int bb_loop_depth (const_basic_block);
|
||||
extern edge single_dom_exit (class loop *);
|
||||
|
||||
/* Converts VAL to widest_int. */
|
||||
|
||||
|
|
|
@ -68,5 +68,6 @@ class loop * loop_version (class loop *, void *,
|
|||
void adjust_loop_info_after_peeling (class loop *loop, int npeel, bool precise);
|
||||
void scale_dominated_blocks_in_loop (class loop *loop, basic_block bb,
|
||||
profile_count num, profile_count den);
|
||||
void update_exit_probability_after_unrolling (class loop *loop, edge new_exit);
|
||||
|
||||
#endif /* GCC_CFGLOOPMANIP_H */
|
||||
|
|
|
@ -5409,7 +5409,7 @@ struct cfg_hooks cfg_layout_rtl_cfg_hooks = {
|
|||
rtl_flow_call_edges_add,
|
||||
NULL, /* execute_on_growing_pred */
|
||||
NULL, /* execute_on_shrinking_pred */
|
||||
duplicate_loop_body_to_header_edge, /* duplicate loop for trees */
|
||||
duplicate_loop_body_to_header_edge, /* duplicate loop for rtl */
|
||||
rtl_lv_add_condition_to_bb, /* lv_add_condition_to_bb */
|
||||
NULL, /* lv_adjust_loop_header_phi*/
|
||||
rtl_extract_cond_bb_edges, /* extract_cond_bb_edges */
|
||||
|
|
|
@ -487,6 +487,7 @@ unroll_loop_constant_iterations (class loop *loop)
|
|||
bool exit_at_end = loop_exit_at_end_p (loop);
|
||||
struct opt_info *opt_info = NULL;
|
||||
bool ok;
|
||||
bool flat = maybe_flat_loop_profile (loop);
|
||||
|
||||
niter = desc->niter;
|
||||
|
||||
|
@ -603,9 +604,14 @@ unroll_loop_constant_iterations (class loop *loop)
|
|||
ok = duplicate_loop_body_to_header_edge (
|
||||
loop, loop_latch_edge (loop), max_unroll, wont_exit, desc->out_edge,
|
||||
&remove_edges,
|
||||
DLTHE_FLAG_UPDATE_FREQ | (opt_info ? DLTHE_RECORD_COPY_NUMBER : 0));
|
||||
DLTHE_FLAG_UPDATE_FREQ | (opt_info ? DLTHE_RECORD_COPY_NUMBER : 0)
|
||||
| (flat ? DLTHE_FLAG_FLAT_PROFILE : 0));
|
||||
gcc_assert (ok);
|
||||
|
||||
edge new_exit = single_dom_exit (loop);
|
||||
if (new_exit)
|
||||
update_exit_probability_after_unrolling (loop, new_exit);
|
||||
|
||||
if (opt_info)
|
||||
{
|
||||
apply_opt_in_copies (opt_info, max_unroll, true, true);
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* { dg-options "-O3 -fdump-tree-cunroll-details -fno-unroll-loops -fpeel-loops" } */
|
||||
/* { dg-options "-O3 -fdump-tree-cunroll-details-blocks -fdump-tree-optimized-details-blocks -fno-unroll-loops -fpeel-loops" } */
|
||||
void abort();
|
||||
|
||||
int a[1000];
|
||||
|
@ -21,3 +21,5 @@ main()
|
|||
return 0;
|
||||
}
|
||||
/* { dg-final-use { scan-tree-dump "Peeled loop ., 1 times" "cunroll" } } */
|
||||
/* { dg-final-use-not-autofdo { scan-tree-dump-not "Invalid sum" "cunroll" } } */
|
||||
/* { dg-final-use-not-autofdo { scan-tree-dump-not "Invalid sum" "optimized" } } */
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* { dg-options "-O3 -fdump-rtl-loop2_unroll-details -funroll-loops -fno-peel-loops" } */
|
||||
/* { dg-options "-O3 -fdump-rtl-loop2_unroll-details-blocks -funroll-loops -fno-peel-loops" } */
|
||||
void abort ();
|
||||
|
||||
int a[1000];
|
||||
|
@ -20,4 +20,5 @@ main()
|
|||
t();
|
||||
return 0;
|
||||
}
|
||||
/* { dg-final-use { scan-rtl-dump "considering unrolling loop with constant number of iterations" "loop2_unroll" } } */
|
||||
/* { dg-final-use-not-autofdo { scan-rtl-dump "considering unrolling loop with constant number of iterations" "loop2_unroll" } } */
|
||||
/* { dg-final-use-not-autofdo { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -fno-tree-vectorize -fdump-tree-cunroll-details" } */
|
||||
/* { dg-options "-O3 -fno-tree-vectorize -fdump-tree-cunroll-details-blocks" } */
|
||||
struct foo {int b; int a[3];} foo;
|
||||
void add(struct foo *a,int l)
|
||||
{
|
||||
|
@ -9,3 +9,4 @@ void add(struct foo *a,int l)
|
|||
}
|
||||
/* { dg-final { scan-tree-dump "Loop 1 likely iterates at most 2 times." "cunroll"} } */
|
||||
/* { dg-final { scan-tree-dump "Peeled loop 1, 3 times." "cunroll"} } */
|
||||
/* { dg-final { scan-tree-dump-not "Invalid sum" "cunroll" } } */
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/* PR optimization/8599 */
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -funroll-loops" } */
|
||||
/* { dg-options "-mtune=k6 -O2 -funroll-loops" { target { { i?86-*-* x86_64-*-* } && ia32 } } } */
|
||||
/* { dg-options "-mtune=k6 -O2 -funroll-loops -fdump-rtl-loop2_unroll-details-blocks" { target { { i?86-*-* x86_64-*-* } && ia32 } } } */
|
||||
|
||||
|
||||
extern void abort (void);
|
||||
|
@ -25,3 +25,5 @@ int main()
|
|||
abort ();
|
||||
return 0;
|
||||
}
|
||||
/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */
|
||||
/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunrolli=foo -fenable-tree-cunrolli=foo" } */
|
||||
/* { dg-options "-O2 -fdump-tree-cunrolli-details-blocks -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunrolli=foo -fenable-tree-cunrolli=foo" } */
|
||||
|
||||
unsigned a[100], b[100];
|
||||
inline void bar()
|
||||
|
@ -29,3 +29,4 @@ int foo2(void)
|
|||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "loop with 2 iterations completely unrolled" 1 "cunrolli" } } */
|
||||
/* { dg-final { scan-tree-dump-not "Invalid sum" "cunrolli" } } */
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo -fdisable-tree-cunrolli=foo2" } */
|
||||
/* { dg-options "-O2 -fdump-tree-cunrolli-details-blocks -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo -fdisable-tree-cunrolli=foo2" } */
|
||||
|
||||
unsigned a[100], b[100];
|
||||
inline void bar()
|
||||
|
@ -29,3 +29,4 @@ int foo2(void)
|
|||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "loop with 2 iterations completely unrolled" 1 "cunrolli" } } */
|
||||
/* { dg-final { scan-tree-dump-not "Invalid sum" "cunrolli" } } */
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo2 -fdisable-tree-cunrolli=foo" } */
|
||||
/* { dg-options "-O2 -fdump-tree-cunrolli-details-blocks -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo2 -fdisable-tree-cunrolli=foo" } */
|
||||
|
||||
unsigned a[100], b[100];
|
||||
inline void bar()
|
||||
|
@ -29,3 +29,4 @@ int foo2(void)
|
|||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "loop with 2 iterations completely unrolled" 1 "cunrolli" } } */
|
||||
/* { dg-final { scan-tree-dump-not "Invalid sum" "cunrolli" } } */
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -fdump-rtl-loop2_unroll -funroll-loops" } */
|
||||
/* { dg-options "-O3 -fdump-rtl-loop2_unroll-details-blocks -funroll-loops" } */
|
||||
/* { dg-require-effective-target int32plus } */
|
||||
|
||||
void abort (void);
|
||||
|
@ -32,3 +32,4 @@ int t2()
|
|||
/* { dg-final { scan-rtl-dump-not "realistic bound: 999999" "loop2_unroll" } } */
|
||||
/* { dg-final { scan-rtl-dump-times " upper bound: 2999999" 1 "loop2_unroll" } } */
|
||||
/* { dg-final { scan-rtl-dump-times "realistic bound: 2999999" 1 "loop2_unroll" } } */
|
||||
/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" { xfail *-*-* } } } */
|
||||
|
|
|
@ -20,7 +20,6 @@ along with GCC; see the file COPYING3. If not see
|
|||
#ifndef GCC_TREE_SSA_LOOP_IVOPTS_H
|
||||
#define GCC_TREE_SSA_LOOP_IVOPTS_H
|
||||
|
||||
extern edge single_dom_exit (class loop *);
|
||||
extern void dump_iv (FILE *, struct iv *);
|
||||
extern void dump_use (FILE *, struct iv_use *);
|
||||
extern void dump_uses (FILE *, struct ivopts_data *);
|
||||
|
|
|
@ -1040,6 +1040,29 @@ determine_exit_conditions (class loop *loop, class tree_niter_desc *desc,
|
|||
*exit_bound = bound;
|
||||
}
|
||||
|
||||
/* Updat NEW_EXIT probability after loop has been unrolled. */
|
||||
|
||||
void
|
||||
update_exit_probability_after_unrolling (class loop *loop, edge new_exit)
|
||||
{
|
||||
/* gimple_duplicate_loop_body_to_header_edge depending on
|
||||
DLTHE_FLAG_UPDATE_FREQ either keeps original frequency of the loop header
|
||||
or scales it down accordingly.
|
||||
However exit edge probability is kept as original. Fix it if needed
|
||||
and compensate. */
|
||||
profile_probability new_prob
|
||||
= loop_preheader_edge
|
||||
(loop)->count ().probability_in (new_exit->src->count);
|
||||
if (!(new_prob == new_exit->probability))
|
||||
{
|
||||
profile_count old_count = new_exit->src->count - new_exit->count ();
|
||||
set_edge_probability_and_rescale_others (new_exit, new_prob);
|
||||
profile_count new_count = new_exit->src->count - new_exit->count ();
|
||||
scale_dominated_blocks_in_loop (loop, new_exit->src,
|
||||
new_count, old_count);
|
||||
}
|
||||
}
|
||||
|
||||
/* Unroll LOOP FACTOR times. LOOP is known to have a single exit edge
|
||||
whose source block dominates the latch. DESC describes the number of
|
||||
iterations of LOOP.
|
||||
|
@ -1266,23 +1289,7 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
|
|||
update_ssa (TODO_update_ssa);
|
||||
|
||||
new_exit = single_dom_exit (loop);
|
||||
|
||||
/* gimple_duplicate_loop_body_to_header_edge depending on
|
||||
DLTHE_FLAG_UPDATE_FREQ either keeps original frequency of the loop header
|
||||
or scales it down accordingly.
|
||||
However exit edge probability is kept as original. Fix it if needed
|
||||
and compensate. */
|
||||
profile_probability new_prob
|
||||
= loop_preheader_edge
|
||||
(loop)->count ().probability_in (new_exit->src->count);
|
||||
if (!(new_prob == new_exit->probability))
|
||||
{
|
||||
profile_count old_count = new_exit->src->count - new_exit->count ();
|
||||
set_edge_probability_and_rescale_others (new_exit, new_prob);
|
||||
profile_count new_count = new_exit->src->count - new_exit->count ();
|
||||
scale_dominated_blocks_in_loop (loop, new_exit->src,
|
||||
new_count, old_count);
|
||||
}
|
||||
update_exit_probability_after_unrolling (loop, new_exit);
|
||||
if (!single_loop_p)
|
||||
{
|
||||
/* Finally create the new counter for number of iterations and add
|
||||
|
|
Loading…
Add table
Reference in a new issue