Fix profile update after RTL unrolling

This patch fixes profile update after RTL unroll, that is now done same way as
in tree one.  We still produce (slightly) corrupted profile for multiple exit
loops I can try to fix incrementally.

I also updated testcases to look for profile mismatches so they do not creep
back in again.

gcc/ChangeLog:

	* cfgloop.h (single_dom_exit): Declare.
	* cfgloopmanip.h (update_exit_probability_after_unrolling): Declare.
	* cfgrtl.cc (struct cfg_hooks): Fix comment.
	* loop-unroll.cc (unroll_loop_constant_iterations): Update exit edge.
	* tree-ssa-loop-ivopts.h (single_dom_exit): Do not declare it here.
	* tree-ssa-loop-manip.cc (update_exit_probability_after_unrolling):
	Break out from ...
	(tree_transform_and_unroll_loop): ... here;

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-prof/peel-1.c: Test for profile mismatches.
	* gcc.dg/tree-prof/unroll-1.c: Test for profile mismatches.
	* gcc.dg/tree-ssa/peel1.c: Test for profile mismatches.
	* gcc.dg/unroll-1.c: Test for profile mismatches.
	* gcc.dg/unroll-3.c: Test for profile mismatches.
	* gcc.dg/unroll-4.c: Test for profile mismatches.
	* gcc.dg/unroll-5.c: Test for profile mismatches.
	* gcc.dg/unroll-6.c: Test for profile mismatches.
This commit is contained in:
Jan Hubicka 2023-07-27 20:06:37 +02:00
parent 081e25d3cf
commit a7d4310aed
14 changed files with 53 additions and 29 deletions

View file

@ -921,6 +921,7 @@ extern bool get_estimated_loop_iterations (class loop *loop, widest_int *nit);
extern bool get_max_loop_iterations (const class loop *loop, widest_int *nit);
extern bool get_likely_max_loop_iterations (class loop *loop, widest_int *nit);
extern int bb_loop_depth (const_basic_block);
extern edge single_dom_exit (class loop *);
/* Converts VAL to widest_int. */

View file

@ -68,5 +68,6 @@ class loop * loop_version (class loop *, void *,
void adjust_loop_info_after_peeling (class loop *loop, int npeel, bool precise);
void scale_dominated_blocks_in_loop (class loop *loop, basic_block bb,
profile_count num, profile_count den);
void update_exit_probability_after_unrolling (class loop *loop, edge new_exit);
#endif /* GCC_CFGLOOPMANIP_H */

View file

@ -5409,7 +5409,7 @@ struct cfg_hooks cfg_layout_rtl_cfg_hooks = {
rtl_flow_call_edges_add,
NULL, /* execute_on_growing_pred */
NULL, /* execute_on_shrinking_pred */
duplicate_loop_body_to_header_edge, /* duplicate loop for trees */
duplicate_loop_body_to_header_edge, /* duplicate loop for rtl */
rtl_lv_add_condition_to_bb, /* lv_add_condition_to_bb */
NULL, /* lv_adjust_loop_header_phi*/
rtl_extract_cond_bb_edges, /* extract_cond_bb_edges */

View file

@ -487,6 +487,7 @@ unroll_loop_constant_iterations (class loop *loop)
bool exit_at_end = loop_exit_at_end_p (loop);
struct opt_info *opt_info = NULL;
bool ok;
bool flat = maybe_flat_loop_profile (loop);
niter = desc->niter;
@ -603,9 +604,14 @@ unroll_loop_constant_iterations (class loop *loop)
ok = duplicate_loop_body_to_header_edge (
loop, loop_latch_edge (loop), max_unroll, wont_exit, desc->out_edge,
&remove_edges,
DLTHE_FLAG_UPDATE_FREQ | (opt_info ? DLTHE_RECORD_COPY_NUMBER : 0));
DLTHE_FLAG_UPDATE_FREQ | (opt_info ? DLTHE_RECORD_COPY_NUMBER : 0)
| (flat ? DLTHE_FLAG_FLAT_PROFILE : 0));
gcc_assert (ok);
edge new_exit = single_dom_exit (loop);
if (new_exit)
update_exit_probability_after_unrolling (loop, new_exit);
if (opt_info)
{
apply_opt_in_copies (opt_info, max_unroll, true, true);

View file

@ -1,4 +1,4 @@
/* { dg-options "-O3 -fdump-tree-cunroll-details -fno-unroll-loops -fpeel-loops" } */
/* { dg-options "-O3 -fdump-tree-cunroll-details-blocks -fdump-tree-optimized-details-blocks -fno-unroll-loops -fpeel-loops" } */
void abort();
int a[1000];
@ -21,3 +21,5 @@ main()
return 0;
}
/* { dg-final-use { scan-tree-dump "Peeled loop ., 1 times" "cunroll" } } */
/* { dg-final-use-not-autofdo { scan-tree-dump-not "Invalid sum" "cunroll" } } */
/* { dg-final-use-not-autofdo { scan-tree-dump-not "Invalid sum" "optimized" } } */

View file

@ -1,4 +1,4 @@
/* { dg-options "-O3 -fdump-rtl-loop2_unroll-details -funroll-loops -fno-peel-loops" } */
/* { dg-options "-O3 -fdump-rtl-loop2_unroll-details-blocks -funroll-loops -fno-peel-loops" } */
void abort ();
int a[1000];
@ -20,4 +20,5 @@ main()
t();
return 0;
}
/* { dg-final-use { scan-rtl-dump "considering unrolling loop with constant number of iterations" "loop2_unroll" } } */
/* { dg-final-use-not-autofdo { scan-rtl-dump "considering unrolling loop with constant number of iterations" "loop2_unroll" } } */
/* { dg-final-use-not-autofdo { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O3 -fno-tree-vectorize -fdump-tree-cunroll-details" } */
/* { dg-options "-O3 -fno-tree-vectorize -fdump-tree-cunroll-details-blocks" } */
struct foo {int b; int a[3];} foo;
void add(struct foo *a,int l)
{
@ -9,3 +9,4 @@ void add(struct foo *a,int l)
}
/* { dg-final { scan-tree-dump "Loop 1 likely iterates at most 2 times." "cunroll"} } */
/* { dg-final { scan-tree-dump "Peeled loop 1, 3 times." "cunroll"} } */
/* { dg-final { scan-tree-dump-not "Invalid sum" "cunroll" } } */

View file

@ -1,7 +1,7 @@
/* PR optimization/8599 */
/* { dg-do run } */
/* { dg-options "-O2 -funroll-loops" } */
/* { dg-options "-mtune=k6 -O2 -funroll-loops" { target { { i?86-*-* x86_64-*-* } && ia32 } } } */
/* { dg-options "-mtune=k6 -O2 -funroll-loops -fdump-rtl-loop2_unroll-details-blocks" { target { { i?86-*-* x86_64-*-* } && ia32 } } } */
extern void abort (void);
@ -25,3 +25,5 @@ int main()
abort ();
return 0;
}
/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */
/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunrolli=foo -fenable-tree-cunrolli=foo" } */
/* { dg-options "-O2 -fdump-tree-cunrolli-details-blocks -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunrolli=foo -fenable-tree-cunrolli=foo" } */
unsigned a[100], b[100];
inline void bar()
@ -29,3 +29,4 @@ int foo2(void)
}
/* { dg-final { scan-tree-dump-times "loop with 2 iterations completely unrolled" 1 "cunrolli" } } */
/* { dg-final { scan-tree-dump-not "Invalid sum" "cunrolli" } } */

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo -fdisable-tree-cunrolli=foo2" } */
/* { dg-options "-O2 -fdump-tree-cunrolli-details-blocks -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo -fdisable-tree-cunrolli=foo2" } */
unsigned a[100], b[100];
inline void bar()
@ -29,3 +29,4 @@ int foo2(void)
}
/* { dg-final { scan-tree-dump-times "loop with 2 iterations completely unrolled" 1 "cunrolli" } } */
/* { dg-final { scan-tree-dump-not "Invalid sum" "cunrolli" } } */

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-cunrolli-details -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo2 -fdisable-tree-cunrolli=foo" } */
/* { dg-options "-O2 -fdump-tree-cunrolli-details-blocks -fno-peel-loops -fno-tree-vrp -fdisable-tree-cunroll -fenable-tree-cunrolli=foo2 -fdisable-tree-cunrolli=foo" } */
unsigned a[100], b[100];
inline void bar()
@ -29,3 +29,4 @@ int foo2(void)
}
/* { dg-final { scan-tree-dump-times "loop with 2 iterations completely unrolled" 1 "cunrolli" } } */
/* { dg-final { scan-tree-dump-not "Invalid sum" "cunrolli" } } */

View file

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O3 -fdump-rtl-loop2_unroll -funroll-loops" } */
/* { dg-options "-O3 -fdump-rtl-loop2_unroll-details-blocks -funroll-loops" } */
/* { dg-require-effective-target int32plus } */
void abort (void);
@ -32,3 +32,4 @@ int t2()
/* { dg-final { scan-rtl-dump-not "realistic bound: 999999" "loop2_unroll" } } */
/* { dg-final { scan-rtl-dump-times " upper bound: 2999999" 1 "loop2_unroll" } } */
/* { dg-final { scan-rtl-dump-times "realistic bound: 2999999" 1 "loop2_unroll" } } */
/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" { xfail *-*-* } } } */

View file

@ -20,7 +20,6 @@ along with GCC; see the file COPYING3. If not see
#ifndef GCC_TREE_SSA_LOOP_IVOPTS_H
#define GCC_TREE_SSA_LOOP_IVOPTS_H
extern edge single_dom_exit (class loop *);
extern void dump_iv (FILE *, struct iv *);
extern void dump_use (FILE *, struct iv_use *);
extern void dump_uses (FILE *, struct ivopts_data *);

View file

@ -1040,6 +1040,29 @@ determine_exit_conditions (class loop *loop, class tree_niter_desc *desc,
*exit_bound = bound;
}
/* Updat NEW_EXIT probability after loop has been unrolled. */
void
update_exit_probability_after_unrolling (class loop *loop, edge new_exit)
{
/* gimple_duplicate_loop_body_to_header_edge depending on
DLTHE_FLAG_UPDATE_FREQ either keeps original frequency of the loop header
or scales it down accordingly.
However exit edge probability is kept as original. Fix it if needed
and compensate. */
profile_probability new_prob
= loop_preheader_edge
(loop)->count ().probability_in (new_exit->src->count);
if (!(new_prob == new_exit->probability))
{
profile_count old_count = new_exit->src->count - new_exit->count ();
set_edge_probability_and_rescale_others (new_exit, new_prob);
profile_count new_count = new_exit->src->count - new_exit->count ();
scale_dominated_blocks_in_loop (loop, new_exit->src,
new_count, old_count);
}
}
/* Unroll LOOP FACTOR times. LOOP is known to have a single exit edge
whose source block dominates the latch. DESC describes the number of
iterations of LOOP.
@ -1266,23 +1289,7 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor,
update_ssa (TODO_update_ssa);
new_exit = single_dom_exit (loop);
/* gimple_duplicate_loop_body_to_header_edge depending on
DLTHE_FLAG_UPDATE_FREQ either keeps original frequency of the loop header
or scales it down accordingly.
However exit edge probability is kept as original. Fix it if needed
and compensate. */
profile_probability new_prob
= loop_preheader_edge
(loop)->count ().probability_in (new_exit->src->count);
if (!(new_prob == new_exit->probability))
{
profile_count old_count = new_exit->src->count - new_exit->count ();
set_edge_probability_and_rescale_others (new_exit, new_prob);
profile_count new_count = new_exit->src->count - new_exit->count ();
scale_dominated_blocks_in_loop (loop, new_exit->src,
new_count, old_count);
}
update_exit_probability_after_unrolling (loop, new_exit);
if (!single_loop_p)
{
/* Finally create the new counter for number of iterations and add