From d59171daa764fc59764a7538e48460ffcdae3f9b Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Mon, 5 Nov 2012 15:00:46 +0100 Subject: [PATCH] ipa-inline.c (compute_uninlined_call_time, [...]): New functions. * ipa-inline.c (compute_uninlined_call_time, compute_inlined_call_time): New functions. (RELATIVE_TIME_BENEFIT_RANGE): New macro. (relative_time_benefit): Rewrite. (edge_badness): Rewrite path with guessed profile and estimated profile. * ipa-inline.h (INLINE_HINT_declared_inline, INLINE_HINT_cross_module): New hints. (struct inline_summary): Add GROWTH filed. * ipa-inline-analysis.c (dump_inline_hints): Update. (reset_inline_summary): Update. (dump_inline_summary): Update. (will_be_nonconstant_predicate): Cleanup to use gimple_store_p and gimple_assign_load_p predicates. (estimate_node_size_and_time): Drop INLINE_HINT_declared_inline hint. (simple_edge_hints): New function. (do_estimate_edge_time): Return time of invocation of callee rather than the time scaled by edge frequency; update hints code. (do_estimate_edge_hints): Update. (do_estimate_growth): Cleanup. From-SVN: r193161 --- gcc/ChangeLog | 22 +++++ gcc/ipa-inline-analysis.c | 73 ++++++++++------ gcc/ipa-inline.c | 170 ++++++++++++++++++++++---------------- gcc/ipa-inline.h | 10 ++- 4 files changed, 179 insertions(+), 96 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index be526da445b..50148685ef9 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,25 @@ +2012-11-05 Jan Hubicka + + * ipa-inline.c (compute_uninlined_call_time, + compute_inlined_call_time): New functions. + (RELATIVE_TIME_BENEFIT_RANGE): New macro. + (relative_time_benefit): Rewrite. + (edge_badness): Rewrite path with guessed profile and estimated profile. + * ipa-inline.h (INLINE_HINT_declared_inline, INLINE_HINT_cross_module): + New hints. + (struct inline_summary): Add GROWTH filed. + * ipa-inline-analysis.c (dump_inline_hints): Update. + (reset_inline_summary): Update. + (dump_inline_summary): Update. + (will_be_nonconstant_predicate): Cleanup to use gimple_store_p and + gimple_assign_load_p predicates. + (estimate_node_size_and_time): Drop INLINE_HINT_declared_inline hint. + (simple_edge_hints): New function. + (do_estimate_edge_time): Return time of invocation of callee rather + than the time scaled by edge frequency; update hints code. + (do_estimate_edge_hints): Update. + (do_estimate_growth): Cleanup. + 2012-11-05 Jakub Jelinek PR target/55194 diff --git a/gcc/ipa-inline-analysis.c b/gcc/ipa-inline-analysis.c index 9db7f1c0958..595cb682a5b 100644 --- a/gcc/ipa-inline-analysis.c +++ b/gcc/ipa-inline-analysis.c @@ -649,6 +649,16 @@ dump_inline_hints (FILE *f, inline_hints hints) hints &= ~INLINE_HINT_in_scc; fprintf (f, " in_scc"); } + if (hints & INLINE_HINT_cross_module) + { + hints &= ~INLINE_HINT_cross_module; + fprintf (f, " cross_module"); + } + if (hints & INLINE_HINT_declared_inline) + { + hints &= ~INLINE_HINT_declared_inline; + fprintf (f, " declared_inline"); + } gcc_assert (!hints); } @@ -983,6 +993,7 @@ reset_inline_summary (struct cgraph_node *node) info->stack_frame_offset = 0; info->size = 0; info->time = 0; + info->growth = 0; info->scc_no = 0; if (info->loop_iterations) { @@ -1375,6 +1386,9 @@ dump_inline_summary (FILE * f, struct cgraph_node *node) (int) s->estimated_self_stack_size); fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size); + if (s->growth) + fprintf (f, " estimated growth:%i\n", + (int) s->growth); if (s->scc_no) fprintf (f, " In SCC: %i\n", (int) s->scc_no); @@ -1977,10 +1991,11 @@ will_be_nonconstant_predicate (struct ipa_node_params *info, return p; /* Stores will stay anyway. */ - if (gimple_vdef (stmt)) + if (gimple_store_p (stmt)) return p; - is_load = gimple_vuse (stmt) != NULL; + is_load = gimple_assign_load_p (stmt); + /* Loads can be optimized when the value is known. */ if (is_load) { @@ -2857,6 +2872,8 @@ estimate_node_size_and_time (struct cgraph_node *node, hints |=INLINE_HINT_loop_stride; if (info->scc_no) hints |= INLINE_HINT_in_scc; + if (DECL_DECLARED_INLINE_P (node->symbol.decl)) + hints |= INLINE_HINT_declared_inline; estimate_calls_size_and_time (node, &size, &time, &hints, possible_truths, known_vals, known_binfos, known_aggs); @@ -2865,7 +2882,6 @@ estimate_node_size_and_time (struct cgraph_node *node, time = RDIV (time, INLINE_TIME_SCALE); size = RDIV (size, INLINE_SIZE_SCALE); - if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "\n size:%i time:%i\n", (int)size, (int)time); @@ -3315,6 +3331,26 @@ inline_update_overall_summary (struct cgraph_node *node) info->size = (info->size + INLINE_SIZE_SCALE / 2) / INLINE_SIZE_SCALE; } +/* Return hints derrived from EDGE. */ +int +simple_edge_hints (struct cgraph_edge *edge) +{ + int hints = 0; + struct cgraph_node *to = (edge->caller->global.inlined_to + ? edge->caller->global.inlined_to + : edge->caller); + if (inline_summary (to)->scc_no + && inline_summary (to)->scc_no == inline_summary (edge->callee)->scc_no + && !cgraph_edge_recursive_p (edge)) + hints |= INLINE_HINT_same_scc; + + if (to->symbol.lto_file_data && edge->callee->symbol.lto_file_data + && to->symbol.lto_file_data != edge->callee->symbol.lto_file_data) + hints |= INLINE_HINT_cross_module; + + return hints; +} + /* Estimate the time cost for the caller when inlining EDGE. Only to be called via estimate_edge_time, that handles the caching mechanism. @@ -3328,7 +3364,6 @@ do_estimate_edge_time (struct cgraph_edge *edge) int time; int size; inline_hints hints; - gcov_type ret; struct cgraph_node *callee; clause_t clause; VEC (tree, heap) *known_vals; @@ -3347,33 +3382,26 @@ do_estimate_edge_time (struct cgraph_edge *edge) VEC_free (tree, heap, known_vals); VEC_free (tree, heap, known_binfos); VEC_free (ipa_agg_jump_function_p, heap, known_aggs); - - ret = RDIV ((gcov_type)time * edge->frequency, - CGRAPH_FREQ_BASE); + gcc_checking_assert (size >= 0); + gcc_checking_assert (time >= 0); /* When caching, update the cache entry. */ if (edge_growth_cache) { - struct cgraph_node *to = (edge->caller->global.inlined_to - ? edge->caller->global.inlined_to - : edge->caller); if ((int)VEC_length (edge_growth_cache_entry, edge_growth_cache) <= edge->uid) VEC_safe_grow_cleared (edge_growth_cache_entry, heap, edge_growth_cache, cgraph_edge_max_uid); VEC_index (edge_growth_cache_entry, edge_growth_cache, edge->uid).time - = ret + (ret >= 0); + = time + (time >= 0); VEC_index (edge_growth_cache_entry, edge_growth_cache, edge->uid).size = size + (size >= 0); - if (inline_summary (to)->scc_no - && inline_summary (to)->scc_no == inline_summary (callee)->scc_no - && !cgraph_edge_recursive_p (edge)) - hints |= INLINE_HINT_same_scc; + hints |= simple_edge_hints (edge); VEC_index (edge_growth_cache_entry, edge_growth_cache, edge->uid).hints = hints + 1; } - return ret; + return time; } @@ -3430,9 +3458,6 @@ do_estimate_edge_hints (struct cgraph_edge *edge) VEC (tree, heap) *known_vals; VEC (tree, heap) *known_binfos; VEC (ipa_agg_jump_function_p, heap) *known_aggs; - struct cgraph_node *to = (edge->caller->global.inlined_to - ? edge->caller->global.inlined_to - : edge->caller); /* When we do caching, use do_estimate_edge_time to populate the entry. */ @@ -3458,10 +3483,7 @@ do_estimate_edge_hints (struct cgraph_edge *edge) VEC_free (tree, heap, known_vals); VEC_free (tree, heap, known_binfos); VEC_free (ipa_agg_jump_function_p, heap, known_aggs); - if (inline_summary (to)->scc_no - && inline_summary (to)->scc_no == inline_summary (callee)->scc_no - && !cgraph_edge_recursive_p (edge)) - hints |= INLINE_HINT_same_scc; + hints |= simple_edge_hints (edge); return hints; } @@ -3549,10 +3571,11 @@ do_estimate_growth (struct cgraph_node *node) return zero or negative growths. */ if (d.self_recursive) d.growth = d.growth < info->size ? info->size : d.growth; + else if (DECL_EXTERNAL (node->symbol.decl)) + ; else { - if (!DECL_EXTERNAL (node->symbol.decl) - && cgraph_will_be_removed_from_program_if_no_direct_calls (node)) + if (cgraph_will_be_removed_from_program_if_no_direct_calls (node)) d.growth -= info->size; /* COMDAT functions are very often not shared across multiple units since they come from various template instantiations. diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c index 2bca2c5fa97..b6a69cbbc7c 100644 --- a/gcc/ipa-inline.c +++ b/gcc/ipa-inline.c @@ -456,6 +456,42 @@ want_early_inline_function_p (struct cgraph_edge *e) return want_inline; } +/* Compute time of the edge->caller + edge->callee execution when inlining + does not happen. */ + +inline int +compute_uninlined_call_time (struct inline_summary *callee_info, + struct cgraph_edge *edge) +{ + int uninlined_call_time = + RDIV ((gcov_type)callee_info->time * MAX (edge->frequency, 1), + CGRAPH_FREQ_BASE); + int caller_time = inline_summary (edge->caller->global.inlined_to + ? edge->caller->global.inlined_to + : edge->caller)->time; + return uninlined_call_time + caller_time; +} + +/* Same as compute_uinlined_call_time but compute time when inlining + does happen. */ + +inline gcov_type +compute_inlined_call_time (struct cgraph_edge *edge, + int edge_time) +{ + int caller_time = inline_summary (edge->caller->global.inlined_to + ? edge->caller->global.inlined_to + : edge->caller)->time; + int time = caller_time + RDIV ((edge_time - inline_edge_summary (edge)->call_stmt_time) + * MAX (edge->frequency, 1), + CGRAPH_FREQ_BASE); + /* Possible one roundoff error, but watch for overflows. */ + gcc_checking_assert (time >= INT_MIN / 2); + if (time < 0) + time = 0; + return time; +} + /* Return true if we are interested in inlining small function. When REPORT is true, report reason to dump file. */ @@ -724,31 +760,41 @@ want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold) return true; } +#define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64) /* Return relative time improvement for inlining EDGE in range - 1...2^9. */ + 1...RELATIVE_TIME_BENEFIT_RANGE */ static inline int relative_time_benefit (struct inline_summary *callee_info, struct cgraph_edge *edge, - int time_growth) + int edge_time) { int relbenefit; - gcov_type uninlined_call_time; + int uninlined_call_time = compute_uninlined_call_time (callee_info, edge); + int inlined_call_time = compute_inlined_call_time (edge, edge_time); + + /* Inlining into extern inline function is not a win. */ + if (DECL_EXTERNAL (edge->caller->global.inlined_to + ? edge->caller->global.inlined_to->symbol.decl + : edge->caller->symbol.decl)) + return 1; + + /* Watch overflows. */ + gcc_checking_assert (uninlined_call_time >= 0); + gcc_checking_assert (inlined_call_time >= 0); + gcc_checking_assert (uninlined_call_time >= inlined_call_time); - uninlined_call_time = - ((gcov_type) - (callee_info->time - + inline_edge_summary (edge)->call_stmt_time) * edge->frequency - + CGRAPH_FREQ_BASE / 2) / CGRAPH_FREQ_BASE; /* Compute relative time benefit, i.e. how much the call becomes faster. ??? perhaps computing how much the caller+calle together become faster would lead to more realistic results. */ if (!uninlined_call_time) uninlined_call_time = 1; relbenefit = - (uninlined_call_time - time_growth) * 256 / (uninlined_call_time); - relbenefit = MIN (relbenefit, 512); + RDIV (((gcov_type)uninlined_call_time - inlined_call_time) * RELATIVE_TIME_BENEFIT_RANGE, + uninlined_call_time); + relbenefit = MIN (relbenefit, RELATIVE_TIME_BENEFIT_RANGE); + gcc_checking_assert (relbenefit >= 0); relbenefit = MAX (relbenefit, 1); return relbenefit; } @@ -764,7 +810,7 @@ static int edge_badness (struct cgraph_edge *edge, bool dump) { gcov_type badness; - int growth, time_growth; + int growth, edge_time; struct cgraph_node *callee = cgraph_function_or_thunk_node (edge->callee, NULL); struct inline_summary *callee_info = inline_summary (callee); @@ -774,17 +820,20 @@ edge_badness (struct cgraph_edge *edge, bool dump) return INT_MIN; growth = estimate_edge_growth (edge); - time_growth = estimate_edge_time (edge); + edge_time = estimate_edge_time (edge); hints = estimate_edge_hints (edge); + gcc_checking_assert (edge_time >= 0); + gcc_checking_assert (edge_time <= callee_info->time); + gcc_checking_assert (growth <= callee_info->size); if (dump) { fprintf (dump_file, " Badness calculation for %s -> %s\n", xstrdup (cgraph_node_name (edge->caller)), xstrdup (cgraph_node_name (callee))); - fprintf (dump_file, " size growth %i, time growth %i ", + fprintf (dump_file, " size growth %i, time %i ", growth, - time_growth); + edge_time); dump_inline_hints (dump_file, hints); fprintf (dump_file, "\n"); } @@ -802,7 +851,7 @@ edge_badness (struct cgraph_edge *edge, bool dump) relative_edge_count * relative_time_benefit goodness = ------------------------------------------- - edge_growth + growth_f_caller badness = -goodness The fraction is upside down, because on edge counts and time beneits @@ -810,11 +859,11 @@ edge_badness (struct cgraph_edge *edge, bool dump) else if (max_count) { - int relbenefit = relative_time_benefit (callee_info, edge, time_growth); + int relbenefit = relative_time_benefit (callee_info, edge, edge_time); badness = ((int) - ((double) edge->count * INT_MIN / 2 / max_count / 512) * - relative_time_benefit (callee_info, edge, time_growth)) / growth; + ((double) edge->count * INT_MIN / 2 / max_count / RELATIVE_TIME_BENEFIT_RANGE) * + relbenefit) / growth; /* Be sure that insanity of the profile won't lead to increasing counts in the scalling and thus to overflow in the computation above. */ @@ -826,73 +875,53 @@ edge_badness (struct cgraph_edge *edge, bool dump) " * Relative benefit %f\n", (int) badness, (double) badness / INT_MIN, (double) edge->count / max_count, - relbenefit * 100 / 256.0); + relbenefit * 100.0 / RELATIVE_TIME_BENEFIT_RANGE); } } /* When function local profile is available. Compute badness as: - - growth_of_callee - badness = -------------------------------------- + growth_for-all - relative_time_benefit * edge_frequency + relative_time_benefit + goodness = --------------------------------- + growth_of_caller * overall_growth + badness = - goodness + + compensated by the inline hints. */ else if (flag_guess_branch_prob) { - int div = edge->frequency * (1<<10) / CGRAPH_FREQ_MAX; - - div = MAX (div, 1); - gcc_checking_assert (edge->frequency <= CGRAPH_FREQ_MAX); - div *= relative_time_benefit (callee_info, edge, time_growth); - - /* frequency is normalized in range 1...2^10. - relbenefit in range 1...2^9 - DIV should be in range 1....2^19. */ - gcc_checking_assert (div >= 1 && div <= (1<<19)); - - /* Result must be integer in range 0...INT_MAX. - Set the base of fixed point calculation so we don't lose much of - precision for small bandesses (those are interesting) yet we don't - overflow for growths that are still in interesting range. - - Fixed point arithmetic with point at 6th bit. */ - badness = ((gcov_type)growth) * (1<<(19+6)); - badness = (badness + div / 2) / div; - - /* Overall growth of inlining all calls of function matters: we want to - inline so offline copy of function is no longer needed. - - Additionally functions that can be fully inlined without much of - effort are better inline candidates than functions that can be fully - inlined only after noticeable overall unit growths. The latter - are better in a sense compressing of code size by factoring out common - code into separate function shared by multiple code paths. - - We might mix the valud into the fraction by taking into account - relative growth of the unit, but for now just add the number - into resulting fraction. */ - if (badness > INT_MAX / 8) - { - badness = INT_MAX / 8; - if (dump) - fprintf (dump_file, "Badness overflow\n"); - } - if (hints & (INLINE_HINT_indirect_call - | INLINE_HINT_loop_iterations - | INLINE_HINT_loop_stride)) - badness /= 8; + badness = (relative_time_benefit (callee_info, edge, edge_time) + * (INT_MIN / 16 / RELATIVE_TIME_BENEFIT_RANGE)); + badness /= (growth * MAX (1, callee_info->growth)); + gcc_checking_assert (badness <=0 && badness >= INT_MIN / 16); + if ((hints & (INLINE_HINT_indirect_call + | INLINE_HINT_loop_iterations + | INLINE_HINT_loop_stride)) + || callee_info->growth <= 0) + badness *= 8; if (hints & (INLINE_HINT_same_scc)) - badness *= 4; - if (hints & (INLINE_HINT_in_scc)) - badness *= 2; + badness /= 16; + else if (hints & (INLINE_HINT_in_scc)) + badness /= 8; + else if (hints & (INLINE_HINT_cross_module)) + badness /= 2; + gcc_checking_assert (badness <= 0 && badness >= INT_MIN / 2); + if ((hints & INLINE_HINT_declared_inline) && badness >= INT_MIN / 32) + badness *= 16; if (dump) { fprintf (dump_file, " %i: guessed profile. frequency %f," - " benefit %f%%, divisor %i\n", + " benefit %f%%, time w/o inlining %i, time w inlining %i" + " overall growth %i (current) %i (original)\n", (int) badness, (double)edge->frequency / CGRAPH_FREQ_BASE, - relative_time_benefit (callee_info, edge, time_growth) * 100 / 256.0, div); + relative_time_benefit (callee_info, edge, edge_time) * 100.0 + / RELATIVE_TIME_BENEFIT_RANGE, + compute_uninlined_call_time (callee_info, edge), + (int)compute_inlined_call_time (edge, edge_time), + estimate_growth (callee), + callee_info->growth); } } /* When function local profile is not available or it does not give @@ -1371,6 +1400,7 @@ inline_small_functions (void) if (!DECL_EXTERNAL (node->symbol.decl)) initial_size += info->size; + info->growth = estimate_growth (node); if (dfs && dfs->next_cycle) { struct cgraph_node *n2; diff --git a/gcc/ipa-inline.h b/gcc/ipa-inline.h index ca59a7f7099..fb055c49c5f 100644 --- a/gcc/ipa-inline.h +++ b/gcc/ipa-inline.h @@ -49,7 +49,9 @@ enum inline_hints_vals { INLINE_HINT_loop_iterations = 2, INLINE_HINT_loop_stride = 4, INLINE_HINT_same_scc = 8, - INLINE_HINT_in_scc = 16 + INLINE_HINT_in_scc = 16, + INLINE_HINT_declared_inline = 32, + INLINE_HINT_cross_module = 64 }; typedef int inline_hints; @@ -129,6 +131,12 @@ struct GTY(()) inline_summary /* Predicate on when some loop in the function becomes to have known stride. */ struct predicate * GTY((skip)) loop_stride; + /* Estimated growth for inlining all copies of the function before start + of small functions inlining. + This value will get out of date as the callers are duplicated, but + using up-to-date value in the badness metric mean a lot of extra + expenses. */ + int growth; /* Number of SCC on the beggining of inlining process. */ int scc_no; };