Limit inlining functions called once
as dicussed in PR ipa/103454 there are several benchmarks that regresses for -finline-functions-called once. Runtmes: - tramp3d with -Ofast. 31% - exchange2 with -Ofast 11-21% - roms O2 9%-10% - tonto 2.5-3.5% with LTO Build times: - specfp2006 41% (mostly wrf that builds 71% faster) - specint2006 1.5-3% - specfp2017 64% (again mostly wrf) - specint2017 2.5-3.5% This patch adds two params to tweak the behaviour: 1) max-inline-functions-called-once-loop-depth limiting the loop depth (this is useful primarily for exchange where the inlined function is in loop depth 9) 2) max-inline-functions-called-once-insns We already have large-function-insns/growth parameters, but these are limiting also inlining small functions, so reducing them will regress very large functions that are hot. Because inlining functions called once is meant just as a cleanup pass I think it makes sense to have separate limit for it. gcc/ChangeLog: 2021-12-09 Jan Hubicka <hubicka@ucw.cz> * doc/invoke.texi (max-inline-functions-called-once-loop-depth, max-inline-functions-called-once-insns): New parameters. * ipa-inline.c (check_callers): Handle param_inline_functions_called_once_loop_depth and param_inline_functions_called_once_insns. (edge_badness): Fix linebreaks. * params.opt (param=max-inline-functions-called-once-loop-depth, param=max-inline-functions-called-once-insn): New params.
This commit is contained in:
parent
243a980437
commit
f157c5362b
3 changed files with 46 additions and 17 deletions
|
@ -13605,6 +13605,14 @@ The maximum number of backtrack attempts the scheduler should make
|
|||
when modulo scheduling a loop. Larger values can exponentially increase
|
||||
compilation time.
|
||||
|
||||
@item max-inline-functions-called-once-loop-depth
|
||||
Maximal loop depth of a call considered by inline heuristics that tries to
|
||||
inline all functions called once.
|
||||
|
||||
@item max-inline-functions-called-once-insns
|
||||
Maximal estimated size of functions produced while inlining functions called
|
||||
once.
|
||||
|
||||
@item max-inline-insns-single
|
||||
Several parameters control the tree inliner used in GCC@. This number sets the
|
||||
maximum number of instructions (counted in GCC's internal representation) in a
|
||||
|
|
|
@ -1091,20 +1091,30 @@ static bool
|
|||
check_callers (struct cgraph_node *node, void *has_hot_call)
|
||||
{
|
||||
struct cgraph_edge *e;
|
||||
for (e = node->callers; e; e = e->next_caller)
|
||||
{
|
||||
if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
|
||||
|| !opt_for_fn (e->caller->decl, optimize))
|
||||
return true;
|
||||
if (!can_inline_edge_p (e, true))
|
||||
return true;
|
||||
if (e->recursive_p ())
|
||||
return true;
|
||||
if (!can_inline_edge_by_limits_p (e, true))
|
||||
return true;
|
||||
if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
|
||||
*(bool *)has_hot_call = true;
|
||||
}
|
||||
for (e = node->callers; e; e = e->next_caller)
|
||||
{
|
||||
if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
|
||||
|| !opt_for_fn (e->caller->decl, optimize))
|
||||
return true;
|
||||
if (!can_inline_edge_p (e, true))
|
||||
return true;
|
||||
if (e->recursive_p ())
|
||||
return true;
|
||||
if (!can_inline_edge_by_limits_p (e, true))
|
||||
return true;
|
||||
/* Inlining large functions to large loop depth is often harmful because
|
||||
of register pressure it implies. */
|
||||
if ((int)ipa_call_summaries->get (e)->loop_depth
|
||||
> param_inline_functions_called_once_loop_depth)
|
||||
return true;
|
||||
/* Do not produce gigantic functions. */
|
||||
if (estimate_size_after_inlining (e->caller->inlined_to ?
|
||||
e->caller->inlined_to : e->caller, e)
|
||||
> param_inline_functions_called_once_insns)
|
||||
return true;
|
||||
if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
|
||||
*(bool *)has_hot_call = true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1327,9 +1337,12 @@ edge_badness (struct cgraph_edge *edge, bool dump)
|
|||
" %i (compensated)\n",
|
||||
badness.to_double (),
|
||||
freq.to_double (),
|
||||
edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
|
||||
caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
|
||||
inlining_speedup (edge, freq, unspec_edge_time, edge_time).to_double (),
|
||||
edge->count.ipa ().initialized_p ()
|
||||
? edge->count.ipa ().to_gcov_type () : -1,
|
||||
caller->count.ipa ().initialized_p ()
|
||||
? caller->count.ipa ().to_gcov_type () : -1,
|
||||
inlining_speedup (edge, freq, unspec_edge_time,
|
||||
edge_time).to_double (),
|
||||
estimate_growth (callee),
|
||||
callee_info->growth, overall_growth);
|
||||
}
|
||||
|
|
|
@ -545,6 +545,14 @@ The maximum expansion factor when copying basic blocks.
|
|||
Common Joined UInteger Var(param_max_hoist_depth) Init(30) Param Optimization
|
||||
Maximum depth of search in the dominator tree for expressions to hoist.
|
||||
|
||||
-param=max-inline-functions-called-once-loop-depth=
|
||||
Common Joined UInteger Var(param_inline_functions_called_once_loop_depth) Init(6) Optimization Param
|
||||
Maximum loop depth of a call which is considered for inlining functions called once
|
||||
|
||||
-param=max-inline-functions-called-once-insns=
|
||||
Common Joined UInteger Var(param_inline_functions_called_once_insns) Init(4000) Optimization Param
|
||||
Maximum combinaed size of caller and callee wich is inlined if callee is called once.
|
||||
|
||||
-param=max-inline-insns-auto=
|
||||
Common Joined UInteger Var(param_max_inline_insns_auto) Init(15) Optimization Param
|
||||
The maximum number of instructions when automatically inlining.
|
||||
|
|
Loading…
Add table
Reference in a new issue