Materialize clones on demand
this patch removes the pass to materialize all clones and instead this is now done on demand. The motivation is to reduce lifetime of function bodies in ltrans that should noticeably reduce memory use for highly parallel compilations of large programs (like Martin does) or with partitioning reduced/disabled. For cc1 with one partition the memory use seems to go down from 4gb to cca 1.5gb (seeing from top, so this is not particularly accurate). gcc/ChangeLog: 2020-10-22 Jan Hubicka <hubicka@ucw.cz> * cgraph.c (cgraph_node::get_untransformed_body): Perform lazy clone materialization. * cgraph.h (cgraph_node::materialize_clone): Declare. (symbol_table::materialize_all_clones): Remove. * cgraphclones.c (cgraph_materialize_clone): Turn to ... (cgraph_node::materialize_clone): .. this one; move here dumping from symbol_table::materialize_all_clones. (symbol_table::materialize_all_clones): Remove. * cgraphunit.c (mark_functions_to_output): Clear stmt references. (cgraph_node::expand): Initialize bitmaps early; do not call execute_all_ipa_transforms if there are no transforms. * ipa-inline-transform.c (save_inline_function_body): Fix formating. (inline_transform): Materialize all clones before function is modified. * ipa-param-manipulation.c (ipa_param_adjustments::modify_call): Materialize clone if needed. * ipa.c (class pass_materialize_all_clones): Remove. (make_pass_materialize_all_clones): Remove. * passes.c (execute_all_ipa_transforms): Materialize all clones. * passes.def: Remove pass_materialize_all_clones. * tree-pass.h (make_pass_materialize_all_clones): Remove. * tree-ssa-structalias.c (ipa_pta_execute): Clear refs.
This commit is contained in:
parent
c26d7df103
commit
0e590b68fa
11 changed files with 94 additions and 159 deletions
15
gcc/cgraph.c
15
gcc/cgraph.c
|
@ -3872,16 +3872,27 @@ cgraph_node::function_or_virtual_thunk_symbol
|
|||
}
|
||||
|
||||
/* When doing LTO, read cgraph_node's body from disk if it is not already
|
||||
present. */
|
||||
present. Also perform any necessary clone materializations. */
|
||||
|
||||
bool
|
||||
cgraph_node::get_untransformed_body (void)
|
||||
cgraph_node::get_untransformed_body ()
|
||||
{
|
||||
lto_file_decl_data *file_data;
|
||||
const char *data, *name;
|
||||
size_t len;
|
||||
tree decl = this->decl;
|
||||
|
||||
/* See if there is clone to be materialized.
|
||||
(inline clones does not need materialization, but we can be seeing
|
||||
an inline clone of real clone). */
|
||||
cgraph_node *p = this;
|
||||
for (cgraph_node *c = clone_of; c; c = c->clone_of)
|
||||
{
|
||||
if (c->decl != decl)
|
||||
p->materialize_clone ();
|
||||
p = c;
|
||||
}
|
||||
|
||||
/* Check if body is already there. Either we have gimple body or
|
||||
the function is thunk and in that case we set DECL_ARGUMENTS. */
|
||||
if (DECL_ARGUMENTS (decl) || gimple_has_body_p (decl))
|
||||
|
|
13
gcc/cgraph.h
13
gcc/cgraph.h
|
@ -1145,12 +1145,14 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node
|
|||
|
||||
/* When doing LTO, read cgraph_node's body from disk if it is not already
|
||||
present. */
|
||||
bool get_untransformed_body (void);
|
||||
bool get_untransformed_body ();
|
||||
|
||||
/* Prepare function body. When doing LTO, read cgraph_node's body from disk
|
||||
if it is not already present. When some IPA transformations are scheduled,
|
||||
apply them. */
|
||||
bool get_body (void);
|
||||
bool get_body ();
|
||||
|
||||
void materialize_clone (void);
|
||||
|
||||
/* Release memory used to represent body of function.
|
||||
Use this only for functions that are released before being translated to
|
||||
|
@ -2286,13 +2288,6 @@ public:
|
|||
functions inserted into callgraph already at construction time. */
|
||||
void process_new_functions (void);
|
||||
|
||||
/* Once all functions from compilation unit are in memory, produce all clones
|
||||
and update all calls. We might also do this on demand if we don't want to
|
||||
bring all functions to memory prior compilation, but current WHOPR
|
||||
implementation does that and it is bit easier to keep everything right
|
||||
in this order. */
|
||||
void materialize_all_clones (void);
|
||||
|
||||
/* Register a symbol NODE. */
|
||||
inline void register_symbol (symtab_node *node);
|
||||
|
||||
|
|
|
@ -1083,114 +1083,57 @@ void cgraph_node::remove_from_clone_tree ()
|
|||
|
||||
/* Given virtual clone, turn it into actual clone. */
|
||||
|
||||
static void
|
||||
cgraph_materialize_clone (cgraph_node *node)
|
||||
void
|
||||
cgraph_node::materialize_clone ()
|
||||
{
|
||||
bitmap_obstack_initialize (NULL);
|
||||
node->former_clone_of = node->clone_of->decl;
|
||||
if (node->clone_of->former_clone_of)
|
||||
node->former_clone_of = node->clone_of->former_clone_of;
|
||||
clone_of->get_untransformed_body ();
|
||||
former_clone_of = clone_of->decl;
|
||||
if (clone_of->former_clone_of)
|
||||
former_clone_of = clone_of->former_clone_of;
|
||||
if (symtab->dump_file)
|
||||
{
|
||||
fprintf (symtab->dump_file, "cloning %s to %s\n",
|
||||
clone_of->dump_name (),
|
||||
dump_name ());
|
||||
if (clone.tree_map)
|
||||
{
|
||||
fprintf (symtab->dump_file, " replace map:");
|
||||
for (unsigned int i = 0;
|
||||
i < vec_safe_length (clone.tree_map);
|
||||
i++)
|
||||
{
|
||||
ipa_replace_map *replace_info;
|
||||
replace_info = (*clone.tree_map)[i];
|
||||
fprintf (symtab->dump_file, "%s %i -> ",
|
||||
i ? "," : "", replace_info->parm_num);
|
||||
print_generic_expr (symtab->dump_file,
|
||||
replace_info->new_tree);
|
||||
}
|
||||
fprintf (symtab->dump_file, "\n");
|
||||
}
|
||||
if (clone.param_adjustments)
|
||||
clone.param_adjustments->dump (symtab->dump_file);
|
||||
}
|
||||
/* Copy the OLD_VERSION_NODE function tree to the new version. */
|
||||
tree_function_versioning (node->clone_of->decl, node->decl,
|
||||
node->clone.tree_map, node->clone.param_adjustments,
|
||||
tree_function_versioning (clone_of->decl, decl,
|
||||
clone.tree_map, clone.param_adjustments,
|
||||
true, NULL, NULL);
|
||||
if (symtab->dump_file)
|
||||
{
|
||||
dump_function_to_file (node->clone_of->decl, symtab->dump_file,
|
||||
dump_function_to_file (clone_of->decl, symtab->dump_file,
|
||||
dump_flags);
|
||||
dump_function_to_file (node->decl, symtab->dump_file, dump_flags);
|
||||
dump_function_to_file (decl, symtab->dump_file, dump_flags);
|
||||
}
|
||||
|
||||
cgraph_node *clone_of = node->clone_of;
|
||||
cgraph_node *this_clone_of = clone_of;
|
||||
/* Function is no longer clone. */
|
||||
node->remove_from_clone_tree ();
|
||||
if (!clone_of->analyzed && !clone_of->clones)
|
||||
remove_from_clone_tree ();
|
||||
if (!this_clone_of->analyzed && !this_clone_of->clones)
|
||||
{
|
||||
clone_of->release_body ();
|
||||
clone_of->remove_callees ();
|
||||
clone_of->remove_all_references ();
|
||||
this_clone_of->release_body ();
|
||||
this_clone_of->remove_callees ();
|
||||
this_clone_of->remove_all_references ();
|
||||
}
|
||||
bitmap_obstack_release (NULL);
|
||||
}
|
||||
|
||||
/* Once all functions from compilation unit are in memory, produce all clones
|
||||
and update all calls. We might also do this on demand if we don't want to
|
||||
bring all functions to memory prior compilation, but current WHOPR
|
||||
implementation does that and it is a bit easier to keep everything right in
|
||||
this order. */
|
||||
|
||||
void
|
||||
symbol_table::materialize_all_clones (void)
|
||||
{
|
||||
cgraph_node *node;
|
||||
bool stabilized = false;
|
||||
|
||||
|
||||
if (symtab->dump_file)
|
||||
fprintf (symtab->dump_file, "Materializing clones\n");
|
||||
|
||||
cgraph_node::checking_verify_cgraph_nodes ();
|
||||
|
||||
/* We can also do topological order, but number of iterations should be
|
||||
bounded by number of IPA passes since single IPA pass is probably not
|
||||
going to create clones of clones it created itself. */
|
||||
while (!stabilized)
|
||||
{
|
||||
stabilized = true;
|
||||
FOR_EACH_FUNCTION (node)
|
||||
{
|
||||
if (node->clone_of && node->decl != node->clone_of->decl
|
||||
&& !gimple_has_body_p (node->decl))
|
||||
{
|
||||
if (!node->clone_of->clone_of)
|
||||
node->clone_of->get_untransformed_body ();
|
||||
if (gimple_has_body_p (node->clone_of->decl))
|
||||
{
|
||||
if (symtab->dump_file)
|
||||
{
|
||||
fprintf (symtab->dump_file, "cloning %s to %s\n",
|
||||
node->clone_of->dump_name (),
|
||||
node->dump_name ());
|
||||
if (node->clone.tree_map)
|
||||
{
|
||||
unsigned int i;
|
||||
fprintf (symtab->dump_file, " replace map:");
|
||||
for (i = 0;
|
||||
i < vec_safe_length (node->clone.tree_map);
|
||||
i++)
|
||||
{
|
||||
ipa_replace_map *replace_info;
|
||||
replace_info = (*node->clone.tree_map)[i];
|
||||
fprintf (symtab->dump_file, "%s %i -> ",
|
||||
i ? "," : "", replace_info->parm_num);
|
||||
print_generic_expr (symtab->dump_file,
|
||||
replace_info->new_tree);
|
||||
}
|
||||
fprintf (symtab->dump_file, "\n");
|
||||
}
|
||||
if (node->clone.param_adjustments)
|
||||
node->clone.param_adjustments->dump (symtab->dump_file);
|
||||
}
|
||||
cgraph_materialize_clone (node);
|
||||
stabilized = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
FOR_EACH_FUNCTION (node)
|
||||
if (!node->analyzed && node->callees)
|
||||
{
|
||||
node->remove_callees ();
|
||||
node->remove_all_references ();
|
||||
}
|
||||
else
|
||||
node->clear_stmts_in_references ();
|
||||
if (symtab->dump_file)
|
||||
fprintf (symtab->dump_file, "Materialization Call site updates done.\n");
|
||||
|
||||
cgraph_node::checking_verify_cgraph_nodes ();
|
||||
|
||||
symtab->remove_unreachable_nodes (symtab->dump_file);
|
||||
}
|
||||
|
||||
#include "gt-cgraphclones.h"
|
||||
|
|
|
@ -1601,6 +1601,7 @@ mark_functions_to_output (void)
|
|||
FOR_EACH_FUNCTION (node)
|
||||
{
|
||||
tree decl = node->decl;
|
||||
node->clear_stmts_in_references ();
|
||||
|
||||
gcc_assert (!node->process || node->same_comdat_group);
|
||||
if (node->process)
|
||||
|
@ -2274,6 +2275,9 @@ cgraph_node::expand (void)
|
|||
announce_function (decl);
|
||||
process = 0;
|
||||
gcc_assert (lowered);
|
||||
|
||||
/* Initialize the default bitmap obstack. */
|
||||
bitmap_obstack_initialize (NULL);
|
||||
get_untransformed_body ();
|
||||
|
||||
/* Generate RTL for the body of DECL. */
|
||||
|
@ -2282,9 +2286,6 @@ cgraph_node::expand (void)
|
|||
|
||||
gcc_assert (symtab->global_info_ready);
|
||||
|
||||
/* Initialize the default bitmap obstack. */
|
||||
bitmap_obstack_initialize (NULL);
|
||||
|
||||
/* Initialize the RTL code for the function. */
|
||||
saved_loc = input_location;
|
||||
input_location = DECL_SOURCE_LOCATION (decl);
|
||||
|
@ -2298,7 +2299,8 @@ cgraph_node::expand (void)
|
|||
bitmap_obstack_initialize (®_obstack); /* FIXME, only at RTL generation*/
|
||||
|
||||
update_ssa (TODO_update_ssa_only_virtuals);
|
||||
execute_all_ipa_transforms (false);
|
||||
if (ipa_transforms_to_apply.exists ())
|
||||
execute_all_ipa_transforms (false);
|
||||
|
||||
/* Perform all tree transforms and optimizations. */
|
||||
|
||||
|
|
|
@ -644,16 +644,16 @@ save_inline_function_body (struct cgraph_node *node)
|
|||
tree_function_versioning (node->decl, first_clone->decl,
|
||||
NULL, NULL, true, NULL, NULL);
|
||||
|
||||
/* The function will be short lived and removed after we inline all the clones,
|
||||
but make it internal so we won't confuse ourself. */
|
||||
/* The function will be short lived and removed after we inline all the
|
||||
clones, but make it internal so we won't confuse ourself. */
|
||||
DECL_EXTERNAL (first_clone->decl) = 0;
|
||||
TREE_PUBLIC (first_clone->decl) = 0;
|
||||
DECL_COMDAT (first_clone->decl) = 0;
|
||||
first_clone->ipa_transforms_to_apply.release ();
|
||||
|
||||
/* When doing recursive inlining, the clone may become unnecessary.
|
||||
This is possible i.e. in the case when the recursive function is proved to be
|
||||
non-throwing and the recursion happens only in the EH landing pad.
|
||||
This is possible i.e. in the case when the recursive function is proved to
|
||||
be non-throwing and the recursion happens only in the EH landing pad.
|
||||
We cannot remove the clone until we are done with saving the body.
|
||||
Remove it now. */
|
||||
if (!first_clone->callers)
|
||||
|
@ -696,6 +696,14 @@ inline_transform (struct cgraph_node *node)
|
|||
if (cfun->after_inlining)
|
||||
return 0;
|
||||
|
||||
cgraph_node *next_clone;
|
||||
for (cgraph_node *n = node->clones; n; n = next_clone)
|
||||
{
|
||||
next_clone = n->next_sibling_clone;
|
||||
if (n->decl != node->decl)
|
||||
n->materialize_clone ();
|
||||
}
|
||||
|
||||
/* We might need the body of this function so that we can expand
|
||||
it inline somewhere else. */
|
||||
if (preserve_function_body_p (node))
|
||||
|
|
|
@ -783,6 +783,13 @@ ipa_param_adjustments::modify_call (gcall *stmt,
|
|||
{
|
||||
vec<tree, va_gc> **debug_args = NULL;
|
||||
unsigned i = 0;
|
||||
cgraph_node *callee_node = cgraph_node::get (callee_decl);
|
||||
|
||||
/* FIXME: we don't seem to be able to insert debug args before clone
|
||||
is materialized. Materializing them early leads to extra memory
|
||||
use. */
|
||||
if (callee_node->clone_of)
|
||||
callee_node->get_untransformed_body ();
|
||||
for (tree old_parm = DECL_ARGUMENTS (old_decl);
|
||||
old_parm && i < old_nargs && ((int) i) < m_always_copy_start;
|
||||
old_parm = DECL_CHAIN (old_parm), i++)
|
||||
|
|
40
gcc/ipa.c
40
gcc/ipa.c
|
@ -1386,43 +1386,3 @@ make_pass_ipa_single_use (gcc::context *ctxt)
|
|||
return new pass_ipa_single_use (ctxt);
|
||||
}
|
||||
|
||||
/* Materialize all clones. */
|
||||
|
||||
namespace {
|
||||
|
||||
const pass_data pass_data_materialize_all_clones =
|
||||
{
|
||||
SIMPLE_IPA_PASS, /* type */
|
||||
"materialize-all-clones", /* name */
|
||||
OPTGROUP_NONE, /* optinfo_flags */
|
||||
TV_IPA_OPT, /* tv_id */
|
||||
0, /* properties_required */
|
||||
0, /* properties_provided */
|
||||
0, /* properties_destroyed */
|
||||
0, /* todo_flags_start */
|
||||
0, /* todo_flags_finish */
|
||||
};
|
||||
|
||||
class pass_materialize_all_clones : public simple_ipa_opt_pass
|
||||
{
|
||||
public:
|
||||
pass_materialize_all_clones (gcc::context *ctxt)
|
||||
: simple_ipa_opt_pass (pass_data_materialize_all_clones, ctxt)
|
||||
{}
|
||||
|
||||
/* opt_pass methods: */
|
||||
virtual unsigned int execute (function *)
|
||||
{
|
||||
symtab->materialize_all_clones ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
}; // class pass_materialize_all_clones
|
||||
|
||||
} // anon namespace
|
||||
|
||||
simple_ipa_opt_pass *
|
||||
make_pass_materialize_all_clones (gcc::context *ctxt)
|
||||
{
|
||||
return new pass_materialize_all_clones (ctxt);
|
||||
}
|
||||
|
|
|
@ -2271,6 +2271,14 @@ execute_all_ipa_transforms (bool do_not_collect)
|
|||
return;
|
||||
node = cgraph_node::get (current_function_decl);
|
||||
|
||||
cgraph_node *next_clone;
|
||||
for (cgraph_node *n = node->clones; n; n = next_clone)
|
||||
{
|
||||
next_clone = n->next_sibling_clone;
|
||||
if (n->decl != node->decl)
|
||||
n->materialize_clone ();
|
||||
}
|
||||
|
||||
if (node->ipa_transforms_to_apply.exists ())
|
||||
{
|
||||
unsigned int i;
|
||||
|
|
|
@ -172,7 +172,6 @@ along with GCC; see the file COPYING3. If not see
|
|||
passes are executed after partitioning and thus see just parts of the
|
||||
compiled unit. */
|
||||
INSERT_PASSES_AFTER (all_late_ipa_passes)
|
||||
NEXT_PASS (pass_materialize_all_clones);
|
||||
NEXT_PASS (pass_ipa_pta);
|
||||
NEXT_PASS (pass_omp_simd_clone);
|
||||
TERMINATE_PASS_LIST (all_late_ipa_passes)
|
||||
|
|
|
@ -519,8 +519,6 @@ extern ipa_opt_pass_d *make_pass_ipa_cdtor_merge (gcc::context *ctxt);
|
|||
extern ipa_opt_pass_d *make_pass_ipa_single_use (gcc::context *ctxt);
|
||||
extern ipa_opt_pass_d *make_pass_ipa_comdats (gcc::context *ctxt);
|
||||
extern ipa_opt_pass_d *make_pass_ipa_modref (gcc::context *ctxt);
|
||||
extern simple_ipa_opt_pass *make_pass_materialize_all_clones (gcc::context *
|
||||
ctxt);
|
||||
|
||||
extern gimple_opt_pass *make_pass_cleanup_cfg_post_optimizing (gcc::context
|
||||
*ctxt);
|
||||
|
|
|
@ -8138,6 +8138,10 @@ ipa_pta_execute (void)
|
|||
from = constraints.length ();
|
||||
}
|
||||
|
||||
/* FIXME: Clone materialization is not preserving stmt references. */
|
||||
FOR_EACH_DEFINED_FUNCTION (node)
|
||||
node->clear_stmts_in_references ();
|
||||
|
||||
/* Build the constraints. */
|
||||
FOR_EACH_DEFINED_FUNCTION (node)
|
||||
{
|
||||
|
|
Loading…
Add table
Reference in a new issue