OpenMP: GC unused SIMD clones

SIMD clones are created during the IPA phase when it is not known whether
or not the vectorizer can use them.  Clones for functions with external
linkage are part of the ABI, but local clones can be GC'ed if no calls are
found in the compilation unit after vectorization.

gcc/ChangeLog
	* cgraph.h (struct cgraph_node): Add gc_candidate bit, modify
	default constructor to initialize it.
	* cgraphunit.cc (expand_all_functions): Save gc_candidate functions
	for last and iterate to handle recursive calls.  Delete leftover
	candidates at the end.
	* omp-simd-clone.cc (simd_clone_create): Set gc_candidate bit
	on local clones.
	* tree-vect-stmts.cc (vectorizable_simd_clone_call): Clear
	gc_candidate bit when a clone is used.

gcc/testsuite/ChangeLog
	* g++.dg/gomp/target-simd-clone-1.C: Tweak to test
	that the unused clone is GC'ed.
	* gcc.dg/gomp/target-simd-clone-1.c: Likewise.
This commit is contained in:
Sandra Loosemore 2023-01-03 17:46:02 +00:00
parent 8fdef16cd5
commit 0425ae780f
6 changed files with 66 additions and 11 deletions

View file

@ -891,7 +891,8 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node
versionable (false), can_change_signature (false),
redefined_extern_inline (false), tm_may_enter_irr (false),
ipcp_clone (false), declare_variant_alt (false),
calls_declare_variant_alt (false), m_uid (uid), m_summary_id (-1)
calls_declare_variant_alt (false), gc_candidate (false),
m_uid (uid), m_summary_id (-1)
{}
/* Remove the node from cgraph and all inline clones inlined into it.
@ -1490,6 +1491,10 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node
unsigned declare_variant_alt : 1;
/* True if the function calls declare_variant_alt functions. */
unsigned calls_declare_variant_alt : 1;
/* True if the function should only be emitted if it is used. This flag
is set for local SIMD clones when they are created and cleared if the
vectorizer uses them. */
unsigned gc_candidate : 1;
private:
/* Unique id of the node. */

View file

@ -1996,19 +1996,52 @@ expand_all_functions (void)
/* Output functions in RPO so callees get optimized before callers. This
makes ipa-ra and other propagators to work.
FIXME: This is far from optimal code layout. */
for (i = new_order_pos - 1; i >= 0; i--)
{
node = order[i];
FIXME: This is far from optimal code layout.
Make multiple passes over the list to defer processing of gc
candidates until all potential uses are seen. */
int gc_candidates = 0;
int prev_gc_candidates = 0;
if (node->process)
while (1)
{
for (i = new_order_pos - 1; i >= 0; i--)
{
expanded_func_count++;
node->process = 0;
node->expand ();
node = order[i];
if (node->gc_candidate)
gc_candidates++;
else if (node->process)
{
expanded_func_count++;
node->process = 0;
node->expand ();
}
}
if (!gc_candidates || gc_candidates == prev_gc_candidates)
break;
prev_gc_candidates = gc_candidates;
gc_candidates = 0;
}
/* Free any unused gc_candidate functions. */
if (gc_candidates)
for (i = new_order_pos - 1; i >= 0; i--)
{
node = order[i];
if (node->gc_candidate)
{
struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
if (symtab->dump_file)
fprintf (symtab->dump_file,
"Deleting unused function %s\n",
IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl)));
node->process = false;
free_dominance_info (fn, CDI_DOMINATORS);
free_dominance_info (fn, CDI_POST_DOMINATORS);
node->release_body (false);
}
}
if (dump_file)
fprintf (dump_file, "Expanded functions with time profile (%s):%u/%u\n",
main_input_filename, profiled_func_count, expanded_func_count);

View file

@ -702,6 +702,11 @@ simd_clone_create (struct cgraph_node *old_node, bool force_local)
= old_node->calls_declare_variant_alt;
}
/* Mark clones with internal linkage as gc'able, so they will not be
emitted unless the vectorizer can actually use them. */
if (!TREE_PUBLIC (new_node->decl))
new_node->gc_candidate = true;
return new_node;
}

View file

@ -1,5 +1,5 @@
/* { dg-options "-fopenmp -O2" } */
/* { dg-additional-options "-fopenmp-target-simd-clone=any -fdump-ipa-simdclone-details" } */
/* { dg-additional-options "-fopenmp-target-simd-clone=any -fdump-ipa-simdclone-details -fdump-ipa-cgraph" } */
/* Test that simd clones are generated for functions with "declare target". */
@ -23,3 +23,8 @@ void callit (int *a, int *b, int *c)
/* { dg-final { scan-ipa-dump "Generated local clone _ZGV.*N.*__Z5additii" "simdclone" { target x86_64-*-* } } } */
/* { dg-final { scan-ipa-dump "Generated local clone _ZGV.*M.*__Z5additii" "simdclone" { target x86_64-*-* } } } */
/* Only the "N" clone is used. The other one should be GC'ed. */
/* { dg-final { scan-ipa-dump "Deleting unused function _ZGV.*M.*__Z5additii" "cgraph" { target x86_64-*-* } } } */

View file

@ -1,5 +1,5 @@
/* { dg-options "-fopenmp -O2" } */
/* { dg-additional-options "-fopenmp-target-simd-clone=any -fdump-ipa-simdclone-details" } */
/* { dg-additional-options "-fopenmp-target-simd-clone=any -fdump-ipa-simdclone-details -fdump-ipa-cgraph" } */
/* Test that simd clones are generated for functions with "declare target". */
@ -23,3 +23,7 @@ void callit (int *a, int *b, int *c)
/* { dg-final { scan-ipa-dump "Generated local clone _ZGV.*N.*_addit" "simdclone" { target x86_64-*-* } } } */
/* { dg-final { scan-ipa-dump "Generated local clone _ZGV.*M.*_addit" "simdclone" { target x86_64-*-* } } } */
/* Only the "N" clone is used. The other one should be GC'ed. */
/* { dg-final { scan-ipa-dump "Deleting unused function _ZGV.*M.*_addit" "cgraph" { target x86_64-*-* } } } */

View file

@ -4620,6 +4620,9 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
}
vargs.release ();
/* Mark the clone as no longer being a candidate for GC. */
bestn->gc_candidate = false;
/* The call in STMT might prevent it from being removed in dce.
We however cannot remove it here, due to the way the ssa name
it defines is mapped to the new definition. So just replace