[BRIGFE] Enable whole program optimizations

HSA assumes all program scope HSAIL symbols can be queried from
the host runtime API, thus cannot be removed by the IPA.

Getting some inlining happening in the finalized binary required:
* explicitly marking the 'prog' scope functions and the launcher
function "externally_visible" to avoid the inliner removing it
* also the host_def ptr is set to externally visible, otherwise
IPA assumes it's never set
* adding the 'inline' keyword to functions to enable inlining,
otherwise GCC defaults to replaceable functions (one can link
over the previous one) which cannot be inlined
* replacing all calls to declarations with calls to definitions to
enable the inliner to find the definition
* to fix missing hidden argument types in the generated functions.
These were ignored silently until GCC started to be able to
inline calls to such functions.
* do not gimplify before fixing the call targets. Otherwise the
calls get detached and the definitions are not found. The reason
why this happens is not clear, but gimplifying only after call
target decl->def conversion fixes this.

From-SVN: r259943
This commit is contained in:
Pekka Jääskeläinen 2018-05-04 16:44:02 +00:00 committed by Pekka Jääskeläinen
parent 1b40975c87
commit 637f3cdec3
8 changed files with 130 additions and 14 deletions

View file

@ -1,3 +1,14 @@
2018-05-04 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
* brig/brig-lang.c: Add support for whole program
optimizations by marking the kernels externally visible.
* brig/brigfrontend/brig-branch-inst-handler.cc: See above.
* brig/brigfrontend/brig-function-handler.cc: See above.
* brig/brigfrontend/brig-function.cc: See above.
* brig/brigfrontend/brig-to-generic.cc: See above.
* brig/brigfrontend/brig-to-generic.h: See above.
* brig/brigfrontend/brig-variable-handler.h: See above.
2018-01-03 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>

View file

@ -57,7 +57,7 @@ static tree handle_pure_attribute (tree *, tree, tree, int, bool *);
static tree handle_nothrow_attribute (tree *, tree, tree, int, bool *);
static tree handle_returns_twice_attribute (tree *, tree, tree, int, bool *);
/* This file is based on Go frontent'd go-lang.c and gogo-tree.cc. */
/* This file is based on Go frontend's go-lang.c and gogo-tree.cc. */
/* If -v set. */
@ -123,7 +123,7 @@ brig_langhook_init_options_struct (struct gcc_options *opts)
/* If we set this to one, the whole program optimizations internalize
all global variables, making them invisible to the dyn loader (and
thus the HSA runtime implementation). */
opts->x_flag_whole_program = 0;
opts->x_flag_whole_program = 1;
/* The builtin math functions should not set errno. */
opts->x_flag_errno_math = 0;

View file

@ -150,6 +150,8 @@ brig_branch_inst_handler::operator () (const BrigBase *base)
}
m_parent.m_cf->m_called_functions.push_back (func_ref);
if (DECL_EXTERNAL (func_ref))
m_parent.add_decl_call (call);
return base->byteCount;
}

View file

@ -132,6 +132,14 @@ brig_directive_function_handler::operator () (const BrigBase *base)
DECL_RESULT (fndecl) = resdecl;
DECL_CONTEXT (resdecl) = fndecl;
DECL_EXTERNAL (fndecl) = 0;
/* Aggressive inlining to the kernel function is usually a good
idea with offlined functionality to enchance SIMD execution on
GPUs and vector units. */
DECL_ATTRIBUTES (fndecl)
= tree_cons (get_identifier ("flatten"), NULL,
DECL_ATTRIBUTES (fndecl));
}
else
{
@ -228,6 +236,8 @@ brig_directive_function_handler::operator () (const BrigBase *base)
vec_safe_push (args, ptr_type_node);
vec_safe_push (args, ptr_type_node);
vec_safe_push (args, ptr_type_node);
vec_safe_push (args, ptr_type_node);
fndecl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, name_identifier,
build_function_type_vec (ret_type, args));
@ -295,21 +305,21 @@ brig_directive_function_handler::operator () (const BrigBase *base)
DECL_SAVED_TREE (fndecl) = bind_expr;
/* Try to preserve the functions across IPA. */
DECL_PRESERVE_P (fndecl) = 1;
TREE_SIDE_EFFECTS (fndecl) = 1;
TREE_ADDRESSABLE (fndecl) = 1;
set_externally_visible (fndecl);
if (base->kind == BRIG_KIND_DIRECTIVE_FUNCTION)
{
TREE_STATIC (fndecl) = 1;
TREE_STATIC (fndecl) = 0;
TREE_PUBLIC (fndecl) = 1;
DECL_EXTERNAL (fndecl) = 0;
DECL_DECLARED_INLINE_P (fndecl) = 1;
}
else if (base->kind == BRIG_KIND_DIRECTIVE_KERNEL)
{
TREE_STATIC (fndecl) = 1;
TREE_STATIC (fndecl) = 0;
TREE_PUBLIC (fndecl) = 1;
DECL_EXTERNAL (fndecl) = 0;
set_externally_visible (fndecl);
}
else if (base->kind == BRIG_KIND_DIRECTIVE_SIGNATURE)
{
@ -349,8 +359,12 @@ brig_directive_function_handler::operator () (const BrigBase *base)
m_parent.add_function_decl (func_name, fndecl);
m_parent.append_global (fndecl);
if (!is_definition)
return bytes_consumed;
{
DECL_EXTERNAL (fndecl) = 1;
return bytes_consumed;
}
m_parent.start_function (fndecl);

View file

@ -589,7 +589,7 @@ brig_function::emit_launcher_and_metadata ()
tree bind_expr = build3 (BIND_EXPR, void_type_node, NULL, stmt_list, NULL);
TREE_STATIC (launcher) = 0;
TREE_STATIC (launcher) = 1;
TREE_PUBLIC (launcher) = 1;
DECL_SAVED_TREE (launcher) = bind_expr;
@ -633,6 +633,8 @@ brig_function::emit_launcher_and_metadata ()
emit_metadata (stmt_list);
set_externally_visible (launcher);
return launcher;
}

View file

@ -52,6 +52,7 @@
#include "cgraph.h"
#include "dumpfile.h"
#include "tree-pretty-print.h"
#include "attribs.h"
extern int gccbrig_verbose;
@ -487,7 +488,9 @@ brig_to_generic::add_global_variable (const std::string &name, tree var_decl)
tree var_addr = build1 (ADDR_EXPR, ptype, var_decl);
DECL_INITIAL (host_def_var) = var_addr;
TREE_PUBLIC (host_def_var) = 0;
TREE_PUBLIC (host_def_var) = 1;
set_externally_visible (host_def_var);
}
/* Adds an indirection pointer for a potential host-defined program scope
@ -510,10 +513,18 @@ brig_to_generic::add_host_def_var_ptr (const std::string &name, tree var_decl)
TREE_ADDRESSABLE (ptr_var) = 1;
TREE_STATIC (ptr_var) = 1;
set_externally_visible (ptr_var);
append_global (ptr_var);
m_global_variables[var_name] = ptr_var;
}
void
brig_to_generic::add_decl_call (tree call)
{
m_decl_call.push_back (call);
}
/* Produce a "mangled name" for the given brig function or kernel.
The mangling is used to make unique global symbol name in case of
module scope functions. Program scope functions are not mangled
@ -701,8 +712,6 @@ brig_to_generic::finish_function ()
m_cf->finish ();
m_cf->emit_metadata (stmts);
dump_function (m_dump_file, m_cf);
gimplify_function_tree (m_cf->m_func_decl);
cgraph_node::finalize_function (m_cf->m_func_decl, true);
}
else
/* Emit the kernel only at the very end so we can analyze the total
@ -846,6 +855,43 @@ call_builtin (tree pdecl, int nargs, tree rettype, ...)
void
brig_to_generic::write_globals ()
{
/* Replace calls to declarations with calls to definitions. Otherwise
inlining will fail to find the definition to inline from. */
for (size_t i = 0; i < m_decl_call.size(); ++i)
{
tree decl_call = m_decl_call.at(i);
tree func_decl = get_callee_fndecl (decl_call);
brig_function *brig_function = get_finished_function (func_decl);
if (brig_function && brig_function->m_func_decl
&& DECL_EXTERNAL (brig_function->m_func_decl) == 0
&& brig_function->m_func_decl != func_decl)
{
decl_call = CALL_EXPR_FN (decl_call);
STRIP_NOPS (decl_call);
if (TREE_CODE (decl_call) == ADDR_EXPR
&& TREE_CODE (TREE_OPERAND (decl_call, 0)) == FUNCTION_DECL)
TREE_OPERAND (decl_call, 0) = brig_function->m_func_decl;
}
}
for (std::map<std::string, brig_function *>::iterator i
= m_finished_functions.begin(), e = m_finished_functions.end();
i != e; ++i)
{
brig_function *brig_f = (*i).second;
if (brig_f->m_is_kernel)
continue;
/* Finalize only at this point to allow the cgraph analysis to
see definitions to calls to later functions. */
gimplify_function_tree (brig_f->m_func_decl);
cgraph_node::finalize_function (brig_f->m_func_decl, true);
}
/* Now that the whole BRIG module has been processed, build a launcher
and a metadata section for each built kernel. */
for (size_t i = 0; i < m_kernels.size (); ++i)
@ -880,6 +926,17 @@ brig_to_generic::write_globals ()
append_global (launcher);
if (m_dump_file)
{
std::string kern_name = f->m_name.substr (1);
fprintf (m_dump_file, "\n;; Function %s", kern_name.c_str());
fprintf (m_dump_file, "\n;; enabled by -%s\n\n",
dump_flag_name (TDI_original));
print_generic_decl (m_dump_file, launcher, 0);
print_generic_expr (m_dump_file, DECL_SAVED_TREE (launcher), 0);
fprintf (m_dump_file, "\n");
}
gimplify_function_tree (launcher);
cgraph_node::finalize_function (launcher, true);
pop_cfun ();
@ -933,6 +990,25 @@ get_scalar_unsigned_int_type (tree original_type)
* BITS_PER_UNIT, true);
}
/* Set the declaration externally visible so it won't get removed by
whole program optimizations. */
void
set_externally_visible (tree decl)
{
if (!lookup_attribute ("externally_visible", DECL_ATTRIBUTES (decl)))
DECL_ATTRIBUTES (decl) = tree_cons (get_identifier ("externally_visible"),
NULL, DECL_ATTRIBUTES (decl));
}
void
set_inline (tree decl)
{
if (!lookup_attribute ("inline", DECL_ATTRIBUTES (decl)))
DECL_ATTRIBUTES (decl) = tree_cons (get_identifier ("inline"),
NULL, DECL_ATTRIBUTES (decl));
}
void
dump_function (FILE *dump_file, brig_function *f)
{

View file

@ -74,6 +74,7 @@ public:
tree global_variable (const std::string &name) const;
void add_global_variable (const std::string &name, tree var_decl);
void add_host_def_var_ptr (const std::string &name, tree var_decl);
void add_decl_call (tree call);
void start_function (tree f);
void finish_function ();
@ -152,6 +153,10 @@ private:
label_index m_global_variables;
/* Calls to declarations to be fixed in the end of processing to call
defs instead. */
std::vector<tree> m_decl_call;
/* The size of each private variable, including the alignment padding. */
std::map<std::string, size_t> m_private_data_sizes;
@ -226,6 +231,9 @@ tree build_stmt (enum tree_code code, ...);
tree get_unsigned_int_type (tree type);
tree get_scalar_unsigned_int_type (tree type);
void set_externally_visible (tree decl);
void set_inline (tree decl);
void dump_function (FILE *dump_file, brig_function *f);

View file

@ -27,6 +27,7 @@
#include "brig-util.h"
#include "print-tree.h"
#include "diagnostic-core.h"
#include "brig-to-generic.h"
tree
brig_directive_variable_handler::build_variable
@ -206,6 +207,8 @@ brig_directive_variable_handler::operator () (const BrigBase *base)
so we can get their address from the Runtime API. */
DECL_CONTEXT (var_decl) = NULL_TREE;
TREE_STATIC (var_decl) = 1;
TREE_PUBLIC (var_decl) = 1;
set_externally_visible (var_decl);
m_parent.add_global_variable (var_name, var_decl);
}
}