analyzer: add support for plugin-supplied known function behaviors

This patch adds the ability for plugins to register "known functions"
with the analyzer, identified by name.  If -fanalyzer sees a call to
such a function (with no body), it will use a plugin-provided subclass
of the new known_function abstract base class to model the possible
outcomes of the function call.

gcc/ChangeLog:
	* Makefile.in (ANALYZER_OBJS): Add
	analyzer/known-function-manager.o.

gcc/analyzer/ChangeLog:
	* analyzer.h (class known_function_manager): New forward decl.
	(class known_function): New.
	(plugin_analyzer_init_iface::register_known_function): New.
	* engine.cc: Include "analyzer/known-function-manager.h".
	(plugin_analyzer_init_impl::plugin_analyzer_init_impl): Add
	known_fn_mgr param.
	(plugin_analyzer_init_impl::register_state_machine): Add
	LOC_SCOPE.
	(plugin_analyzer_init_impl::register_known_function): New.
	(plugin_analyzer_init_impl::m_known_fn_mgr): New.
	(impl_run_checkers): Update plugin callback invocation to use
	eng's known_function_manager.
	* known-function-manager.cc: New file.
	* known-function-manager.h: New file.
	* region-model-manager.cc
	(region_model_manager::region_model_manager): Pass logger to
	m_known_fn_mgr's ctor.
	* region-model.cc (region_model::update_for_zero_return): New.
	(region_model::update_for_nonzero_return): New.
	(maybe_simplify_upper_bound): New.
	(region_model::maybe_get_copy_bounds): New.
	(region_model::get_known_function): New.
	(region_model::on_call_pre): Handle plugin-supplied known
	functions.
	* region-model.h: Include "analyzer/known-function-manager.h".
	(region_model_manager::get_known_function_manager): New.
	(region_model_manager::m_known_fn_mgr): New.
	(call_details::get_model): New accessor.
	(region_model::maybe_get_copy_bounds): New decl.
	(region_model::update_for_zero_return): New decl.
	(region_model::update_for_nonzero_return): New decl.
	(region_model::get_known_function): New decl.
	(region_model::get_known_function_manager): New.

gcc/testsuite/ChangeLog:
	* gcc.dg/plugin/analyzer_known_fns_plugin.c: New test plugin.
	* gcc.dg/plugin/known-fns-1.c: New test.
	* gcc.dg/plugin/plugin.exp (plugin_test_list): Add the new plugin
	and test.

Signed-off-by: David Malcolm <dmalcolm@redhat.com>
This commit is contained in:
David Malcolm 2022-09-09 17:11:42 -04:00
parent 084dc9a0c6
commit 07e30160be
11 changed files with 548 additions and 2 deletions

View file

@ -1262,6 +1262,7 @@ ANALYZER_OBJS = \
analyzer/engine.o \
analyzer/feasible-graph.o \
analyzer/function-set.o \
analyzer/known-function-manager.o \
analyzer/pending-diagnostic.o \
analyzer/program-point.o \
analyzer/program-state.o \

View file

@ -113,6 +113,7 @@ class engine;
class state_machine;
class logger;
class visitor;
class known_function_manager;
/* Forward decls of functions. */
@ -218,12 +219,24 @@ extern location_t get_stmt_location (const gimple *stmt, function *fun);
extern bool compat_types_p (tree src_type, tree dst_type);
/* Abstract base class for simulating the behavior of known functions,
supplied by plugins. */
class known_function
{
public:
virtual ~known_function () {}
virtual void impl_call_pre (const call_details &cd) const = 0;
};
/* Passed by pointer to PLUGIN_ANALYZER_INIT callbacks. */
class plugin_analyzer_init_iface
{
public:
virtual void register_state_machine (state_machine *) = 0;
virtual void register_known_function (const char *name,
known_function *) = 0;
virtual logger *get_logger () const = 0;
};

View file

@ -71,6 +71,7 @@ along with GCC; see the file COPYING3. If not see
#include "stringpool.h"
#include "attribs.h"
#include "tree-dfa.h"
#include "analyzer/known-function-manager.h"
/* For an overview, see gcc/doc/analyzer.texi. */
@ -5813,16 +5814,26 @@ class plugin_analyzer_init_impl : public plugin_analyzer_init_iface
{
public:
plugin_analyzer_init_impl (auto_delete_vec <state_machine> *checkers,
known_function_manager *known_fn_mgr,
logger *logger)
: m_checkers (checkers),
m_known_fn_mgr (known_fn_mgr),
m_logger (logger)
{}
void register_state_machine (state_machine *sm) final override
{
LOG_SCOPE (m_logger);
m_checkers->safe_push (sm);
}
void register_known_function (const char *name,
known_function *kf) final override
{
LOG_SCOPE (m_logger);
m_known_fn_mgr->add (name, kf);
}
logger *get_logger () const final override
{
return m_logger;
@ -5830,6 +5841,7 @@ public:
private:
auto_delete_vec <state_machine> *m_checkers;
known_function_manager *m_known_fn_mgr;
logger *m_logger;
};
@ -5885,7 +5897,9 @@ impl_run_checkers (logger *logger)
auto_delete_vec <state_machine> checkers;
make_checkers (checkers, logger);
plugin_analyzer_init_impl data (&checkers, logger);
plugin_analyzer_init_impl data (&checkers,
eng.get_known_function_manager (),
logger);
invoke_plugin_callbacks (PLUGIN_ANALYZER_INIT, &data);
if (logger)

View file

@ -0,0 +1,78 @@
/* Support for plugin-supplied behaviors of known functions.
Copyright (C) 2022 Free Software Foundation, Inc.
Contributed by David Malcolm <dmalcolm@redhat.com>.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tree.h"
#include "function.h"
#include "analyzer/analyzer.h"
#include "diagnostic-core.h"
#include "analyzer/analyzer-logging.h"
#include "stringpool.h"
#include "analyzer/known-function-manager.h"
#if ENABLE_ANALYZER
namespace ana {
/* class known_function_manager : public log_user. */
known_function_manager::known_function_manager (logger *logger)
: log_user (logger)
{
}
known_function_manager::~known_function_manager ()
{
/* Delete all owned kfs. */
for (auto iter : m_map_id_to_kf)
delete iter.second;
}
void
known_function_manager::add (const char *name, known_function *kf)
{
LOG_FUNC_1 (get_logger (), "registering %s", name);
tree id = get_identifier (name);
m_map_id_to_kf.put (id, kf);
}
const known_function *
known_function_manager::get_by_identifier (tree identifier)
{
known_function **slot = m_map_id_to_kf.get (identifier);
if (slot)
return *slot;
else
return NULL;
}
const known_function *
known_function_manager::get_by_fndecl (tree fndecl)
{
if (tree identifier = DECL_NAME (fndecl))
return get_by_identifier (identifier);
return NULL;
}
} // namespace ana
#endif /* #if ENABLE_ANALYZER */

View file

@ -0,0 +1,45 @@
/* Support for plugin-supplied behaviors of known functions.
Copyright (C) 2022 Free Software Foundation, Inc.
Contributed by David Malcolm <dmalcolm@redhat.com>.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#ifndef GCC_ANALYZER_KNOWN_FUNCTION_MANAGER_H
#define GCC_ANALYZER_KNOWN_FUNCTION_MANAGER_H
namespace ana {
class known_function_manager : public log_user
{
public:
known_function_manager (logger *logger);
~known_function_manager ();
void add (const char *name, known_function *kf);
const known_function *get_by_identifier (tree identifier);
const known_function *get_by_fndecl (tree fndecl);
private:
DISABLE_COPY_AND_ASSIGN (known_function_manager);
/* Map from identifier to known_function instance.
Has ownership of the latter. */
hash_map<tree, known_function *> m_map_id_to_kf;
};
} // namespace ana
#endif /* GCC_ANALYZER_KNOWN_FUNCTION_MANAGER_H */

View file

@ -81,7 +81,8 @@ region_model_manager::region_model_manager (logger *logger)
m_globals_region (alloc_region_id (), &m_root_region),
m_globals_map (),
m_store_mgr (this),
m_range_mgr (new bounded_ranges_manager ())
m_range_mgr (new bounded_ranges_manager ()),
m_known_fn_mgr (logger)
{
}

View file

@ -1968,6 +1968,110 @@ maybe_get_const_fn_result (const call_details &cd)
return sval;
}
/* Update this model for an outcome of a call that returns zero.
If UNMERGEABLE, then make the result unmergeable, e.g. to prevent
the state-merger code from merging success and failure outcomes. */
void
region_model::update_for_zero_return (const call_details &cd,
bool unmergeable)
{
if (!cd.get_lhs_type ())
return;
const svalue *result
= m_mgr->get_or_create_int_cst (cd.get_lhs_type (), 0);
if (unmergeable)
result = m_mgr->get_or_create_unmergeable (result);
set_value (cd.get_lhs_region (), result, cd.get_ctxt ());
}
/* Update this model for an outcome of a call that returns non-zero. */
void
region_model::update_for_nonzero_return (const call_details &cd)
{
if (!cd.get_lhs_type ())
return;
const svalue *zero
= m_mgr->get_or_create_int_cst (cd.get_lhs_type (), 0);
const svalue *result
= get_store_value (cd.get_lhs_region (), cd.get_ctxt ());
add_constraint (result, NE_EXPR, zero, cd.get_ctxt ());
}
/* Subroutine of region_model::maybe_get_copy_bounds.
The Linux kernel commonly uses
min_t([unsigned] long, VAR, sizeof(T));
to set an upper bound on the size of a copy_to_user.
Attempt to simplify such sizes by trying to get the upper bound as a
constant.
Return the simplified svalue if possible, or NULL otherwise. */
static const svalue *
maybe_simplify_upper_bound (const svalue *num_bytes_sval,
region_model_manager *mgr)
{
tree type = num_bytes_sval->get_type ();
while (const svalue *raw = num_bytes_sval->maybe_undo_cast ())
num_bytes_sval = raw;
if (const binop_svalue *binop_sval = num_bytes_sval->dyn_cast_binop_svalue ())
if (binop_sval->get_op () == MIN_EXPR)
if (binop_sval->get_arg1 ()->get_kind () == SK_CONSTANT)
{
return mgr->get_or_create_cast (type, binop_sval->get_arg1 ());
/* TODO: we might want to also capture the constraint
when recording the diagnostic, or note that we're using
the upper bound. */
}
return NULL;
}
/* Attempt to get an upper bound for the size of a copy when simulating a
copy function.
NUM_BYTES_SVAL is the symbolic value for the size of the copy.
Use it if it's constant, otherwise try to simplify it. Failing
that, use the size of SRC_REG if constant.
Return a symbolic value for an upper limit on the number of bytes
copied, or NULL if no such value could be determined. */
const svalue *
region_model::maybe_get_copy_bounds (const region *src_reg,
const svalue *num_bytes_sval)
{
if (num_bytes_sval->maybe_get_constant ())
return num_bytes_sval;
if (const svalue *simplified
= maybe_simplify_upper_bound (num_bytes_sval, m_mgr))
num_bytes_sval = simplified;
if (num_bytes_sval->maybe_get_constant ())
return num_bytes_sval;
/* For now, try just guessing the size as the capacity of the
base region of the src.
This is a hack; we might get too large a value. */
const region *src_base_reg = src_reg->get_base_region ();
num_bytes_sval = get_capacity (src_base_reg);
if (num_bytes_sval->maybe_get_constant ())
return num_bytes_sval;
/* Non-constant: give up. */
return NULL;
}
/* Get any known_function for FNDECL, or NULL if there is none. */
const known_function *
region_model::get_known_function (tree fndecl) const
{
known_function_manager *known_fn_mgr = m_mgr->get_known_function_manager ();
return known_fn_mgr->get_by_fndecl (fndecl);
}
/* Update this model for the CALL stmt, using CTXT to report any
diagnostics - the first half.
@ -2224,6 +2328,11 @@ region_model::on_call_pre (const gcall *call, region_model_context *ctxt,
{
/* Handle in "on_call_post". */
}
else if (const known_function *kf = get_known_function (callee_fndecl))
{
kf->impl_call_pre (cd);
return false;
}
else if (!fndecl_has_gimple_body_p (callee_fndecl)
&& (!(callee_fndecl_flags & (ECF_CONST | ECF_PURE)))
&& !fndecl_built_in_p (callee_fndecl))

View file

@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see
#include "analyzer/svalue.h"
#include "analyzer/region.h"
#include "analyzer/known-function-manager.h"
using namespace ana;
@ -347,6 +348,11 @@ public:
store_manager *get_store_manager () { return &m_store_mgr; }
bounded_ranges_manager *get_range_manager () const { return m_range_mgr; }
known_function_manager *get_known_function_manager ()
{
return &m_known_fn_mgr;
}
/* Dynamically-allocated region instances.
The number of these within the analysis can grow arbitrarily.
They are still owned by the manager. */
@ -504,6 +510,8 @@ private:
bounded_ranges_manager *m_range_mgr;
known_function_manager m_known_fn_mgr;
/* "Dynamically-allocated" region instances.
The number of these within the analysis can grow arbitrarily.
They are still owned by the manager. */
@ -521,6 +529,7 @@ public:
call_details (const gcall *call, region_model *model,
region_model_context *ctxt);
region_model *get_model () const { return m_model; }
region_model_manager *get_manager () const;
region_model_context *get_ctxt () const { return m_ctxt; }
uncertainty_t *get_uncertainty () const;
@ -645,6 +654,12 @@ class region_model
void impl_call_va_arg (const call_details &cd);
void impl_call_va_end (const call_details &cd);
const svalue *maybe_get_copy_bounds (const region *src_reg,
const svalue *num_bytes_sval);
void update_for_zero_return (const call_details &cd,
bool unmergeable);
void update_for_nonzero_return (const call_details &cd);
void handle_unrecognized_call (const gcall *call,
region_model_context *ctxt);
void get_reachable_svalues (svalue_set *out,
@ -815,6 +830,8 @@ class region_model
get_representative_path_var_1 (const region *reg,
svalue_set *visited) const;
const known_function *get_known_function (tree fndecl) const;
bool add_constraint (const svalue *lhs,
enum tree_code op,
const svalue *rhs,
@ -1324,6 +1341,10 @@ public:
engine (const supergraph *sg = NULL, logger *logger = NULL);
const supergraph *get_supergraph () { return m_sg; }
region_model_manager *get_model_manager () { return &m_mgr; }
known_function_manager *get_known_function_manager ()
{
return m_mgr.get_known_function_manager ();
}
void log_stats (logger *logger) const;

View file

@ -0,0 +1,201 @@
/* Proof-of-concept of a -fanalyzer plugin to handle known functions. */
/* { dg-options "-g" } */
#include "gcc-plugin.h"
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tree.h"
#include "function.h"
#include "basic-block.h"
#include "gimple.h"
#include "gimple-iterator.h"
#include "diagnostic-core.h"
#include "graphviz.h"
#include "options.h"
#include "cgraph.h"
#include "tree-dfa.h"
#include "stringpool.h"
#include "convert.h"
#include "target.h"
#include "fold-const.h"
#include "tree-pretty-print.h"
#include "diagnostic-color.h"
#include "diagnostic-metadata.h"
#include "tristate.h"
#include "bitmap.h"
#include "selftest.h"
#include "function.h"
#include "json.h"
#include "analyzer/analyzer.h"
#include "analyzer/analyzer-logging.h"
#include "ordered-hash-map.h"
#include "options.h"
#include "cgraph.h"
#include "cfg.h"
#include "digraph.h"
#include "analyzer/supergraph.h"
#include "sbitmap.h"
#include "analyzer/call-string.h"
#include "analyzer/program-point.h"
#include "analyzer/store.h"
#include "analyzer/region-model.h"
#include "analyzer/call-info.h"
int plugin_is_GPL_compatible;
#if ENABLE_ANALYZER
namespace ana {
/* Basic example of known fn behavior. */
class known_function_returns_42 : public known_function
{
public:
void impl_call_pre (const call_details &cd) const final override
{
if (cd.get_lhs_type ())
{
const svalue *result
= cd.get_manager ()->get_or_create_int_cst (cd.get_lhs_type (), 42);
cd.maybe_set_lhs (result);
}
}
};
/* Example of bifurcation, with a copy that can fail. */
class known_function_attempt_to_copy : public known_function
{
public:
class copy_success : public success_call_info
{
public:
copy_success (const call_details &cd,
const region *sized_dest_reg,
const svalue *copied_sval)
: success_call_info (cd),
m_sized_dest_reg (sized_dest_reg),
m_copied_sval (copied_sval)
{}
bool update_model (region_model *model,
const exploded_edge *,
region_model_context *ctxt) const final override
{
call_details cd (get_call_details (model, ctxt));
model->update_for_zero_return (cd, true);
model->set_value (m_sized_dest_reg, m_copied_sval, ctxt);
return true;
}
const region *m_sized_dest_reg;
const svalue *m_copied_sval;
const region *m_sized_src_reg;
};
class copy_failure : public failed_call_info
{
public:
copy_failure (const call_details &cd)
: failed_call_info (cd)
{}
bool update_model (region_model *model,
const exploded_edge *,
region_model_context *ctxt) const final override
{
call_details cd (get_call_details (model, ctxt));
model->update_for_nonzero_return (cd);
/* Leave the destination region untouched. */
return true;
}
};
void impl_call_pre (const call_details &cd) const final override
{
region_model_manager *mgr = cd.get_manager ();
region_model *model = cd.get_model ();
const svalue *dest_sval = cd.get_arg_svalue (0);
const svalue *src_sval = cd.get_arg_svalue (1);
const svalue *num_bytes_sval = cd.get_arg_svalue (2);
const region *dest_reg = model->deref_rvalue (dest_sval,
cd.get_arg_tree (0),
cd.get_ctxt ());
const region *src_reg = model->deref_rvalue (src_sval,
cd.get_arg_tree (1),
cd.get_ctxt ());
if (const svalue * bounded_sval
= model->maybe_get_copy_bounds (src_reg, num_bytes_sval))
num_bytes_sval = bounded_sval;
if (tree cst = num_bytes_sval->maybe_get_constant ())
if (zerop (cst))
/* No-op. */
return;
const region *sized_src_reg = mgr->get_sized_region (src_reg,
NULL_TREE,
num_bytes_sval);
const svalue *copied_sval
= model->get_store_value (sized_src_reg, cd.get_ctxt ());
const region *sized_dest_reg = mgr->get_sized_region (dest_reg,
NULL_TREE,
num_bytes_sval);
if (cd.get_ctxt ())
{
/* Bifurcate state, creating a "failure" out-edge. */
cd.get_ctxt ()->bifurcate (new copy_failure (cd));
/* The "unbifurcated" state is the "success" case. */
copy_success success (cd,
sized_dest_reg,
copied_sval);
success.update_model (model, NULL, cd.get_ctxt ());
}
}
};
/* Callback handler for the PLUGIN_ANALYZER_INIT event. */
static void
known_fn_analyzer_init_cb (void *gcc_data, void */*user_data*/)
{
ana::plugin_analyzer_init_iface *iface
= (ana::plugin_analyzer_init_iface *)gcc_data;
LOG_SCOPE (iface->get_logger ());
if (0)
inform (input_location, "got here: known_fn_analyzer_init_cb");
iface->register_known_function ("returns_42",
new known_function_returns_42 ());
iface->register_known_function ("attempt_to_copy",
new known_function_attempt_to_copy ());
}
} // namespace ana
#endif /* #if ENABLE_ANALYZER */
int
plugin_init (struct plugin_name_args *plugin_info,
struct plugin_gcc_version *version)
{
#if ENABLE_ANALYZER
const char *plugin_name = plugin_info->base_name;
if (0)
inform (input_location, "got here; %qs", plugin_name);
register_callback (plugin_info->base_name,
PLUGIN_ANALYZER_INIT,
ana::known_fn_analyzer_init_cb,
NULL); /* void *user_data */
#else
sorry_no_analyzer ();
#endif
return 0;
}

View file

@ -0,0 +1,61 @@
/* { dg-do compile } */
/* { dg-options "-fanalyzer" } */
/* { dg-require-effective-target analyzer } */
#include "../analyzer/analyzer-decls.h"
/* Basic example of known fn behavior. */
extern int returns_42 (void);
void test_1 (void)
{
int val = returns_42 ();
__analyzer_eval (val == 42); /* { dg-warning "TRUE" } */
}
/* Example of bifurcation, with a copy that can fail. */
extern int
attempt_to_copy (void *to, const void *from, int sz);
void test_copy_success (void *to, const void *from, int sz)
{
if (!attempt_to_copy (to, from, sz))
{
/* Success */
}
}
void test_copy_failure (void *to, const void *from, int sz)
{
if (attempt_to_copy (to, from, sz)) /* { dg-message "when 'attempt_to_copy' fails" } */
__analyzer_dump_path (); /* { dg-message "path" } */
}
struct coord
{
int x;
int y;
int z;
};
void test_copy_2 (void)
{
struct coord to = {1, 2, 3};
struct coord from = {4, 5, 6};
if (attempt_to_copy (&to, &from, sizeof (struct coord)))
{
/* Failure. */
__analyzer_eval (to.x == 1); /* { dg-warning "TRUE" } */
__analyzer_eval (to.y == 2); /* { dg-warning "TRUE" } */
__analyzer_eval (to.z == 3); /* { dg-warning "TRUE" } */
}
else
{
/* Success. */
__analyzer_eval (to.x == 4); /* { dg-warning "TRUE" } */
__analyzer_eval (to.y == 5); /* { dg-warning "TRUE" } */
__analyzer_eval (to.z == 6); /* { dg-warning "TRUE" } */
}
}

View file

@ -123,6 +123,8 @@ set plugin_test_list [list \
dump-2.c } \
{ analyzer_gil_plugin.c \
gil-1.c } \
{ analyzer_known_fns_plugin.c \
known-fns-1.c } \
]
foreach plugin_test $plugin_test_list {