analyzer: new warning: -Wanalyzer-undefined-behavior-strtok [PR107573]
This patch: - adds support to the analyzer for tracking API-private state or which we don't have a decl (such as strtok's internal state), - uses it to implement a new -Wanalyzer-undefined-behavior-strtok which warns when strtok (NULL, delim) is called as the first call to strtok after main. gcc/analyzer/ChangeLog: PR analyzer/107573 * analyzer.h (register_known_functions): Add region_model_manager param. * analyzer.opt (Wanalyzer-undefined-behavior-strtok): New. * call-summary.cc (call_summary_replay::convert_region_from_summary_1): Handle RK_PRIVATE. * engine.cc (impl_run_checkers): Pass model manager to register_known_functions. * kf.cc (class undefined_function_behavior): New. (class kf_strtok): New. (register_known_functions): Add region_model_manager param. Use it to register "strtok". * region-model-manager.cc (region_model_manager::get_or_create_conjured_svalue): Add "idx" param. * region-model-manager.h (region_model_manager::get_or_create_conjured_svalue): Add "idx" param. (region_model_manager::get_root_region): New accessor. * region-model.cc (region_model::scan_for_null_terminator): Handle "expr" being null. (region_model::get_representative_path_var_1): Handle RK_PRIVATE. * region-model.h (region_model::called_from_main_p): Make public. * region.cc (region::get_memory_space): Handle RK_PRIVATE. (region::can_have_initial_svalue_p): Handle MEMSPACE_PRIVATE. (private_region::dump_to_pp): New. * region.h (MEMSPACE_PRIVATE): New. (RK_PRIVATE): New. (class private_region): New. (is_a_helper <const private_region *>::test): New. * store.cc (store::replay_call_summary_cluster): Handle RK_PRIVATE. * svalue.h (struct conjured_svalue::key_t): Add "idx" param to ctor and "m_idx" field. (class conjured_svalue::conjured_svalue): Likewise. gcc/ChangeLog: PR analyzer/107573 * doc/invoke.texi: Add -Wanalyzer-undefined-behavior-strtok. gcc/testsuite/ChangeLog: PR analyzer/107573 * c-c++-common/analyzer/strtok-1.c: New test. * c-c++-common/analyzer/strtok-2.c: New test. * c-c++-common/analyzer/strtok-3.c: New test. * c-c++-common/analyzer/strtok-4.c: New test. * c-c++-common/analyzer/strtok-cppreference.c: New test. Signed-off-by: David Malcolm <dmalcolm@redhat.com>
This commit is contained in:
parent
9d58d2d8ba
commit
f65f63c4d8
19 changed files with 619 additions and 17 deletions
|
@ -326,7 +326,8 @@ public:
|
|||
void impl_call_pre (const call_details &cd) const override;
|
||||
};
|
||||
|
||||
extern void register_known_functions (known_function_manager &mgr);
|
||||
extern void register_known_functions (known_function_manager &kfm,
|
||||
region_model_manager &rmm);
|
||||
extern void register_known_analyzer_functions (known_function_manager &kfm);
|
||||
extern void register_known_fd_functions (known_function_manager &kfm);
|
||||
extern void register_known_file_functions (known_function_manager &kfm);
|
||||
|
|
|
@ -222,6 +222,10 @@ Wanalyzer-tainted-size
|
|||
Common Var(warn_analyzer_tainted_size) Init(1) Warning
|
||||
Warn about code paths in which an unsanitized value is used as a size.
|
||||
|
||||
Wanalyzer-undefined-behavior-strtok
|
||||
Common Var(warn_analyzer_undefined_behavior_strtok) Init(1) Warning
|
||||
Warn about code paths in in which a call is made to strtok with undefined behavior.
|
||||
|
||||
Wanalyzer-use-after-free
|
||||
Common Var(warn_analyzer_use_after_free) Init(1) Warning
|
||||
Warn about code paths in which a freed value is used.
|
||||
|
|
|
@ -585,6 +585,7 @@ call_summary_replay::convert_region_from_summary_1 (const region *summary_reg)
|
|||
case RK_STRING:
|
||||
case RK_ERRNO:
|
||||
case RK_UNKNOWN:
|
||||
case RK_PRIVATE:
|
||||
/* We can reuse these regions directly. */
|
||||
return summary_reg;
|
||||
|
||||
|
|
|
@ -6174,7 +6174,8 @@ impl_run_checkers (logger *logger)
|
|||
auto_delete_vec <state_machine> checkers;
|
||||
make_checkers (checkers, logger);
|
||||
|
||||
register_known_functions (*eng.get_known_function_manager ());
|
||||
register_known_functions (*eng.get_known_function_manager (),
|
||||
*eng.get_model_manager ());
|
||||
|
||||
plugin_analyzer_init_impl data (&checkers,
|
||||
eng.get_known_function_manager (),
|
||||
|
|
|
@ -40,6 +40,40 @@ along with GCC; see the file COPYING3. If not see
|
|||
|
||||
namespace ana {
|
||||
|
||||
/* Abstract subclass for describing undefined behavior of an API. */
|
||||
|
||||
class undefined_function_behavior
|
||||
: public pending_diagnostic_subclass<undefined_function_behavior>
|
||||
{
|
||||
public:
|
||||
undefined_function_behavior (const call_details &cd)
|
||||
: m_call_stmt (cd.get_call_stmt ()),
|
||||
m_callee_fndecl (cd.get_fndecl_for_call ())
|
||||
{
|
||||
gcc_assert (m_call_stmt);
|
||||
gcc_assert (m_callee_fndecl);
|
||||
}
|
||||
|
||||
const char *get_kind () const final override
|
||||
{
|
||||
return "undefined_behavior";
|
||||
}
|
||||
|
||||
bool operator== (const undefined_function_behavior &other) const
|
||||
{
|
||||
return (m_call_stmt == other.m_call_stmt
|
||||
&& m_callee_fndecl == other.m_callee_fndecl);
|
||||
}
|
||||
|
||||
bool terminate_path_p () const final override { return true; }
|
||||
|
||||
tree get_callee_fndecl () const { return m_callee_fndecl; }
|
||||
|
||||
private:
|
||||
const gimple *m_call_stmt;
|
||||
tree m_callee_fndecl;
|
||||
};
|
||||
|
||||
/* class pure_known_function_with_default_return : public known_function. */
|
||||
|
||||
void
|
||||
|
@ -1679,6 +1713,288 @@ kf_strstr::impl_call_post (const call_details &cd) const
|
|||
}
|
||||
}
|
||||
|
||||
/* Handle calls to "strtok".
|
||||
See e.g.
|
||||
https://en.cppreference.com/w/c/string/byte/strtok
|
||||
https://man7.org/linux/man-pages/man3/strtok.3.html */
|
||||
|
||||
class kf_strtok : public known_function
|
||||
{
|
||||
public:
|
||||
class undefined_behavior : public undefined_function_behavior
|
||||
{
|
||||
public:
|
||||
undefined_behavior (const call_details &cd)
|
||||
: undefined_function_behavior (cd)
|
||||
{
|
||||
}
|
||||
int get_controlling_option () const final override
|
||||
{
|
||||
return OPT_Wanalyzer_undefined_behavior_strtok;
|
||||
}
|
||||
|
||||
bool emit (rich_location *rich_loc, logger *) final override
|
||||
{
|
||||
/* CWE-476: NULL Pointer Dereference. */
|
||||
diagnostic_metadata m;
|
||||
m.add_cwe (476);
|
||||
if (warning_meta
|
||||
(rich_loc, m, get_controlling_option (),
|
||||
"calling %qD for first time with NULL as argument 1"
|
||||
" has undefined behavior",
|
||||
get_callee_fndecl ()))
|
||||
{
|
||||
inform (rich_loc->get_loc (),
|
||||
"some implementations of %qD may crash on such input",
|
||||
get_callee_fndecl ());
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
label_text describe_final_event (const evdesc::final_event &ev)
|
||||
final override
|
||||
{
|
||||
return ev.formatted_print
|
||||
("calling %qD for first time with NULL as argument 1"
|
||||
" has undefined behavior",
|
||||
get_callee_fndecl ());
|
||||
}
|
||||
};
|
||||
|
||||
/* An outcome of a "strtok" call.
|
||||
We have a four-way bifurcation of the analysis via the
|
||||
4 combinations of two flags:
|
||||
- m_nonnull_str covers whether the "str" param was null or non-null
|
||||
- m_found covers whether the result is null or non-null
|
||||
*/
|
||||
class strtok_call_info : public call_info
|
||||
{
|
||||
public:
|
||||
strtok_call_info (const call_details &cd,
|
||||
const private_region &private_reg,
|
||||
bool nonnull_str,
|
||||
bool found)
|
||||
: call_info (cd),
|
||||
m_private_reg (private_reg),
|
||||
m_nonnull_str (nonnull_str),
|
||||
m_found (found)
|
||||
{
|
||||
}
|
||||
|
||||
label_text get_desc (bool can_colorize) const final override
|
||||
{
|
||||
if (m_nonnull_str)
|
||||
{
|
||||
if (m_found)
|
||||
return make_label_text
|
||||
(can_colorize,
|
||||
"when %qE on non-NULL string returns non-NULL",
|
||||
get_fndecl ());
|
||||
else
|
||||
return make_label_text
|
||||
(can_colorize,
|
||||
"when %qE on non-NULL string returns NULL",
|
||||
get_fndecl ());
|
||||
}
|
||||
else
|
||||
{
|
||||
if (m_found)
|
||||
return make_label_text
|
||||
(can_colorize,
|
||||
"when %qE with NULL string (using prior) returns non-NULL",
|
||||
get_fndecl ());
|
||||
else
|
||||
return make_label_text
|
||||
(can_colorize,
|
||||
"when %qE with NULL string (using prior) returns NULL",
|
||||
get_fndecl ());
|
||||
}
|
||||
}
|
||||
|
||||
bool update_model (region_model *model,
|
||||
const exploded_edge *,
|
||||
region_model_context *ctxt) const final override
|
||||
{
|
||||
region_model_manager *mgr = model->get_manager ();
|
||||
const call_details cd (get_call_details (model, ctxt));
|
||||
const svalue *str_sval = cd.get_arg_svalue (0);
|
||||
/* const svalue *delim_sval = cd.get_arg_svalue (1); */
|
||||
|
||||
cd.check_for_null_terminated_string_arg (1);
|
||||
/* We check that either arg 0 or the private region is null
|
||||
terminated below. */
|
||||
|
||||
const svalue *null_ptr_sval
|
||||
= mgr->get_or_create_null_ptr (cd.get_arg_type (0));;
|
||||
if (!model->add_constraint (str_sval,
|
||||
m_nonnull_str ? NE_EXPR : EQ_EXPR,
|
||||
null_ptr_sval,
|
||||
cd.get_ctxt ()))
|
||||
return false;
|
||||
|
||||
if (m_nonnull_str)
|
||||
{
|
||||
/* Update internal buffer. */
|
||||
model->set_value (&m_private_reg,
|
||||
mgr->get_or_create_unmergeable (str_sval),
|
||||
ctxt);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Read from internal buffer. */
|
||||
str_sval = model->get_store_value (&m_private_reg, ctxt);
|
||||
|
||||
/* The initial value of the private region is NULL when we're
|
||||
on a path from main. */
|
||||
if (const initial_svalue *initial_sval
|
||||
= str_sval->dyn_cast_initial_svalue ())
|
||||
if (initial_sval->get_region () == &m_private_reg
|
||||
&& model->called_from_main_p ())
|
||||
{
|
||||
/* Implementations of strtok do not necessarily check for NULL
|
||||
here, and may crash; see PR analyzer/107573.
|
||||
Warn for this, if we were definitely passed NULL. */
|
||||
if (cd.get_arg_svalue (0)->all_zeroes_p ())
|
||||
{
|
||||
if (ctxt)
|
||||
ctxt->warn (::make_unique<undefined_behavior> (cd));
|
||||
}
|
||||
|
||||
/* Assume that "str" was actually non-null; terminate
|
||||
this path. */
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Now assume str_sval is non-null. */
|
||||
if (!model->add_constraint (str_sval,
|
||||
NE_EXPR,
|
||||
null_ptr_sval,
|
||||
cd.get_ctxt ()))
|
||||
return false;
|
||||
}
|
||||
|
||||
const region *buf_reg = model->deref_rvalue (str_sval, NULL_TREE, ctxt);
|
||||
model->scan_for_null_terminator (buf_reg,
|
||||
NULL_TREE,
|
||||
nullptr,
|
||||
ctxt);
|
||||
|
||||
if (m_found)
|
||||
{
|
||||
const region *str_reg
|
||||
= model->deref_rvalue (str_sval, cd.get_arg_tree (0),
|
||||
cd.get_ctxt ());
|
||||
/* We want to figure out the start and nul terminator
|
||||
for the token.
|
||||
For each, we want str_sval + OFFSET for some unknown OFFSET.
|
||||
Use a conjured_svalue to represent the offset,
|
||||
using the str_reg as the id of the conjured_svalue. */
|
||||
const svalue *start_offset
|
||||
= mgr->get_or_create_conjured_svalue (size_type_node,
|
||||
cd.get_call_stmt (),
|
||||
str_reg,
|
||||
conjured_purge (model,
|
||||
ctxt),
|
||||
0);
|
||||
const svalue *nul_offset
|
||||
= mgr->get_or_create_conjured_svalue (size_type_node,
|
||||
cd.get_call_stmt (),
|
||||
str_reg,
|
||||
conjured_purge (model,
|
||||
ctxt),
|
||||
1);
|
||||
|
||||
tree char_ptr_type = build_pointer_type (char_type_node);
|
||||
const svalue *result
|
||||
= mgr->get_or_create_binop (char_ptr_type, POINTER_PLUS_EXPR,
|
||||
str_sval, start_offset);
|
||||
cd.maybe_set_lhs (result);
|
||||
|
||||
/* nul_offset + 1; the offset to use for the next call. */
|
||||
const svalue *next_offset
|
||||
= mgr->get_or_create_binop (size_type_node, PLUS_EXPR,
|
||||
nul_offset,
|
||||
mgr->get_or_create_int_cst
|
||||
(char_type_node, 1));
|
||||
|
||||
/* Write '\0' to str_sval[nul_offset]. */
|
||||
const svalue *ptr_to_term
|
||||
= mgr->get_or_create_binop (char_ptr_type, POINTER_PLUS_EXPR,
|
||||
str_sval, nul_offset);
|
||||
const region *terminator_reg
|
||||
= model->deref_rvalue (ptr_to_term, NULL_TREE, cd.get_ctxt ());
|
||||
model->set_value (terminator_reg,
|
||||
mgr->get_or_create_unmergeable
|
||||
(mgr->get_or_create_int_cst (char_type_node,
|
||||
0)),
|
||||
cd.get_ctxt ());
|
||||
|
||||
/* Update saved ptr to be at [nul_offset + 1]. */
|
||||
const svalue *ptr_to_next
|
||||
= mgr->get_or_create_binop (cd.get_lhs_type (), POINTER_PLUS_EXPR,
|
||||
str_sval, next_offset);
|
||||
model->set_value (&m_private_reg, ptr_to_next, ctxt);
|
||||
}
|
||||
else
|
||||
if (tree lhs_type = cd.get_lhs_type ())
|
||||
{
|
||||
const svalue *result
|
||||
= mgr->get_or_create_int_cst (lhs_type, 0);
|
||||
cd.maybe_set_lhs (result);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
private:
|
||||
const private_region &m_private_reg;
|
||||
bool m_nonnull_str;
|
||||
bool m_found;
|
||||
}; // class strtok_call_info
|
||||
|
||||
kf_strtok (region_model_manager &mgr)
|
||||
: m_private_reg (mgr.alloc_symbol_id (),
|
||||
mgr.get_root_region (),
|
||||
get_region_type (),
|
||||
"strtok buffer")
|
||||
{
|
||||
}
|
||||
|
||||
bool matches_call_types_p (const call_details &cd) const final override
|
||||
{
|
||||
return (cd.num_args () == 2
|
||||
&& POINTER_TYPE_P (cd.get_arg_type (0))
|
||||
&& POINTER_TYPE_P (cd.get_arg_type (1)));
|
||||
}
|
||||
|
||||
void impl_call_post (const call_details &cd) const final override
|
||||
{
|
||||
if (cd.get_ctxt ())
|
||||
{
|
||||
/* Four-way bifurcation, based on whether:
|
||||
- the str is non-null
|
||||
- the result is non-null
|
||||
Typically the str is either null or non-null at a particular site,
|
||||
so hopefully this will generally just lead to two out-edges. */
|
||||
cd.get_ctxt ()->bifurcate
|
||||
(make_unique<strtok_call_info> (cd, m_private_reg, false, false));
|
||||
cd.get_ctxt ()->bifurcate
|
||||
(make_unique<strtok_call_info> (cd, m_private_reg, false, true));
|
||||
cd.get_ctxt ()->bifurcate
|
||||
(make_unique<strtok_call_info> (cd, m_private_reg, true, false));
|
||||
cd.get_ctxt ()->bifurcate
|
||||
(make_unique<strtok_call_info> (cd, m_private_reg, true, true));
|
||||
cd.get_ctxt ()->terminate_path ();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
static tree get_region_type ()
|
||||
{
|
||||
return build_pointer_type (char_type_node);
|
||||
}
|
||||
const private_region m_private_reg;
|
||||
};
|
||||
|
||||
class kf_ubsan_bounds : public internal_known_function
|
||||
{
|
||||
/* Empty. */
|
||||
|
@ -1823,7 +2139,8 @@ register_atomic_builtins (known_function_manager &kfm)
|
|||
analyzer (as opposed to plugins). */
|
||||
|
||||
void
|
||||
register_known_functions (known_function_manager &kfm)
|
||||
register_known_functions (known_function_manager &kfm,
|
||||
region_model_manager &rmm)
|
||||
{
|
||||
/* Debugging/test support functions, all with a "__analyzer_" prefix. */
|
||||
register_known_analyzer_functions (kfm);
|
||||
|
@ -1911,6 +2228,7 @@ register_known_functions (known_function_manager &kfm)
|
|||
{
|
||||
kfm.add ("fopen", make_unique<kf_fopen> ());
|
||||
kfm.add ("putenv", make_unique<kf_putenv> ());
|
||||
kfm.add ("strtok", make_unique<kf_strtok> (rmm));
|
||||
|
||||
register_known_fd_functions (kfm);
|
||||
register_known_file_functions (kfm);
|
||||
|
|
|
@ -1261,7 +1261,8 @@ conjured_purge::purge (const conjured_svalue *sval) const
|
|||
}
|
||||
|
||||
/* Return the svalue * of type TYPE for the value conjured for ID_REG
|
||||
at STMT, creating it if necessary.
|
||||
at STMT (using IDX for any further disambiguation),
|
||||
creating it if necessary.
|
||||
Use P to purge existing state from the svalue, for the case where a
|
||||
conjured_svalue would be reused along an execution path. */
|
||||
|
||||
|
@ -1269,9 +1270,10 @@ const svalue *
|
|||
region_model_manager::get_or_create_conjured_svalue (tree type,
|
||||
const gimple *stmt,
|
||||
const region *id_reg,
|
||||
const conjured_purge &p)
|
||||
const conjured_purge &p,
|
||||
unsigned idx)
|
||||
{
|
||||
conjured_svalue::key_t key (type, stmt, id_reg);
|
||||
conjured_svalue::key_t key (type, stmt, id_reg, idx);
|
||||
if (conjured_svalue **slot = m_conjured_values_map.get (key))
|
||||
{
|
||||
const conjured_svalue *sval = *slot;
|
||||
|
@ -1283,7 +1285,7 @@ region_model_manager::get_or_create_conjured_svalue (tree type,
|
|||
return sval;
|
||||
}
|
||||
conjured_svalue *conjured_sval
|
||||
= new conjured_svalue (alloc_symbol_id (), type, stmt, id_reg);
|
||||
= new conjured_svalue (alloc_symbol_id (), type, stmt, id_reg, idx);
|
||||
RETURN_UNKNOWN_IF_TOO_COMPLEX (conjured_sval);
|
||||
m_conjured_values_map.put (key, conjured_sval);
|
||||
return conjured_sval;
|
||||
|
|
|
@ -79,7 +79,8 @@ public:
|
|||
const binding_map &map);
|
||||
const svalue *get_or_create_conjured_svalue (tree type, const gimple *stmt,
|
||||
const region *id_reg,
|
||||
const conjured_purge &p);
|
||||
const conjured_purge &p,
|
||||
unsigned idx = 0);
|
||||
const svalue *
|
||||
get_or_create_asm_output_svalue (tree type,
|
||||
const gasm *asm_stmt,
|
||||
|
@ -105,6 +106,7 @@ public:
|
|||
const svalue *create_unique_svalue (tree type);
|
||||
|
||||
/* region consolidation. */
|
||||
const root_region *get_root_region () const { return &m_root_region; }
|
||||
const stack_region * get_stack_region () const { return &m_stack_region; }
|
||||
const heap_region *get_heap_region () const { return &m_heap_region; }
|
||||
const code_region *get_code_region () const { return &m_code_region; }
|
||||
|
|
|
@ -3884,8 +3884,10 @@ region_model::scan_for_null_terminator (const region *reg,
|
|||
byte_range bytes_to_read (src_byte_offset, 1);
|
||||
const svalue *sval = get_store_bytes (base_reg, bytes_to_read, ctxt);
|
||||
tree byte_expr
|
||||
= get_tree_for_byte_offset (expr,
|
||||
src_byte_offset - initial_src_byte_offset);
|
||||
= (expr
|
||||
? get_tree_for_byte_offset (expr,
|
||||
src_byte_offset - initial_src_byte_offset)
|
||||
: NULL_TREE);
|
||||
check_for_poison (sval, byte_expr, nullptr, ctxt);
|
||||
if (base_reg->can_have_initial_svalue_p ())
|
||||
{
|
||||
|
@ -5077,6 +5079,7 @@ region_model::get_representative_path_var_1 (const region *reg,
|
|||
case RK_VAR_ARG:
|
||||
case RK_ERRNO:
|
||||
case RK_UNKNOWN:
|
||||
case RK_PRIVATE:
|
||||
return path_var (NULL_TREE, 0);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -559,6 +559,8 @@ class region_model
|
|||
callback (model, prev_model, retval, ctxt);
|
||||
}
|
||||
|
||||
bool called_from_main_p () const;
|
||||
|
||||
private:
|
||||
const region *get_lvalue_1 (path_var pv, region_model_context *ctxt) const;
|
||||
const svalue *get_rvalue_1 (path_var pv, region_model_context *ctxt) const;
|
||||
|
@ -606,7 +608,6 @@ private:
|
|||
bool nonnull,
|
||||
region_model_context *ctxt);
|
||||
|
||||
bool called_from_main_p () const;
|
||||
const svalue *get_initial_value_for_global (const region *reg) const;
|
||||
|
||||
const region * get_region_for_poisoned_expr (tree expr) const;
|
||||
|
|
|
@ -514,6 +514,8 @@ region::get_memory_space () const
|
|||
return MEMSPACE_HEAP;
|
||||
case RK_STRING:
|
||||
return MEMSPACE_READONLY_DATA;
|
||||
case RK_PRIVATE:
|
||||
return MEMSPACE_PRIVATE;
|
||||
}
|
||||
if (iter->get_kind () == RK_CAST)
|
||||
iter = iter->dyn_cast_cast_region ()->get_original_region ();
|
||||
|
@ -543,6 +545,7 @@ region::can_have_initial_svalue_p () const
|
|||
case MEMSPACE_CODE:
|
||||
case MEMSPACE_GLOBALS:
|
||||
case MEMSPACE_READONLY_DATA:
|
||||
case MEMSPACE_PRIVATE:
|
||||
/* Such regions have initial_svalues. */
|
||||
return true;
|
||||
|
||||
|
@ -2259,6 +2262,17 @@ errno_region::dump_to_pp (pretty_printer *pp, bool simple) const
|
|||
pp_string (pp, "errno_region()");
|
||||
}
|
||||
|
||||
/* class private_region : public region. */
|
||||
|
||||
void
|
||||
private_region::dump_to_pp (pretty_printer *pp, bool simple) const
|
||||
{
|
||||
if (simple)
|
||||
pp_printf (pp, "PRIVATE_REG(%qs)", m_desc);
|
||||
else
|
||||
pp_printf (pp, "private_region(%qs)", m_desc);
|
||||
}
|
||||
|
||||
/* class unknown_region : public region. */
|
||||
|
||||
/* Implementation of region::dump_to_pp vfunc for unknown_region. */
|
||||
|
|
|
@ -35,7 +35,8 @@ enum memory_space
|
|||
MEMSPACE_STACK,
|
||||
MEMSPACE_HEAP,
|
||||
MEMSPACE_READONLY_DATA,
|
||||
MEMSPACE_THREAD_LOCAL
|
||||
MEMSPACE_THREAD_LOCAL,
|
||||
MEMSPACE_PRIVATE
|
||||
};
|
||||
|
||||
/* An enum for discriminating between the different concrete subclasses
|
||||
|
@ -65,6 +66,7 @@ enum region_kind
|
|||
RK_BIT_RANGE,
|
||||
RK_VAR_ARG,
|
||||
RK_ERRNO,
|
||||
RK_PRIVATE,
|
||||
RK_UNKNOWN,
|
||||
};
|
||||
|
||||
|
@ -108,6 +110,7 @@ enum region_kind
|
|||
var_arg_region (RK_VAR_ARG): a region for the N-th vararg within a
|
||||
frame_region for a variadic call
|
||||
errno_region (RK_ERRNO): a region for holding "errno"
|
||||
private_region (RK_PRIVATE): a region for internal state of an API
|
||||
unknown_region (RK_UNKNOWN): for handling unimplemented tree codes. */
|
||||
|
||||
/* Abstract base class for representing ways of accessing chunks of memory.
|
||||
|
@ -1434,6 +1437,42 @@ is_a_helper <const errno_region *>::test (const region *reg)
|
|||
|
||||
namespace ana {
|
||||
|
||||
/* Similar to a decl region, but we don't have the decl.
|
||||
For implementing e.g. static buffers of known_functions,
|
||||
or other internal state of an API.
|
||||
|
||||
These are owned by known_function instances, rather than the
|
||||
region_model_manager. */
|
||||
|
||||
class private_region : public region
|
||||
{
|
||||
public:
|
||||
private_region (unsigned id, const region *parent, tree type,
|
||||
const char *desc)
|
||||
: region (complexity (parent), id, parent, type),
|
||||
m_desc (desc)
|
||||
{}
|
||||
|
||||
enum region_kind get_kind () const final override { return RK_PRIVATE; }
|
||||
|
||||
void dump_to_pp (pretty_printer *pp, bool simple) const final override;
|
||||
|
||||
private:
|
||||
const char *m_desc;
|
||||
};
|
||||
|
||||
} // namespace ana
|
||||
|
||||
template <>
|
||||
template <>
|
||||
inline bool
|
||||
is_a_helper <const private_region *>::test (const region *reg)
|
||||
{
|
||||
return reg->get_kind () == RK_PRIVATE;
|
||||
}
|
||||
|
||||
namespace ana {
|
||||
|
||||
/* An unknown region, for handling unimplemented tree codes. */
|
||||
|
||||
class unknown_region : public region
|
||||
|
|
|
@ -3372,6 +3372,7 @@ store::replay_call_summary_cluster (call_summary_replay &r,
|
|||
case RK_HEAP_ALLOCATED:
|
||||
case RK_DECL:
|
||||
case RK_ERRNO:
|
||||
case RK_PRIVATE:
|
||||
{
|
||||
const region *caller_dest_reg
|
||||
= r.convert_region_from_summary (summary_base_reg);
|
||||
|
|
|
@ -1360,8 +1360,8 @@ public:
|
|||
/* A support class for uniquifying instances of conjured_svalue. */
|
||||
struct key_t
|
||||
{
|
||||
key_t (tree type, const gimple *stmt, const region *id_reg)
|
||||
: m_type (type), m_stmt (stmt), m_id_reg (id_reg)
|
||||
key_t (tree type, const gimple *stmt, const region *id_reg, unsigned idx)
|
||||
: m_type (type), m_stmt (stmt), m_id_reg (id_reg), m_idx (idx)
|
||||
{}
|
||||
|
||||
hashval_t hash () const
|
||||
|
@ -1377,7 +1377,8 @@ public:
|
|||
{
|
||||
return (m_type == other.m_type
|
||||
&& m_stmt == other.m_stmt
|
||||
&& m_id_reg == other.m_id_reg);
|
||||
&& m_id_reg == other.m_id_reg
|
||||
&& m_idx == other.m_idx);
|
||||
}
|
||||
|
||||
/* Use m_stmt to mark empty/deleted, as m_type can be NULL for
|
||||
|
@ -1393,12 +1394,13 @@ public:
|
|||
tree m_type;
|
||||
const gimple *m_stmt;
|
||||
const region *m_id_reg;
|
||||
unsigned m_idx;
|
||||
};
|
||||
|
||||
conjured_svalue (symbol::id_t id, tree type, const gimple *stmt,
|
||||
const region *id_reg)
|
||||
const region *id_reg, unsigned idx)
|
||||
: svalue (complexity (id_reg), id, type),
|
||||
m_stmt (stmt), m_id_reg (id_reg)
|
||||
m_stmt (stmt), m_id_reg (id_reg), m_idx (idx)
|
||||
{
|
||||
gcc_assert (m_stmt != NULL);
|
||||
}
|
||||
|
@ -1419,6 +1421,7 @@ public:
|
|||
private:
|
||||
const gimple *m_stmt;
|
||||
const region *m_id_reg;
|
||||
unsigned m_idx;
|
||||
};
|
||||
|
||||
} // namespace ana
|
||||
|
|
|
@ -490,6 +490,7 @@ Objective-C and Objective-C++ Dialects}.
|
|||
-Wno-analyzer-tainted-offset
|
||||
-Wno-analyzer-tainted-size
|
||||
-Wanalyzer-too-complex
|
||||
-Wno-analyzer-undefined-behavior-strtok
|
||||
-Wno-analyzer-unsafe-call-within-signal-handler
|
||||
-Wno-analyzer-use-after-free
|
||||
-Wno-analyzer-use-of-pointer-in-stale-stack-frame
|
||||
|
@ -10424,6 +10425,7 @@ Enabling this option effectively enables the following warnings:
|
|||
-Wanalyzer-tainted-divisor
|
||||
-Wanalyzer-tainted-offset
|
||||
-Wanalyzer-tainted-size
|
||||
-Wanalyzer-undefined-behavior-strtok
|
||||
-Wanalyzer-unsafe-call-within-signal-handler
|
||||
-Wanalyzer-use-after-free
|
||||
-Wanalyzer-use-of-pointer-in-stale-stack-frame
|
||||
|
@ -11060,6 +11062,18 @@ attacker could inject an out-of-bounds access.
|
|||
|
||||
See @uref{https://cwe.mitre.org/data/definitions/129.html, CWE-129: Improper Validation of Array Index}.
|
||||
|
||||
@opindex Wanalyzer-undefined-behavior-strtok
|
||||
@opindex Wno-analyzer-undefined-behavior-strtok
|
||||
@item -Wno-analyzer-undefined-behavior-strtok
|
||||
This warning requires @option{-fanalyzer}, which enables it; use
|
||||
@option{-Wno-analyzer-undefined-behavior-strtok} to disable it.
|
||||
|
||||
This diagnostic warns for paths through the code in which a
|
||||
call is made to @code{strtok} with undefined behavior.
|
||||
|
||||
Specifically, passing NULL as the first parameter for the initial
|
||||
call to @code{strtok} within a process has undefined behavior.
|
||||
|
||||
@opindex Wanalyzer-unsafe-call-within-signal-handler
|
||||
@opindex Wno-analyzer-unsafe-call-within-signal-handler
|
||||
@item -Wno-analyzer-unsafe-call-within-signal-handler
|
||||
|
|
62
gcc/testsuite/c-c++-common/analyzer/strtok-1.c
Normal file
62
gcc/testsuite/c-c++-common/analyzer/strtok-1.c
Normal file
|
@ -0,0 +1,62 @@
|
|||
/* { dg-additional-options "-fpermissive" { target c++ } } */
|
||||
|
||||
#include "../../gcc.dg/analyzer/analyzer-decls.h"
|
||||
|
||||
typedef __SIZE_TYPE__ size_t;
|
||||
|
||||
extern char *strtok (char *str, const char *delim)
|
||||
__attribute__((nonnull (2)));
|
||||
|
||||
char *
|
||||
test_passthrough (char *str, const char *delim)
|
||||
{
|
||||
return strtok (str, delim);
|
||||
}
|
||||
|
||||
char *
|
||||
test_null_str (const char *delim)
|
||||
{
|
||||
return strtok (NULL, delim);
|
||||
}
|
||||
|
||||
char *
|
||||
test_null_delim (char *str)
|
||||
{
|
||||
return strtok (str, NULL); /* { dg-warning "use of NULL where non-null expected" } */
|
||||
/* This is from the attribute. */
|
||||
}
|
||||
|
||||
char *
|
||||
test_write_to_literal (void)
|
||||
{
|
||||
const char *str = "hello world";
|
||||
return strtok ((char *)str, " "); /* { dg-warning "write to string literal" } */
|
||||
}
|
||||
|
||||
char *
|
||||
test_unterminated_str (const char *delim)
|
||||
{
|
||||
char str[3] = "abc"; /* { dg-warning "initializer-string for '\[^\n\]*' is too long" "" { target c++ } } */
|
||||
return strtok (str, delim); /* { dg-warning "stack-based buffer over-read" } */
|
||||
}
|
||||
|
||||
char *
|
||||
test_unterminated_delimstr (char *str)
|
||||
{
|
||||
char delim[3] = "abc"; /* { dg-warning "initializer-string for '\[^\n\]*' is too long" "" { target c++ } } */
|
||||
return strtok (str, delim); /* { dg-warning "stack-based buffer over-read" } */
|
||||
/* { dg-message "while looking for null terminator for argument 2" "" { target *-*-* } .-1 } */
|
||||
}
|
||||
|
||||
size_t
|
||||
test_use_after_free_via_null_2 (char *p)
|
||||
{
|
||||
strtok (p, " ");
|
||||
__builtin_free (p);
|
||||
|
||||
char *q = strtok (NULL, " "); /* TODO: should complain about this. */
|
||||
if (q)
|
||||
return __builtin_strlen (q); /* TODO: should complain about this. */
|
||||
else
|
||||
return 0;
|
||||
}
|
18
gcc/testsuite/c-c++-common/analyzer/strtok-2.c
Normal file
18
gcc/testsuite/c-c++-common/analyzer/strtok-2.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
#include "../../gcc.dg/analyzer/analyzer-decls.h"
|
||||
|
||||
extern char *strtok (char *str, const char *delim)
|
||||
__attribute__((nonnull (2)));
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
char *cmd;
|
||||
char *arg;
|
||||
|
||||
if (argc < 2)
|
||||
return -1;
|
||||
|
||||
cmd = strtok (argv[1], " "); /* { dg-bogus "undefined behavior" } */
|
||||
arg = strtok (NULL, " ");
|
||||
return 0;
|
||||
}
|
26
gcc/testsuite/c-c++-common/analyzer/strtok-3.c
Normal file
26
gcc/testsuite/c-c++-common/analyzer/strtok-3.c
Normal file
|
@ -0,0 +1,26 @@
|
|||
#include "../../gcc.dg/analyzer/analyzer-decls.h"
|
||||
|
||||
extern char *strtok (char *str, const char *delim)
|
||||
__attribute__((nonnull (2)));
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
char *cmd;
|
||||
char *arg;
|
||||
|
||||
if (argc < 2)
|
||||
return -1;
|
||||
|
||||
cmd = strtok (NULL, " "); /* { dg-line "first_call" } */
|
||||
arg = strtok (NULL, " ");
|
||||
return 0;
|
||||
|
||||
/* C:
|
||||
{ dg-warning "calling 'strtok' for first time with NULL as argument 1 has undefined behavior \\\[CWE-476\\\] \\\[-Wanalyzer-undefined-behavior-strtok\\\]" "" { target c } first_call }
|
||||
{ dg-message "some implementations of 'strtok' may crash on such input" "" { target c } first_call } */
|
||||
|
||||
/* C++:
|
||||
{ dg-warning "calling 'char\\* strtok\\(char\\*, const char\\*\\)' for first time with NULL as argument 1 has undefined behavior \\\[CWE-476\\\] \\\[-Wanalyzer-undefined-behavior-strtok\\\]" "" { target c++ } first_call }
|
||||
{ dg-message "some implementations of 'char\\* strtok\\(char\\*, const char\\*\\)' may crash on such input" "" { target c++ } first_call } */
|
||||
}
|
42
gcc/testsuite/c-c++-common/analyzer/strtok-4.c
Normal file
42
gcc/testsuite/c-c++-common/analyzer/strtok-4.c
Normal file
|
@ -0,0 +1,42 @@
|
|||
#include "../../gcc.dg/analyzer/analyzer-decls.h"
|
||||
|
||||
extern char *strtok (char *str, const char *delim);
|
||||
|
||||
void test (void)
|
||||
{
|
||||
char buffer[] = { 'a', 'x', 'b', 'y', 'c', '\0' };
|
||||
|
||||
char *p1 = strtok (buffer, "x");
|
||||
/* Should result in:
|
||||
| buffer[] = { 'a', '\0', 'b', 'y', 'c', '\0' },
|
||||
| ^ ^ ^
|
||||
| | | |
|
||||
| | | internal ptr
|
||||
| p1 modified. */
|
||||
|
||||
char *p2 = strtok (NULL, "y"); /* note new delimiter. */
|
||||
/* Should result in:
|
||||
| buffer[] = { 'a', '\0', 'b', '\0', 'c', '\0' },
|
||||
| ^ ^ ^
|
||||
| | | |
|
||||
| | | internal ptr
|
||||
| p2 modified. */
|
||||
|
||||
char *p3 = strtok (NULL, "z"); /* again new delimiter. */
|
||||
/* Should result in:
|
||||
| buffer[] = { 'a', '\0', 'b', '\0', 'c', '\0' },
|
||||
| ^ ^~
|
||||
| | |
|
||||
| | internal ptr
|
||||
| p3. */
|
||||
|
||||
char *p4 = strtok (NULL, "z");
|
||||
/* Should result in p4 == NULL, and:
|
||||
| buffer[] = { 'a', '\0', 'b', '\0', 'c', '\0' },
|
||||
| ^~
|
||||
| |
|
||||
| internal ptr. */
|
||||
|
||||
/* We don't yet model strtok closely enough to capture
|
||||
these exact behaviors. */
|
||||
}
|
50
gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c
Normal file
50
gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c
Normal file
|
@ -0,0 +1,50 @@
|
|||
/* Example of strtok adapted from:
|
||||
https://en.cppreference.com/w/c/string/byte/strtok
|
||||
which is
|
||||
"licensed under Creative Commons Attribution-Sharealike 3.0
|
||||
Unported License (CC-BY-SA) and by the GNU Free Documentation License
|
||||
(GFDL) (unversioned, with no invariant sections, front-cover texts, or
|
||||
back-cover texts). That means that you can use this site in almost any way
|
||||
you like, including mirroring, copying, translating, etc. All we would ask
|
||||
is to provide link back to cppreference.com so that people know where to
|
||||
get the most up-to-date content. In addition to that, any modified content
|
||||
should be released under an equivalent license so that everyone could
|
||||
benefit from the modified versions. " */
|
||||
|
||||
#define __STDC_WANT_LIB_EXT1__ 0
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
int main(void)
|
||||
{
|
||||
char input[] = "A bird came down the walk";
|
||||
printf("Parsing the input string '%s'\n", input);
|
||||
char *token = strtok(input, " ");
|
||||
while(token) {
|
||||
puts(token);
|
||||
token = strtok(NULL, " ");
|
||||
}
|
||||
|
||||
printf("Contents of the input string now: '");
|
||||
for(size_t n = 0; n < sizeof input; ++n)
|
||||
input[n] ? putchar(input[n]) : fputs("\\0", stdout);
|
||||
puts("'");
|
||||
|
||||
#ifdef __STDC_LIB_EXT1__
|
||||
char str[] = "A bird came down the walk";
|
||||
rsize_t strmax = sizeof str;
|
||||
const char *delim = " ";
|
||||
char *next_token;
|
||||
printf("Parsing the input string '%s'\n", str);
|
||||
token = strtok_s(str, &strmax, delim, &next_token);
|
||||
while(token) {
|
||||
puts(token);
|
||||
token = strtok_s(NULL, &strmax, delim, &next_token);
|
||||
}
|
||||
|
||||
printf("Contents of the input string now: '");
|
||||
for(size_t n = 0; n < sizeof str; ++n)
|
||||
str[n] ? putchar(str[n]) : fputs("\\0", stdout);
|
||||
puts("'");
|
||||
#endif
|
||||
}
|
Loading…
Add table
Reference in a new issue