analyzer: new warning: -Wanalyzer-undefined-behavior-strtok [PR107573]

This patch:
- adds support to the analyzer for tracking API-private state
  or which we don't have a decl (such as strtok's internal state),
- uses it to implement a new -Wanalyzer-undefined-behavior-strtok which
  warns when strtok (NULL, delim) is called as the first call to
  strtok after main.

gcc/analyzer/ChangeLog:
	PR analyzer/107573
	* analyzer.h (register_known_functions): Add region_model_manager
	param.
	* analyzer.opt (Wanalyzer-undefined-behavior-strtok): New.
	* call-summary.cc
	(call_summary_replay::convert_region_from_summary_1): Handle
	RK_PRIVATE.
	* engine.cc (impl_run_checkers): Pass model manager to
	register_known_functions.
	* kf.cc (class undefined_function_behavior): New.
	(class kf_strtok): New.
	(register_known_functions): Add region_model_manager param.
	Use it to register "strtok".
	* region-model-manager.cc
	(region_model_manager::get_or_create_conjured_svalue): Add "idx"
	param.
	* region-model-manager.h
	(region_model_manager::get_or_create_conjured_svalue): Add "idx"
	param.
	(region_model_manager::get_root_region): New accessor.
	* region-model.cc (region_model::scan_for_null_terminator): Handle
	"expr" being null.
	(region_model::get_representative_path_var_1): Handle RK_PRIVATE.
	* region-model.h (region_model::called_from_main_p): Make public.
	* region.cc (region::get_memory_space): Handle RK_PRIVATE.
	(region::can_have_initial_svalue_p): Handle MEMSPACE_PRIVATE.
	(private_region::dump_to_pp): New.
	* region.h (MEMSPACE_PRIVATE): New.
	(RK_PRIVATE): New.
	(class private_region): New.
	(is_a_helper <const private_region *>::test): New.
	* store.cc (store::replay_call_summary_cluster): Handle
	RK_PRIVATE.
	* svalue.h (struct conjured_svalue::key_t): Add "idx" param to
	ctor and "m_idx" field.
	(class conjured_svalue::conjured_svalue): Likewise.

gcc/ChangeLog:
	PR analyzer/107573
	* doc/invoke.texi: Add -Wanalyzer-undefined-behavior-strtok.

gcc/testsuite/ChangeLog:
	PR analyzer/107573
	* c-c++-common/analyzer/strtok-1.c: New test.
	* c-c++-common/analyzer/strtok-2.c: New test.
	* c-c++-common/analyzer/strtok-3.c: New test.
	* c-c++-common/analyzer/strtok-4.c: New test.
	* c-c++-common/analyzer/strtok-cppreference.c: New test.

Signed-off-by: David Malcolm <dmalcolm@redhat.com>
This commit is contained in:
David Malcolm 2023-11-18 20:35:59 -05:00
parent 9d58d2d8ba
commit f65f63c4d8
19 changed files with 619 additions and 17 deletions

View file

@ -326,7 +326,8 @@ public:
void impl_call_pre (const call_details &cd) const override;
};
extern void register_known_functions (known_function_manager &mgr);
extern void register_known_functions (known_function_manager &kfm,
region_model_manager &rmm);
extern void register_known_analyzer_functions (known_function_manager &kfm);
extern void register_known_fd_functions (known_function_manager &kfm);
extern void register_known_file_functions (known_function_manager &kfm);

View file

@ -222,6 +222,10 @@ Wanalyzer-tainted-size
Common Var(warn_analyzer_tainted_size) Init(1) Warning
Warn about code paths in which an unsanitized value is used as a size.
Wanalyzer-undefined-behavior-strtok
Common Var(warn_analyzer_undefined_behavior_strtok) Init(1) Warning
Warn about code paths in in which a call is made to strtok with undefined behavior.
Wanalyzer-use-after-free
Common Var(warn_analyzer_use_after_free) Init(1) Warning
Warn about code paths in which a freed value is used.

View file

@ -585,6 +585,7 @@ call_summary_replay::convert_region_from_summary_1 (const region *summary_reg)
case RK_STRING:
case RK_ERRNO:
case RK_UNKNOWN:
case RK_PRIVATE:
/* We can reuse these regions directly. */
return summary_reg;

View file

@ -6174,7 +6174,8 @@ impl_run_checkers (logger *logger)
auto_delete_vec <state_machine> checkers;
make_checkers (checkers, logger);
register_known_functions (*eng.get_known_function_manager ());
register_known_functions (*eng.get_known_function_manager (),
*eng.get_model_manager ());
plugin_analyzer_init_impl data (&checkers,
eng.get_known_function_manager (),

View file

@ -40,6 +40,40 @@ along with GCC; see the file COPYING3. If not see
namespace ana {
/* Abstract subclass for describing undefined behavior of an API. */
class undefined_function_behavior
: public pending_diagnostic_subclass<undefined_function_behavior>
{
public:
undefined_function_behavior (const call_details &cd)
: m_call_stmt (cd.get_call_stmt ()),
m_callee_fndecl (cd.get_fndecl_for_call ())
{
gcc_assert (m_call_stmt);
gcc_assert (m_callee_fndecl);
}
const char *get_kind () const final override
{
return "undefined_behavior";
}
bool operator== (const undefined_function_behavior &other) const
{
return (m_call_stmt == other.m_call_stmt
&& m_callee_fndecl == other.m_callee_fndecl);
}
bool terminate_path_p () const final override { return true; }
tree get_callee_fndecl () const { return m_callee_fndecl; }
private:
const gimple *m_call_stmt;
tree m_callee_fndecl;
};
/* class pure_known_function_with_default_return : public known_function. */
void
@ -1679,6 +1713,288 @@ kf_strstr::impl_call_post (const call_details &cd) const
}
}
/* Handle calls to "strtok".
See e.g.
https://en.cppreference.com/w/c/string/byte/strtok
https://man7.org/linux/man-pages/man3/strtok.3.html */
class kf_strtok : public known_function
{
public:
class undefined_behavior : public undefined_function_behavior
{
public:
undefined_behavior (const call_details &cd)
: undefined_function_behavior (cd)
{
}
int get_controlling_option () const final override
{
return OPT_Wanalyzer_undefined_behavior_strtok;
}
bool emit (rich_location *rich_loc, logger *) final override
{
/* CWE-476: NULL Pointer Dereference. */
diagnostic_metadata m;
m.add_cwe (476);
if (warning_meta
(rich_loc, m, get_controlling_option (),
"calling %qD for first time with NULL as argument 1"
" has undefined behavior",
get_callee_fndecl ()))
{
inform (rich_loc->get_loc (),
"some implementations of %qD may crash on such input",
get_callee_fndecl ());
return true;
}
return false;
}
label_text describe_final_event (const evdesc::final_event &ev)
final override
{
return ev.formatted_print
("calling %qD for first time with NULL as argument 1"
" has undefined behavior",
get_callee_fndecl ());
}
};
/* An outcome of a "strtok" call.
We have a four-way bifurcation of the analysis via the
4 combinations of two flags:
- m_nonnull_str covers whether the "str" param was null or non-null
- m_found covers whether the result is null or non-null
*/
class strtok_call_info : public call_info
{
public:
strtok_call_info (const call_details &cd,
const private_region &private_reg,
bool nonnull_str,
bool found)
: call_info (cd),
m_private_reg (private_reg),
m_nonnull_str (nonnull_str),
m_found (found)
{
}
label_text get_desc (bool can_colorize) const final override
{
if (m_nonnull_str)
{
if (m_found)
return make_label_text
(can_colorize,
"when %qE on non-NULL string returns non-NULL",
get_fndecl ());
else
return make_label_text
(can_colorize,
"when %qE on non-NULL string returns NULL",
get_fndecl ());
}
else
{
if (m_found)
return make_label_text
(can_colorize,
"when %qE with NULL string (using prior) returns non-NULL",
get_fndecl ());
else
return make_label_text
(can_colorize,
"when %qE with NULL string (using prior) returns NULL",
get_fndecl ());
}
}
bool update_model (region_model *model,
const exploded_edge *,
region_model_context *ctxt) const final override
{
region_model_manager *mgr = model->get_manager ();
const call_details cd (get_call_details (model, ctxt));
const svalue *str_sval = cd.get_arg_svalue (0);
/* const svalue *delim_sval = cd.get_arg_svalue (1); */
cd.check_for_null_terminated_string_arg (1);
/* We check that either arg 0 or the private region is null
terminated below. */
const svalue *null_ptr_sval
= mgr->get_or_create_null_ptr (cd.get_arg_type (0));;
if (!model->add_constraint (str_sval,
m_nonnull_str ? NE_EXPR : EQ_EXPR,
null_ptr_sval,
cd.get_ctxt ()))
return false;
if (m_nonnull_str)
{
/* Update internal buffer. */
model->set_value (&m_private_reg,
mgr->get_or_create_unmergeable (str_sval),
ctxt);
}
else
{
/* Read from internal buffer. */
str_sval = model->get_store_value (&m_private_reg, ctxt);
/* The initial value of the private region is NULL when we're
on a path from main. */
if (const initial_svalue *initial_sval
= str_sval->dyn_cast_initial_svalue ())
if (initial_sval->get_region () == &m_private_reg
&& model->called_from_main_p ())
{
/* Implementations of strtok do not necessarily check for NULL
here, and may crash; see PR analyzer/107573.
Warn for this, if we were definitely passed NULL. */
if (cd.get_arg_svalue (0)->all_zeroes_p ())
{
if (ctxt)
ctxt->warn (::make_unique<undefined_behavior> (cd));
}
/* Assume that "str" was actually non-null; terminate
this path. */
return false;
}
/* Now assume str_sval is non-null. */
if (!model->add_constraint (str_sval,
NE_EXPR,
null_ptr_sval,
cd.get_ctxt ()))
return false;
}
const region *buf_reg = model->deref_rvalue (str_sval, NULL_TREE, ctxt);
model->scan_for_null_terminator (buf_reg,
NULL_TREE,
nullptr,
ctxt);
if (m_found)
{
const region *str_reg
= model->deref_rvalue (str_sval, cd.get_arg_tree (0),
cd.get_ctxt ());
/* We want to figure out the start and nul terminator
for the token.
For each, we want str_sval + OFFSET for some unknown OFFSET.
Use a conjured_svalue to represent the offset,
using the str_reg as the id of the conjured_svalue. */
const svalue *start_offset
= mgr->get_or_create_conjured_svalue (size_type_node,
cd.get_call_stmt (),
str_reg,
conjured_purge (model,
ctxt),
0);
const svalue *nul_offset
= mgr->get_or_create_conjured_svalue (size_type_node,
cd.get_call_stmt (),
str_reg,
conjured_purge (model,
ctxt),
1);
tree char_ptr_type = build_pointer_type (char_type_node);
const svalue *result
= mgr->get_or_create_binop (char_ptr_type, POINTER_PLUS_EXPR,
str_sval, start_offset);
cd.maybe_set_lhs (result);
/* nul_offset + 1; the offset to use for the next call. */
const svalue *next_offset
= mgr->get_or_create_binop (size_type_node, PLUS_EXPR,
nul_offset,
mgr->get_or_create_int_cst
(char_type_node, 1));
/* Write '\0' to str_sval[nul_offset]. */
const svalue *ptr_to_term
= mgr->get_or_create_binop (char_ptr_type, POINTER_PLUS_EXPR,
str_sval, nul_offset);
const region *terminator_reg
= model->deref_rvalue (ptr_to_term, NULL_TREE, cd.get_ctxt ());
model->set_value (terminator_reg,
mgr->get_or_create_unmergeable
(mgr->get_or_create_int_cst (char_type_node,
0)),
cd.get_ctxt ());
/* Update saved ptr to be at [nul_offset + 1]. */
const svalue *ptr_to_next
= mgr->get_or_create_binop (cd.get_lhs_type (), POINTER_PLUS_EXPR,
str_sval, next_offset);
model->set_value (&m_private_reg, ptr_to_next, ctxt);
}
else
if (tree lhs_type = cd.get_lhs_type ())
{
const svalue *result
= mgr->get_or_create_int_cst (lhs_type, 0);
cd.maybe_set_lhs (result);
}
return true;
}
private:
const private_region &m_private_reg;
bool m_nonnull_str;
bool m_found;
}; // class strtok_call_info
kf_strtok (region_model_manager &mgr)
: m_private_reg (mgr.alloc_symbol_id (),
mgr.get_root_region (),
get_region_type (),
"strtok buffer")
{
}
bool matches_call_types_p (const call_details &cd) const final override
{
return (cd.num_args () == 2
&& POINTER_TYPE_P (cd.get_arg_type (0))
&& POINTER_TYPE_P (cd.get_arg_type (1)));
}
void impl_call_post (const call_details &cd) const final override
{
if (cd.get_ctxt ())
{
/* Four-way bifurcation, based on whether:
- the str is non-null
- the result is non-null
Typically the str is either null or non-null at a particular site,
so hopefully this will generally just lead to two out-edges. */
cd.get_ctxt ()->bifurcate
(make_unique<strtok_call_info> (cd, m_private_reg, false, false));
cd.get_ctxt ()->bifurcate
(make_unique<strtok_call_info> (cd, m_private_reg, false, true));
cd.get_ctxt ()->bifurcate
(make_unique<strtok_call_info> (cd, m_private_reg, true, false));
cd.get_ctxt ()->bifurcate
(make_unique<strtok_call_info> (cd, m_private_reg, true, true));
cd.get_ctxt ()->terminate_path ();
}
}
private:
static tree get_region_type ()
{
return build_pointer_type (char_type_node);
}
const private_region m_private_reg;
};
class kf_ubsan_bounds : public internal_known_function
{
/* Empty. */
@ -1823,7 +2139,8 @@ register_atomic_builtins (known_function_manager &kfm)
analyzer (as opposed to plugins). */
void
register_known_functions (known_function_manager &kfm)
register_known_functions (known_function_manager &kfm,
region_model_manager &rmm)
{
/* Debugging/test support functions, all with a "__analyzer_" prefix. */
register_known_analyzer_functions (kfm);
@ -1911,6 +2228,7 @@ register_known_functions (known_function_manager &kfm)
{
kfm.add ("fopen", make_unique<kf_fopen> ());
kfm.add ("putenv", make_unique<kf_putenv> ());
kfm.add ("strtok", make_unique<kf_strtok> (rmm));
register_known_fd_functions (kfm);
register_known_file_functions (kfm);

View file

@ -1261,7 +1261,8 @@ conjured_purge::purge (const conjured_svalue *sval) const
}
/* Return the svalue * of type TYPE for the value conjured for ID_REG
at STMT, creating it if necessary.
at STMT (using IDX for any further disambiguation),
creating it if necessary.
Use P to purge existing state from the svalue, for the case where a
conjured_svalue would be reused along an execution path. */
@ -1269,9 +1270,10 @@ const svalue *
region_model_manager::get_or_create_conjured_svalue (tree type,
const gimple *stmt,
const region *id_reg,
const conjured_purge &p)
const conjured_purge &p,
unsigned idx)
{
conjured_svalue::key_t key (type, stmt, id_reg);
conjured_svalue::key_t key (type, stmt, id_reg, idx);
if (conjured_svalue **slot = m_conjured_values_map.get (key))
{
const conjured_svalue *sval = *slot;
@ -1283,7 +1285,7 @@ region_model_manager::get_or_create_conjured_svalue (tree type,
return sval;
}
conjured_svalue *conjured_sval
= new conjured_svalue (alloc_symbol_id (), type, stmt, id_reg);
= new conjured_svalue (alloc_symbol_id (), type, stmt, id_reg, idx);
RETURN_UNKNOWN_IF_TOO_COMPLEX (conjured_sval);
m_conjured_values_map.put (key, conjured_sval);
return conjured_sval;

View file

@ -79,7 +79,8 @@ public:
const binding_map &map);
const svalue *get_or_create_conjured_svalue (tree type, const gimple *stmt,
const region *id_reg,
const conjured_purge &p);
const conjured_purge &p,
unsigned idx = 0);
const svalue *
get_or_create_asm_output_svalue (tree type,
const gasm *asm_stmt,
@ -105,6 +106,7 @@ public:
const svalue *create_unique_svalue (tree type);
/* region consolidation. */
const root_region *get_root_region () const { return &m_root_region; }
const stack_region * get_stack_region () const { return &m_stack_region; }
const heap_region *get_heap_region () const { return &m_heap_region; }
const code_region *get_code_region () const { return &m_code_region; }

View file

@ -3884,8 +3884,10 @@ region_model::scan_for_null_terminator (const region *reg,
byte_range bytes_to_read (src_byte_offset, 1);
const svalue *sval = get_store_bytes (base_reg, bytes_to_read, ctxt);
tree byte_expr
= get_tree_for_byte_offset (expr,
src_byte_offset - initial_src_byte_offset);
= (expr
? get_tree_for_byte_offset (expr,
src_byte_offset - initial_src_byte_offset)
: NULL_TREE);
check_for_poison (sval, byte_expr, nullptr, ctxt);
if (base_reg->can_have_initial_svalue_p ())
{
@ -5077,6 +5079,7 @@ region_model::get_representative_path_var_1 (const region *reg,
case RK_VAR_ARG:
case RK_ERRNO:
case RK_UNKNOWN:
case RK_PRIVATE:
return path_var (NULL_TREE, 0);
}
}

View file

@ -559,6 +559,8 @@ class region_model
callback (model, prev_model, retval, ctxt);
}
bool called_from_main_p () const;
private:
const region *get_lvalue_1 (path_var pv, region_model_context *ctxt) const;
const svalue *get_rvalue_1 (path_var pv, region_model_context *ctxt) const;
@ -606,7 +608,6 @@ private:
bool nonnull,
region_model_context *ctxt);
bool called_from_main_p () const;
const svalue *get_initial_value_for_global (const region *reg) const;
const region * get_region_for_poisoned_expr (tree expr) const;

View file

@ -514,6 +514,8 @@ region::get_memory_space () const
return MEMSPACE_HEAP;
case RK_STRING:
return MEMSPACE_READONLY_DATA;
case RK_PRIVATE:
return MEMSPACE_PRIVATE;
}
if (iter->get_kind () == RK_CAST)
iter = iter->dyn_cast_cast_region ()->get_original_region ();
@ -543,6 +545,7 @@ region::can_have_initial_svalue_p () const
case MEMSPACE_CODE:
case MEMSPACE_GLOBALS:
case MEMSPACE_READONLY_DATA:
case MEMSPACE_PRIVATE:
/* Such regions have initial_svalues. */
return true;
@ -2259,6 +2262,17 @@ errno_region::dump_to_pp (pretty_printer *pp, bool simple) const
pp_string (pp, "errno_region()");
}
/* class private_region : public region. */
void
private_region::dump_to_pp (pretty_printer *pp, bool simple) const
{
if (simple)
pp_printf (pp, "PRIVATE_REG(%qs)", m_desc);
else
pp_printf (pp, "private_region(%qs)", m_desc);
}
/* class unknown_region : public region. */
/* Implementation of region::dump_to_pp vfunc for unknown_region. */

View file

@ -35,7 +35,8 @@ enum memory_space
MEMSPACE_STACK,
MEMSPACE_HEAP,
MEMSPACE_READONLY_DATA,
MEMSPACE_THREAD_LOCAL
MEMSPACE_THREAD_LOCAL,
MEMSPACE_PRIVATE
};
/* An enum for discriminating between the different concrete subclasses
@ -65,6 +66,7 @@ enum region_kind
RK_BIT_RANGE,
RK_VAR_ARG,
RK_ERRNO,
RK_PRIVATE,
RK_UNKNOWN,
};
@ -108,6 +110,7 @@ enum region_kind
var_arg_region (RK_VAR_ARG): a region for the N-th vararg within a
frame_region for a variadic call
errno_region (RK_ERRNO): a region for holding "errno"
private_region (RK_PRIVATE): a region for internal state of an API
unknown_region (RK_UNKNOWN): for handling unimplemented tree codes. */
/* Abstract base class for representing ways of accessing chunks of memory.
@ -1434,6 +1437,42 @@ is_a_helper <const errno_region *>::test (const region *reg)
namespace ana {
/* Similar to a decl region, but we don't have the decl.
For implementing e.g. static buffers of known_functions,
or other internal state of an API.
These are owned by known_function instances, rather than the
region_model_manager. */
class private_region : public region
{
public:
private_region (unsigned id, const region *parent, tree type,
const char *desc)
: region (complexity (parent), id, parent, type),
m_desc (desc)
{}
enum region_kind get_kind () const final override { return RK_PRIVATE; }
void dump_to_pp (pretty_printer *pp, bool simple) const final override;
private:
const char *m_desc;
};
} // namespace ana
template <>
template <>
inline bool
is_a_helper <const private_region *>::test (const region *reg)
{
return reg->get_kind () == RK_PRIVATE;
}
namespace ana {
/* An unknown region, for handling unimplemented tree codes. */
class unknown_region : public region

View file

@ -3372,6 +3372,7 @@ store::replay_call_summary_cluster (call_summary_replay &r,
case RK_HEAP_ALLOCATED:
case RK_DECL:
case RK_ERRNO:
case RK_PRIVATE:
{
const region *caller_dest_reg
= r.convert_region_from_summary (summary_base_reg);

View file

@ -1360,8 +1360,8 @@ public:
/* A support class for uniquifying instances of conjured_svalue. */
struct key_t
{
key_t (tree type, const gimple *stmt, const region *id_reg)
: m_type (type), m_stmt (stmt), m_id_reg (id_reg)
key_t (tree type, const gimple *stmt, const region *id_reg, unsigned idx)
: m_type (type), m_stmt (stmt), m_id_reg (id_reg), m_idx (idx)
{}
hashval_t hash () const
@ -1377,7 +1377,8 @@ public:
{
return (m_type == other.m_type
&& m_stmt == other.m_stmt
&& m_id_reg == other.m_id_reg);
&& m_id_reg == other.m_id_reg
&& m_idx == other.m_idx);
}
/* Use m_stmt to mark empty/deleted, as m_type can be NULL for
@ -1393,12 +1394,13 @@ public:
tree m_type;
const gimple *m_stmt;
const region *m_id_reg;
unsigned m_idx;
};
conjured_svalue (symbol::id_t id, tree type, const gimple *stmt,
const region *id_reg)
const region *id_reg, unsigned idx)
: svalue (complexity (id_reg), id, type),
m_stmt (stmt), m_id_reg (id_reg)
m_stmt (stmt), m_id_reg (id_reg), m_idx (idx)
{
gcc_assert (m_stmt != NULL);
}
@ -1419,6 +1421,7 @@ public:
private:
const gimple *m_stmt;
const region *m_id_reg;
unsigned m_idx;
};
} // namespace ana

View file

@ -490,6 +490,7 @@ Objective-C and Objective-C++ Dialects}.
-Wno-analyzer-tainted-offset
-Wno-analyzer-tainted-size
-Wanalyzer-too-complex
-Wno-analyzer-undefined-behavior-strtok
-Wno-analyzer-unsafe-call-within-signal-handler
-Wno-analyzer-use-after-free
-Wno-analyzer-use-of-pointer-in-stale-stack-frame
@ -10424,6 +10425,7 @@ Enabling this option effectively enables the following warnings:
-Wanalyzer-tainted-divisor
-Wanalyzer-tainted-offset
-Wanalyzer-tainted-size
-Wanalyzer-undefined-behavior-strtok
-Wanalyzer-unsafe-call-within-signal-handler
-Wanalyzer-use-after-free
-Wanalyzer-use-of-pointer-in-stale-stack-frame
@ -11060,6 +11062,18 @@ attacker could inject an out-of-bounds access.
See @uref{https://cwe.mitre.org/data/definitions/129.html, CWE-129: Improper Validation of Array Index}.
@opindex Wanalyzer-undefined-behavior-strtok
@opindex Wno-analyzer-undefined-behavior-strtok
@item -Wno-analyzer-undefined-behavior-strtok
This warning requires @option{-fanalyzer}, which enables it; use
@option{-Wno-analyzer-undefined-behavior-strtok} to disable it.
This diagnostic warns for paths through the code in which a
call is made to @code{strtok} with undefined behavior.
Specifically, passing NULL as the first parameter for the initial
call to @code{strtok} within a process has undefined behavior.
@opindex Wanalyzer-unsafe-call-within-signal-handler
@opindex Wno-analyzer-unsafe-call-within-signal-handler
@item -Wno-analyzer-unsafe-call-within-signal-handler

View file

@ -0,0 +1,62 @@
/* { dg-additional-options "-fpermissive" { target c++ } } */
#include "../../gcc.dg/analyzer/analyzer-decls.h"
typedef __SIZE_TYPE__ size_t;
extern char *strtok (char *str, const char *delim)
__attribute__((nonnull (2)));
char *
test_passthrough (char *str, const char *delim)
{
return strtok (str, delim);
}
char *
test_null_str (const char *delim)
{
return strtok (NULL, delim);
}
char *
test_null_delim (char *str)
{
return strtok (str, NULL); /* { dg-warning "use of NULL where non-null expected" } */
/* This is from the attribute. */
}
char *
test_write_to_literal (void)
{
const char *str = "hello world";
return strtok ((char *)str, " "); /* { dg-warning "write to string literal" } */
}
char *
test_unterminated_str (const char *delim)
{
char str[3] = "abc"; /* { dg-warning "initializer-string for '\[^\n\]*' is too long" "" { target c++ } } */
return strtok (str, delim); /* { dg-warning "stack-based buffer over-read" } */
}
char *
test_unterminated_delimstr (char *str)
{
char delim[3] = "abc"; /* { dg-warning "initializer-string for '\[^\n\]*' is too long" "" { target c++ } } */
return strtok (str, delim); /* { dg-warning "stack-based buffer over-read" } */
/* { dg-message "while looking for null terminator for argument 2" "" { target *-*-* } .-1 } */
}
size_t
test_use_after_free_via_null_2 (char *p)
{
strtok (p, " ");
__builtin_free (p);
char *q = strtok (NULL, " "); /* TODO: should complain about this. */
if (q)
return __builtin_strlen (q); /* TODO: should complain about this. */
else
return 0;
}

View file

@ -0,0 +1,18 @@
#include "../../gcc.dg/analyzer/analyzer-decls.h"
extern char *strtok (char *str, const char *delim)
__attribute__((nonnull (2)));
int
main(int argc, char *argv[])
{
char *cmd;
char *arg;
if (argc < 2)
return -1;
cmd = strtok (argv[1], " "); /* { dg-bogus "undefined behavior" } */
arg = strtok (NULL, " ");
return 0;
}

View file

@ -0,0 +1,26 @@
#include "../../gcc.dg/analyzer/analyzer-decls.h"
extern char *strtok (char *str, const char *delim)
__attribute__((nonnull (2)));
int
main (int argc, char *argv[])
{
char *cmd;
char *arg;
if (argc < 2)
return -1;
cmd = strtok (NULL, " "); /* { dg-line "first_call" } */
arg = strtok (NULL, " ");
return 0;
/* C:
{ dg-warning "calling 'strtok' for first time with NULL as argument 1 has undefined behavior \\\[CWE-476\\\] \\\[-Wanalyzer-undefined-behavior-strtok\\\]" "" { target c } first_call }
{ dg-message "some implementations of 'strtok' may crash on such input" "" { target c } first_call } */
/* C++:
{ dg-warning "calling 'char\\* strtok\\(char\\*, const char\\*\\)' for first time with NULL as argument 1 has undefined behavior \\\[CWE-476\\\] \\\[-Wanalyzer-undefined-behavior-strtok\\\]" "" { target c++ } first_call }
{ dg-message "some implementations of 'char\\* strtok\\(char\\*, const char\\*\\)' may crash on such input" "" { target c++ } first_call } */
}

View file

@ -0,0 +1,42 @@
#include "../../gcc.dg/analyzer/analyzer-decls.h"
extern char *strtok (char *str, const char *delim);
void test (void)
{
char buffer[] = { 'a', 'x', 'b', 'y', 'c', '\0' };
char *p1 = strtok (buffer, "x");
/* Should result in:
| buffer[] = { 'a', '\0', 'b', 'y', 'c', '\0' },
| ^ ^ ^
| | | |
| | | internal ptr
| p1 modified. */
char *p2 = strtok (NULL, "y"); /* note new delimiter. */
/* Should result in:
| buffer[] = { 'a', '\0', 'b', '\0', 'c', '\0' },
| ^ ^ ^
| | | |
| | | internal ptr
| p2 modified. */
char *p3 = strtok (NULL, "z"); /* again new delimiter. */
/* Should result in:
| buffer[] = { 'a', '\0', 'b', '\0', 'c', '\0' },
| ^ ^~
| | |
| | internal ptr
| p3. */
char *p4 = strtok (NULL, "z");
/* Should result in p4 == NULL, and:
| buffer[] = { 'a', '\0', 'b', '\0', 'c', '\0' },
| ^~
| |
| internal ptr. */
/* We don't yet model strtok closely enough to capture
these exact behaviors. */
}

View file

@ -0,0 +1,50 @@
/* Example of strtok adapted from:
https://en.cppreference.com/w/c/string/byte/strtok
which is
"licensed under Creative Commons Attribution-Sharealike 3.0
Unported License (CC-BY-SA) and by the GNU Free Documentation License
(GFDL) (unversioned, with no invariant sections, front-cover texts, or
back-cover texts). That means that you can use this site in almost any way
you like, including mirroring, copying, translating, etc. All we would ask
is to provide link back to cppreference.com so that people know where to
get the most up-to-date content. In addition to that, any modified content
should be released under an equivalent license so that everyone could
benefit from the modified versions. " */
#define __STDC_WANT_LIB_EXT1__ 0
#include <string.h>
#include <stdio.h>
int main(void)
{
char input[] = "A bird came down the walk";
printf("Parsing the input string '%s'\n", input);
char *token = strtok(input, " ");
while(token) {
puts(token);
token = strtok(NULL, " ");
}
printf("Contents of the input string now: '");
for(size_t n = 0; n < sizeof input; ++n)
input[n] ? putchar(input[n]) : fputs("\\0", stdout);
puts("'");
#ifdef __STDC_LIB_EXT1__
char str[] = "A bird came down the walk";
rsize_t strmax = sizeof str;
const char *delim = " ";
char *next_token;
printf("Parsing the input string '%s'\n", str);
token = strtok_s(str, &strmax, delim, &next_token);
while(token) {
puts(token);
token = strtok_s(NULL, &strmax, delim, &next_token);
}
printf("Contents of the input string now: '");
for(size_t n = 0; n < sizeof str; ++n)
str[n] ? putchar(str[n]) : fputs("\\0", stdout);
puts("'");
#endif
}