From 3242fb533d48abab621618c4f183ca395de3dcd2 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Tue, 22 Aug 2023 18:36:54 -0400 Subject: [PATCH] analyzer: reimplement kf_strlen [PR105899] Reimplement kf_strlen in terms of the new string scanning implementation, sharing strlen's implementation with __analyzer_get_strlen. gcc/analyzer/ChangeLog: PR analyzer/105899 * kf-analyzer.cc (class kf_analyzer_get_strlen): Move to kf.cc. (register_known_analyzer_functions): Use make_kf_strlen. * kf.cc (class kf_strlen::impl_call_pre): Replace with implementation of kf_analyzer_get_strlen from kf-analyzer.cc. Handle "UNKNOWN" return from check_for_null_terminated_string_arg by falling back to a conjured svalue. (make_kf_strlen): New. (register_known_functions): Use make_kf_strlen. * known-function-manager.h (make_kf_strlen): New decl. gcc/testsuite/ChangeLog: PR analyzer/105899 * gcc.dg/analyzer/null-terminated-strings-1.c: Update expected results on symbolic values. * gcc.dg/analyzer/strlen-1.c: New test. Signed-off-by: David Malcolm --- gcc/analyzer/kf-analyzer.cc | 30 +--------- gcc/analyzer/kf.cc | 56 +++++++++---------- gcc/analyzer/known-function-manager.h | 2 + .../analyzer/null-terminated-strings-1.c | 4 +- gcc/testsuite/gcc.dg/analyzer/strlen-1.c | 54 ++++++++++++++++++ 5 files changed, 85 insertions(+), 61 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/analyzer/strlen-1.c diff --git a/gcc/analyzer/kf-analyzer.cc b/gcc/analyzer/kf-analyzer.cc index c767ebcb661..7ae598a8912 100644 --- a/gcc/analyzer/kf-analyzer.cc +++ b/gcc/analyzer/kf-analyzer.cc @@ -358,33 +358,6 @@ public: } }; -/* Handler for "__analyzer_get_strlen". */ - -class kf_analyzer_get_strlen : public known_function -{ -public: - bool matches_call_types_p (const call_details &cd) const final override - { - return cd.num_args () == 1 && cd.arg_is_pointer_p (0); - } - void impl_call_pre (const call_details &cd) const final override - { - if (const svalue *bytes_read = cd.check_for_null_terminated_string_arg (0)) - { - region_model_manager *mgr = cd.get_manager (); - /* strlen is (bytes_read - 1). */ - const svalue *strlen_sval - = mgr->get_or_create_binop (size_type_node, - MINUS_EXPR, - bytes_read, - mgr->get_or_create_int_cst (size_type_node, 1)); - cd.maybe_set_lhs (strlen_sval); - } - else - cd.set_any_lhs_with_defaults (); - } -}; - /* Populate KFM with instances of known functions used for debugging the analyzer and for writing DejaGnu tests, all with a "__analyzer_" prefix. */ @@ -406,8 +379,7 @@ register_known_analyzer_functions (known_function_manager &kfm) kfm.add ("__analyzer_eval", make_unique ()); kfm.add ("__analyzer_get_unknown_ptr", make_unique ()); - kfm.add ("__analyzer_get_strlen", - make_unique ()); + kfm.add ("__analyzer_get_strlen", make_kf_strlen ()); } } // namespace ana diff --git a/gcc/analyzer/kf.cc b/gcc/analyzer/kf.cc index 1601cf15c68..59f46bab581 100644 --- a/gcc/analyzer/kf.cc +++ b/gcc/analyzer/kf.cc @@ -1187,7 +1187,7 @@ public: } }; -/* Handle the on_call_pre part of "strlen". */ +/* Handler for "strlen" and for "__analyzer_get_strlen". */ class kf_strlen : public known_function { @@ -1196,37 +1196,33 @@ public: { return (cd.num_args () == 1 && cd.arg_is_pointer_p (0)); } - void impl_call_pre (const call_details &cd) const final override; -}; - -void -kf_strlen::impl_call_pre (const call_details &cd) const -{ - region_model_context *ctxt = cd.get_ctxt (); - region_model *model = cd.get_model (); - region_model_manager *mgr = cd.get_manager (); - - const svalue *arg_sval = cd.get_arg_svalue (0); - const region *buf_reg - = model->deref_rvalue (arg_sval, cd.get_arg_tree (0), ctxt); - if (const string_region *str_reg - = buf_reg->dyn_cast_string_region ()) - { - tree str_cst = str_reg->get_string_cst (); - /* TREE_STRING_LENGTH is sizeof, not strlen. */ - int sizeof_cst = TREE_STRING_LENGTH (str_cst); - int strlen_cst = sizeof_cst - 1; - if (cd.get_lhs_type ()) + void impl_call_pre (const call_details &cd) const final override + { + if (const svalue *bytes_read = cd.check_for_null_terminated_string_arg (0)) + if (bytes_read->get_kind () != SK_UNKNOWN) { - tree t_cst = build_int_cst (cd.get_lhs_type (), strlen_cst); - const svalue *result_sval - = mgr->get_or_create_constant_svalue (t_cst); - cd.maybe_set_lhs (result_sval); + region_model_manager *mgr = cd.get_manager (); + /* strlen is (bytes_read - 1). */ + const svalue *one = mgr->get_or_create_int_cst (size_type_node, 1); + const svalue *strlen_sval = mgr->get_or_create_binop (size_type_node, + MINUS_EXPR, + bytes_read, + one); + cd.maybe_set_lhs (strlen_sval); return; } - } - /* Otherwise a conjured value. */ - cd.set_any_lhs_with_defaults (); + + /* Use a conjured svalue. */ + cd.set_any_lhs_with_defaults (); + } +}; + +/* Factory function, so that kf-analyzer.cc can use this class. */ + +std::unique_ptr +make_kf_strlen () +{ + return make_unique (); } /* Handler for "strndup" and "__builtin_strndup". */ @@ -1434,7 +1430,7 @@ register_known_functions (known_function_manager &kfm) kfm.add (BUILT_IN_STRCPY_CHK, make_unique (3)); kfm.add (BUILT_IN_STRDUP, make_unique ()); kfm.add (BUILT_IN_STRNDUP, make_unique ()); - kfm.add (BUILT_IN_STRLEN, make_unique ()); + kfm.add (BUILT_IN_STRLEN, make_kf_strlen ()); register_atomic_builtins (kfm); register_varargs_builtins (kfm); diff --git a/gcc/analyzer/known-function-manager.h b/gcc/analyzer/known-function-manager.h index 4a76136f259..1432e548acb 100644 --- a/gcc/analyzer/known-function-manager.h +++ b/gcc/analyzer/known-function-manager.h @@ -64,6 +64,8 @@ private: known_function *m_combined_fns_arr[CFN_LAST]; }; +extern std::unique_ptr make_kf_strlen (); + } // namespace ana #endif /* GCC_ANALYZER_KNOWN_FUNCTION_MANAGER_H */ diff --git a/gcc/testsuite/gcc.dg/analyzer/null-terminated-strings-1.c b/gcc/testsuite/gcc.dg/analyzer/null-terminated-strings-1.c index 1db82a76d3b..ecd794a2337 100644 --- a/gcc/testsuite/gcc.dg/analyzer/null-terminated-strings-1.c +++ b/gcc/testsuite/gcc.dg/analyzer/null-terminated-strings-1.c @@ -129,12 +129,12 @@ char *test_dynamic_4 (const char *src) void test_symbolic_ptr (const char *ptr) { - __analyzer_describe (0, __analyzer_get_strlen (ptr)); /* { dg-warning "UNKNOWN" } */ + __analyzer_describe (0, __analyzer_get_strlen (ptr)); /* { dg-warning "CONJURED" } */ } void test_symbolic_offset (size_t idx) { - __analyzer_describe (0, __analyzer_get_strlen ("abc" + idx)); /* { dg-warning "UNKNOWN" } */ + __analyzer_describe (0, __analyzer_get_strlen ("abc" + idx)); /* { dg-warning "CONJURED" } */ } void test_casts (void) diff --git a/gcc/testsuite/gcc.dg/analyzer/strlen-1.c b/gcc/testsuite/gcc.dg/analyzer/strlen-1.c new file mode 100644 index 00000000000..99ce3aa297b --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/strlen-1.c @@ -0,0 +1,54 @@ +/* See e.g. https://en.cppreference.com/w/c/string/byte/strlen */ + +#include "analyzer-decls.h" + +typedef __SIZE_TYPE__ size_t; + +static size_t __attribute__((noinline)) +call_strlen_1 (const char *p) +{ + return __builtin_strlen (p); +} + +void test_string (void) +{ + __analyzer_eval (call_strlen_1 ("abc") == 3); /* { dg-warning "TRUE" } */ +} + +static size_t __attribute__((noinline)) +call_strlen_2 (const char *p) +{ + return __builtin_strlen (p); /* { dg-warning "stack-based buffer over-read" } */ +} + +void test_unterminated (void) +{ + const char buf[3] = "abc"; + __analyzer_eval (call_strlen_2 (buf) == 3); /* { dg-warning "UNKNOWN" } */ +} + +void test_uninitialized (void) +{ + char buf[16]; + __builtin_strlen (buf); /* { dg-warning "use of uninitialized value 'buf\\\[0\\\]'" } */ +} + +void test_partially_initialized (void) +{ + char buf[16]; + buf[0] = 'a'; + __builtin_strlen (buf); /* { dg-warning "use of uninitialized value 'buf\\\[1\\\]'" } */ +} + +extern size_t strlen (const char *str); + +size_t +test_passthrough (const char *str) +{ + return strlen (str); +} + +/* TODO + - complain if NULL str + - should it be tainted if str's bytes are tainted? +*/