Support more predicates in tree-sitter search functions
Right now we support regexp strings and predicate functions for the PRED argument. This change adds support for (not ...) (or ...) and (regexp . pred) predicates. I still need to find a place to document the supported shapes of a predicate. * src/treesit.c (treesit_traverse_validate_predicate): New function. (treesit_traverse_match_predicate): Support more predicate shapes. (treesit_search_dfs): (treesit_search_forward) (treesit_build_sparse_tree): Fix docstring (unrelated to this change). (Ftreesit_search_subtree) (Ftreesit_search_forward) (Ftreesit_induce_sparse_tree): Use the new function to validate predicate shape. (syms_of_treesit): New error Qtreesit_invalid_predicate. * test/src/treesit-tests.el: (treesit--ert-search-setup): Add edebug declaration. (treesit-search-forward-predicate) (treesit-search-forward-predicate-invalid-predicate): New tests.
This commit is contained in:
parent
a5eb9f6ad4
commit
361c5fc2d8
2 changed files with 201 additions and 20 deletions
168
src/treesit.c
168
src/treesit.c
|
@ -3139,10 +3139,84 @@ treesit_traverse_child_helper (TSTreeCursor *cursor,
|
|||
}
|
||||
}
|
||||
|
||||
/* Return true if the node at CURSOR matches PRED. PRED can be a
|
||||
string or a function. This function assumes PRED is either a
|
||||
string or a function. If NAMED is true, also check that the node
|
||||
is named. */
|
||||
/* Validate the PRED passed to treesit_traverse_match_predicate. If
|
||||
there's an error, set SIGNAL_DATA to something signal accepts, and
|
||||
return false, otherwise return true. */
|
||||
static bool
|
||||
treesit_traverse_validate_predicate (Lisp_Object pred,
|
||||
Lisp_Object *signal_data)
|
||||
{
|
||||
if (STRINGP (pred))
|
||||
return true;
|
||||
/* We want to allow cl-labels-defined functions, so we allow
|
||||
symbols. */
|
||||
else if (FUNCTIONP (pred) || SYMBOLP (pred))
|
||||
return true;
|
||||
else if (CONSP (pred))
|
||||
{
|
||||
Lisp_Object car = XCAR (pred);
|
||||
Lisp_Object cdr = XCDR (pred);
|
||||
if (EQ (car, Qnot))
|
||||
{
|
||||
if (!CONSP (cdr))
|
||||
{
|
||||
*signal_data = list2 (build_string ("Invalide `not' "
|
||||
"predicate"),
|
||||
pred);
|
||||
return false;
|
||||
}
|
||||
/* At this point CDR must be a cons. */
|
||||
if (XFIXNUM (Flength (cdr)) != 1)
|
||||
{
|
||||
*signal_data = list2 (build_string ("`not' can only "
|
||||
"have one argument"),
|
||||
pred);
|
||||
return false;
|
||||
}
|
||||
return treesit_traverse_validate_predicate (XCAR (cdr),
|
||||
signal_data);
|
||||
}
|
||||
else if (EQ (car, Qor))
|
||||
{
|
||||
if (!CONSP (cdr) || NILP (cdr))
|
||||
{
|
||||
*signal_data = list2 (build_string ("`or' must have a list "
|
||||
"of patterns as "
|
||||
"arguments "),
|
||||
pred);
|
||||
return false;
|
||||
}
|
||||
FOR_EACH_TAIL (cdr)
|
||||
{
|
||||
if (!treesit_traverse_validate_predicate (XCAR (cdr),
|
||||
signal_data))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
/* We allow the function to be a symbol to support cl-label. */
|
||||
else if (STRINGP (car) && (FUNCTIONP (cdr) || SYMBOLP (cdr)))
|
||||
return true;
|
||||
}
|
||||
*signal_data = list2 (build_string ("Invalid predicate, see TODO for "
|
||||
"valid forms of predicate"),
|
||||
pred);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Return true if the node at CURSOR matches PRED. PRED can be a lot
|
||||
of things:
|
||||
|
||||
PRED := string | function | (string . function)
|
||||
| (or PRED...) | (not PRED)
|
||||
|
||||
See docstring of treesit-search-forward and friends for the meaning
|
||||
of each shape.
|
||||
|
||||
This function assumes PRED is in one of its valid forms. If NAMED
|
||||
is true, also check that the node is named.
|
||||
|
||||
This function may signal if the predicate function signals. */
|
||||
static bool
|
||||
treesit_traverse_match_predicate (TSTreeCursor *cursor, Lisp_Object pred,
|
||||
Lisp_Object parser, bool named)
|
||||
|
@ -3156,24 +3230,63 @@ treesit_traverse_match_predicate (TSTreeCursor *cursor, Lisp_Object pred,
|
|||
const char *type = ts_node_type (node);
|
||||
return fast_c_string_match (pred, type, strlen (type)) >= 0;
|
||||
}
|
||||
else
|
||||
/* We want to allow cl-labels-defined functions, so we allow
|
||||
symbols. */
|
||||
else if (FUNCTIONP (pred) || SYMBOLP (pred))
|
||||
{
|
||||
Lisp_Object lisp_node = make_treesit_node (parser, node);
|
||||
return !NILP (CALLN (Ffuncall, pred, lisp_node));
|
||||
}
|
||||
else if (CONSP (pred))
|
||||
{
|
||||
Lisp_Object car = XCAR (pred);
|
||||
Lisp_Object cdr = XCDR (pred);
|
||||
|
||||
if (EQ (car, Qnot))
|
||||
{
|
||||
return !treesit_traverse_match_predicate (cursor, XCAR (cdr),
|
||||
parser, named);
|
||||
}
|
||||
else if (EQ (car, Qor))
|
||||
{
|
||||
FOR_EACH_TAIL (cdr)
|
||||
{
|
||||
if (treesit_traverse_match_predicate (cursor, XCAR (cdr),
|
||||
parser, named))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
/* We want to allow cl-labels-defined functions, so we allow
|
||||
symbols. */
|
||||
else if (STRINGP (car) && (FUNCTIONP (cdr) || SYMBOLP (cdr)))
|
||||
{
|
||||
/* A bit of code duplication here, but should be fine. */
|
||||
const char *type = ts_node_type (node);
|
||||
if (!(fast_c_string_match (pred, type, strlen (type)) >= 0))
|
||||
return false;
|
||||
|
||||
Lisp_Object lisp_node = make_treesit_node (parser, node);
|
||||
if (NILP (CALLN (Ffuncall, pred, lisp_node)))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
/* Returning false is better than UB. */
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Traverse the parse tree starting from CURSOR. PRED can be a
|
||||
function (takes a node and returns nil/non-nil), or a string
|
||||
(treated as regexp matching the node's type, must be all single
|
||||
byte characters). If the node satisfies PRED, leave CURSOR on that
|
||||
node and return true. If no node satisfies PRED, move CURSOR back
|
||||
to starting position and return false.
|
||||
/* Traverse the parse tree starting from CURSOR. See TODO for the
|
||||
shapes PRED can have. If the node satisfies PRED, leave CURSOR on
|
||||
that node and return true. If no node satisfies PRED, move CURSOR
|
||||
back to starting position and return false.
|
||||
|
||||
LIMIT is the number of levels we descend in the tree. FORWARD
|
||||
controls the direction in which we traverse the tree, true means
|
||||
forward, false backward. If SKIP_ROOT is true, don't match ROOT.
|
||||
*/
|
||||
|
||||
This function may signal if the predicate function signals. */
|
||||
static bool
|
||||
treesit_search_dfs (TSTreeCursor *cursor,
|
||||
Lisp_Object pred, Lisp_Object parser,
|
||||
|
@ -3209,7 +3322,9 @@ treesit_search_dfs (TSTreeCursor *cursor,
|
|||
START. PRED, PARSER, NAMED, FORWARD are the same as in
|
||||
ts_search_subtree. If a match is found, leave CURSOR at that node,
|
||||
and return true, if no match is found, return false, and CURSOR's
|
||||
position is undefined. */
|
||||
position is undefined.
|
||||
|
||||
This function may signal if the predicate function signals. */
|
||||
static bool
|
||||
treesit_search_forward (TSTreeCursor *cursor,
|
||||
Lisp_Object pred, Lisp_Object parser,
|
||||
|
@ -3272,11 +3387,13 @@ Return the first matched node, or nil if none matches. */)
|
|||
Lisp_Object all, Lisp_Object depth)
|
||||
{
|
||||
CHECK_TS_NODE (node);
|
||||
CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate),
|
||||
list3 (Qor, Qstringp, Qfunctionp), predicate);
|
||||
CHECK_SYMBOL (all);
|
||||
CHECK_SYMBOL (backward);
|
||||
|
||||
Lisp_Object signal_data = Qnil;
|
||||
if (!treesit_traverse_validate_predicate (predicate, &signal_data))
|
||||
xsignal1 (Qtreesit_invalid_predicate, signal_data);
|
||||
|
||||
/* We use a default limit of 1000. See bug#59426 for the
|
||||
discussion. */
|
||||
ptrdiff_t the_limit = treesit_recursion_limit;
|
||||
|
@ -3344,11 +3461,13 @@ always traverse leaf nodes first, then upwards. */)
|
|||
Lisp_Object all)
|
||||
{
|
||||
CHECK_TS_NODE (start);
|
||||
CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate),
|
||||
list3 (Qor, Qstringp, Qfunctionp), predicate);
|
||||
CHECK_SYMBOL (all);
|
||||
CHECK_SYMBOL (backward);
|
||||
|
||||
Lisp_Object signal_data = Qnil;
|
||||
if (!treesit_traverse_validate_predicate (predicate, &signal_data))
|
||||
xsignal1 (Qtreesit_invalid_predicate, signal_data);
|
||||
|
||||
treesit_initialize ();
|
||||
|
||||
Lisp_Object parser = XTS_NODE (start)->parser;
|
||||
|
@ -3376,7 +3495,9 @@ always traverse leaf nodes first, then upwards. */)
|
|||
/* Recursively traverse the tree under CURSOR, and append the result
|
||||
subtree to PARENT's cdr. See more in Ftreesit_induce_sparse_tree.
|
||||
Note that the top-level children list is reversed, because
|
||||
reasons. */
|
||||
reasons.
|
||||
|
||||
This function may signal if the predicate function signals. */
|
||||
static void
|
||||
treesit_build_sparse_tree (TSTreeCursor *cursor, Lisp_Object parent,
|
||||
Lisp_Object pred, Lisp_Object process_fn,
|
||||
|
@ -3462,8 +3583,10 @@ a regexp. */)
|
|||
Lisp_Object depth)
|
||||
{
|
||||
CHECK_TS_NODE (root);
|
||||
CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate),
|
||||
list3 (Qor, Qstringp, Qfunctionp), predicate);
|
||||
|
||||
Lisp_Object signal_data = Qnil;
|
||||
if (!treesit_traverse_validate_predicate (predicate, &signal_data))
|
||||
xsignal1 (Qtreesit_invalid_predicate, signal_data);
|
||||
|
||||
if (!NILP (process_fn))
|
||||
CHECK_TYPE (FUNCTIONP (process_fn), Qfunctionp, process_fn);
|
||||
|
@ -3595,6 +3718,7 @@ syms_of_treesit (void)
|
|||
DEFSYM (Qoutdated, "outdated");
|
||||
DEFSYM (Qhas_error, "has-error");
|
||||
DEFSYM (Qlive, "live");
|
||||
DEFSYM (Qnot, "not");
|
||||
|
||||
DEFSYM (QCanchor, ":anchor");
|
||||
DEFSYM (QCequal, ":equal");
|
||||
|
@ -3619,6 +3743,7 @@ syms_of_treesit (void)
|
|||
"user-emacs-directory");
|
||||
DEFSYM (Qtreesit_parser_deleted, "treesit-parser-deleted");
|
||||
DEFSYM (Qtreesit_pattern_expand, "treesit-pattern-expand");
|
||||
DEFSYM (Qtreesit_invalid_predicate, "treesit-invalid-predicate");
|
||||
|
||||
DEFSYM (Qor, "or");
|
||||
|
||||
|
@ -3646,6 +3771,9 @@ syms_of_treesit (void)
|
|||
define_error (Qtreesit_parser_deleted,
|
||||
"This parser is deleted and cannot be used",
|
||||
Qtreesit_error);
|
||||
define_error (Qtreesit_invalid_predicate,
|
||||
"Invalid predicate, see TODO for valid forms for a predicate",
|
||||
Qtreesit_error);
|
||||
|
||||
DEFVAR_LISP ("treesit-load-name-override-list",
|
||||
Vtreesit_load_name_override_list,
|
||||
|
|
|
@ -257,6 +257,7 @@
|
|||
(defmacro treesit--ert-search-setup (&rest body)
|
||||
"Setup macro used by `treesit-search-forward' and friends.
|
||||
BODY is the test body."
|
||||
(declare (debug (&rest form)))
|
||||
`(with-temp-buffer
|
||||
(let (parser root array)
|
||||
(progn
|
||||
|
@ -332,6 +333,58 @@ BODY is the test body."
|
|||
do (should (equal (treesit-node-text cursor)
|
||||
text)))))
|
||||
|
||||
(ert-deftest treesit-search-forward-predicate ()
|
||||
"Test various form of supported predicates in search functions."
|
||||
(skip-unless (treesit-language-available-p 'json))
|
||||
(treesit--ert-search-setup
|
||||
;; The following tests are adapted from `treesit-search-forward'.
|
||||
|
||||
;; Test `or'
|
||||
(cl-loop for cursor = (treesit-node-child array 0)
|
||||
then (treesit-search-forward cursor `(or "number" ,(rx "["))
|
||||
nil t)
|
||||
for text in '("[" "[" "1" "2" "3"
|
||||
"[" "4" "5" "6"
|
||||
"[" "7" "8" "9")
|
||||
while cursor
|
||||
do (should (equal (treesit-node-text cursor) text)))
|
||||
;; Test `not' and `or'
|
||||
(cl-loop for cursor = (treesit-node-child array 0)
|
||||
then (treesit-search-forward cursor
|
||||
`(not (or "number" ,(rx "[")))
|
||||
nil t)
|
||||
for text in '("[" "," "," "]"
|
||||
"[1,2,3]" ","
|
||||
"," "," "]"
|
||||
"[4,5,6]" ","
|
||||
"," "," "]"
|
||||
"[7,8,9]" "]"
|
||||
"[[1,2,3], [4,5,6], [7,8,9]]")
|
||||
while cursor
|
||||
do (should (equal (treesit-node-text cursor) text)))
|
||||
;; Test (regexp . function)
|
||||
(cl-labels ((is-odd (string)
|
||||
(and (eq 1 (length string))
|
||||
(cl-oddp (string-to-number string)))))
|
||||
(cl-loop for cursor = (treesit-node-child array 0)
|
||||
then (treesit-search-forward cursor '("number" . is-odd)
|
||||
nil t)
|
||||
for text in '("[" "1" "3" "5" "7" "9")
|
||||
while cursor
|
||||
do (should (equal (treesit-node-text cursor) text))))))
|
||||
|
||||
(ert-deftest treesit-search-forward-predicate-invalid-predicate ()
|
||||
"Test tree-sitter's ability to detect invalid predicates."
|
||||
(skip-unless (treesit-language-available-p 'json))
|
||||
(treesit--ert-search-setup
|
||||
(dolist (pred '( 1 (not 1) (not "2" "3") (or) (or 1)))
|
||||
(should-error (treesit-search-forward (treesit-node-child array 0)
|
||||
pred)
|
||||
:type 'treesit-invalid-predicate))
|
||||
(should-error (treesit-search-forward (treesit-node-child array 0)
|
||||
'not-a-function)
|
||||
:type 'void-function)))
|
||||
|
||||
(ert-deftest treesit-cursor-helper-with-missing-node ()
|
||||
"Test treesit_cursor_helper with a missing node."
|
||||
(skip-unless (treesit-language-available-p 'json))
|
||||
|
|
Loading…
Add table
Reference in a new issue