Switch to use cursor API in treesit.c

ts_node_parent has bugs (bug#60054), using cursor API avoids that.
Tree-sitter's author might remove ts_node_parent in the future, so
might as well switch to use cursors now.  We are basically
reimplementing some of the logic of ts_node_prev_sibling and
ts_node_parent in the sibling helper and cursor helper functions.

See also https://github.com/tree-sitter/tree-sitter/issues/1992

* src/treesit.c (treesit_traverse_sibling_helper)
(treesit_traverse_child_helper)
(treesit_traverse_match_predicate): Reimplemented to use the cursor API.
(treesit_search_dfs)
(treesit_search_forward): Use the new cursor helper functions.
(Ftreesit_search_subtree)
(Ftreesit_search_forward)
(Ftreesit_induce_sparse_tree): Use cursors.

* test/src/treesit-tests.el (treesit-search-subtree): New test.
(treesit--ert-search-setup): New macro.
(treesit-search-forward)
(treesit-search-forward-named-only)
(treesit-search-backward)
(treesit-search-backward-named-only)
(treesit-cursor-helper-with-missing-node): New tests.
This commit is contained in:
Yuan Fu 2022-12-17 15:01:57 -08:00
parent a275e436df
commit 5f0286c0af
No known key found for this signature in database
GPG key ID: 56E19BC57664A442
2 changed files with 343 additions and 146 deletions

View file

@ -288,7 +288,7 @@ init_treesit_functions (void)
slow enough to make insignificant any performance advantages from slow enough to make insignificant any performance advantages from
using the cursor. Not exposing the cursor also minimizes the using the cursor. Not exposing the cursor also minimizes the
number of new types this adds to Emacs Lisp; currently, this adds number of new types this adds to Emacs Lisp; currently, this adds
only the parser and node types. only the parser, node, and compiled query types.
- Because updating the change is handled on the C level as each - Because updating the change is handled on the C level as each
change is made in the buffer, there is no way for Lisp to update change is made in the buffer, there is no way for Lisp to update
@ -2687,63 +2687,143 @@ treesit_cursor_helper (TSNode node, Lisp_Object parser)
return cursor; return cursor;
} }
/* Return the next/previous named/unnamed sibling of NODE. FORWARD /* Move CURSOR to the next/previous sibling. FORWARD controls the
controls the direction and NAMED controls the nameness. */ direction. NAMED controls the namedness. If there is a valid
static TSNode sibling, move CURSOR to it and return true, otherwise return false.
treesit_traverse_sibling_helper (TSNode node, bool forward, bool named) When false is returned, CURSOR points to a sibling node of the node
{ we started at, but exactly which is undefined. */
if (forward)
{
if (named)
return ts_node_next_named_sibling (node);
else
return ts_node_next_sibling (node);
}
else
{
if (named)
return ts_node_prev_named_sibling (node);
else
return ts_node_prev_sibling (node);
}
}
/* Return the first/last named/unnamed child of NODE. FORWARD controls
the direction and NAMED controls the nameness. */
static TSNode
treesit_traverse_child_helper (TSNode node, bool forward, bool named)
{
if (forward)
{
if (named)
return ts_node_named_child (node, 0);
else
return ts_node_child (node, 0);
}
else
{
if (named)
{
uint32_t count = ts_node_named_child_count (node);
uint32_t idx = count == 0 ? 0 : count - 1;
return ts_node_named_child (node, idx);
}
else
{
uint32_t count = ts_node_child_count (node);
uint32_t idx = count == 0 ? 0 : count - 1;
return ts_node_child (node, idx);
}
}
}
/* Return true if NODE matches PRED. PRED can be a string or a
function. This function assumes PRED is either a string or a
function. */
static bool static bool
treesit_traverse_match_predicate (TSNode node, Lisp_Object pred, treesit_traverse_sibling_helper (TSTreeCursor *cursor,
Lisp_Object parser) bool forward, bool named)
{ {
if (forward)
{
if (!named)
return ts_tree_cursor_goto_next_sibling (cursor);
/* Else named... */
while (ts_tree_cursor_goto_next_sibling (cursor))
{
if (ts_node_is_named (ts_tree_cursor_current_node (cursor)))
return true;
}
return false;
}
else /* Backward. */
{
/* Go to first child and go through each sibling, until we find
the one just before the starting node. */
TSNode start = ts_tree_cursor_current_node (cursor);
if (!ts_tree_cursor_goto_parent (cursor))
return false;
treesit_assume_true (ts_tree_cursor_goto_first_child (cursor));
/* Now CURSOR is at the first child. If we started at the first
child, then there is no further siblings. */
TSNode first_child = ts_tree_cursor_current_node (cursor);
if (ts_node_eq (first_child, start))
return false;
/* PROBE is always DELTA siblings ahead of CURSOR. */
TSTreeCursor probe = ts_tree_cursor_copy (cursor);
/* This is position of PROBE minus position of CURSOR. */
ptrdiff_t delta = 0;
TSNode probe_node;
TSNode cursor_node;
while (ts_tree_cursor_goto_next_sibling (&probe))
{
/* Move PROBE forward, if it equals to the starting node,
CURSOR points to the node we want (prev valid sibling of
the starting node). */
delta++;
probe_node = ts_tree_cursor_current_node (&probe);
/* PROBE matched, depending on NAMED, return true/false. */
if (ts_node_eq (probe_node, start))
{
ts_tree_cursor_delete (&probe);
cursor_node = ts_tree_cursor_current_node (cursor);
ts_tree_cursor_delete (&probe);
return (!named || (named && ts_node_is_named (cursor_node)));
}
/* PROBE didn't match, move CURSOR forward to PROBE's
position, but if we are looking for named nodes, only
move CURSOR to PROBE if PROBE is at a named node. */
if (!named || (named && ts_node_is_named (probe_node)))
for (; delta > 0; delta--)
treesit_assume_true (ts_tree_cursor_goto_next_sibling (cursor));
}
ts_tree_cursor_delete (&probe);
return false;
}
}
/* Move CURSOR to the first/last child. FORWARD controls the
direction. NAMED controls the namedness. If there is a valid
child, move CURSOR to it and return true, otherwise don't move
CURSOR and return false. */
static bool
treesit_traverse_child_helper (TSTreeCursor *cursor,
bool forward, bool named)
{
if (forward)
{
if (!named)
return ts_tree_cursor_goto_first_child (cursor);
else
{
if (!ts_tree_cursor_goto_first_child (cursor))
return false;
/* After this point, if you return false, make sure to go
back to parent. */
TSNode first_child = ts_tree_cursor_current_node (cursor);
if (ts_node_is_named (first_child))
return true;
if (treesit_traverse_sibling_helper (cursor, true, true))
return true;
else
{
treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
return false;
}
}
}
else /* Backward. */
{
if (!ts_tree_cursor_goto_first_child (cursor))
return false;
/* After this point, if you return false, make sure to go
back to parent. */
/* First go to the last child. */
while (ts_tree_cursor_goto_next_sibling (cursor));
if (!named)
return true;
/* Else named... */
if (treesit_traverse_sibling_helper(cursor, false, true))
return true;
else
{
treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
return false;
}
}
}
/* Return true if the node at CURSOR matches PRED. PRED can be a
string or a function. This function assumes PRED is either a
string or a function. If NAMED is true, also check that the node
is named. */
static bool
treesit_traverse_match_predicate (TSTreeCursor *cursor, Lisp_Object pred,
Lisp_Object parser, bool named)
{
TSNode node = ts_tree_cursor_current_node (cursor);
if (named && !ts_node_is_named (node))
return false;
if (STRINGP (pred)) if (STRINGP (pred))
{ {
const char *type = ts_node_type (node); const char *type = ts_node_type (node);
@ -2754,73 +2834,60 @@ treesit_traverse_match_predicate (TSNode node, Lisp_Object pred,
Lisp_Object lisp_node = make_treesit_node (parser, node); Lisp_Object lisp_node = make_treesit_node (parser, node);
return !NILP (CALLN (Ffuncall, pred, lisp_node)); return !NILP (CALLN (Ffuncall, pred, lisp_node));
} }
} }
/* Traverse the parse tree starting from ROOT (but ROOT is not /* Traverse the parse tree starting from CURSOR. PRED can be a
matches against PRED). PRED can be a function (takes a node and function (takes a node and returns nil/non-nil), or a string
returns nil/non-nil),or a string (treated as regexp matching the (treated as regexp matching the node's type, must be all single
node's type, ignores case, must be all single byte characters). If byte characters). If the node satisfies PRED, leave CURSOR on that
the node satisfies PRED , terminate, set ROOT to that node, and node and return true. If no node satisfies PRED, move CURSOR back
return true. If no node satisfies PRED, return FALSE. PARSER is to starting position and return false.
the parser of ROOT.
LIMIT is the number of levels we descend in the tree. FORWARD LIMIT is the number of levels we descend in the tree. FORWARD
controls the direction in which we traverse the tree, true means controls the direction in which we traverse the tree, true means
forward, false backward. If NAMED is true, only traverse named forward, false backward. If SKIP_ROOT is true, don't match ROOT.
nodes, if false, all nodes. If SKIP_ROOT is true, don't match */
ROOT. */
static bool static bool
treesit_search_dfs (TSNode *root, Lisp_Object pred, Lisp_Object parser, treesit_search_dfs (TSTreeCursor *cursor,
bool named, bool forward, ptrdiff_t limit, Lisp_Object pred, Lisp_Object parser,
bool forward, bool named, ptrdiff_t limit,
bool skip_root) bool skip_root)
{ {
/* TSTreeCursor doesn't allow us to move backward, so we can't use if (!skip_root
it. */ && treesit_traverse_match_predicate (cursor, pred, parser, named))
TSNode node = *root; return true;
if (!skip_root && treesit_traverse_match_predicate (node, pred, parser)) if (limit == 0)
{
*root = node;
return true;
}
if (limit <= 0)
return false; return false;
else
{
int count = (named
? ts_node_named_child_count (node)
: ts_node_child_count (node));
for (int offset = 0; offset < count; offset++)
{
uint32_t idx = forward ? offset : count - offset - 1;
TSNode child = (named
? ts_node_named_child (node, idx)
: ts_node_child (node, idx));
if (!ts_node_is_null (child) if (!treesit_traverse_child_helper (cursor, forward, named))
&& treesit_search_dfs (&child, pred, parser, named, return false;
forward, limit - 1, false)) /* After this point, if you return false, make sure to go back to
{ parent. */
*root = child;
return true; do /* Iterate through each child. */
} {
} if (treesit_search_dfs (cursor, pred, parser, forward,
return false; named, limit - 1, false))
return true;
} }
while (treesit_traverse_sibling_helper (cursor, forward, false));
/* No match in any child's subtree, go back to starting node. */
treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
return false;
} }
/* Go through the whole tree linearly, leaf-first, starting from /* Go through the whole tree linearly, leaf-first, starting from
START. PRED, PARSER, NAMED, FORWARD are the same as in START. PRED, PARSER, NAMED, FORWARD are the same as in
ts_search_subtre. If UP_ONLY is true, never go to children, only ts_search_subtree. If a match is found, leave CURSOR at that node,
sibling and parents. */ and return true, if no match is found, return false, and CURSOR's
position is undefined. */
static bool static bool
treesit_search_forward (TSNode *start, Lisp_Object pred, Lisp_Object parser, treesit_search_forward (TSTreeCursor *cursor,
bool named, bool forward) Lisp_Object pred, Lisp_Object parser,
bool forward, bool named)
{ {
TSNode node = *start;
/* We don't search for subtree and always search from the leaf /* We don't search for subtree and always search from the leaf
nodes. This way repeated call of this function traverses each nodes. This way repeated call of this function traverses each
node in the tree once and only once: node in the tree once and only once:
@ -2830,39 +2897,26 @@ treesit_search_forward (TSNode *start, Lisp_Object pred, Lisp_Object parser,
bool initial = true; bool initial = true;
while (true) while (true)
{ {
if (!initial /* We don't match START. */ if (!initial /* We don't match the starting node. */
&& treesit_traverse_match_predicate (node, pred, parser)) && treesit_traverse_match_predicate (cursor, pred, parser, named))
{ return true;
*start = node;
return true;
}
initial = false; initial = false;
TSNode next = treesit_traverse_sibling_helper (node, forward, named); /* Try going to the next sibling, if there is no next sibling,
while (ts_node_is_null (next)) go to parent and try again. */
while (!treesit_traverse_sibling_helper (cursor, forward, named))
{ {
/* There is no next sibling, go to parent. */ /* There is no next sibling, go to parent. */
node = ts_node_parent (node); if (!ts_tree_cursor_goto_parent (cursor))
if (ts_node_is_null (node))
return false; return false;
if (treesit_traverse_match_predicate (node, pred, parser)) if (treesit_traverse_match_predicate (cursor, pred, parser, named))
{
*start = node;
return true; return true;
}
next = treesit_traverse_sibling_helper (node, forward, named);
} }
/* We are at the next sibling, deep dive into the first leaf /* We are at the next sibling, deep dive into the first leaf
node. */ node. */
TSNode next_next = treesit_traverse_child_helper (next, forward, named); while (treesit_traverse_child_helper (cursor, forward, false));
while (!ts_node_is_null (next_next)) /* At this point CURSOR is at a leaf node. */
{
next = next_next;
next_next = treesit_traverse_child_helper (next, forward, named);
}
/* At this point NEXT is a leaf node. */
node = next;
} }
} }
@ -2890,7 +2944,7 @@ Return the first matched node, or nil if none matches. */)
CHECK_SYMBOL (all); CHECK_SYMBOL (all);
CHECK_SYMBOL (backward); CHECK_SYMBOL (backward);
/* We use a default limit to 1000. See bug#59426 for the /* We use a default limit of 1000. See bug#59426 for the
discussion. */ discussion. */
ptrdiff_t the_limit = 1000; ptrdiff_t the_limit = 1000;
if (!NILP (limit)) if (!NILP (limit))
@ -2901,13 +2955,17 @@ Return the first matched node, or nil if none matches. */)
treesit_initialize (); treesit_initialize ();
TSNode treesit_node = XTS_NODE (node)->node;
Lisp_Object parser = XTS_NODE (node)->parser; Lisp_Object parser = XTS_NODE (node)->parser;
if (treesit_search_dfs (&treesit_node, predicate, parser, NILP (all), Lisp_Object return_value = Qnil;
NILP (backward), the_limit, false)) TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (node)->node, parser);
return make_treesit_node (parser, treesit_node); if (treesit_search_dfs (&cursor, predicate, parser, NILP (backward),
else NILP (all), the_limit, false))
return Qnil; {
TSNode node = ts_tree_cursor_current_node (&cursor);
return_value = make_treesit_node (parser, node);
}
ts_tree_cursor_delete (&cursor);
return return_value;
} }
DEFUN ("treesit-search-forward", DEFUN ("treesit-search-forward",
@ -2951,13 +3009,17 @@ always traverse leaf nodes first, then upwards. */)
treesit_initialize (); treesit_initialize ();
TSNode treesit_start = XTS_NODE (start)->node;
Lisp_Object parser = XTS_NODE (start)->parser; Lisp_Object parser = XTS_NODE (start)->parser;
if (treesit_search_forward (&treesit_start, predicate, parser, NILP (all), Lisp_Object return_value = Qnil;
NILP (backward))) TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (start)->node, parser);
return make_treesit_node (parser, treesit_start); if (treesit_search_forward (&cursor, predicate, parser,
else NILP (backward), NILP (all)))
return Qnil; {
TSNode node = ts_tree_cursor_current_node (&cursor);
return_value = make_treesit_node (parser, node);
}
ts_tree_cursor_delete (&cursor);
return return_value;
} }
/* Recursively traverse the tree under CURSOR, and append the result /* Recursively traverse the tree under CURSOR, and append the result
@ -2969,13 +3031,12 @@ treesit_build_sparse_tree (TSTreeCursor *cursor, Lisp_Object parent,
Lisp_Object pred, Lisp_Object process_fn, Lisp_Object pred, Lisp_Object process_fn,
ptrdiff_t limit, Lisp_Object parser) ptrdiff_t limit, Lisp_Object parser)
{ {
bool match = treesit_traverse_match_predicate (cursor, pred, parser, false);
TSNode node = ts_tree_cursor_current_node (cursor);
bool match = treesit_traverse_match_predicate (node, pred, parser);
if (match) if (match)
{ {
/* If this node matches pred, add a new node to the parent's /* If this node matches pred, add a new node to the parent's
children list. */ children list. */
TSNode node = ts_tree_cursor_current_node (cursor);
Lisp_Object lisp_node = make_treesit_node (parser, node); Lisp_Object lisp_node = make_treesit_node (parser, node);
if (!NILP (process_fn)) if (!NILP (process_fn))
lisp_node = CALLN (Ffuncall, process_fn, lisp_node); lisp_node = CALLN (Ffuncall, process_fn, lisp_node);
@ -3056,7 +3117,7 @@ a regexp. */)
if (!NILP (process_fn)) if (!NILP (process_fn))
CHECK_TYPE (FUNCTIONP (process_fn), Qfunctionp, process_fn); CHECK_TYPE (FUNCTIONP (process_fn), Qfunctionp, process_fn);
/* We use a default limit to 1000. See bug#59426 for the /* We use a default limit of 1000. See bug#59426 for the
discussion. */ discussion. */
ptrdiff_t the_limit = 1000; ptrdiff_t the_limit = 1000;
if (!NILP (limit)) if (!NILP (limit))
@ -3067,11 +3128,12 @@ a regexp. */)
treesit_initialize (); treesit_initialize ();
TSTreeCursor cursor = ts_tree_cursor_new (XTS_NODE (root)->node);
Lisp_Object parser = XTS_NODE (root)->parser; Lisp_Object parser = XTS_NODE (root)->parser;
Lisp_Object parent = Fcons (Qnil, Qnil); Lisp_Object parent = Fcons (Qnil, Qnil);
TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (root)->node, parser);
treesit_build_sparse_tree (&cursor, parent, predicate, process_fn, treesit_build_sparse_tree (&cursor, parent, predicate, process_fn,
the_limit, parser); the_limit, parser);
ts_tree_cursor_delete (&cursor);
Fsetcdr (parent, Fnreverse (Fcdr (parent))); Fsetcdr (parent, Fnreverse (Fcdr (parent)));
if (NILP (Fcdr (parent))) if (NILP (Fcdr (parent)))
return Qnil; return Qnil;

View file

@ -198,6 +198,141 @@
(kill-buffer base) (kill-buffer base)
(kill-buffer indirect)))) (kill-buffer indirect))))
;;; Tree traversal
(ert-deftest treesit-search-subtree ()
"Test `treesit-search-subtree'."
(skip-unless (treesit-language-available-p 'json))
(with-temp-buffer
(let (parser root array)
(progn
(insert "[[1,2,3], [1,2,3], [1,2,3]]")
(setq parser (treesit-parser-create 'json))
(setq root (treesit-parser-root-node parser))
(setq array (treesit-node-child root 0)))
(dolist (subarray (treesit-node-children array t))
;; Find named node forward.
(should (equal "1" (treesit-node-text
(treesit-search-subtree
subarray "number"))))
;; Find named node backward.
(should (equal "3" (treesit-node-text
(treesit-search-subtree
subarray "number" t))))
;; Find anonymous node forward.
(should (equal "[" (treesit-node-text
(treesit-search-subtree
subarray "\\[" nil t))))
;; Find anonymous node backward.
(should (equal "]" (treesit-node-text
(treesit-search-subtree
subarray "\\]" t t))))
;; If ALL=nil, it shouldn't find anonymous node.
(should (eq nil (treesit-node-text
(treesit-search-subtree
subarray "\\["))))
;; If ALL=nil, searching for number should still find the
;; numbers.
(should (equal "1" (treesit-node-text
(treesit-search-subtree
subarray "number" nil t))))
;; Find named node backward.
(should (equal "3" (treesit-node-text
(treesit-search-subtree
subarray "number" t t))))
))))
(defmacro treesit--ert-search-setup (&rest body)
"Setup macro used by `treesit-search-forward' and friends.
BODY is the test body."
`(with-temp-buffer
(let (parser root array)
(progn
(insert "[[1,2,3], [4,5,6], [7,8,9]]")
(setq parser (treesit-parser-create 'json))
(setq root (treesit-parser-root-node
parser))
(setq array (treesit-node-child root 0))
;; First bracket.
(setq cursor (treesit-node-child array 0)))
,@body)))
(ert-deftest treesit-search-forward ()
"Test `treesit-search-forward'."
(skip-unless (treesit-language-available-p 'json))
(treesit--ert-search-setup
(cl-loop for cursor = (treesit-node-child array 0)
then (treesit-search-forward cursor "" nil t)
for text in '("[" "[" "1" "," "2" "," "3" "]"
"[1,2,3]" ","
"[" "4" "," "5" "," "6" "]"
"[4,5,6]" ","
"[" "7" "," "8" "," "9" "]"
"[7,8,9]" "]"
"[[1,2,3], [4,5,6], [7,8,9]]")
while cursor
do (should (equal (treesit-node-text cursor)
text)))))
(ert-deftest treesit-search-forward-named-only ()
"Test `treesit-search-forward'."
(skip-unless (treesit-language-available-p 'json))
(treesit--ert-search-setup
(cl-loop for cursor = (treesit-node-child
(treesit-node-child array 1) 1)
then (treesit-search-forward cursor "")
for text in '("1" "2" "3" "[1,2,3]"
"4" "5" "6" "[4,5,6]"
"7" "8" "9" "[7,8,9]"
"[[1,2,3], [4,5,6], [7,8,9]]")
while cursor
do (should (equal (treesit-node-text cursor)
text)))))
(ert-deftest treesit-search-backward ()
"Test `treesit-search-forward'."
(skip-unless (treesit-language-available-p 'json))
(treesit--ert-search-setup
(cl-loop for cursor = (treesit-node-child array -1)
then (treesit-search-forward cursor "" t t)
for text in (reverse '("[[1,2,3], [4,5,6], [7,8,9]]"
"[" "[1,2,3]"
"[" "1" "," "2" "," "3" "]"
"," "[4,5,6]"
"[" "4" "," "5" "," "6" "]"
"," "[7,8,9]"
"[" "7" "," "8" "," "9" "]"
"]"))
while cursor
do (should (equal (treesit-node-text cursor)
text)))))
(ert-deftest treesit-search-backward-named-only ()
"Test `treesit-search-forward'."
(skip-unless (treesit-language-available-p 'json))
(treesit--ert-search-setup
(cl-loop for cursor = (treesit-node-child
(treesit-node-child array -1 t) -1 t)
then (treesit-search-forward cursor "" t)
for text in (reverse '("[[1,2,3], [4,5,6], [7,8,9]]"
"[1,2,3]" "1" "2" "3"
"[4,5,6]" "4" "5" "6"
"[7,8,9]" "7" "8" "9"))
while cursor
do (should (equal (treesit-node-text cursor)
text)))))
(ert-deftest treesit-cursor-helper-with-missing-node ()
"Test treesit_cursor_helper with a missing node."
(skip-unless (treesit-language-available-p 'json))
(treesit--ert-search-setup
(delete-char -1)
(setq root (treesit-buffer-root-node))
(setq array (treesit-node-child root 0))
;; If everything works, this should not hang.
(let ((missing-bracket (treesit-node-child array -1)))
(treesit-search-forward missing-bracket "" t))))
;;; Query ;;; Query
(ert-deftest treesit-query-api () (ert-deftest treesit-query-api ()