Switch to use cursor API in treesit.c

ts_node_parent has bugs (bug#60054), using cursor API avoids that.
Tree-sitter's author might remove ts_node_parent in the future, so
might as well switch to use cursors now.  We are basically
reimplementing some of the logic of ts_node_prev_sibling and
ts_node_parent in the sibling helper and cursor helper functions.

See also https://github.com/tree-sitter/tree-sitter/issues/1992

* src/treesit.c (treesit_traverse_sibling_helper)
(treesit_traverse_child_helper)
(treesit_traverse_match_predicate): Reimplemented to use the cursor API.
(treesit_search_dfs)
(treesit_search_forward): Use the new cursor helper functions.
(Ftreesit_search_subtree)
(Ftreesit_search_forward)
(Ftreesit_induce_sparse_tree): Use cursors.

* test/src/treesit-tests.el (treesit-search-subtree): New test.
(treesit--ert-search-setup): New macro.
(treesit-search-forward)
(treesit-search-forward-named-only)
(treesit-search-backward)
(treesit-search-backward-named-only)
(treesit-cursor-helper-with-missing-node): New tests.
This commit is contained in:
Yuan Fu 2022-12-17 15:01:57 -08:00
parent a275e436df
commit 5f0286c0af
No known key found for this signature in database
GPG key ID: 56E19BC57664A442
2 changed files with 343 additions and 146 deletions

View file

@ -288,7 +288,7 @@ init_treesit_functions (void)
slow enough to make insignificant any performance advantages from
using the cursor. Not exposing the cursor also minimizes the
number of new types this adds to Emacs Lisp; currently, this adds
only the parser and node types.
only the parser, node, and compiled query types.
- Because updating the change is handled on the C level as each
change is made in the buffer, there is no way for Lisp to update
@ -2687,63 +2687,143 @@ treesit_cursor_helper (TSNode node, Lisp_Object parser)
return cursor;
}
/* Return the next/previous named/unnamed sibling of NODE. FORWARD
controls the direction and NAMED controls the nameness. */
static TSNode
treesit_traverse_sibling_helper (TSNode node, bool forward, bool named)
{
if (forward)
{
if (named)
return ts_node_next_named_sibling (node);
else
return ts_node_next_sibling (node);
}
else
{
if (named)
return ts_node_prev_named_sibling (node);
else
return ts_node_prev_sibling (node);
}
}
/* Return the first/last named/unnamed child of NODE. FORWARD controls
the direction and NAMED controls the nameness. */
static TSNode
treesit_traverse_child_helper (TSNode node, bool forward, bool named)
{
if (forward)
{
if (named)
return ts_node_named_child (node, 0);
else
return ts_node_child (node, 0);
}
else
{
if (named)
{
uint32_t count = ts_node_named_child_count (node);
uint32_t idx = count == 0 ? 0 : count - 1;
return ts_node_named_child (node, idx);
}
else
{
uint32_t count = ts_node_child_count (node);
uint32_t idx = count == 0 ? 0 : count - 1;
return ts_node_child (node, idx);
}
}
}
/* Return true if NODE matches PRED. PRED can be a string or a
function. This function assumes PRED is either a string or a
function. */
/* Move CURSOR to the next/previous sibling. FORWARD controls the
direction. NAMED controls the namedness. If there is a valid
sibling, move CURSOR to it and return true, otherwise return false.
When false is returned, CURSOR points to a sibling node of the node
we started at, but exactly which is undefined. */
static bool
treesit_traverse_match_predicate (TSNode node, Lisp_Object pred,
Lisp_Object parser)
treesit_traverse_sibling_helper (TSTreeCursor *cursor,
bool forward, bool named)
{
if (forward)
{
if (!named)
return ts_tree_cursor_goto_next_sibling (cursor);
/* Else named... */
while (ts_tree_cursor_goto_next_sibling (cursor))
{
if (ts_node_is_named (ts_tree_cursor_current_node (cursor)))
return true;
}
return false;
}
else /* Backward. */
{
/* Go to first child and go through each sibling, until we find
the one just before the starting node. */
TSNode start = ts_tree_cursor_current_node (cursor);
if (!ts_tree_cursor_goto_parent (cursor))
return false;
treesit_assume_true (ts_tree_cursor_goto_first_child (cursor));
/* Now CURSOR is at the first child. If we started at the first
child, then there is no further siblings. */
TSNode first_child = ts_tree_cursor_current_node (cursor);
if (ts_node_eq (first_child, start))
return false;
/* PROBE is always DELTA siblings ahead of CURSOR. */
TSTreeCursor probe = ts_tree_cursor_copy (cursor);
/* This is position of PROBE minus position of CURSOR. */
ptrdiff_t delta = 0;
TSNode probe_node;
TSNode cursor_node;
while (ts_tree_cursor_goto_next_sibling (&probe))
{
/* Move PROBE forward, if it equals to the starting node,
CURSOR points to the node we want (prev valid sibling of
the starting node). */
delta++;
probe_node = ts_tree_cursor_current_node (&probe);
/* PROBE matched, depending on NAMED, return true/false. */
if (ts_node_eq (probe_node, start))
{
ts_tree_cursor_delete (&probe);
cursor_node = ts_tree_cursor_current_node (cursor);
ts_tree_cursor_delete (&probe);
return (!named || (named && ts_node_is_named (cursor_node)));
}
/* PROBE didn't match, move CURSOR forward to PROBE's
position, but if we are looking for named nodes, only
move CURSOR to PROBE if PROBE is at a named node. */
if (!named || (named && ts_node_is_named (probe_node)))
for (; delta > 0; delta--)
treesit_assume_true (ts_tree_cursor_goto_next_sibling (cursor));
}
ts_tree_cursor_delete (&probe);
return false;
}
}
/* Move CURSOR to the first/last child. FORWARD controls the
direction. NAMED controls the namedness. If there is a valid
child, move CURSOR to it and return true, otherwise don't move
CURSOR and return false. */
static bool
treesit_traverse_child_helper (TSTreeCursor *cursor,
bool forward, bool named)
{
if (forward)
{
if (!named)
return ts_tree_cursor_goto_first_child (cursor);
else
{
if (!ts_tree_cursor_goto_first_child (cursor))
return false;
/* After this point, if you return false, make sure to go
back to parent. */
TSNode first_child = ts_tree_cursor_current_node (cursor);
if (ts_node_is_named (first_child))
return true;
if (treesit_traverse_sibling_helper (cursor, true, true))
return true;
else
{
treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
return false;
}
}
}
else /* Backward. */
{
if (!ts_tree_cursor_goto_first_child (cursor))
return false;
/* After this point, if you return false, make sure to go
back to parent. */
/* First go to the last child. */
while (ts_tree_cursor_goto_next_sibling (cursor));
if (!named)
return true;
/* Else named... */
if (treesit_traverse_sibling_helper(cursor, false, true))
return true;
else
{
treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
return false;
}
}
}
/* Return true if the node at CURSOR matches PRED. PRED can be a
string or a function. This function assumes PRED is either a
string or a function. If NAMED is true, also check that the node
is named. */
static bool
treesit_traverse_match_predicate (TSTreeCursor *cursor, Lisp_Object pred,
Lisp_Object parser, bool named)
{
TSNode node = ts_tree_cursor_current_node (cursor);
if (named && !ts_node_is_named (node))
return false;
if (STRINGP (pred))
{
const char *type = ts_node_type (node);
@ -2754,73 +2834,60 @@ treesit_traverse_match_predicate (TSNode node, Lisp_Object pred,
Lisp_Object lisp_node = make_treesit_node (parser, node);
return !NILP (CALLN (Ffuncall, pred, lisp_node));
}
}
/* Traverse the parse tree starting from ROOT (but ROOT is not
matches against PRED). PRED can be a function (takes a node and
returns nil/non-nil),or a string (treated as regexp matching the
node's type, ignores case, must be all single byte characters). If
the node satisfies PRED , terminate, set ROOT to that node, and
return true. If no node satisfies PRED, return FALSE. PARSER is
the parser of ROOT.
/* Traverse the parse tree starting from CURSOR. PRED can be a
function (takes a node and returns nil/non-nil), or a string
(treated as regexp matching the node's type, must be all single
byte characters). If the node satisfies PRED, leave CURSOR on that
node and return true. If no node satisfies PRED, move CURSOR back
to starting position and return false.
LIMIT is the number of levels we descend in the tree. FORWARD
controls the direction in which we traverse the tree, true means
forward, false backward. If NAMED is true, only traverse named
nodes, if false, all nodes. If SKIP_ROOT is true, don't match
ROOT. */
forward, false backward. If SKIP_ROOT is true, don't match ROOT.
*/
static bool
treesit_search_dfs (TSNode *root, Lisp_Object pred, Lisp_Object parser,
bool named, bool forward, ptrdiff_t limit,
treesit_search_dfs (TSTreeCursor *cursor,
Lisp_Object pred, Lisp_Object parser,
bool forward, bool named, ptrdiff_t limit,
bool skip_root)
{
/* TSTreeCursor doesn't allow us to move backward, so we can't use
it. */
TSNode node = *root;
if (!skip_root
&& treesit_traverse_match_predicate (cursor, pred, parser, named))
return true;
if (!skip_root && treesit_traverse_match_predicate (node, pred, parser))
{
*root = node;
return true;
}
if (limit <= 0)
if (limit == 0)
return false;
else
{
int count = (named
? ts_node_named_child_count (node)
: ts_node_child_count (node));
for (int offset = 0; offset < count; offset++)
{
uint32_t idx = forward ? offset : count - offset - 1;
TSNode child = (named
? ts_node_named_child (node, idx)
: ts_node_child (node, idx));
if (!ts_node_is_null (child)
&& treesit_search_dfs (&child, pred, parser, named,
forward, limit - 1, false))
{
*root = child;
return true;
}
}
return false;
if (!treesit_traverse_child_helper (cursor, forward, named))
return false;
/* After this point, if you return false, make sure to go back to
parent. */
do /* Iterate through each child. */
{
if (treesit_search_dfs (cursor, pred, parser, forward,
named, limit - 1, false))
return true;
}
while (treesit_traverse_sibling_helper (cursor, forward, false));
/* No match in any child's subtree, go back to starting node. */
treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
return false;
}
/* Go through the whole tree linearly, leaf-first, starting from
START. PRED, PARSER, NAMED, FORWARD are the same as in
ts_search_subtre. If UP_ONLY is true, never go to children, only
sibling and parents. */
ts_search_subtree. If a match is found, leave CURSOR at that node,
and return true, if no match is found, return false, and CURSOR's
position is undefined. */
static bool
treesit_search_forward (TSNode *start, Lisp_Object pred, Lisp_Object parser,
bool named, bool forward)
treesit_search_forward (TSTreeCursor *cursor,
Lisp_Object pred, Lisp_Object parser,
bool forward, bool named)
{
TSNode node = *start;
/* We don't search for subtree and always search from the leaf
nodes. This way repeated call of this function traverses each
node in the tree once and only once:
@ -2830,39 +2897,26 @@ treesit_search_forward (TSNode *start, Lisp_Object pred, Lisp_Object parser,
bool initial = true;
while (true)
{
if (!initial /* We don't match START. */
&& treesit_traverse_match_predicate (node, pred, parser))
{
*start = node;
return true;
}
if (!initial /* We don't match the starting node. */
&& treesit_traverse_match_predicate (cursor, pred, parser, named))
return true;
initial = false;
TSNode next = treesit_traverse_sibling_helper (node, forward, named);
while (ts_node_is_null (next))
/* Try going to the next sibling, if there is no next sibling,
go to parent and try again. */
while (!treesit_traverse_sibling_helper (cursor, forward, named))
{
/* There is no next sibling, go to parent. */
node = ts_node_parent (node);
if (ts_node_is_null (node))
if (!ts_tree_cursor_goto_parent (cursor))
return false;
if (treesit_traverse_match_predicate (node, pred, parser))
{
*start = node;
if (treesit_traverse_match_predicate (cursor, pred, parser, named))
return true;
}
next = treesit_traverse_sibling_helper (node, forward, named);
}
/* We are at the next sibling, deep dive into the first leaf
node. */
TSNode next_next = treesit_traverse_child_helper (next, forward, named);
while (!ts_node_is_null (next_next))
{
next = next_next;
next_next = treesit_traverse_child_helper (next, forward, named);
}
/* At this point NEXT is a leaf node. */
node = next;
while (treesit_traverse_child_helper (cursor, forward, false));
/* At this point CURSOR is at a leaf node. */
}
}
@ -2890,7 +2944,7 @@ Return the first matched node, or nil if none matches. */)
CHECK_SYMBOL (all);
CHECK_SYMBOL (backward);
/* We use a default limit to 1000. See bug#59426 for the
/* We use a default limit of 1000. See bug#59426 for the
discussion. */
ptrdiff_t the_limit = 1000;
if (!NILP (limit))
@ -2901,13 +2955,17 @@ Return the first matched node, or nil if none matches. */)
treesit_initialize ();
TSNode treesit_node = XTS_NODE (node)->node;
Lisp_Object parser = XTS_NODE (node)->parser;
if (treesit_search_dfs (&treesit_node, predicate, parser, NILP (all),
NILP (backward), the_limit, false))
return make_treesit_node (parser, treesit_node);
else
return Qnil;
Lisp_Object return_value = Qnil;
TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (node)->node, parser);
if (treesit_search_dfs (&cursor, predicate, parser, NILP (backward),
NILP (all), the_limit, false))
{
TSNode node = ts_tree_cursor_current_node (&cursor);
return_value = make_treesit_node (parser, node);
}
ts_tree_cursor_delete (&cursor);
return return_value;
}
DEFUN ("treesit-search-forward",
@ -2951,13 +3009,17 @@ always traverse leaf nodes first, then upwards. */)
treesit_initialize ();
TSNode treesit_start = XTS_NODE (start)->node;
Lisp_Object parser = XTS_NODE (start)->parser;
if (treesit_search_forward (&treesit_start, predicate, parser, NILP (all),
NILP (backward)))
return make_treesit_node (parser, treesit_start);
else
return Qnil;
Lisp_Object return_value = Qnil;
TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (start)->node, parser);
if (treesit_search_forward (&cursor, predicate, parser,
NILP (backward), NILP (all)))
{
TSNode node = ts_tree_cursor_current_node (&cursor);
return_value = make_treesit_node (parser, node);
}
ts_tree_cursor_delete (&cursor);
return return_value;
}
/* Recursively traverse the tree under CURSOR, and append the result
@ -2969,13 +3031,12 @@ treesit_build_sparse_tree (TSTreeCursor *cursor, Lisp_Object parent,
Lisp_Object pred, Lisp_Object process_fn,
ptrdiff_t limit, Lisp_Object parser)
{
TSNode node = ts_tree_cursor_current_node (cursor);
bool match = treesit_traverse_match_predicate (node, pred, parser);
bool match = treesit_traverse_match_predicate (cursor, pred, parser, false);
if (match)
{
/* If this node matches pred, add a new node to the parent's
children list. */
TSNode node = ts_tree_cursor_current_node (cursor);
Lisp_Object lisp_node = make_treesit_node (parser, node);
if (!NILP (process_fn))
lisp_node = CALLN (Ffuncall, process_fn, lisp_node);
@ -3056,7 +3117,7 @@ a regexp. */)
if (!NILP (process_fn))
CHECK_TYPE (FUNCTIONP (process_fn), Qfunctionp, process_fn);
/* We use a default limit to 1000. See bug#59426 for the
/* We use a default limit of 1000. See bug#59426 for the
discussion. */
ptrdiff_t the_limit = 1000;
if (!NILP (limit))
@ -3067,11 +3128,12 @@ a regexp. */)
treesit_initialize ();
TSTreeCursor cursor = ts_tree_cursor_new (XTS_NODE (root)->node);
Lisp_Object parser = XTS_NODE (root)->parser;
Lisp_Object parent = Fcons (Qnil, Qnil);
TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (root)->node, parser);
treesit_build_sparse_tree (&cursor, parent, predicate, process_fn,
the_limit, parser);
ts_tree_cursor_delete (&cursor);
Fsetcdr (parent, Fnreverse (Fcdr (parent)));
if (NILP (Fcdr (parent)))
return Qnil;

View file

@ -198,6 +198,141 @@
(kill-buffer base)
(kill-buffer indirect))))
;;; Tree traversal
(ert-deftest treesit-search-subtree ()
"Test `treesit-search-subtree'."
(skip-unless (treesit-language-available-p 'json))
(with-temp-buffer
(let (parser root array)
(progn
(insert "[[1,2,3], [1,2,3], [1,2,3]]")
(setq parser (treesit-parser-create 'json))
(setq root (treesit-parser-root-node parser))
(setq array (treesit-node-child root 0)))
(dolist (subarray (treesit-node-children array t))
;; Find named node forward.
(should (equal "1" (treesit-node-text
(treesit-search-subtree
subarray "number"))))
;; Find named node backward.
(should (equal "3" (treesit-node-text
(treesit-search-subtree
subarray "number" t))))
;; Find anonymous node forward.
(should (equal "[" (treesit-node-text
(treesit-search-subtree
subarray "\\[" nil t))))
;; Find anonymous node backward.
(should (equal "]" (treesit-node-text
(treesit-search-subtree
subarray "\\]" t t))))
;; If ALL=nil, it shouldn't find anonymous node.
(should (eq nil (treesit-node-text
(treesit-search-subtree
subarray "\\["))))
;; If ALL=nil, searching for number should still find the
;; numbers.
(should (equal "1" (treesit-node-text
(treesit-search-subtree
subarray "number" nil t))))
;; Find named node backward.
(should (equal "3" (treesit-node-text
(treesit-search-subtree
subarray "number" t t))))
))))
(defmacro treesit--ert-search-setup (&rest body)
"Setup macro used by `treesit-search-forward' and friends.
BODY is the test body."
`(with-temp-buffer
(let (parser root array)
(progn
(insert "[[1,2,3], [4,5,6], [7,8,9]]")
(setq parser (treesit-parser-create 'json))
(setq root (treesit-parser-root-node
parser))
(setq array (treesit-node-child root 0))
;; First bracket.
(setq cursor (treesit-node-child array 0)))
,@body)))
(ert-deftest treesit-search-forward ()
"Test `treesit-search-forward'."
(skip-unless (treesit-language-available-p 'json))
(treesit--ert-search-setup
(cl-loop for cursor = (treesit-node-child array 0)
then (treesit-search-forward cursor "" nil t)
for text in '("[" "[" "1" "," "2" "," "3" "]"
"[1,2,3]" ","
"[" "4" "," "5" "," "6" "]"
"[4,5,6]" ","
"[" "7" "," "8" "," "9" "]"
"[7,8,9]" "]"
"[[1,2,3], [4,5,6], [7,8,9]]")
while cursor
do (should (equal (treesit-node-text cursor)
text)))))
(ert-deftest treesit-search-forward-named-only ()
"Test `treesit-search-forward'."
(skip-unless (treesit-language-available-p 'json))
(treesit--ert-search-setup
(cl-loop for cursor = (treesit-node-child
(treesit-node-child array 1) 1)
then (treesit-search-forward cursor "")
for text in '("1" "2" "3" "[1,2,3]"
"4" "5" "6" "[4,5,6]"
"7" "8" "9" "[7,8,9]"
"[[1,2,3], [4,5,6], [7,8,9]]")
while cursor
do (should (equal (treesit-node-text cursor)
text)))))
(ert-deftest treesit-search-backward ()
"Test `treesit-search-forward'."
(skip-unless (treesit-language-available-p 'json))
(treesit--ert-search-setup
(cl-loop for cursor = (treesit-node-child array -1)
then (treesit-search-forward cursor "" t t)
for text in (reverse '("[[1,2,3], [4,5,6], [7,8,9]]"
"[" "[1,2,3]"
"[" "1" "," "2" "," "3" "]"
"," "[4,5,6]"
"[" "4" "," "5" "," "6" "]"
"," "[7,8,9]"
"[" "7" "," "8" "," "9" "]"
"]"))
while cursor
do (should (equal (treesit-node-text cursor)
text)))))
(ert-deftest treesit-search-backward-named-only ()
"Test `treesit-search-forward'."
(skip-unless (treesit-language-available-p 'json))
(treesit--ert-search-setup
(cl-loop for cursor = (treesit-node-child
(treesit-node-child array -1 t) -1 t)
then (treesit-search-forward cursor "" t)
for text in (reverse '("[[1,2,3], [4,5,6], [7,8,9]]"
"[1,2,3]" "1" "2" "3"
"[4,5,6]" "4" "5" "6"
"[7,8,9]" "7" "8" "9"))
while cursor
do (should (equal (treesit-node-text cursor)
text)))))
(ert-deftest treesit-cursor-helper-with-missing-node ()
"Test treesit_cursor_helper with a missing node."
(skip-unless (treesit-language-available-p 'json))
(treesit--ert-search-setup
(delete-char -1)
(setq root (treesit-buffer-root-node))
(setq array (treesit-node-child root 0))
;; If everything works, this should not hang.
(let ((missing-bracket (treesit-node-child array -1)))
(treesit-search-forward missing-bracket "" t))))
;;; Query
(ert-deftest treesit-query-api ()