Switch to use cursor API in treesit.c

ts_node_parent has bugs (bug#60054), using cursor API avoids that. Tree-sitter's author might remove ts_node_parent in the future, so might as well switch to use cursors now. We are basically reimplementing some of the logic of ts_node_prev_sibling and ts_node_parent in the sibling helper and cursor helper functions. See also https://github.com/tree-sitter/tree-sitter/issues/1992 * src/treesit.c (treesit_traverse_sibling_helper) (treesit_traverse_child_helper) (treesit_traverse_match_predicate): Reimplemented to use the cursor API. (treesit_search_dfs) (treesit_search_forward): Use the new cursor helper functions. (Ftreesit_search_subtree) (Ftreesit_search_forward) (Ftreesit_induce_sparse_tree): Use cursors. * test/src/treesit-tests.el (treesit-search-subtree): New test. (treesit--ert-search-setup): New macro. (treesit-search-forward) (treesit-search-forward-named-only) (treesit-search-backward) (treesit-search-backward-named-only) (treesit-cursor-helper-with-missing-node): New tests.
2022-12-17 15:01:57 -08:00 · 2022-12-17 15:01:57 -08:00 · 5f0286c0af
commit 5f0286c0af
parent a275e436df
2 changed files with 343 additions and 146 deletions
--- a/src/treesit.c
+++ b/src/treesit.c
@ -288,7 +288,7 @@ init_treesit_functions (void)
     slow enough to make insignificant any performance advantages from
     using the cursor.  Not exposing the cursor also minimizes the
     number of new types this adds to Emacs Lisp; currently, this adds
-     only the parser and node types.
+     only the parser, node, and compiled query types.
   - Because updating the change is handled on the C level as each
     change is made in the buffer, there is no way for Lisp to update
@ -2687,63 +2687,143 @@ treesit_cursor_helper (TSNode node, Lisp_Object parser)
  return cursor;
 }
-/* Return the next/previous named/unnamed sibling of NODE.  FORWARD
+/* Move CURSOR to the next/previous sibling.  FORWARD controls the
-   controls the direction and NAMED controls the nameness.  */
+   direction.  NAMED controls the namedness.  If there is a valid
-static TSNode
+   sibling, move CURSOR to it and return true, otherwise return false.
-treesit_traverse_sibling_helper (TSNode node, bool forward, bool named)
+   When false is returned, CURSOR points to a sibling node of the node
-{
+   we started at, but exactly which is undefined.  */
  if (forward)
    {
      if (named)
 	return ts_node_next_named_sibling (node);
      else
 	return ts_node_next_sibling (node);
    }
  else
    {
      if (named)
 	return ts_node_prev_named_sibling (node);
      else
 	return ts_node_prev_sibling (node);
    }
 }
 /* Return the first/last named/unnamed child of NODE.  FORWARD controls
   the direction and NAMED controls the nameness.  */
 static TSNode
 treesit_traverse_child_helper (TSNode node, bool forward, bool named)
 {
  if (forward)
    {
      if (named)
 	return ts_node_named_child (node, 0);
      else
 	return ts_node_child (node, 0);
    }
  else
    {
      if (named)
 	{
 	  uint32_t count = ts_node_named_child_count (node);
 	  uint32_t idx = count == 0 ? 0 : count - 1;
 	  return ts_node_named_child (node, idx);
 	}
      else
 	{
 	  uint32_t count = ts_node_child_count (node);
 	  uint32_t idx = count == 0 ? 0 : count - 1;
 	  return ts_node_child (node, idx);
 	}
    }
 }
 /* Return true if NODE matches PRED.  PRED can be a string or a
   function.  This function assumes PRED is either a string or a
   function.  */
 static bool
-treesit_traverse_match_predicate (TSNode node, Lisp_Object pred,
+treesit_traverse_sibling_helper (TSTreeCursor *cursor,
-				  Lisp_Object parser)
+				 bool forward, bool named)
 {
  if (forward)
    {
      if (!named)
 	return ts_tree_cursor_goto_next_sibling (cursor);
      /* Else named...  */
      while (ts_tree_cursor_goto_next_sibling (cursor))
 	{
 	  if (ts_node_is_named (ts_tree_cursor_current_node (cursor)))
 	    return true;
 	}
      return false;
    }
  else /* Backward.  */
    {
      /* Go to first child and go through each sibling, until we find
 	 the one just before the starting node.  */
      TSNode start = ts_tree_cursor_current_node (cursor);
      if (!ts_tree_cursor_goto_parent (cursor))
 	return false;
      treesit_assume_true (ts_tree_cursor_goto_first_child (cursor));
      /* Now CURSOR is at the first child.  If we started at the first
 	 child, then there is no further siblings.  */
      TSNode first_child = ts_tree_cursor_current_node (cursor);
      if (ts_node_eq (first_child, start))
 	return false;
      /* PROBE is always DELTA siblings ahead of CURSOR. */
      TSTreeCursor probe = ts_tree_cursor_copy (cursor);
      /* This is position of PROBE minus position of CURSOR.  */
      ptrdiff_t delta = 0;
      TSNode probe_node;
      TSNode cursor_node;
      while (ts_tree_cursor_goto_next_sibling (&probe))
 	{
 	  /* Move PROBE forward, if it equals to the starting node,
 	     CURSOR points to the node we want (prev valid sibling of
 	     the starting node).  */
 	  delta++;
 	  probe_node = ts_tree_cursor_current_node (&probe);
 	  /* PROBE matched, depending on NAMED, return true/false.  */
 	  if (ts_node_eq (probe_node, start))
 	    {
 	      ts_tree_cursor_delete (&probe);
 	      cursor_node = ts_tree_cursor_current_node (cursor);
 	      ts_tree_cursor_delete (&probe);
 	      return (!named || (named && ts_node_is_named (cursor_node)));
 	    }
 	  /* PROBE didn't match, move CURSOR forward to PROBE's
 	     position, but if we are looking for named nodes, only
 	     move CURSOR to PROBE if PROBE is at a named node.  */
 	  if (!named || (named && ts_node_is_named (probe_node)))
 	    for (; delta > 0; delta--)
 	      treesit_assume_true (ts_tree_cursor_goto_next_sibling (cursor));
 	}
      ts_tree_cursor_delete (&probe);
      return false;
    }
 }
 /* Move CURSOR to the first/last child.  FORWARD controls the
   direction.  NAMED controls the namedness.  If there is a valid
   child, move CURSOR to it and return true, otherwise don't move
   CURSOR and return false.  */
 static bool
 treesit_traverse_child_helper (TSTreeCursor *cursor,
 			       bool forward, bool named)
 {
  if (forward)
    {
      if (!named)
 	return ts_tree_cursor_goto_first_child (cursor);
      else
 	{
 	  if (!ts_tree_cursor_goto_first_child (cursor))
 	    return false;
 	  /* After this point, if you return false, make sure to go
 	     back to parent.  */
 	  TSNode first_child = ts_tree_cursor_current_node (cursor);
 	  if (ts_node_is_named (first_child))
 	    return true;
 	  if (treesit_traverse_sibling_helper (cursor, true, true))
 	    return true;
 	  else
 	    {
 	      treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
 	      return false;
 	    }
 	}
    }
  else /* Backward.  */
    {
      if (!ts_tree_cursor_goto_first_child (cursor))
 	return false;
      /* After this point, if you return false, make sure to go
 	 back to parent.  */
      /* First go to the last child.  */
      while (ts_tree_cursor_goto_next_sibling (cursor));
      if (!named)
 	return true;
      /* Else named... */
      if (treesit_traverse_sibling_helper(cursor, false, true))
 	return true;
      else
 	{
 	  treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
 	  return false;
 	}
    }
 }
 /* Return true if the node at CURSOR matches PRED.  PRED can be a
   string or a function.  This function assumes PRED is either a
   string or a function.  If NAMED is true, also check that the node
   is named.  */
 static bool
 treesit_traverse_match_predicate (TSTreeCursor *cursor, Lisp_Object pred,
 				  Lisp_Object parser, bool named)
 {
  TSNode node = ts_tree_cursor_current_node (cursor);
  if (named && !ts_node_is_named (node))
    return false;
  if (STRINGP (pred))
    {
      const char *type = ts_node_type (node);
@ -2754,73 +2834,60 @@ treesit_traverse_match_predicate (TSNode node, Lisp_Object pred,
      Lisp_Object lisp_node = make_treesit_node (parser, node);
      return !NILP (CALLN (Ffuncall, pred, lisp_node));
    }
 }
-/* Traverse the parse tree starting from ROOT (but ROOT is not
+/* Traverse the parse tree starting from CURSOR.  PRED can be a
-   matches against PRED).  PRED can be a function (takes a node and
+   function (takes a node and returns nil/non-nil), or a string
-   returns nil/non-nil),or a string (treated as regexp matching the
+   (treated as regexp matching the node's type, must be all single
-   node's type, ignores case, must be all single byte characters).  If
+   byte characters).  If the node satisfies PRED, leave CURSOR on that
-   the node satisfies PRED , terminate, set ROOT to that node, and
+   node and return true.  If no node satisfies PRED, move CURSOR back
-   return true.  If no node satisfies PRED, return FALSE.  PARSER is
+   to starting position and return false.
   the parser of ROOT.
   LIMIT is the number of levels we descend in the tree.  FORWARD
   controls the direction in which we traverse the tree, true means
-   forward, false backward.  If NAMED is true, only traverse named
+   forward, false backward.  If SKIP_ROOT is true, don't match ROOT.
-   nodes, if false, all nodes.  If SKIP_ROOT is true, don't match
+   */
   ROOT.  */
 static bool
-treesit_search_dfs (TSNode *root, Lisp_Object pred, Lisp_Object parser,
+treesit_search_dfs (TSTreeCursor *cursor,
-		    bool named, bool forward, ptrdiff_t limit,
+		    Lisp_Object pred, Lisp_Object parser,
 		    bool forward, bool named, ptrdiff_t limit,
 		    bool skip_root)
 {
-  /* TSTreeCursor doesn't allow us to move backward, so we can't use
+  if (!skip_root
-     it.  */
+      && treesit_traverse_match_predicate (cursor, pred, parser, named))
-  TSNode node = *root;
+    return true;
-  if (!skip_root && treesit_traverse_match_predicate (node, pred, parser))
+  if (limit == 0)
    {
      *root = node;
      return true;
    }
  if (limit <= 0)
    return false;
  else
    {
      int count = (named
 		   ? ts_node_named_child_count (node)
 		   : ts_node_child_count (node));
      for (int offset = 0; offset < count; offset++)
 	{
 	  uint32_t idx = forward ? offset : count - offset - 1;
 	  TSNode child = (named
 			  ? ts_node_named_child (node, idx)
 			  : ts_node_child (node, idx));
-	  if (!ts_node_is_null (child)
+  if (!treesit_traverse_child_helper (cursor, forward, named))
-	      && treesit_search_dfs (&child, pred, parser, named,
+    return false;
-				     forward, limit - 1, false))
+  /* After this point, if you return false, make sure to go back to
-	    {
+     parent.  */
-	      *root = child;
+
-	      return true;
+  do /* Iterate through each child.  */
-	    }
+    {
-	}
+      if (treesit_search_dfs (cursor, pred, parser, forward,
-      return false;
+			      named, limit - 1, false))
 	return true;
    }
  while (treesit_traverse_sibling_helper (cursor, forward, false));
  /* No match in any child's subtree, go back to starting node.  */
  treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
  return false;
 }
 /* Go through the whole tree linearly, leaf-first, starting from
   START.  PRED, PARSER, NAMED, FORWARD are the same as in
-   ts_search_subtre.  If UP_ONLY is true, never go to children, only
+   ts_search_subtree.  If a match is found, leave CURSOR at that node,
-   sibling and parents.  */
+   and return true, if no match is found, return false, and CURSOR's
   position is undefined.  */
 static bool
-treesit_search_forward (TSNode *start, Lisp_Object pred, Lisp_Object parser,
+treesit_search_forward (TSTreeCursor *cursor,
-			bool named, bool forward)
+			Lisp_Object pred, Lisp_Object parser,
 			bool forward, bool named)
 {
  TSNode node = *start;
  /* We don't search for subtree and always search from the leaf
     nodes.  This way repeated call of this function traverses each
     node in the tree once and only once:
@ -2830,39 +2897,26 @@ treesit_search_forward (TSNode *start, Lisp_Object pred, Lisp_Object parser,
  bool initial = true;
  while (true)
    {
-      if (!initial /* We don't match START.  */
+      if (!initial /* We don't match the starting node.  */
-	  && treesit_traverse_match_predicate (node, pred, parser))
+	  && treesit_traverse_match_predicate (cursor, pred, parser, named))
-	{
+	return true;
 	  *start = node;
 	  return true;
 	}
      initial = false;
-      TSNode next = treesit_traverse_sibling_helper (node, forward, named);
+      /* Try going to the next sibling, if there is no next sibling,
-      while (ts_node_is_null (next))
+	 go to parent and try again.  */
      while (!treesit_traverse_sibling_helper (cursor, forward, named))
 	{
 	  /* There is no next sibling, go to parent.  */
-	  node = ts_node_parent (node);
+	  if (!ts_tree_cursor_goto_parent (cursor))
 	  if (ts_node_is_null (node))
 	    return false;
-	  if (treesit_traverse_match_predicate (node, pred, parser))
+	  if (treesit_traverse_match_predicate (cursor, pred, parser, named))
 	    {
 	      *start = node;
 	      return true;
 	    }
 	  next = treesit_traverse_sibling_helper (node, forward, named);
 	}
      /* We are at the next sibling, deep dive into the first leaf
 	 node.  */
-      TSNode next_next = treesit_traverse_child_helper (next, forward, named);
+      while (treesit_traverse_child_helper (cursor, forward, false));
-      while (!ts_node_is_null (next_next))
+      /* At this point CURSOR is at a leaf node.  */
 	{
 	  next = next_next;
 	  next_next = treesit_traverse_child_helper (next, forward, named);
 	}
      /* At this point NEXT is a leaf node.  */
      node = next;
    }
 }
@ -2890,7 +2944,7 @@ Return the first matched node, or nil if none matches.  */)
  CHECK_SYMBOL (all);
  CHECK_SYMBOL (backward);
-  /* We use a default limit to 1000.  See bug#59426 for the
+  /* We use a default limit of 1000.  See bug#59426 for the
     discussion.  */
  ptrdiff_t the_limit = 1000;
  if (!NILP (limit))
@ -2901,13 +2955,17 @@ Return the first matched node, or nil if none matches.  */)
  treesit_initialize ();
  TSNode treesit_node = XTS_NODE (node)->node;
  Lisp_Object parser = XTS_NODE (node)->parser;
-  if (treesit_search_dfs (&treesit_node, predicate, parser, NILP (all),
+  Lisp_Object return_value = Qnil;
-			  NILP (backward), the_limit, false))
+  TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (node)->node, parser);
-    return make_treesit_node (parser, treesit_node);
+  if (treesit_search_dfs (&cursor, predicate, parser, NILP (backward),
-  else
+			  NILP (all), the_limit, false))
-    return Qnil;
+    {
      TSNode node = ts_tree_cursor_current_node (&cursor);
      return_value = make_treesit_node (parser, node);
    }
  ts_tree_cursor_delete (&cursor);
  return return_value;
 }
 DEFUN ("treesit-search-forward",
@ -2951,13 +3009,17 @@ always traverse leaf nodes first, then upwards.  */)
  treesit_initialize ();
  TSNode treesit_start = XTS_NODE (start)->node;
  Lisp_Object parser = XTS_NODE (start)->parser;
-  if (treesit_search_forward (&treesit_start, predicate, parser, NILP (all),
+  Lisp_Object return_value = Qnil;
-			      NILP (backward)))
+  TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (start)->node, parser);
-    return make_treesit_node (parser, treesit_start);
+  if (treesit_search_forward (&cursor, predicate, parser,
-  else
+			      NILP (backward), NILP (all)))
-    return Qnil;
+    {
      TSNode node = ts_tree_cursor_current_node (&cursor);
      return_value = make_treesit_node (parser, node);
    }
  ts_tree_cursor_delete (&cursor);
  return return_value;
 }
 /* Recursively traverse the tree under CURSOR, and append the result
@ -2969,13 +3031,12 @@ treesit_build_sparse_tree (TSTreeCursor *cursor, Lisp_Object parent,
 			   Lisp_Object pred, Lisp_Object process_fn,
 			   ptrdiff_t limit, Lisp_Object parser)
 {
-
+  bool match = treesit_traverse_match_predicate (cursor, pred, parser, false);
  TSNode node = ts_tree_cursor_current_node (cursor);
  bool match = treesit_traverse_match_predicate (node, pred, parser);
  if (match)
    {
      /* If this node matches pred, add a new node to the parent's
 	 children list.  */
      TSNode node = ts_tree_cursor_current_node (cursor);
      Lisp_Object lisp_node = make_treesit_node (parser, node);
      if (!NILP (process_fn))
 	lisp_node = CALLN (Ffuncall, process_fn, lisp_node);
@ -3056,7 +3117,7 @@ a regexp.  */)
  if (!NILP (process_fn))
    CHECK_TYPE (FUNCTIONP (process_fn), Qfunctionp, process_fn);
-  /* We use a default limit to 1000.  See bug#59426 for the
+  /* We use a default limit of 1000.  See bug#59426 for the
     discussion.  */
  ptrdiff_t the_limit = 1000;
  if (!NILP (limit))
@ -3067,11 +3128,12 @@ a regexp.  */)
  treesit_initialize ();
  TSTreeCursor cursor = ts_tree_cursor_new (XTS_NODE (root)->node);
  Lisp_Object parser = XTS_NODE (root)->parser;
  Lisp_Object parent = Fcons (Qnil, Qnil);
  TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (root)->node, parser);
  treesit_build_sparse_tree (&cursor, parent, predicate, process_fn,
 			     the_limit, parser);
  ts_tree_cursor_delete (&cursor);
  Fsetcdr (parent, Fnreverse (Fcdr (parent)));
  if (NILP (Fcdr (parent)))
    return Qnil;
--- a/test/src/treesit-tests.el
+++ b/test/src/treesit-tests.el
@ -198,6 +198,141 @@
      (kill-buffer base)
      (kill-buffer indirect))))
 ;;; Tree traversal
 (ert-deftest treesit-search-subtree ()
  "Test `treesit-search-subtree'."
  (skip-unless (treesit-language-available-p 'json))
  (with-temp-buffer
    (let (parser root array)
      (progn
        (insert "[[1,2,3], [1,2,3], [1,2,3]]")
        (setq parser (treesit-parser-create 'json))
        (setq root (treesit-parser-root-node parser))
        (setq array (treesit-node-child root 0)))
      (dolist (subarray (treesit-node-children array t))
        ;; Find named node forward.
        (should (equal "1" (treesit-node-text
                            (treesit-search-subtree
                             subarray "number"))))
        ;; Find named node backward.
        (should (equal "3" (treesit-node-text
                            (treesit-search-subtree
                             subarray "number" t))))
        ;; Find anonymous node forward.
        (should (equal "[" (treesit-node-text
                            (treesit-search-subtree
                             subarray "\\[" nil t))))
        ;; Find anonymous node backward.
        (should (equal "]" (treesit-node-text
                            (treesit-search-subtree
                             subarray "\\]" t t))))
        ;; If ALL=nil, it shouldn't find anonymous node.
        (should (eq nil (treesit-node-text
                         (treesit-search-subtree
                          subarray "\\["))))
        ;; If ALL=nil, searching for number should still find the
        ;; numbers.
        (should (equal "1" (treesit-node-text
                            (treesit-search-subtree
                             subarray "number" nil t))))
        ;; Find named node backward.
        (should (equal "3" (treesit-node-text
                            (treesit-search-subtree
                             subarray "number" t t))))
        ))))
 (defmacro treesit--ert-search-setup (&rest body)
  "Setup macro used by `treesit-search-forward' and friends.
 BODY is the test body."
  `(with-temp-buffer
     (let (parser root array)
       (progn
         (insert "[[1,2,3], [4,5,6], [7,8,9]]")
         (setq parser (treesit-parser-create 'json))
         (setq root (treesit-parser-root-node
                     parser))
         (setq array (treesit-node-child root 0))
         ;; First bracket.
         (setq cursor (treesit-node-child array 0)))
       ,@body)))
 (ert-deftest treesit-search-forward ()
  "Test `treesit-search-forward'."
  (skip-unless (treesit-language-available-p 'json))
  (treesit--ert-search-setup
   (cl-loop for cursor = (treesit-node-child array 0)
            then (treesit-search-forward cursor "" nil t)
            for text in '("[" "[" "1" "," "2" "," "3" "]"
                          "[1,2,3]" ","
                          "[" "4" "," "5" "," "6" "]"
                          "[4,5,6]" ","
                          "[" "7" "," "8" "," "9" "]"
                          "[7,8,9]" "]"
                          "[[1,2,3], [4,5,6], [7,8,9]]")
            while cursor
            do (should (equal (treesit-node-text cursor)
                              text)))))
 (ert-deftest treesit-search-forward-named-only ()
  "Test `treesit-search-forward'."
  (skip-unless (treesit-language-available-p 'json))
  (treesit--ert-search-setup
   (cl-loop for cursor = (treesit-node-child
                          (treesit-node-child array 1) 1)
            then (treesit-search-forward cursor "")
            for text in '("1" "2"  "3" "[1,2,3]"
                          "4" "5" "6" "[4,5,6]"
                          "7" "8"  "9" "[7,8,9]"
                          "[[1,2,3], [4,5,6], [7,8,9]]")
            while cursor
            do (should (equal (treesit-node-text cursor)
                              text)))))
 (ert-deftest treesit-search-backward ()
  "Test `treesit-search-forward'."
  (skip-unless (treesit-language-available-p 'json))
  (treesit--ert-search-setup
   (cl-loop for cursor = (treesit-node-child array -1)
            then (treesit-search-forward cursor "" t t)
            for text in (reverse '("[[1,2,3], [4,5,6], [7,8,9]]"
                                   "[" "[1,2,3]"
                                   "[" "1" "," "2" "," "3" "]"
                                   "," "[4,5,6]"
                                   "[" "4" "," "5" "," "6" "]"
                                   "," "[7,8,9]"
                                   "[" "7" "," "8" "," "9" "]"
                                   "]"))
            while cursor
            do (should (equal (treesit-node-text cursor)
                              text)))))
 (ert-deftest treesit-search-backward-named-only ()
  "Test `treesit-search-forward'."
  (skip-unless (treesit-language-available-p 'json))
  (treesit--ert-search-setup
   (cl-loop for cursor = (treesit-node-child
                          (treesit-node-child array -1 t) -1 t)
            then (treesit-search-forward cursor "" t)
            for text in (reverse '("[[1,2,3], [4,5,6], [7,8,9]]"
                                   "[1,2,3]" "1" "2"  "3"
                                   "[4,5,6]" "4" "5" "6"
                                   "[7,8,9]" "7" "8"  "9"))
            while cursor
            do (should (equal (treesit-node-text cursor)
                              text)))))
 (ert-deftest treesit-cursor-helper-with-missing-node ()
  "Test treesit_cursor_helper with a missing node."
  (skip-unless (treesit-language-available-p 'json))
  (treesit--ert-search-setup
   (delete-char -1)
   (setq root (treesit-buffer-root-node))
   (setq array (treesit-node-child root 0))
   ;; If everything works, this should not hang.
   (let ((missing-bracket (treesit-node-child array -1)))
     (treesit-search-forward missing-bracket "" t))))
 ;;; Query
 (ert-deftest treesit-query-api ()