Fix treesit_cursor_helper (bug#60267)

The cause of that bug is that in a particular parse tree, the node treesit_cursor_helper tries to go to is a missing node, not only is it a missing node, it is the first node of a subtree. So when treesit_cursor_helper follows the algorithm and goes down the tree, it goes down the previous subtree (because that subtree's end = end_pos, because the target node has zero width). o | o--+-o | | +-+ +-+-+ | | | | | o x t o o (We ended up in x when the target is t, because t has zero width.) One way to solve it is to go back up the tree if we are at a leaf node and still haven't matched the target node. That's too ugly and finicky so I resorted to recursion. Now one more functions will return give up (treesit_node_parent) if we are in a werid parse tree that is super deep. But since we already kind of give up on this kind of parse trees (bug#59426), it doesn't really hurt. * src/treesit.c (treesit_cursor_helper_1): New function. (treesit_cursor_helper): Use the new function. Change return type to bool, and accept a cursor pointer. (Ftreesit_node_parent) (Ftreesit_search_subtree) (Ftreesit_search_forward) (Ftreesit_induce_sparse_tree): Use the new signature.
2022-12-23 15:22:31 -08:00 · 2022-12-23 15:22:31 -08:00 · e492c21e81
commit e492c21e81
parent 4437dbedf7
1 changed files with 76 additions and 53 deletions
--- a/src/treesit.c
+++ b/src/treesit.c
@ -1762,7 +1762,7 @@ If NODE is nil, return nil.  */)
  return build_string (string);
 }

-static TSTreeCursor treesit_cursor_helper (TSNode, Lisp_Object);
+static bool treesit_cursor_helper (TSTreeCursor *, TSNode, Lisp_Object);

 DEFUN ("treesit-node-parent",
       Ftreesit_node_parent, Streesit_node_parent, 1, 1, 0,
@ -1778,7 +1778,10 @@ Return nil if NODE has no parent.  If NODE is nil, return nil.  */)

  TSNode treesit_node = XTS_NODE (node)->node;
  Lisp_Object parser = XTS_NODE (node)->parser;
-  TSTreeCursor cursor = treesit_cursor_helper (treesit_node, parser);
+  TSTreeCursor cursor;
+  if (!treesit_cursor_helper (&cursor, treesit_node, parser))
+    return return_value;
+
  if (ts_tree_cursor_goto_parent (&cursor))
  {
    TSNode parent = ts_tree_cursor_current_node (&cursor);
@ -2637,8 +2640,59 @@ treesit_assume_true (bool val)
  eassert (val == true);
 }

+/* Tries to move CURSOR to point to TARGET.  END_POS is the end of
+   TARGET.  If success, return true, otherwise move CURSOR back to
+   starting position and return false.  LIMIT is the recursion
+   limit.  */
+static bool
+treesit_cursor_helper_1 (TSTreeCursor *cursor, TSNode *target,
+			 uint32_t end_pos, ptrdiff_t limit)
+{
+  if (limit <= 0)
+    return false;
+
+  TSNode cursor_node = ts_tree_cursor_current_node (cursor);
+  if (ts_node_eq (cursor_node, *target))
+    return true;
+
+  if (!ts_tree_cursor_goto_first_child (cursor))
+    return false;
+
+  /* Skip nodes that definitely don't contain TARGET.  */
+  while (ts_node_end_byte (cursor_node) < end_pos)
+    {
+      if (!ts_tree_cursor_goto_next_sibling (cursor))
+	break;
+      cursor_node = ts_tree_cursor_current_node (cursor);
+    }
+
+  /* Go through each sibling that could contain TARGET.  Because of
+     missing nodes (their width is 0), there could be multiple
+     siblings that could contain TARGET.  */
+  while (ts_node_start_byte (cursor_node) <= end_pos)
+    {
+      if (treesit_cursor_helper_1 (cursor, target, end_pos, limit - 1))
+	return true;
+
+      if (!ts_tree_cursor_goto_next_sibling (cursor))
+	break;
+      cursor_node = ts_tree_cursor_current_node (cursor);
+    }
+
+  /* Couldn't find TARGET, must be not in this subtree, move cursor
+     back and pray that other brothers and sisters can succeed.  */
+  treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
+  return false;
+}
+
 /* Create a TSTreeCursor pointing at NODE.  PARSER is the lisp parser
-   that produced NODE.
+   that produced NODE.  If success, return true, otherwise return
+   false.  This function should almost always succeed, but if the parse
+   tree is strangely too deep and exceeds the recursion limit, this
+   function will fail and return false.
+
+   If this function returns true, caller needs to free CURSOR; if
+   returns false, caller don't need to free CURSOR.

   The reason we need this instead of simply using ts_tree_cursor_new
   is that we have to create the cursor on the root node and traverse
@ -2646,56 +2700,16 @@ treesit_assume_true (bool val)
   Otherwise going to sibling or parent of NODE wouldn't work.

   (Wow perfect filling.)  */
-static TSTreeCursor
-treesit_cursor_helper (TSNode node, Lisp_Object parser)
+static bool
+treesit_cursor_helper (TSTreeCursor *cursor, TSNode node, Lisp_Object parser)
 {
  uint32_t end_pos = ts_node_end_byte (node);
  TSNode root = ts_tree_root_node (XTS_PARSER (parser)->tree);
-  TSTreeCursor cursor = ts_tree_cursor_new (root);
-  TSNode cursor_node = ts_tree_cursor_current_node (&cursor);
-  /* This is like treesit-node-at.  We go down from the root node,
-     either to first child or next sibling, repeatedly, and finally
-     arrive at NODE.  */
-  while (!ts_node_eq (node, cursor_node))
-    {
-      treesit_assume_true (ts_tree_cursor_goto_first_child (&cursor));
-      cursor_node = ts_tree_cursor_current_node (&cursor);
-      /* ts_tree_cursor_goto_first_child_for_byte is not reliable, so
-	 we just go through each sibling.  */
-      while (ts_node_is_missing (cursor_node)
-	     || ts_node_end_byte (cursor_node) < end_pos)
-	{
-	  /* A "missing" node has zero width, so it's possible that
-	     its end = NODE.end but it's not NODE, so we skip them.
-	     But we need to make sure this missing node is not the
-	     node we are looking for before skipping it.  */
-	  if (ts_node_is_missing (cursor_node)
-	      && ts_node_eq (node, cursor_node))
-	    return cursor;
-	  treesit_assume_true (ts_tree_cursor_goto_next_sibling (&cursor));
-	  cursor_node = ts_tree_cursor_current_node (&cursor);
-	}
-      /* Right now CURSOR.end >= NODE.end.  But what if CURSOR.end =
-	 NODE.end, and there are missing nodes after CURSOR, and the
-	 missing node after CURSOR is the NODE we are looking for??
-	 Well, create a probe and look ahead.  (This is tested by
-	 treesit-cursor-helper-with-missing-node.)  */
-      TSTreeCursor probe = ts_tree_cursor_copy (&cursor);
-      TSNode probe_node;
-      while (ts_tree_cursor_goto_next_sibling (&probe))
-	{
-	  probe_node = ts_tree_cursor_current_node (&probe);
-	  if (!ts_node_is_missing (probe_node))
-	    break;
-	  if (ts_node_eq (probe_node, node))
-	    {
-	      ts_tree_cursor_delete (&cursor);
-	      return probe;
-	    }
-	}
-      ts_tree_cursor_delete (&probe);
-    }
-  return cursor;
+  *cursor = ts_tree_cursor_new (root);
+  bool success = treesit_cursor_helper_1 (cursor, &node, end_pos, 1000);
+  if (!success)
+    ts_tree_cursor_delete (cursor);
+  return success;
 }

 /* Move CURSOR to the next/previous sibling.  FORWARD controls the
@ -2968,7 +2982,10 @@ Return the first matched node, or nil if none matches.  */)

  Lisp_Object parser = XTS_NODE (node)->parser;
  Lisp_Object return_value = Qnil;
-  TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (node)->node, parser);
+  TSTreeCursor cursor;
+  if (!treesit_cursor_helper (&cursor, XTS_NODE (node)->node, parser))
+    return return_value;
+
  if (treesit_search_dfs (&cursor, predicate, parser, NILP (backward),
 			  NILP (all), the_limit, false))
    {
@ -3022,7 +3039,10 @@ always traverse leaf nodes first, then upwards.  */)

  Lisp_Object parser = XTS_NODE (start)->parser;
  Lisp_Object return_value = Qnil;
-  TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (start)->node, parser);
+  TSTreeCursor cursor;
+  if (!treesit_cursor_helper (&cursor, XTS_NODE (start)->node, parser))
+    return return_value;
+
  if (treesit_search_forward (&cursor, predicate, parser,
 			      NILP (backward), NILP (all)))
    {
@ -3141,7 +3161,10 @@ a regexp.  */)

  Lisp_Object parser = XTS_NODE (root)->parser;
  Lisp_Object parent = Fcons (Qnil, Qnil);
-  TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (root)->node, parser);
+  TSTreeCursor cursor;
+  if (!treesit_cursor_helper (&cursor, XTS_NODE (root)->node, parser))
+    return Qnil;
+
  treesit_build_sparse_tree (&cursor, parent, predicate, process_fn,
 			     the_limit, parser);
  ts_tree_cursor_delete (&cursor);