From 5cd3db73bed06e394ea8e7b0e332b1b1e5bd9571 Mon Sep 17 00:00:00 2001 From: Yuan Fu Date: Fri, 11 Nov 2022 20:04:38 -0800 Subject: [PATCH] Improve treesit-node-at * doc/lispref/parsing.texi (Retrieving Node): Update manual. * lisp/treesit.el (treesit-node-at): Change semantic. It tries to return the node that a user would expect in various circumstances. * test/src/treesit-tests.el (treesit-node-at): New test. --- doc/lispref/parsing.texi | 28 ++++++++++++++------- lisp/treesit.el | 53 ++++++++++++++++++++++++++------------- test/src/treesit-tests.el | 31 +++++++++++++++++++++++ 3 files changed, 85 insertions(+), 27 deletions(-) diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index bc2f0dda91c..f690e6a6047 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -492,9 +492,22 @@ Using an outdated node signals the @code{treesit-node-outdated} error. @cindex syntax tree, retrieving nodes @defun treesit-node-at pos &optional parser-or-lang named -This function returns the @emph{smallest} node that starts at or after -the buffer position @var{pos}. In other words, the start of the node -is greater or equal to @var{pos}. +This function returns a @emph{leaf} node at buffer position @var{pos}. +A leaf node is a node that doesn't have any child nodes. + +This function tries to return a node whose span covers @var{pos}: the +node's beginning is less or equal to @var{pos}, and the node's end is +greater or equal to @var{pos}. + +But if no leaf node's span covers @var{pos} (e.g., @var{pos} is on the +whitespace between two leaf nodes), this function returns the first +leaf node after @var{pos}. + +Finally, if there is no leaf node after @var{pos}, return the first +leaf node before @var{pos}. + +If @var{pos} is in between two adjacent nodes, this function returns +the one after @var{pos}. When @var{parser-or-lang} is @code{nil} or omitted, this function uses the first parser in @code{(treesit-parser-list)} of the current @@ -503,15 +516,12 @@ parser; if @var{parser-or-lang} is a language, it finds the first parser using that language in @code{(treesit-parser-list)}, and uses that. +If this function cannot find a suitable node to return, it returns +nil. + If @var{named} is non-@code{nil}, this function looks for a named node only (@pxref{tree-sitter named node, named node}). -When @var{pos} is after all the text in the buffer, technically there -is no node after @var{pos}. But for convenience, this function will -return the last leaf node in the parse tree. If @var{strict} is -non-@code{nil}, this function will strictly comply to the semantics and -return @var{nil}. - Example: @example diff --git a/lisp/treesit.el b/lisp/treesit.el index 1c61b1efebf..796b85ef74b 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -168,44 +168,61 @@ parser in `treesit-parser-list', or nil if there is no parser." (treesit-parser-language (treesit-node-parser node))) -(defun treesit-node-at (pos &optional parser-or-lang named strict) - "Return the smallest node that starts at or after buffer position POS. +(defun treesit-node-at (pos &optional parser-or-lang named) + "Return the leaf node at position POS. -\"Starts at or after POS\" means the start of the node is greater -than or equal to POS. +A leaf node is a node that doesn't have any child nodes. -Return nil if none was found. If NAMED is non-nil, only look for -named node. +The returned node's span covers POS: the node's beginning is less +or equal to POS, and the node's end is greater or equal to POS. + +If no leaf node's span covers POS (e.g., POS is on whitespace +between two leaf nodes), return the first leaf node after POS. + +If there is no leaf node after POS, return the first leaf node +before POS. + +If POS is in between two adjacent nodes, return the one after +POS. + +Return nil if no leaf node can be returned. If NAMED is non-nil, +only look for named nodes. If PARSER-OR-LANG is nil, use the first parser in `treesit-parser-list'; if PARSER-OR-LANG is a parser, use that parser; if PARSER-OR-LANG is a language, find a parser using -that language in the current buffer, and use that. - -If POS is after all the text in the buffer, i.e., there is no -node after POS, return the last leaf node in the parse tree, even -though that node is before POS. If STRICT is non-nil, return nil -in this case." +that language in the current buffer, and use that." (let* ((root (if (treesit-parser-p parser-or-lang) (treesit-parser-root-node parser-or-lang) (treesit-buffer-root-node parser-or-lang))) (node root) + (node-before root) + (pos-1 (max (1- pos) (point-min))) next) (when node ;; This is very fast so no need for C implementation. (while (setq next (treesit-node-first-child-for-pos node pos named)) (setq node next)) - ;; If we are at the end of buffer or after all the text, we will - ;; end up with NODE = root node. For convenience, return the last - ;; leaf node in the tree. + ;; If POS is at the end of buffer, after all the text, we will + ;; end up with NODE = root node. Instead of returning nil, + ;; return the last leaf node in the tree for convenience. (if (treesit-node-eq node root) - (if strict - nil + (progn (while (setq next (treesit-node-child node -1 named)) (setq node next)) node) - node)))) + ;; Normal case, where we found a node. + (if (<= (treesit-node-start node) pos) + node + ;; So the node we found is completely after POS, try to find + ;; a node whose end equals to POS. + (while (setq next (treesit-node-first-child-for-pos + node-before pos-1 named)) + (setq node-before next)) + (if (eq (treesit-node-end node-before) pos) + node-before + node)))))) (defun treesit-node-on (beg end &optional parser-or-lang named) "Return the smallest node covering BEG to END. diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el index 5e4aea3ad41..7fc810492bc 100644 --- a/test/src/treesit-tests.el +++ b/test/src/treesit-tests.el @@ -474,6 +474,37 @@ visible_end.)" ;; `treesit-search-forward-goto' )) +(ert-deftest treesit-node-at () + "Test `treesit-node-at'." + (skip-unless (treesit-language-available-p 'json)) + (let (parser root-node) + (progn + (insert "[1, 2, 3,4] ") + (setq parser (treesit-parser-create 'json)) + (setq root-node (treesit-parser-root-node + parser))) + ;; Point at ",", should return ",". + (goto-char (point-min)) + (search-forward "1") + (should (equal (treesit-node-text + (treesit-node-at (point))) + ",")) + ;; Point behind ",", should still return the ",". + (search-forward ",") + (should (equal (treesit-node-text + (treesit-node-at (point))) + ",")) + ;; Point between "," and "2", should return 2. + (forward-char) + (should (equal (treesit-node-text + (treesit-node-at (point))) + "2")) + ;; EOF, should return the last leaf node "]". + (goto-char (point-max)) + (should (equal (treesit-node-text + (treesit-node-at (point))) + "]")))) + (ert-deftest treesit-misc () "Misc helper functions." (let ((settings '((t 0 t)