Improve treesit-node-at

* doc/lispref/parsing.texi (Retrieving Node): Update manual.
* lisp/treesit.el (treesit-node-at): Change semantic.  It tries to
return the node that a user would expect in various circumstances.
* test/src/treesit-tests.el (treesit-node-at): New test.
This commit is contained in:
Yuan Fu 2022-11-11 20:04:38 -08:00
parent 4489450f37
commit 5cd3db73be
No known key found for this signature in database
GPG key ID: 56E19BC57664A442
3 changed files with 85 additions and 27 deletions

View file

@ -492,9 +492,22 @@ Using an outdated node signals the @code{treesit-node-outdated} error.
@cindex syntax tree, retrieving nodes
@defun treesit-node-at pos &optional parser-or-lang named
This function returns the @emph{smallest} node that starts at or after
the buffer position @var{pos}. In other words, the start of the node
is greater or equal to @var{pos}.
This function returns a @emph{leaf} node at buffer position @var{pos}.
A leaf node is a node that doesn't have any child nodes.
This function tries to return a node whose span covers @var{pos}: the
node's beginning is less or equal to @var{pos}, and the node's end is
greater or equal to @var{pos}.
But if no leaf node's span covers @var{pos} (e.g., @var{pos} is on the
whitespace between two leaf nodes), this function returns the first
leaf node after @var{pos}.
Finally, if there is no leaf node after @var{pos}, return the first
leaf node before @var{pos}.
If @var{pos} is in between two adjacent nodes, this function returns
the one after @var{pos}.
When @var{parser-or-lang} is @code{nil} or omitted, this function uses
the first parser in @code{(treesit-parser-list)} of the current
@ -503,15 +516,12 @@ parser; if @var{parser-or-lang} is a language, it finds the first
parser using that language in @code{(treesit-parser-list)}, and uses
that.
If this function cannot find a suitable node to return, it returns
nil.
If @var{named} is non-@code{nil}, this function looks for a named node
only (@pxref{tree-sitter named node, named node}).
When @var{pos} is after all the text in the buffer, technically there
is no node after @var{pos}. But for convenience, this function will
return the last leaf node in the parse tree. If @var{strict} is
non-@code{nil}, this function will strictly comply to the semantics and
return @var{nil}.
Example:
@example

View file

@ -168,44 +168,61 @@ parser in `treesit-parser-list', or nil if there is no parser."
(treesit-parser-language
(treesit-node-parser node)))
(defun treesit-node-at (pos &optional parser-or-lang named strict)
"Return the smallest node that starts at or after buffer position POS.
(defun treesit-node-at (pos &optional parser-or-lang named)
"Return the leaf node at position POS.
\"Starts at or after POS\" means the start of the node is greater
than or equal to POS.
A leaf node is a node that doesn't have any child nodes.
Return nil if none was found. If NAMED is non-nil, only look for
named node.
The returned node's span covers POS: the node's beginning is less
or equal to POS, and the node's end is greater or equal to POS.
If no leaf node's span covers POS (e.g., POS is on whitespace
between two leaf nodes), return the first leaf node after POS.
If there is no leaf node after POS, return the first leaf node
before POS.
If POS is in between two adjacent nodes, return the one after
POS.
Return nil if no leaf node can be returned. If NAMED is non-nil,
only look for named nodes.
If PARSER-OR-LANG is nil, use the first parser in
`treesit-parser-list'; if PARSER-OR-LANG is a parser, use
that parser; if PARSER-OR-LANG is a language, find a parser using
that language in the current buffer, and use that.
If POS is after all the text in the buffer, i.e., there is no
node after POS, return the last leaf node in the parse tree, even
though that node is before POS. If STRICT is non-nil, return nil
in this case."
that language in the current buffer, and use that."
(let* ((root (if (treesit-parser-p parser-or-lang)
(treesit-parser-root-node parser-or-lang)
(treesit-buffer-root-node parser-or-lang)))
(node root)
(node-before root)
(pos-1 (max (1- pos) (point-min)))
next)
(when node
;; This is very fast so no need for C implementation.
(while (setq next (treesit-node-first-child-for-pos
node pos named))
(setq node next))
;; If we are at the end of buffer or after all the text, we will
;; end up with NODE = root node. For convenience, return the last
;; leaf node in the tree.
;; If POS is at the end of buffer, after all the text, we will
;; end up with NODE = root node. Instead of returning nil,
;; return the last leaf node in the tree for convenience.
(if (treesit-node-eq node root)
(if strict
nil
(progn
(while (setq next (treesit-node-child node -1 named))
(setq node next))
node)
node))))
;; Normal case, where we found a node.
(if (<= (treesit-node-start node) pos)
node
;; So the node we found is completely after POS, try to find
;; a node whose end equals to POS.
(while (setq next (treesit-node-first-child-for-pos
node-before pos-1 named))
(setq node-before next))
(if (eq (treesit-node-end node-before) pos)
node-before
node))))))
(defun treesit-node-on (beg end &optional parser-or-lang named)
"Return the smallest node covering BEG to END.

View file

@ -474,6 +474,37 @@ visible_end.)"
;; `treesit-search-forward-goto'
))
(ert-deftest treesit-node-at ()
"Test `treesit-node-at'."
(skip-unless (treesit-language-available-p 'json))
(let (parser root-node)
(progn
(insert "[1, 2, 3,4] ")
(setq parser (treesit-parser-create 'json))
(setq root-node (treesit-parser-root-node
parser)))
;; Point at ",", should return ",".
(goto-char (point-min))
(search-forward "1")
(should (equal (treesit-node-text
(treesit-node-at (point)))
","))
;; Point behind ",", should still return the ",".
(search-forward ",")
(should (equal (treesit-node-text
(treesit-node-at (point)))
","))
;; Point between "," and "2", should return 2.
(forward-char)
(should (equal (treesit-node-text
(treesit-node-at (point)))
"2"))
;; EOF, should return the last leaf node "]".
(goto-char (point-max))
(should (equal (treesit-node-text
(treesit-node-at (point)))
"]"))))
(ert-deftest treesit-misc ()
"Misc helper functions."
(let ((settings '((t 0 t)