Add improved tree-sitter navigation

This new set of functions (and tests) should eliminate
defun-navigation bugs and limitations we currently have.  This commit
doesn't change any existing bahavior: treesit-beginning/end-of-defun
and friends are unchanged.  The plan is to later switch gear and
replace the current functions with the new ones introduced in this
change.

This is a relatively big change, but I've setup a comprehensive test,
and it should fix current bugs, so I think it's ok to put it on the
release branch.

The gist of the new navigation is to use treesit--defuns-around to
find the previous sibling defun, next sibling defun, and the parent
defun, then use this information to move to previous/next
beginning/end of defun in treesit--navigate-defun.

I also added comprehensive testing that tests all four possible
operations (prev-beg, next-beg, prev-end, next-end) starting at all
possible positions (between two sibling defuns, inside a sibling
defun, etc).

* lisp/treesit.el (treesit-defun-type-regexp): Expand definition to
allow (REGEXP . FILTER).  Old functions don't support this, but it
should be fine since we are soon replacing them.

(treesit-defun-tactic)
(treesit-defun-skipper): New variables.

(treesit-default-defun-skipper)
(treesit--defuns-around)
(treesit--top-level-defun)
(treesit--navigate-defun): New functions.

* test/src/treesit-tests.el (treesit--ert-insert-and-parse-marker)
(treesit--ert-collect-positions)
(treesit--ert-test-defun-navigation): New helper functions.

(treesit--ert-defun-navigation-python-program)
(treesit--ert-defun-navigation-js-program)
(treesit--ert-defun-navigation-bash-program)
(treesit--ert-defun-navigation-nested-master): New variables.

(treesit-defun-navigation-nested-1)
(treesit-defun-navigation-nested-2)
(treesit-defun-navigation-nested-3)
(treesit-defun-navigation-top-level): New tests.
This commit is contained in:
Yuan Fu 2022-12-12 20:25:53 -08:00
parent a5272e2a7c
commit 03ad1a92a2
No known key found for this signature in database
GPG key ID: 56E19BC57664A442
2 changed files with 489 additions and 0 deletions

View file

@ -1569,8 +1569,25 @@ BACKWARD and ALL are the same as in `treesit-search-forward'."
"A regexp that matches the node type of defun nodes.
For example, \"(function|class)_definition\".
Sometimes not all nodes matched by the regexp are valid defuns.
In that case, set this variable to a cons cell of the
form (REGEXP . FILTER), where FILTER is a function that takes a
node (the matched node) and returns t if node is valid, or nil
for invalid node.
This is used by `treesit-beginning-of-defun' and friends.")
(defvar-local treesit-defun-tactic 'nested
"Determines how does Emacs treat nested defuns.
If the value is `top-level', Emacs only move across top-level
defuns, if the value is `nested', Emacs recognizes nested defuns.")
(defvar-local treesit-defun-skipper #'treesit-default-defun-skipper
"A function called after tree-sitter navigation moved a step.
It is called with no arguments. By default, this function tries
to move to the beginning of a line, either by moving to the empty
newline after a defun, or the beginning of a defun.")
(defvar-local treesit-defun-prefer-top-level nil
"When non-nil, Emacs prefers top-level defun.
@ -1639,6 +1656,196 @@ ARG is the same as in `beginning-of-defun'."
(when top
(goto-char (treesit-node-end top)))))
(defun treesit-default-defun-skipper ()
"Skips spaces after navigating a defun.
This fucntion tries to move to the beginning of a line, either by
moving to the empty newline after a defun, or to the beginning of
the current line if the beginning of the defun is indented."
(cond ((and (looking-at (rx (* (or " " "\\t")) "\n"))
(not (looking-at (rx bol))))
(goto-char (match-end 0)))
((save-excursion
(skip-chars-backward " \t")
(eq (point) (line-beginning-position)))
(goto-char (line-beginning-position)))))
;; prev-sibling:
;; 1. end-of-node before pos
;; 2. highest such node
;;
;; next-sibling:
;; 1. beg-of-node after pos
;; 2. highest such node
;;
;; parent:
;; 1. node covers pos
;; 2. smallest such node
(defun treesit--defuns-around (pos regexp &optional pred)
"Return the previous, next, and parent defun around POS.
Return a list of (PREV NEXT PARENT), where PREV and NEXT are
previous and next sibling defuns around POS, and PARENT is the
parent defun surrouding POS. All of three could be nil if no
sound defun exists.
REGEXP and PRED are the same as in `treesit-defun-type-regexp'."
(let* ((node (treesit-node-at pos))
;; NODE-BEFORE/AFTER = NODE when POS is completely in NODE,
;; but if not, that means point could be in between two
;; defun, in that case we want to use a node that's actually
;; before/after point.
(node-before (if (>= (treesit-node-start node) pos)
(treesit-search-forward-goto node "" t t t)
node))
(node-after (if (<= (treesit-node-end node) pos)
(treesit-search-forward-goto node "" nil nil t)
node))
(result (list nil nil nil))
(pred (or pred (lambda (_) t))))
;; 1. Find previous and next sibling defuns.
(cl-loop
for idx from 0 to 1
for node in (list node-before node-after)
for backward in '(t nil)
for pos-pred in (list (lambda (n) (<= (treesit-node-end n) pos))
(lambda (n) (>= (treesit-node-start n) pos)))
;; If point is inside a defun, our process below will never
;; return a next/prev sibling outside of that defun, effectively
;; any prev/next sibling is locked inside the smallest defun
;; covering point, which is the correct behavior. That's because
;; when there exists a defun that covers point,
;; `treesit-search-forward' will first reach that defun, after
;; that we only go upwards in the tree, so other defuns outside
;; of the covering defun is never reached. (Don't use
;; `treesit-search-forward-goto' as it breaks when NODE-AFTER is
;; the last token of a parent defun: it will skip the parent
;; defun because it wants to ensure progress.)
do (cl-loop for cursor = (when node
(save-excursion
(treesit-search-forward
node regexp backward backward)))
then (treesit-node-parent cursor)
while cursor
if (and (string-match-p
regexp (treesit-node-type cursor))
(funcall pred cursor)
(funcall pos-pred cursor))
do (setf (nth idx result) cursor)))
;; 2. Find the parent defun.
(setf (nth 2 result)
(cl-loop for cursor = (or (nth 0 result)
(nth 1 result)
node)
then (treesit-node-parent cursor)
while cursor
if (and (string-match-p
regexp (treesit-node-type cursor))
(funcall pred cursor)
(not (member cursor result)))
return cursor))
result))
(defun treesit--top-level-defun (node regexp &optional pred)
"Return the top-level parent defun of NODE.
REGEXP and PRED are the same as in `treesit-defun-type-regexp'."
(let* ((pred (or pred (lambda (_) t))))
;; `treesit-search-forward-goto' will make sure the matched node
;; is before POS.
(cl-loop for cursor = node
then (treesit-node-parent cursor)
while cursor
if (and (string-match-p
regexp (treesit-node-type cursor))
(funcall pred cursor))
do (setq node cursor))
node))
(defun treesit--navigate-defun (pos arg side &optional recursing)
"Navigate defun ARG steps from POS.
If ARG is positive, move forward that many steps, if negative,
move backward. If SIDE is `beg', stop at the beginning of a
defun, if SIDE is `end', stop at the end.
This function doesn't actaully move point, it just returns the
position it would move to. If there aren't enough defuns to move
across, return nil.
RECURSING is an internal parameter, if non-nil, it means this
function is called recursively."
(pcase-let*
((counter (abs arg))
(`(,regexp . ,pred)
(if (consp treesit-defun-type-regexp)
treesit-defun-type-regexp
(cons treesit-defun-type-regexp nil)))
;; Move POS to the beg/end of NODE. If NODE is nil, terminate.
;; Return the position we moved to.
(advance (lambda (node)
(let ((dest (pcase side
('beg (treesit-node-start node))
('end (treesit-node-end node)))))
(if (null dest)
(throw 'term nil)
dest)))))
(catch 'term
(while (> counter 0)
(pcase-let
((`(,prev ,next ,parent)
(treesit--defuns-around pos regexp pred)))
;; When PARENT is nil, nested and top-level are the same,
;; there there is a PARENT, make PARENT to be the top-level
;; parent and pretend there is no nested PREV and NEXT.
(when (and (eq treesit-defun-tactic 'top-level)
parent)
(setq parent (treesit--top-level-defun
parent regexp pred)
prev nil
next nil))
;; Move...
(if (> arg 0)
;; ...forward.
(if (and (eq side 'beg)
;; Should we skip the defun (recurse)?
(cond (next (not recursing)) ; [1] (see below)
(parent t) ; [2]
(t nil)))
;; Special case: go to next beg-of-defun. Set POS
;; to the end of next/parent defun, and run one more
;; step. If there is a next defun, step over it, so
;; we only need to recurse once, so we don't need to
;; recurse if we are already recursing [1]. If there
;; is no next but a parent, keep stepping out
;; (recursing) until we got out of the parents until
;; (1) there is a next sibling defun, or (2) no more
;; parents [2].
(setq pos
(or (treesit--navigate-defun
(treesit-node-end (or next parent))
1 'beg t)
(throw 'term nil)))
;; Normal case.
(setq pos (funcall advance (or next parent))))
;; ...backward.
(if (and (eq side 'end)
(cond (prev (not recursing))
(parent t)
(t nil)))
;; Special case: go to prev end-of-defun.
(setq pos
(or (treesit--navigate-defun
(treesit-node-start (or prev parent))
-1 'end t)
(throw 'term nil)))
;; Normal case.
(setq pos (funcall advance (or prev parent)))))
;; A successful step! Decrement counter.
(cl-decf counter))))
;; Counter equal to 0 means we successfully stepped ARG steps.
(if (eq counter 0)
pos
nil)))
;;; Activating tree-sitter
(defun treesit-ready-p (language &optional quiet)

View file

@ -607,6 +607,288 @@ visible_end.)"
(insert "]")
(should (treesit-node-check array-node 'outdated))))
;;; Defun navigation
;;
;; I've setup a framework for easier testing of defun navigation.
;;
;; To use it for a particular langauge, first write a test program
;; similar to `treesit--ert-defun-navigation-python-program', and
;; insert markers. Markers that marks BOLs are defined as follows:
;;
;; 100 Before 1st parent
;; 101 Beg of 1st parent
;; 102 End of 1st parent
;; 103 Beg of 2nd parent
;; 104 Beg of 1st method
;; 105 End of 1st method
;; 106 Beg of 2nd method
;; 107 End of 2nd method
;; 108 End of 2nd parent
;; 109 Beg of 3rd parent
;; 110 End of 3rd parent
;; 999 Dummy markers
;;
;; Then add marker 0-9 following the definition given in
;; `treesit--ert-defun-navigation-nested-master'. Then you can use
;; `treesit--ert-test-defun-navigation', pass the test program you
;; just wrote, and the appropriate master:
;;
;; - `treesit--ert-defun-navigation-nested-master' for nested defun
;; - `treesit--ert-defun-navigation-top-level-master' for top-level
(defun treesit--ert-insert-and-parse-marker (opening closing text)
"Insert TEXT and parse the marker positions in it.
TEXT should be a string in which contains position markings
like (1). OPENING and CLOSING are position marking's delimiters,
for (1), OPENING and CLOSING should be \"(\" and \")\",
respectively.
This function inserts TEXT, parses and removes all the markings,
and returns an alist of (NUMBER . POS), where number is each
marking's number, and POS is each marking's position."
(declare (indent 2))
(let (result)
(insert text)
(goto-char (point-min))
(while (re-search-forward
(rx-to-string `(seq ,opening (group (+ digit)) ,closing))
nil t)
(let ((idx (string-to-number (match-string 1))))
(push (cons idx (match-beginning 0)) result)
(delete-region (match-beginning 0) (match-end 0))))
(nreverse result)))
(defun treesit--ert-collect-positions (positions functions)
"Collect posifions after caling each function in FUNCTIONS.
POSITIONS should be a list of buffer positions, FUNCTIONS should
be a list of functions. This function collects the return value
of each function in FUNCTIONS starting at each position in
POSITIONS.
Return a list of (POS...) where each POS corresponds to a
function in FUNCTIONS. For example, if buffer content is
\"123\", POSITIONS is (2 3), FUNCTIONS is (point-min point-max),
the return value is ((1 3) (1 3))."
(cl-loop for pos in positions
collect (cl-loop for fn in functions
collect (progn
(goto-char pos)
(funcall fn)))))
(defun treesit--ert-test-defun-navigation
(init program master &optional opening closing)
"Run defun navigation tests on PROGRAM and MASTER.
INIT is a setup function that runs right after this function
creates a temporary buffer. It should take no argument.
PROGRAM is a program source in string, MASTER is a list of
\(START PREV-BEG NEXT-END PREV-END NEXT-BEG), where START is the
starting marker position, and the rest are marker positions the
corresponding navigation should stop at (after running
`treesit-defun-skipper').
OPENING and CLOSING are the same as in
`treesit--ert-insert-and-parse-marker', by default they are \"[\"
and \"]\"."
(with-temp-buffer
(funcall init)
(let* ((opening (or opening "["))
(closing (or closing "]"))
;; Insert program and parse marker positions.
(marker-alist (treesit--ert-insert-and-parse-marker
opening closing program))
;; Translate marker positions into buffer positions.
(decoded-master
(cl-loop for record in master
collect
(cl-loop for pos in record
collect (alist-get pos marker-alist))))
;; Collect positions each function returns.
(positions
(treesit--ert-collect-positions
;; The first columnn of DECODED-MASTER.
(mapcar #'car decoded-master)
;; Four functions: next-end, prev-beg, next-beg, prev-end.
(mapcar (lambda (conf)
(lambda ()
(if-let ((pos (funcall
#'treesit--navigate-defun
(point) (car conf) (cdr conf))))
(save-excursion
(goto-char pos)
(funcall treesit-defun-skipper)
(point)))))
'((-1 . beg)
(1 . end)
(-1 . end)
(1 . beg))))))
;; Verify each position.
(cl-loop for record in decoded-master
for orig-record in master
for poss in positions
for name = (format "marker %d" (car orig-record))
do (should (equal (cons name (cdr record))
(cons name poss)))))))
(defvar treesit--ert-defun-navigation-python-program
"[100]
[101]class Class1():
[999] prop = 0
[102]
[103]class Class2():[0]
[104] [1]def method1():
[999] [2]return 0[3]
[105] [4]
[106] [5]def method2():
[999] [6]return 0[7]
[107] [8]
[999] prop = 0[9]
[108]
[109]class Class3():
[999] prop = 0[10]
[110]
"
"Python source for navigation test.")
(defvar treesit--ert-defun-navigation-js-program
"[100]
[101]class Class1 {
[999]}
[102]
[103]class Class2 {[0]
[104] [1]method1() {
[999] [2]return 0;
[999] }[3]
[105] [4]
[106] [5]method2() {
[999] [6]return 0;
[999] }[7]
[107][8]
[999]}[9]
[108]
[109]class class3 {
[999]}[10]
[110]
"
"Javascript source for navigation test.")
(defvar treesit--ert-defun-navigation-bash-program
"[100]
[101]parent1 () {
[999]}
[102]
[103]parent2 () {[0]
[104] [1]sibling1 () {
[999] [2]echo hi
[999] }[3]
[105] [4]
[106] [5]sibling2 () {
[999] [6]echo hi
[999] }[7]
[107][8]
[999]}[9]
[108]
[109]parent3 () {
[999]}
[110]
"
"Javascript source for navigation test.")
(defvar treesit--ert-defun-navigation-nested-master
;; START PREV-BEG NEXT-END PREV-END NEXT-BEG
'((0 103 105 102 106) ; Between Beg of parent & 1st sibling.
(1 103 105 102 106) ; Beg of 1st sibling.
(2 104 105 102 106) ; Inside 1st sibling.
(3 104 107 102 109) ; End of 1st sibling.
(4 104 107 102 109) ; Between 1st sibling & 2nd sibling.
(5 104 107 102 109) ; Beg of 2nd sibling.
(6 106 107 105 109) ; Inside 2nd sibling.
(7 106 108 105 109) ; End of 2nd sibling.
(8 106 108 105 109) ; Between 2nd sibling & end of parent.
(9 103 110 102 nil) ; End of parent.
(100 nil 102 nil 103) ; Before 1st parent.
(101 nil 102 nil 103) ; Beg of 1st parent.
(102 101 108 nil 109) ; Between 1st & 2nd parent.
(103 101 108 nil 109) ; Beg of 2nd parent.
(110 109 nil 108 nil) ; After 3rd parent.
)
"Master of nested navigation test.
This basically says, e.g., \"start with point on marker 0, go to
the prev-beg, now point should be at marker 103\", etc.")
(defvar treesit--ert-defun-navigation-top-level-master
;; START PREV-BEG NEXT-END NEXT-BEG PREV-END
'((0 103 108 102 109) ; Between Beg of parent & 1st sibling.
(1 103 108 102 109) ; Beg of 1st sibling.
(2 103 108 102 109) ; Inside 1st sibling.
(3 103 108 102 109) ; End of 1st sibling.
(4 103 108 102 109) ; Between 1st sibling & 2nd sibling.
(5 103 108 102 109) ; Beg of 2nd sibling.
(6 103 108 102 109) ; Inside 2nd sibling.
(7 103 108 102 109) ; End of 2nd sibling.
(8 103 108 102 109) ; Between 2nd sibling & end of parent.
(9 103 110 102 nil) ; End of parent.
;; Top-level defuns should be identical to the nested test.
(100 nil 102 nil 103) ; Before 1st parent.
(101 nil 102 nil 103) ; Beg of 1st parent.
(102 101 108 nil 109) ; Between 1st & 2nd parent.
(103 101 108 nil 109) ; Beg of 2nd parent.
(110 109 nil 108 nil) ; After 3rd parent.
)
"Master of top-level navigation test.")
(ert-deftest treesit-defun-navigation-nested-1 ()
"Test defun navigation."
(skip-unless (treesit-language-available-p 'python))
;; Nested defun navigation
(let ((treesit-defun-tactic 'nested))
(require 'python)
(treesit--ert-test-defun-navigation
'python-ts-mode
treesit--ert-defun-navigation-python-program
treesit--ert-defun-navigation-nested-master)))
(ert-deftest treesit-defun-navigation-nested-2 ()
"Test defun navigation using `js-ts-mode'."
(skip-unless (treesit-language-available-p 'javascript))
;; Nested defun navigation
(let ((treesit-defun-tactic 'nested))
(require 'js)
(treesit--ert-test-defun-navigation
'js-ts-mode
treesit--ert-defun-navigation-js-program
treesit--ert-defun-navigation-nested-master)))
(ert-deftest treesit-defun-navigation-nested-3 ()
"Test defun navigation using `bash-ts-mode'."
(skip-unless (treesit-language-available-p 'bash))
;; Nested defun navigation
(let ((treesit-defun-tactic 'nested))
(treesit--ert-test-defun-navigation
(lambda ()
(treesit-parser-create 'bash)
(setq-local treesit-defun-type-regexp "function_definition"))
treesit--ert-defun-navigation-bash-program
treesit--ert-defun-navigation-nested-master)))
(ert-deftest treesit-defun-navigation-top-level ()
"Test top-level only defun navigation."
(skip-unless (treesit-language-available-p 'python))
;; Nested defun navigation
(let ((treesit-defun-tactic 'top-level))
(require 'python)
(treesit--ert-test-defun-navigation
'python-ts-mode
treesit--ert-defun-navigation-python-program
treesit--ert-defun-navigation-top-level-master)))
;; TODO
;; - Functions in treesit.el
;; - treesit-load-name-override-list