Support tree-sitter local parsers

* doc/lispref/parsing.texi (Multiple Languages): Update manual.
* lisp/treesit.el (treesit-range-settings): Add LOCAL-P to range
setting.
(treesit-range-rules): Support :local keyword.
(treesit-local-parsers-at)
(treesit-local-parsers-in)
(treesit--update-ranges-local): New functions.
(treesit-update-ranges)
(treesit-font-lock-fontify-region)
(treesit--indent-1): Support local parsers and prioritize it over
global parsers.
This commit is contained in:
Yuan Fu 2023-09-05 19:57:34 -07:00
parent cf0986401c
commit d05494a9ff
No known key found for this signature in database
GPG key ID: 56E19BC57664A442
2 changed files with 186 additions and 68 deletions

View file

@ -1714,6 +1714,19 @@ If @var{query} is a tree-sitter query, it should be preceded by two
specifies the embedded language, and the @code{:host} keyword
specifies the host language.
@cindex local parser
If the query is given a @code{:local} keyword, and the value is
@code{t}, the range set by this query has a dedicated local parser;
otherwise the range shares a parser with other ranges for the same
language.
A parser sees view its ranges continuously, rather than viewing them
as independent segments. Therefore, if the embedded range are
semantically independent segments, use local parsers for them.
Local parser set to a range can be retrieved by
@code{treesit-local-parsers-at} and @code{treesit-local-parsers-in}.
@code{treesit-update-ranges} uses @var{query} to figure out how to set
the ranges for parsers for the embedded language. It queries
@var{query} in a host language parser, computes the ranges which the
@ -1749,6 +1762,23 @@ language of the buffer text at @var{pos}. This variable is used by
@code{treesit-language-at}.
@end defvar
@defun treesit-local-parsers-at &optional pos language
This function returns all the local parsers at @var{pos}.
Local parsers are those who only parses a limited region marked by an
overlay. If @var{language} is non-@code{nil}, only return parsers for
that language.
@var{pos} defaults to point.
@end defun
@defun treesit-local-parsers-on &optional beg end language
This function is the same as @code{treesit-local-parsers-at}, but gets
the local parsers in a range instead of at a point.
@var{beg} and @var{end} default to cover the whole buffer.
@end defun
@node Tree-sitter Major Modes
@section Developing major modes with tree-sitter
@cindex major mode, developing with tree-sitter
@ -1843,6 +1873,8 @@ add-log functions used by @code{add-log-current-defun}.
If @code{treesit-simple-imenu-settings} (@pxref{Imenu}) is
non-@code{nil}, it sets up Imenu.
@end itemize
@c TODO: Add treesit-thing-settings stuff once we finalize it.
@end defun
For more information on these built-in tree-sitter features,

View file

@ -442,11 +442,13 @@ are ignored."
(defvar-local treesit-range-settings nil
"A list of range settings.
Each element of the list is of the form (QUERY LANGUAGE).
Each element of the list is of the form (QUERY LANGUAGE LOCAL-P).
When updating the range of each parser in the buffer,
`treesit-update-ranges' queries each QUERY, and sets LANGUAGE's
range to the range spanned by captured nodes. QUERY must be a
compiled query.
compiled query. If LOCAL-P is t, give each range a separate
local parser rather than using a single parser for all the
ranges.
Capture names generally don't matter, but names that starts with
an underscore are ignored.
@ -487,15 +489,21 @@ this way: Emacs queries QUERY in the host language's parser,
computes the ranges spanned by the captured nodes, and applies
these ranges to parsers for the embedded language.
If there's a `:local' keyword with value t, the range computed by
this QUERY is given a dedicated local parser. Otherwise, the
range shares the same parser with other ranges.
QUERY can also be a function that takes two arguments, START and
END. If QUERY is a function, it doesn't need the :KEYWORD VALUE
pair preceding it. This function should set the ranges for
parsers in the current buffer in the region between START and
END. It is OK for this function to set ranges in a larger region
that encompasses the region between START and END."
(let (host embed result)
(let (host embed result local)
(while query-specs
(pcase (pop query-specs)
(:local (when (eq t (pop query-specs))
(setq local t)))
(:host (let ((host-lang (pop query-specs)))
(unless (symbolp host-lang)
(signal 'treesit-error (list "Value of :host option should be a symbol" host-lang)))
@ -511,7 +519,7 @@ that encompasses the region between START and END."
(when (null host)
(signal 'treesit-error (list "Value of :host option cannot be omitted")))
(push (list (treesit-query-compile host query)
embed host)
embed local)
result))
(setq host nil embed nil))))
(nreverse result)))
@ -562,6 +570,72 @@ those inside are kept."
if (<= start (car range) (cdr range) end)
collect range))
(defun treesit-local-parsers-at (&optional pos language)
"Return all the local parsers at POS.
Local parsers are those who only parses a limited region marked
by an overlay. If LANGUAGE is non-nil, only return parsers for
that language.
POS defaults to point."
(let ((res nil))
(dolist (ov (overlays-at (or pos (point))))
(when-let ((parser (overlay-get ov 'treesit-parser)))
(when (or (null language)
(eq (treesit-parser-language parser)
language))
(push parser res))))
(nreverse res)))
(defun treesit-local-parsers-in (&optional beg end language)
"Return all the local parsers between BEG END.
Local parsers are those who has an `embedded' tag, and only
parses a limited region marked by an overlay. If LANGUAGE is
non-nil, only return parsers for that language.
BEG and END default to cover the whole buffer."
(let ((res nil))
(dolist (ov (overlays-in (or beg (point-min)) (or end (point-max))))
(when-let ((parser (overlay-get ov 'treesit-parser)))
(when (or (null language)
(eq (treesit-parser-language parser)
language))
(push parser res))))
(nreverse res)))
(defun treesit--update-ranges-local
(query embedded-lang &optional beg end)
"Update range for local parsers betwwen BEG and END.
Use QUERY to get the ranges, and make sure each range has a local
parser for EMBEDDED-LANG."
;; Clean up.
(dolist (ov (overlays-in (or beg (point-min)) (or end (point-max))))
(when-let ((parser (overlay-get ov 'treesit-parser)))
(when (eq (overlay-start ov) (overlay-end ov))
(delete-overlay ov)
(treesit-parser-delete parser))))
;; Update range.
(let* ((host-lang (treesit-query-language query))
(ranges (treesit-query-range host-lang query beg end)))
(pcase-dolist (`(,beg . ,end) ranges)
(let ((has-parser nil))
(dolist (ov (overlays-in beg end))
;; Update range of local parser.
(let ((embedded-parser (overlay-get ov 'treesit-parser)))
(when (and embedded-parser
(eq (treesit-parser-language embedded-parser)
embedded-lang))
(treesit-parser-set-included-ranges
embedded-parser `((,beg . ,end)))
(setq has-parser t))))
;; Create overlay and local parser.
(when (not has-parser)
(let ((embedded-parser (treesit-parser-create
embedded-lang nil t 'embedded))
(ov (make-overlay beg end nil nil t)))
(overlay-put ov 'treesit-parser embedded-parser)))))))
(defun treesit-update-ranges (&optional beg end)
"Update the ranges for each language in the current buffer.
If BEG and END are non-nil, only update parser ranges in that
@ -574,9 +648,14 @@ region."
(dolist (setting treesit-range-settings)
(let ((query (nth 0 setting))
(language (nth 1 setting))
(local (nth 2 setting))
(beg (or beg (point-min)))
(end (or end (point-max))))
(if (functionp query) (funcall query beg end)
(cond
((functionp query) (funcall query beg end))
(local
(treesit--update-ranges-local query language beg end))
(t
(let* ((host-lang (treesit-query-language query))
(parser (treesit-parser-create language))
(old-ranges (treesit-parser-included-ranges parser))
@ -586,11 +665,9 @@ region."
(treesit--merge-ranges
old-ranges new-ranges beg end)
(point-min) (point-max))))
(dolist (parser (treesit-parser-list))
(when (eq (treesit-parser-language parser)
language)
(treesit-parser-set-included-ranges
parser set-ranges))))))))
(dolist (parser (treesit-parser-list language))
(treesit-parser-set-included-ranges
parser set-ranges))))))))
(defun treesit-parser-range-on (parser beg &optional end)
"Check if PARSER's range covers the portion between BEG and END.
@ -1042,70 +1119,77 @@ If LOUDLY is non-nil, display some debugging information."
(message "Fontifying region: %s-%s" start end))
(treesit-update-ranges start end)
(font-lock-unfontify-region start end)
(dolist (setting treesit-font-lock-settings)
(let* ((query (nth 0 setting))
(enable (nth 1 setting))
(override (nth 3 setting))
(language (treesit-query-language query)))
(let* ((local-parsers (treesit-local-parsers-in start end))
(global-parsers (treesit-parser-list))
(root-nodes
(mapcar (lambda (parser)
(cons (treesit-parser-language parser)
(treesit-parser-root-node parser)))
(append local-parsers global-parsers))))
(dolist (setting treesit-font-lock-settings)
(let* ((query (nth 0 setting))
(enable (nth 1 setting))
(override (nth 3 setting))
(language (treesit-query-language query))
(root (alist-get language root-nodes)))
;; Use deterministic way to decide whether to turn on "fast
;; mode". (See bug#60691, bug#60223.)
(when (eq treesit--font-lock-fast-mode 'unspecified)
(pcase-let ((`(,max-depth ,max-width)
(treesit-subtree-stat
(treesit-buffer-root-node language))))
(if (or (> max-depth 100) (> max-width 4000))
(setq treesit--font-lock-fast-mode t)
(setq treesit--font-lock-fast-mode nil))))
;; Use deterministic way to decide whether to turn on "fast
;; mode". (See bug#60691, bug#60223.)
(when (eq treesit--font-lock-fast-mode 'unspecified)
(pcase-let ((`(,max-depth ,max-width)
(treesit-subtree-stat
(treesit-buffer-root-node language))))
(if (or (> max-depth 100) (> max-width 4000))
(setq treesit--font-lock-fast-mode t)
(setq treesit--font-lock-fast-mode nil))))
(when-let* ((root (treesit-buffer-root-node language))
(nodes (if (eq t treesit--font-lock-fast-mode)
(treesit--children-covering-range-recurse
root start end (* 4 jit-lock-chunk-size))
(list (treesit-buffer-root-node language))))
;; Only activate if ENABLE flag is t.
(activate (eq t enable)))
(ignore activate)
;; Only activate if ENABLE flag is t.
(when-let ((activate (eq t enable))
(nodes (if (eq t treesit--font-lock-fast-mode)
(treesit--children-covering-range-recurse
root start end (* 4 jit-lock-chunk-size))
(list root))))
(ignore activate)
;; Query each node.
(dolist (sub-node nodes)
(let* ((delta-start (car treesit--font-lock-query-expand-range))
(delta-end (cdr treesit--font-lock-query-expand-range))
(captures (treesit-query-capture
sub-node query
(max (- start delta-start) (point-min))
(min (+ end delta-end) (point-max)))))
;; Query each node.
(dolist (sub-node nodes)
(let* ((delta-start (car treesit--font-lock-query-expand-range))
(delta-end (cdr treesit--font-lock-query-expand-range))
(captures (treesit-query-capture
sub-node query
(max (- start delta-start) (point-min))
(min (+ end delta-end) (point-max)))))
;; For each captured node, fontify that node.
(with-silent-modifications
(dolist (capture captures)
(let* ((face (car capture))
(node (cdr capture))
(node-start (treesit-node-start node))
(node-end (treesit-node-end node)))
;; For each captured node, fontify that node.
(with-silent-modifications
(dolist (capture captures)
(let* ((face (car capture))
(node (cdr capture))
(node-start (treesit-node-start node))
(node-end (treesit-node-end node)))
;; If node is not in the region, take them out. See
;; comment #3 above for more detail.
(if (and (facep face)
(or (>= start node-end) (>= node-start end)))
;; If node is not in the region, take them out. See
;; comment #3 above for more detail.
(if (and (facep face)
(or (>= start node-end) (>= node-start end)))
(when (or loudly treesit--font-lock-verbose)
(message "Captured node %s(%s-%s) but it is outside of fontifing region" node node-start node-end))
(cond
((facep face)
(treesit-fontify-with-override
(max node-start start) (min node-end end)
face override))
((functionp face)
(funcall face node override start end)))
;; Don't raise an error if FACE is neither a face nor
;; a function. This is to allow intermediate capture
;; names used for #match and #eq.
(when (or loudly treesit--font-lock-verbose)
(message "Captured node %s(%s-%s) but it is outside of fontifing region" node node-start node-end))
(cond
((facep face)
(treesit-fontify-with-override
(max node-start start) (min node-end end)
face override))
((functionp face)
(funcall face node override start end)))
;; Don't raise an error if FACE is neither a face nor
;; a function. This is to allow intermediate capture
;; names used for #match and #eq.
(when (or loudly treesit--font-lock-verbose)
(message "Fontifying text from %d to %d, Face: %s, Node: %s"
(max node-start start) (min node-end end)
face (treesit-node-type node))))))))))))
(message "Fontifying text from %d to %d, Face: %s, Node: %s"
(max node-start start) (min node-end end)
face (treesit-node-type node)))))))))))))
`(jit-lock-bounds ,start . ,end))
(defun treesit--font-lock-notifier (ranges parser)
@ -1522,8 +1606,10 @@ Return (ANCHOR . OFFSET). This function is used by
(forward-line 0)
(skip-chars-forward " \t")
(point)))
(local-parsers (treesit-local-parsers-at bol))
(smallest-node
(cond ((null (treesit-parser-list)) nil)
(local-parsers (car local-parsers))
((eq 1 (length (treesit-parser-list)))
(treesit-node-at bol))
((treesit-language-at (point))