Support tree-sitter local parsers
* doc/lispref/parsing.texi (Multiple Languages): Update manual. * lisp/treesit.el (treesit-range-settings): Add LOCAL-P to range setting. (treesit-range-rules): Support :local keyword. (treesit-local-parsers-at) (treesit-local-parsers-in) (treesit--update-ranges-local): New functions. (treesit-update-ranges) (treesit-font-lock-fontify-region) (treesit--indent-1): Support local parsers and prioritize it over global parsers.
This commit is contained in:
parent
cf0986401c
commit
d05494a9ff
2 changed files with 186 additions and 68 deletions
|
@ -1714,6 +1714,19 @@ If @var{query} is a tree-sitter query, it should be preceded by two
|
|||
specifies the embedded language, and the @code{:host} keyword
|
||||
specifies the host language.
|
||||
|
||||
@cindex local parser
|
||||
If the query is given a @code{:local} keyword, and the value is
|
||||
@code{t}, the range set by this query has a dedicated local parser;
|
||||
otherwise the range shares a parser with other ranges for the same
|
||||
language.
|
||||
|
||||
A parser sees view its ranges continuously, rather than viewing them
|
||||
as independent segments. Therefore, if the embedded range are
|
||||
semantically independent segments, use local parsers for them.
|
||||
|
||||
Local parser set to a range can be retrieved by
|
||||
@code{treesit-local-parsers-at} and @code{treesit-local-parsers-in}.
|
||||
|
||||
@code{treesit-update-ranges} uses @var{query} to figure out how to set
|
||||
the ranges for parsers for the embedded language. It queries
|
||||
@var{query} in a host language parser, computes the ranges which the
|
||||
|
@ -1749,6 +1762,23 @@ language of the buffer text at @var{pos}. This variable is used by
|
|||
@code{treesit-language-at}.
|
||||
@end defvar
|
||||
|
||||
@defun treesit-local-parsers-at &optional pos language
|
||||
This function returns all the local parsers at @var{pos}.
|
||||
|
||||
Local parsers are those who only parses a limited region marked by an
|
||||
overlay. If @var{language} is non-@code{nil}, only return parsers for
|
||||
that language.
|
||||
|
||||
@var{pos} defaults to point.
|
||||
@end defun
|
||||
|
||||
@defun treesit-local-parsers-on &optional beg end language
|
||||
This function is the same as @code{treesit-local-parsers-at}, but gets
|
||||
the local parsers in a range instead of at a point.
|
||||
|
||||
@var{beg} and @var{end} default to cover the whole buffer.
|
||||
@end defun
|
||||
|
||||
@node Tree-sitter Major Modes
|
||||
@section Developing major modes with tree-sitter
|
||||
@cindex major mode, developing with tree-sitter
|
||||
|
@ -1843,6 +1873,8 @@ add-log functions used by @code{add-log-current-defun}.
|
|||
If @code{treesit-simple-imenu-settings} (@pxref{Imenu}) is
|
||||
non-@code{nil}, it sets up Imenu.
|
||||
@end itemize
|
||||
|
||||
@c TODO: Add treesit-thing-settings stuff once we finalize it.
|
||||
@end defun
|
||||
|
||||
For more information on these built-in tree-sitter features,
|
||||
|
|
222
lisp/treesit.el
222
lisp/treesit.el
|
@ -442,11 +442,13 @@ are ignored."
|
|||
(defvar-local treesit-range-settings nil
|
||||
"A list of range settings.
|
||||
|
||||
Each element of the list is of the form (QUERY LANGUAGE).
|
||||
Each element of the list is of the form (QUERY LANGUAGE LOCAL-P).
|
||||
When updating the range of each parser in the buffer,
|
||||
`treesit-update-ranges' queries each QUERY, and sets LANGUAGE's
|
||||
range to the range spanned by captured nodes. QUERY must be a
|
||||
compiled query.
|
||||
compiled query. If LOCAL-P is t, give each range a separate
|
||||
local parser rather than using a single parser for all the
|
||||
ranges.
|
||||
|
||||
Capture names generally don't matter, but names that starts with
|
||||
an underscore are ignored.
|
||||
|
@ -487,15 +489,21 @@ this way: Emacs queries QUERY in the host language's parser,
|
|||
computes the ranges spanned by the captured nodes, and applies
|
||||
these ranges to parsers for the embedded language.
|
||||
|
||||
If there's a `:local' keyword with value t, the range computed by
|
||||
this QUERY is given a dedicated local parser. Otherwise, the
|
||||
range shares the same parser with other ranges.
|
||||
|
||||
QUERY can also be a function that takes two arguments, START and
|
||||
END. If QUERY is a function, it doesn't need the :KEYWORD VALUE
|
||||
pair preceding it. This function should set the ranges for
|
||||
parsers in the current buffer in the region between START and
|
||||
END. It is OK for this function to set ranges in a larger region
|
||||
that encompasses the region between START and END."
|
||||
(let (host embed result)
|
||||
(let (host embed result local)
|
||||
(while query-specs
|
||||
(pcase (pop query-specs)
|
||||
(:local (when (eq t (pop query-specs))
|
||||
(setq local t)))
|
||||
(:host (let ((host-lang (pop query-specs)))
|
||||
(unless (symbolp host-lang)
|
||||
(signal 'treesit-error (list "Value of :host option should be a symbol" host-lang)))
|
||||
|
@ -511,7 +519,7 @@ that encompasses the region between START and END."
|
|||
(when (null host)
|
||||
(signal 'treesit-error (list "Value of :host option cannot be omitted")))
|
||||
(push (list (treesit-query-compile host query)
|
||||
embed host)
|
||||
embed local)
|
||||
result))
|
||||
(setq host nil embed nil))))
|
||||
(nreverse result)))
|
||||
|
@ -562,6 +570,72 @@ those inside are kept."
|
|||
if (<= start (car range) (cdr range) end)
|
||||
collect range))
|
||||
|
||||
(defun treesit-local-parsers-at (&optional pos language)
|
||||
"Return all the local parsers at POS.
|
||||
|
||||
Local parsers are those who only parses a limited region marked
|
||||
by an overlay. If LANGUAGE is non-nil, only return parsers for
|
||||
that language.
|
||||
|
||||
POS defaults to point."
|
||||
(let ((res nil))
|
||||
(dolist (ov (overlays-at (or pos (point))))
|
||||
(when-let ((parser (overlay-get ov 'treesit-parser)))
|
||||
(when (or (null language)
|
||||
(eq (treesit-parser-language parser)
|
||||
language))
|
||||
(push parser res))))
|
||||
(nreverse res)))
|
||||
|
||||
(defun treesit-local-parsers-in (&optional beg end language)
|
||||
"Return all the local parsers between BEG END.
|
||||
|
||||
Local parsers are those who has an `embedded' tag, and only
|
||||
parses a limited region marked by an overlay. If LANGUAGE is
|
||||
non-nil, only return parsers for that language.
|
||||
|
||||
BEG and END default to cover the whole buffer."
|
||||
(let ((res nil))
|
||||
(dolist (ov (overlays-in (or beg (point-min)) (or end (point-max))))
|
||||
(when-let ((parser (overlay-get ov 'treesit-parser)))
|
||||
(when (or (null language)
|
||||
(eq (treesit-parser-language parser)
|
||||
language))
|
||||
(push parser res))))
|
||||
(nreverse res)))
|
||||
|
||||
(defun treesit--update-ranges-local
|
||||
(query embedded-lang &optional beg end)
|
||||
"Update range for local parsers betwwen BEG and END.
|
||||
Use QUERY to get the ranges, and make sure each range has a local
|
||||
parser for EMBEDDED-LANG."
|
||||
;; Clean up.
|
||||
(dolist (ov (overlays-in (or beg (point-min)) (or end (point-max))))
|
||||
(when-let ((parser (overlay-get ov 'treesit-parser)))
|
||||
(when (eq (overlay-start ov) (overlay-end ov))
|
||||
(delete-overlay ov)
|
||||
(treesit-parser-delete parser))))
|
||||
;; Update range.
|
||||
(let* ((host-lang (treesit-query-language query))
|
||||
(ranges (treesit-query-range host-lang query beg end)))
|
||||
(pcase-dolist (`(,beg . ,end) ranges)
|
||||
(let ((has-parser nil))
|
||||
(dolist (ov (overlays-in beg end))
|
||||
;; Update range of local parser.
|
||||
(let ((embedded-parser (overlay-get ov 'treesit-parser)))
|
||||
(when (and embedded-parser
|
||||
(eq (treesit-parser-language embedded-parser)
|
||||
embedded-lang))
|
||||
(treesit-parser-set-included-ranges
|
||||
embedded-parser `((,beg . ,end)))
|
||||
(setq has-parser t))))
|
||||
;; Create overlay and local parser.
|
||||
(when (not has-parser)
|
||||
(let ((embedded-parser (treesit-parser-create
|
||||
embedded-lang nil t 'embedded))
|
||||
(ov (make-overlay beg end nil nil t)))
|
||||
(overlay-put ov 'treesit-parser embedded-parser)))))))
|
||||
|
||||
(defun treesit-update-ranges (&optional beg end)
|
||||
"Update the ranges for each language in the current buffer.
|
||||
If BEG and END are non-nil, only update parser ranges in that
|
||||
|
@ -574,9 +648,14 @@ region."
|
|||
(dolist (setting treesit-range-settings)
|
||||
(let ((query (nth 0 setting))
|
||||
(language (nth 1 setting))
|
||||
(local (nth 2 setting))
|
||||
(beg (or beg (point-min)))
|
||||
(end (or end (point-max))))
|
||||
(if (functionp query) (funcall query beg end)
|
||||
(cond
|
||||
((functionp query) (funcall query beg end))
|
||||
(local
|
||||
(treesit--update-ranges-local query language beg end))
|
||||
(t
|
||||
(let* ((host-lang (treesit-query-language query))
|
||||
(parser (treesit-parser-create language))
|
||||
(old-ranges (treesit-parser-included-ranges parser))
|
||||
|
@ -586,11 +665,9 @@ region."
|
|||
(treesit--merge-ranges
|
||||
old-ranges new-ranges beg end)
|
||||
(point-min) (point-max))))
|
||||
(dolist (parser (treesit-parser-list))
|
||||
(when (eq (treesit-parser-language parser)
|
||||
language)
|
||||
(treesit-parser-set-included-ranges
|
||||
parser set-ranges))))))))
|
||||
(dolist (parser (treesit-parser-list language))
|
||||
(treesit-parser-set-included-ranges
|
||||
parser set-ranges))))))))
|
||||
|
||||
(defun treesit-parser-range-on (parser beg &optional end)
|
||||
"Check if PARSER's range covers the portion between BEG and END.
|
||||
|
@ -1042,70 +1119,77 @@ If LOUDLY is non-nil, display some debugging information."
|
|||
(message "Fontifying region: %s-%s" start end))
|
||||
(treesit-update-ranges start end)
|
||||
(font-lock-unfontify-region start end)
|
||||
(dolist (setting treesit-font-lock-settings)
|
||||
(let* ((query (nth 0 setting))
|
||||
(enable (nth 1 setting))
|
||||
(override (nth 3 setting))
|
||||
(language (treesit-query-language query)))
|
||||
(let* ((local-parsers (treesit-local-parsers-in start end))
|
||||
(global-parsers (treesit-parser-list))
|
||||
(root-nodes
|
||||
(mapcar (lambda (parser)
|
||||
(cons (treesit-parser-language parser)
|
||||
(treesit-parser-root-node parser)))
|
||||
(append local-parsers global-parsers))))
|
||||
(dolist (setting treesit-font-lock-settings)
|
||||
(let* ((query (nth 0 setting))
|
||||
(enable (nth 1 setting))
|
||||
(override (nth 3 setting))
|
||||
(language (treesit-query-language query))
|
||||
(root (alist-get language root-nodes)))
|
||||
|
||||
;; Use deterministic way to decide whether to turn on "fast
|
||||
;; mode". (See bug#60691, bug#60223.)
|
||||
(when (eq treesit--font-lock-fast-mode 'unspecified)
|
||||
(pcase-let ((`(,max-depth ,max-width)
|
||||
(treesit-subtree-stat
|
||||
(treesit-buffer-root-node language))))
|
||||
(if (or (> max-depth 100) (> max-width 4000))
|
||||
(setq treesit--font-lock-fast-mode t)
|
||||
(setq treesit--font-lock-fast-mode nil))))
|
||||
;; Use deterministic way to decide whether to turn on "fast
|
||||
;; mode". (See bug#60691, bug#60223.)
|
||||
(when (eq treesit--font-lock-fast-mode 'unspecified)
|
||||
(pcase-let ((`(,max-depth ,max-width)
|
||||
(treesit-subtree-stat
|
||||
(treesit-buffer-root-node language))))
|
||||
(if (or (> max-depth 100) (> max-width 4000))
|
||||
(setq treesit--font-lock-fast-mode t)
|
||||
(setq treesit--font-lock-fast-mode nil))))
|
||||
|
||||
(when-let* ((root (treesit-buffer-root-node language))
|
||||
(nodes (if (eq t treesit--font-lock-fast-mode)
|
||||
(treesit--children-covering-range-recurse
|
||||
root start end (* 4 jit-lock-chunk-size))
|
||||
(list (treesit-buffer-root-node language))))
|
||||
;; Only activate if ENABLE flag is t.
|
||||
(activate (eq t enable)))
|
||||
(ignore activate)
|
||||
;; Only activate if ENABLE flag is t.
|
||||
(when-let ((activate (eq t enable))
|
||||
(nodes (if (eq t treesit--font-lock-fast-mode)
|
||||
(treesit--children-covering-range-recurse
|
||||
root start end (* 4 jit-lock-chunk-size))
|
||||
(list root))))
|
||||
(ignore activate)
|
||||
|
||||
;; Query each node.
|
||||
(dolist (sub-node nodes)
|
||||
(let* ((delta-start (car treesit--font-lock-query-expand-range))
|
||||
(delta-end (cdr treesit--font-lock-query-expand-range))
|
||||
(captures (treesit-query-capture
|
||||
sub-node query
|
||||
(max (- start delta-start) (point-min))
|
||||
(min (+ end delta-end) (point-max)))))
|
||||
;; Query each node.
|
||||
(dolist (sub-node nodes)
|
||||
(let* ((delta-start (car treesit--font-lock-query-expand-range))
|
||||
(delta-end (cdr treesit--font-lock-query-expand-range))
|
||||
(captures (treesit-query-capture
|
||||
sub-node query
|
||||
(max (- start delta-start) (point-min))
|
||||
(min (+ end delta-end) (point-max)))))
|
||||
|
||||
;; For each captured node, fontify that node.
|
||||
(with-silent-modifications
|
||||
(dolist (capture captures)
|
||||
(let* ((face (car capture))
|
||||
(node (cdr capture))
|
||||
(node-start (treesit-node-start node))
|
||||
(node-end (treesit-node-end node)))
|
||||
;; For each captured node, fontify that node.
|
||||
(with-silent-modifications
|
||||
(dolist (capture captures)
|
||||
(let* ((face (car capture))
|
||||
(node (cdr capture))
|
||||
(node-start (treesit-node-start node))
|
||||
(node-end (treesit-node-end node)))
|
||||
|
||||
;; If node is not in the region, take them out. See
|
||||
;; comment #3 above for more detail.
|
||||
(if (and (facep face)
|
||||
(or (>= start node-end) (>= node-start end)))
|
||||
;; If node is not in the region, take them out. See
|
||||
;; comment #3 above for more detail.
|
||||
(if (and (facep face)
|
||||
(or (>= start node-end) (>= node-start end)))
|
||||
(when (or loudly treesit--font-lock-verbose)
|
||||
(message "Captured node %s(%s-%s) but it is outside of fontifing region" node node-start node-end))
|
||||
|
||||
(cond
|
||||
((facep face)
|
||||
(treesit-fontify-with-override
|
||||
(max node-start start) (min node-end end)
|
||||
face override))
|
||||
((functionp face)
|
||||
(funcall face node override start end)))
|
||||
|
||||
;; Don't raise an error if FACE is neither a face nor
|
||||
;; a function. This is to allow intermediate capture
|
||||
;; names used for #match and #eq.
|
||||
(when (or loudly treesit--font-lock-verbose)
|
||||
(message "Captured node %s(%s-%s) but it is outside of fontifing region" node node-start node-end))
|
||||
|
||||
(cond
|
||||
((facep face)
|
||||
(treesit-fontify-with-override
|
||||
(max node-start start) (min node-end end)
|
||||
face override))
|
||||
((functionp face)
|
||||
(funcall face node override start end)))
|
||||
|
||||
;; Don't raise an error if FACE is neither a face nor
|
||||
;; a function. This is to allow intermediate capture
|
||||
;; names used for #match and #eq.
|
||||
(when (or loudly treesit--font-lock-verbose)
|
||||
(message "Fontifying text from %d to %d, Face: %s, Node: %s"
|
||||
(max node-start start) (min node-end end)
|
||||
face (treesit-node-type node))))))))))))
|
||||
(message "Fontifying text from %d to %d, Face: %s, Node: %s"
|
||||
(max node-start start) (min node-end end)
|
||||
face (treesit-node-type node)))))))))))))
|
||||
`(jit-lock-bounds ,start . ,end))
|
||||
|
||||
(defun treesit--font-lock-notifier (ranges parser)
|
||||
|
@ -1522,8 +1606,10 @@ Return (ANCHOR . OFFSET). This function is used by
|
|||
(forward-line 0)
|
||||
(skip-chars-forward " \t")
|
||||
(point)))
|
||||
(local-parsers (treesit-local-parsers-at bol))
|
||||
(smallest-node
|
||||
(cond ((null (treesit-parser-list)) nil)
|
||||
(local-parsers (car local-parsers))
|
||||
((eq 1 (length (treesit-parser-list)))
|
||||
(treesit-node-at bol))
|
||||
((treesit-language-at (point))
|
||||
|
|
Loading…
Add table
Reference in a new issue