From 5e0daa1ef77d2a5fe5b65b8f0fa6c4eab83a2498 Mon Sep 17 00:00:00 2001 From: Juri Linkov Date: Tue, 8 Apr 2025 20:40:10 +0300 Subject: [PATCH] New function treesit-parsers-at for treesit-language-at (bug#77256). * doc/lispref/parsing.texi (Multiple Languages): The variable 'treesit-language-at-point-function' is now optional for multi-language major modes. Add description of 'treesit-parsers-at'. * lisp/treesit.el (treesit-language-at-point-function): Change the the docstring to remove the dissuasion against deriving the language from parser ranges. (treesit-language-at): Use the first parser from 'treesit-parsers-at' as the default return value when 'treesit-language-at-point-function' is nil. Adapt the docstring. (treesit-node-at): Use 'treesit-parsers-at'. (treesit-parsers-at): New function. (treesit-local-parsers-at): Use 'treesit-parsers-at' with the most part of the body moved to it. (treesit-local-parsers-on): Replace the overlay property 'treesit-parser' with 'treesit-parser-local-p' in the docstring. (treesit-up-list, treesit-simple-imenu, treesit-outline-level): Use 'treesit-parsers-at'. * lisp/progmodes/c-ts-mode.el (c-ts-mode): Don't set 'treesit-language-at-point-function'. * lisp/progmodes/elixir-ts-mode.el (elixir-ts--treesit-language-at-point): Remove. (elixir-ts-mode): Don't set 'treesit-language-at-point-function'. * lisp/progmodes/js.el (js--treesit-language-at-point): Remove. (js-ts-mode): Don't set 'treesit-language-at-point-function'. * lisp/progmodes/php-ts-mode.el (php-ts-mode--html-language-at-point) (php-ts-mode--language-at-point): Remove. (php-ts-mode): Don't set 'treesit-language-at-point-function'. * lisp/textmodes/mhtml-ts-mode.el (mhtml-ts-mode--language-at-point): Remove. (mhtml-ts-mode): Don't set 'treesit-language-at-point-function'. Use 'treesit-language-at' for mode-line lighter. --- doc/lispref/parsing.texi | 48 ++++++++++--- etc/NEWS | 29 ++++---- lisp/progmodes/c-ts-mode.el | 2 - lisp/progmodes/elixir-ts-mode.el | 15 ---- lisp/progmodes/js.el | 17 ----- lisp/progmodes/php-ts-mode.el | 28 -------- lisp/textmodes/mhtml-ts-mode.el | 21 +----- lisp/treesit.el | 118 +++++++++++++++++++------------ 8 files changed, 127 insertions(+), 151 deletions(-) diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index 3e8e0851f2c..f7be47dc044 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -1814,12 +1814,12 @@ ranges for each parser are correct before using parsers in a buffer, and call @code{treesit-language-at} to figure out the language responsible for the text at some position. These two functions don't work by themselves; they need major modes to set @code{treesit-range-settings} -and @code{treesit-language-at-point-function}, which do the actual work. +and optionally @code{treesit-language-at-point-function}, which do the actual work. These functions and variables are explained in more detail towards the end of the section. In short, multi-language major modes should set -@code{treesit-primary-parser}, @code{treesit-range-settings}, and +@code{treesit-primary-parser}, @code{treesit-range-settings}, and optionally @code{treesit-language-at-point-function} before calling @code{treesit-major-mode-setup}. @@ -1921,9 +1921,9 @@ This function returns the language of the text at buffer position @var{pos}. Under the hood it calls @code{treesit-language-at-point-function} and returns its return value. If @code{treesit-language-at-point-function} is @code{nil}, -this function returns the language of the first parser in the returned -value of @code{treesit-parser-list}. If there is no parser in the -buffer, it returns @code{nil}. +this function returns the language of the deepest parser by embed level +among parsers returned by @code{treesit-parsers-at}. If there is no +parser at that buffer position, it returns @code{nil}. @end defun @heading Supporting multiple languages in major modes @@ -2011,7 +2011,7 @@ directly translate into operations shown above. @end group @group -;; Major modes with multiple languages should always set +;; Major modes with multiple languages can optionally set ;; `treesit-language-at-point-function' (which see). (setq treesit-language-at-point-function (lambda (pos) @@ -2094,17 +2094,45 @@ language of the buffer text at @var{pos}. This variable is used by @code{treesit-language-at}. @end defvar -@defun treesit-local-parsers-at &optional pos language +@defun treesit-parsers-at &optional pos language with-host only +This function returns all parsers at @var{pos} in the current buffer. +@var{pos} defaults to point. The returned parsers are sorted by the +decreasing embed level. + +If @var{language} is non-@code{nil}, return parsers only for that +language. + +If @var{with-host} is non-@code{nil}, return a list of +@w{@code{(@var{parser} . @var{host-parser})}} where @var{host-parser} +is the host parser which created the @var{parser}. + +If @var{only} is non-@code{nil}, return all parsers including the +primary parser. + +The argument @var{only} can be a list of symbols that specify what +parsers to include in the return value. + +If @var{only} contains the symbol @code{local}, include local parsers. +Local parsers are those which only parse a limited region marked by an +overlay with a non-@code{nil} @code{treesit-parser-local-p} property. + +If @var{only} contains the symbol @code{global}, include non-local parsers +excluding the primary parser. + +If @var{only} contains the symbol `primary', include the primary parser. +@end defun + +@defun treesit-local-parsers-at &optional pos language with-host This function returns all the local parsers at @var{pos} in the current buffer. @var{pos} defaults to point. Local parsers are those which only parse a limited region marked by an -overlay with a non-@code{nil} @code{treesit-parser} property. If -@var{language} is non-@code{nil}, only return parsers for that +overlay with a non-@code{nil} @code{treesit-parser-local-p} property. +If @var{language} is non-@code{nil}, only return parsers for that language. @end defun -@defun treesit-local-parsers-on &optional beg end language +@defun treesit-local-parsers-on &optional beg end language with-host This function is the same as @code{treesit-local-parsers-at}, but it returns the local parsers in the range between @var{beg} and @var{end} instead of at point. diff --git a/etc/NEWS b/etc/NEWS index c08b0052639..fa8b0bf89a8 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1994,18 +1994,6 @@ in the old rules with new ones, then returns the modified rules. In a multi-language major mode it is sometimes necessary to modify rules from one of the major modes to better suit the new multilingual context. -+++ -*** New command 'treesit-explore'. -This command replaces 'treesit-explore-mode'. It turns on -'treesit-explore-mode' if it’s not on, and pops up the explorer buffer -if it’s already on. - -+++ -*** 'treesit-explore-mode' now supports local parsers. -Now 'treesit-explore-mode' (or 'treesit-explore') prompts for a parser -rather than a language, and it’s now possible to select a local parser -at point to explore. - +++ *** New variable 'treesit-aggregated-simple-imenu-settings'. This variable allows major modes to setup Imenu for multiple languages. @@ -2023,6 +2011,23 @@ tree-sitter modes. Users can customize this variable to add simple custom indentation rules for tree-sitter major modes. ++++ +*** 'treesit-language-at-point-function' is now optional. +Multi-language major modes can rely on the default return value from +'treesit-language-at' that uses the new function 'treesit-parsers-at'. + ++++ +*** New command 'treesit-explore'. +This command replaces 'treesit-explore-mode'. It turns on +'treesit-explore-mode' if it’s not on, and pops up the explorer buffer +if it’s already on. + ++++ +*** 'treesit-explore-mode' now supports local parsers. +Now 'treesit-explore-mode' (or 'treesit-explore') prompts for a parser +rather than a language, and it’s now possible to select a local parser +at point to explore. + +++ ** New optional BUFFER argument for 'string-pixel-width'. If supplied, 'string-pixel-width' will use any face remappings from diff --git a/lisp/progmodes/c-ts-mode.el b/lisp/progmodes/c-ts-mode.el index fa5f8567b60..c49e928884a 100644 --- a/lisp/progmodes/c-ts-mode.el +++ b/lisp/progmodes/c-ts-mode.el @@ -1488,8 +1488,6 @@ in your init files." (setq-local treesit-range-settings (treesit-range-rules 'c-ts-mode--emacs-set-ranges)) - (setq-local treesit-language-at-point-function - (lambda (_pos) 'c)) (treesit-font-lock-recompute-features '(emacs-devel))) ;; Inject doxygen parser for comment. diff --git a/lisp/progmodes/elixir-ts-mode.el b/lisp/progmodes/elixir-ts-mode.el index d50692d87c0..9a0418eaf29 100644 --- a/lisp/progmodes/elixir-ts-mode.el +++ b/lisp/progmodes/elixir-ts-mode.el @@ -596,18 +596,6 @@ With ARG, do it many times. Negative ARG means move backward." (back-to-indentation) (point))) -(defun elixir-ts--treesit-language-at-point (point) - "Return the language at POINT." - (let ((node (treesit-node-at point 'elixir))) - (if (and (equal (treesit-node-type node) "quoted_content") - (let ((prev-sibling (treesit-node-prev-sibling node t))) - (and (treesit-node-p prev-sibling) - (string-match-p - (rx bos (or "H" "F") eos) - (treesit-node-text prev-sibling))))) - 'heex - 'elixir))) - (defun elixir-ts--defun-p (node) "Return non-nil when NODE is a defun." (member (treesit-node-text @@ -702,9 +690,6 @@ Return nil if NODE is not a defun node or doesn't have a name." (setq-local treesit-primary-parser (treesit-parser-create 'elixir)) - (setq-local treesit-language-at-point-function - 'elixir-ts--treesit-language-at-point) - ;; Font-lock. (setq-local treesit-font-lock-settings elixir-ts--font-lock-settings) (setq-local treesit-font-lock-feature-list diff --git a/lisp/progmodes/js.el b/lisp/progmodes/js.el index f319d1d2fa9..69d05feb594 100644 --- a/lisp/progmodes/js.el +++ b/lisp/progmodes/js.el @@ -3730,22 +3730,6 @@ Return nil if there is no name or if NODE is not a defun node." eos)))) (_ t))) -(defun js--treesit-language-at-point (point) - "Return the language at POINT." - (let* ((node (treesit-node-at point 'javascript)) - (node-type (treesit-node-type node)) - (node-start (treesit-node-start node)) - (node-end (treesit-node-end node))) - (if (not (treesit-ready-p 'jsdoc t)) - 'javascript - (if (equal node-type "comment") - (save-excursion - (goto-char node-start) - (if (search-forward "/**" node-end t) - 'jsdoc - 'javascript)) - 'javascript)))) - ;;; Main Function ;;;###autoload @@ -4006,7 +3990,6 @@ See `treesit-thing-settings' for more information.") ;; Tree-sitter setup. (setq-local treesit-primary-parser (treesit-parser-create 'javascript)) - (setq-local treesit-language-at-point-function #'js--treesit-language-at-point) ;; Indent. (setq-local treesit-simple-indent-rules js--treesit-indent-rules) diff --git a/lisp/progmodes/php-ts-mode.el b/lisp/progmodes/php-ts-mode.el index fb5cf46f9e5..33c44693cf4 100644 --- a/lisp/progmodes/php-ts-mode.el +++ b/lisp/progmodes/php-ts-mode.el @@ -1150,32 +1150,6 @@ For NODE, OVERRIDE, START, and END, see `treesit-font-lock-rules'." 'font-lock-warning-face override start end)) -(defun php-ts-mode--html-language-at-point (point) - "Return the language at POINT assuming the point is within a HTML region." - (let* ((node (treesit-node-at point 'html)) - (parent (treesit-node-parent node)) - (node-query (format "(%s (%s))" - (treesit-node-type parent) - (treesit-node-type node)))) - (cond - ((string-equal "(script_element (raw_text))" node-query) 'javascript) - ((string-equal "(style_element (raw_text))" node-query) 'css) - (t 'html)))) - -(defun php-ts-mode--language-at-point (point) - "Return the language at POINT." - (let* ((node (treesit-node-at point 'php)) - (node-type (treesit-node-type node)) - (parent (treesit-node-parent node)) - (node-query (format "(%s (%s))" (treesit-node-type parent) node-type))) - (save-excursion - (goto-char (treesit-node-start node)) - (cond - ((not (member node-query '("(program (text))" - "(text_interpolation (text))"))) - 'php) - (t (php-ts-mode--html-language-at-point point)))))) - ;;; Imenu @@ -1466,8 +1440,6 @@ Depends on `c-ts-common-comment-setup'." (start_tag (tag_name)) (raw_text) @cap)))) - (setq-local treesit-language-at-point-function #'php-ts-mode--language-at-point) - ;; Navigation. (setq-local treesit-defun-type-regexp (regexp-opt '("class_declaration" diff --git a/lisp/textmodes/mhtml-ts-mode.el b/lisp/textmodes/mhtml-ts-mode.el index 25af6a0a1e0..0c98ec472b2 100644 --- a/lisp/textmodes/mhtml-ts-mode.el +++ b/lisp/textmodes/mhtml-ts-mode.el @@ -211,21 +211,6 @@ Optional ARGUMENTS to to be passed to it." "Menu bar for `mhtml-ts-mode'." css-mode--menu) -;; To enable some basic treesiter functionality, you should define -;; a function that recognizes which grammar is used at-point. -;; This function should be assigned to `treesit-language-at-point-function' -(defun mhtml-ts-mode--language-at-point (point) - "Return the language at POINT assuming the point is within a HTML buffer." - (let* ((node (treesit-node-at point 'html)) - (parent (treesit-node-parent node)) - (node-query (format "(%s (%s))" - (treesit-node-type parent) - (treesit-node-type node)))) - (cond - ((equal "(script_element (raw_text))" node-query) (js--treesit-language-at-point point)) - ((equal "(style_element (raw_text))" node-query) 'css) - (t 'html)))) - ;; Custom font-lock function that's used to apply color to css color ;; The signature of the function should be conforming to signature ;; QUERY-SPEC required by `treesit-font-lock-rules'. @@ -440,7 +425,7 @@ Calls REPORT-FN directly. Requires tidy." ;;;###autoload (define-derived-mode mhtml-ts-mode html-ts-mode - '("HTML+" (:eval (let ((lang (mhtml-ts-mode--language-at-point (point)))) + '("HTML+" (:eval (let ((lang (treesit-language-at (point)))) (cond ((eq lang 'html) "") ((eq lang 'javascript) "JS") ((eq lang 'css) "CSS"))))) @@ -514,10 +499,6 @@ Powered by tree-sitter." (setq-local c-ts-common--comment-regexp js--treesit-jsdoc-comment-regexp)) - - ;; Many treesit functions need to know the language at-point. - ;; So you should define such a function. - (setq-local treesit-language-at-point-function #'mhtml-ts-mode--language-at-point) (setq-local prettify-symbols-alist mhtml-ts-mode--prettify-symbols-alist) ;; Indent. diff --git a/lisp/treesit.el b/lisp/treesit.el index 07861603244..888f067e0cc 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -174,26 +174,19 @@ The function is called with one argument, the position of point. In general, this function should call `treesit-node-at' with an explicit language (usually the host language), and determine the -language at point using the type of the returned node. - -DO NOT derive the language at point from parser ranges. It's -cumbersome and can't deal with some edge cases.") +language at point using the type of the returned node.") (defun treesit-language-at (position) "Return the language at POSITION. -This function assumes that parser ranges are up-to-date. It -returns the return value of `treesit-language-at-point-function' -if it's non-nil, otherwise it returns the language of the first -parser in `treesit-parser-list', or nil if there is no parser. - -In a multi-language buffer, make sure -`treesit-language-at-point-function' is implemented! Otherwise -`treesit-language-at' wouldn't return the correct result." +When there are multiple parsers that covers POSITION, determine +the most relevant parser (hence language) by their embed level. +If `treesit-language-at-point-function' is non-nil, return +the return value of that function instead." (if treesit-language-at-point-function (funcall treesit-language-at-point-function position) - (when-let* ((parser (car (treesit-parser-list)))) - (treesit-parser-language parser)))) + (treesit-parser-language + (car (treesit-parsers-at position))))) ;;; Node API supplement @@ -247,8 +240,9 @@ language and doesn't match the language of the local parser." (parser-or-lang (let* ((local-parser (car (treesit-local-parsers-at pos parser-or-lang))) - (global-parser (car (treesit-parser-list - nil parser-or-lang))) + (global-parser (car (treesit-parsers-at + pos parser-or-lang nil + '(primary global)))) (parser (or local-parser global-parser))) (when parser (treesit-parser-root-node parser)))) @@ -267,13 +261,10 @@ language and doesn't match the language of the local parser." (local-parser ;; Find the local parser with highest ;; embed-level at point. - (car (seq-sort-by #'treesit-parser-embed-level - (lambda (a b) - (> (or a 0) (or b 0))) - (treesit-local-parsers-at - pos lang)))) - (global-parser (car (treesit-parser-list - nil lang))) + (car (treesit-local-parsers-at pos lang))) + (global-parser (car (treesit-parsers-at + pos lang nil + '(primary global)))) (parser (or local-parser global-parser))) (when parser (treesit-parser-root-node parser)))))) @@ -851,30 +842,68 @@ those inside are kept." if (<= start (car range) (cdr range) end) collect range)) +(defun treesit-parsers-at (&optional pos language with-host only) + "Return all parsers at POS. + +POS defaults to point. The returned parsers are sorted by +the decreasing embed level. + +If LANGUAGE is non-nil, only return parsers for LANGUAGE. + +If WITH-HOST is non-nil, return a list of (PARSER . HOST-PARSER) +instead. HOST-PARSER is the host parser which created the PARSER. + +If ONLY is nil, return all parsers including the primary parser. + +The argument ONLY can be a list of symbols that specify what +parsers to include in the return value. + +If ONLY contains the symbol `local', include local parsers. +Local parsers are those which only parse a limited region marked +by an overlay with non-nil `treesit-parser-local-p' property. + +If ONLY contains the symbol `global', include non-local parsers +excluding the primary parser. + +If ONLY contains the symbol `primary', include the primary parser." + (let ((res nil)) + ;; Refer to (ref:local-parser-overlay) for more explanation of local + ;; parser overlays. + (dolist (ov (overlays-at (or pos (point)))) + (when-let* ((parser (overlay-get ov 'treesit-parser)) + (host-parser (or (null with-host) + (overlay-get ov 'treesit-host-parser))) + (_ (or (null language) + (eq (treesit-parser-language parser) + language))) + (_ (or (null only) + (and (memq 'local only) (memq 'global only)) + (and (memq 'local only) + (overlay-get ov 'treesit-parser-local-p)) + (and (memq 'global only) + (not (overlay-get ov 'treesit-parser-local-p)))))) + (push (if with-host (cons parser host-parser) parser) res))) + (when (or (null only) (memq 'primary only)) + (setq res (cons treesit-primary-parser res))) + (seq-sort-by (lambda (p) + (treesit-parser-embed-level + (or (car-safe p) p))) + (lambda (a b) + (> (or a 0) (or b 0))) + res))) + (defun treesit-local-parsers-at (&optional pos language with-host) "Return all the local parsers at POS. POS defaults to point. Local parsers are those which only parse a limited region marked -by an overlay with non-nil `treesit-parser' property. +by an overlay with non-nil `treesit-parser-local-p' property. If LANGUAGE is non-nil, only return parsers for LANGUAGE. If WITH-HOST is non-nil, return a list of (PARSER . HOST-PARSER) instead. HOST-PARSER is the host parser which created the local PARSER." - (let ((res nil)) - ;; Refer to (ref:local-parser-overlay) for more explanation of local - ;; parser overlays. - (dolist (ov (overlays-at (or pos (point)))) - (let ((parser (overlay-get ov 'treesit-parser)) - (host-parser (overlay-get ov 'treesit-host-parser)) - (local-p (overlay-get ov 'treesit-parser-local-p))) - (when (and parser host-parser local-p - (or (null language) - (eq (treesit-parser-language parser) - language))) - (push (if with-host (cons parser host-parser) parser) res)))) - (nreverse res))) + (treesit-parsers-at pos language with-host '(local))) (defun treesit-local-parsers-on (&optional beg end language with-host) "Return the list of local parsers that cover the region between BEG and END. @@ -883,7 +912,7 @@ BEG and END default to the beginning and end of the buffer's accessible portion. Local parsers are those that have an `embedded' tag, and only parse a -limited region marked by an overlay with a non-nil `treesit-parser' +limited region marked by an overlay with a non-nil `treesit-parser-local-p' property. If LANGUAGE is non-nil, only return parsers for LANGUAGE. If WITH-HOST is non-nil, return a list of (PARSER . HOST-PARSER) @@ -3139,9 +3168,7 @@ ARG is described in the docstring of `up-list'." (setq parent (treesit-parent-until parent pred))) (unless parent - (let ((parsers (seq-keep (lambda (o) - (overlay-get o 'treesit-host-parser)) - (overlays-at (point) t)))) + (let ((parsers (mapcar #'cdr (treesit-parsers-at (point) nil t '(global local))))) (while (and (not parent) parsers) (setq parent (treesit-parent-until (treesit-node-at (point) (car parsers)) pred) @@ -3891,9 +3918,8 @@ by `treesit-simple-imenu-settings'." (lambda (entry) (let* ((lang (car entry)) (settings (cdr entry)) - (global-parser (car (treesit-parser-list nil lang))) - (local-parsers - (treesit-parser-list nil lang 'embedded))) + (global-parser (car (treesit-parsers-at nil lang nil '(primary global)))) + (local-parsers (treesit-local-parsers-at nil lang))) (cons (treesit-language-display-name lang) ;; No one says you can't have both global and local ;; parsers for the same language. E.g., Rust uses @@ -4033,9 +4059,7 @@ For BOUND, MOVE, BACKWARD, LOOKING-AT, see the descriptions in (setq level (1+ level))) ;; Continue counting the host nodes. - (dolist (parser (seq-keep (lambda (o) - (overlay-get o 'treesit-host-parser)) - (overlays-at (point) t))) + (dolist (parser (mapcar #'cdr (treesit-parsers-at (point) nil t '(global local)))) (let* ((node (treesit-node-at (point) parser)) (lang (treesit-parser-language parser)) (pred (alist-get lang treesit-aggregated-outline-predicate)))