Fix FOR_EACH_TAIL in c-ts-mode (bug#62951)

* lisp/progmodes/c-ts-mode.el
(c-ts-mode--indent-styles): New indent rule.

(c-ts-mode--for-each-tail-regexp)
(c-ts-mode--for-each-tail-body-matcher)
(c-ts-mode--emacs-c-range-query)
(c-ts-mode--for-each-tail-ranges)
(c-ts-mode--reverse-ranges)
(c-ts-mode--emacs-set-ranges): New functions and variables.

(c-ts-mode): Create a emacs-c parser.  More setup for Emacs source
support.

* lisp/treesit.el (treesit-query-range): Ignore underscore-prefixed
capture names.
This commit is contained in:
Yuan Fu 2023-04-26 20:09:42 -07:00
parent 0cf6e0998b
commit 31b58161bb
No known key found for this signature in database
GPG key ID: 56E19BC57664A442

View file

@ -357,7 +357,9 @@ PARENT, BOL, ARGS are the same as other anchor functions."
"Indent rules supported by `c-ts-mode'.
MODE is either `c' or `cpp'."
(let ((common
`(((parent-is "translation_unit") column-0 0)
`((c-ts-mode--for-each-tail-body-matcher prev-line c-ts-mode-indent-offset)
((parent-is "translation_unit") column-0 0)
((query "(ERROR (ERROR)) @indent") column-0 0)
((node-is ")") parent 1)
((node-is "]") parent-bol 0)
@ -969,6 +971,81 @@ if `c-ts-mode-emacs-sources-support' is non-nil."
(or (treesit-add-log-current-defun)
(c-ts-mode--defun-name (c-ts-mode--emacs-defun-at-point))))
;;; FOR_EACH_TAIL fix
;;
;; FOR_EACH_TAIL (and FOR_EACH_TAIL_SAFE) followed by a unbracketed
;; body will mess up the parser, which parses the thing as a function
;; declaration. We "fix" it by adding a shadow parser, emacs-c (which
;; is just c but under a different name). We use emacs-c to find each
;; FOR_EACH_TAIL with a unbracketed body, and set the ranges of the C
;; parser so that it skips those FOR_EACH_TAIL's. Note that we only
;; ignore FOR_EACH_TAIL's with a unbracketed body. Those with a
;; bracketed body parses more or less fine.
(defvar c-ts-mode--for-each-tail-regexp
(rx "FOR_EACH_" (or "TAIL" "TAIL_SAFE" "ALIST_VALUE"
"LIVE_BUFFER" "FRAME"))
"A regexp matching all the FOR_EACH_TAIL variants.")
(defun c-ts-mode--for-each-tail-body-matcher (_n _p bol &rest _)
"A matcher that matches the first line after a FOR_EACH_TAIL.
For BOL see `treesit-simple-indent-rules'."
(when c-ts-mode-emacs-sources-support
(save-excursion
(goto-char bol)
(forward-line -1)
(skip-chars-forward " \t")
(looking-at c-ts-mode--for-each-tail-regexp))))
(defvar c-ts-mode--emacs-c-range-query
(treesit-query-compile
'emacs-c `(((declaration
type: (macro_type_specifier
name: (identifier) @_name)
@for-each-tail)
(:match ,c-ts-mode--for-each-tail-regexp
@_name))))
"Query that finds the FOR_EACH_TAIL with a unbracketed body.")
(defvar-local c-ts-mode--for-each-tail-ranges nil
"Ranges covering all the FOR_EACH_TAIL's in the buffer.")
(defun c-ts-mode--reverse-ranges (ranges beg end)
"Reverse RANGES and return the new ranges between BEG and END.
Positions that were included RANGES are not in the returned
ranges, and vice versa.
Return nil if RANGES is nil. This way, passing the returned
ranges to `treesit-parser-set-included-ranges' will make the
parser parse the whole buffer."
(if (null ranges)
nil
(let ((new-ranges nil)
(prev-end beg))
(dolist (range ranges)
(when (< prev-end (car range))
(push (cons prev-end (car range)) new-ranges))
(setq prev-end (cdr range)))
(when (< prev-end end)
(push (cons prev-end end) new-ranges))
(nreverse new-ranges))))
(defun c-ts-mode--emacs-set-ranges (beg end)
"Set ranges for the C parser to skip some FOR_EACH_TAIL's.
BEG and END are described in `treesit-range-rules'."
(let* ((c-parser (treesit-parser-create 'c))
(old-ranges c-ts-mode--for-each-tail-ranges)
(new-ranges (treesit-query-range
'emacs-c c-ts-mode--emacs-c-range-query beg end))
(set-ranges (treesit--clip-ranges
(treesit--merge-ranges
old-ranges new-ranges beg end)
(point-min) (point-max)))
(reversed-ranges (c-ts-mode--reverse-ranges
set-ranges (point-min) (point-max))))
(setq-local c-ts-mode--for-each-tail-ranges set-ranges)
(treesit-parser-set-included-ranges c-parser reversed-ranges)))
;;; Modes
(defvar-keymap c-ts-base-mode-map
@ -1072,6 +1149,17 @@ in your configuration."
:after-hook (c-ts-mode-set-modeline)
(when (treesit-ready-p 'c)
;; Add a fake "emacs-c" language which is just C. Used for
;; skipping FOR_EACH_TAIL, see `c-ts-mode--emacs-set-ranges'.
(setf (alist-get 'emacs-c treesit-load-name-override-list)
'("libtree-sitter-c" "tree_sitter_c"))
;; If Emacs source support is enabled, make sure emacs-c parser is
;; after c parser in the parser list. This way various tree-sitter
;; functions will automatically use the c parser rather than the
;; emacs-c parser.
(when c-ts-mode-emacs-sources-support
(treesit-parser-create 'emacs-c))
(treesit-parser-create 'c)
;; Comments.
(setq-local comment-start "/* ")
@ -1085,9 +1173,16 @@ in your configuration."
(setq-local treesit-defun-tactic 'top-level)
(treesit-major-mode-setup)
;; Emacs source support: handle DEFUN and FOR_EACH_TAIL gracefully.
(when c-ts-mode-emacs-sources-support
(setq-local add-log-current-defun-function
#'c-ts-mode--emacs-current-defun-name))))
#'c-ts-mode--emacs-current-defun-name)
(setq-local treesit-range-settings
(treesit-range-rules 'c-ts-mode--emacs-set-ranges))
(setq-local treesit-language-at-point-function
(lambda (_pos) 'c)))))
;;;###autoload
(define-derived-mode c++-ts-mode c-ts-base-mode "C++"