Move c-ts-mode--statement-offset to c-ts-common.el

Now it can be used by other C-like languages.

* lisp/progmodes/c-ts-common.el (c-ts-common-indent-offset):
(c-ts-common-indent-block-type-regexp):
(c-ts-common-indent-bracketless-type-regexp): New variables.
(c-ts-common-statement-offset):
(c-ts-mode--fix-bracketless-indent):
(c-ts-mode--close-bracket-offset): New functions.

* lisp/progmodes/c-ts-mode.el (c-ts-mode--indent-styles): Change
c-ts-mode--statement-offset to c-ts-common-statement-offset.
The (parent-is "if_statement") rules are now handled by (node-is
"compound_statement").

(c-ts-mode--statement-offset-post-processr):
(c-ts-mode--statement-offset):
(c-ts-mode--fix-bracketless-indent): Move to c-ts-common.el.

(c-ts-base-mode): Setup c-ts-common stuff.

* test/lisp/progmodes/c-ts-mode-resources/indent.erts: Make the test
more challenging.
This commit is contained in:
Yuan Fu 2023-01-28 17:08:53 -08:00
parent 4b1714571c
commit a3003492ac
No known key found for this signature in database
GPG key ID: 56E19BC57664A442
3 changed files with 140 additions and 111 deletions

View file

@ -2,7 +2,7 @@
;; Copyright (C) 2023 Free Software Foundation, Inc.
;; Author : 付禹安 (Yuan Fu) <casouri@gmail.com>
;; Maintainer : 付禹安 (Yuan Fu) <casouri@gmail.com>
;; Keywords : c c++ java javascript rust languages tree-sitter
;; This file is part of GNU Emacs.
@ -22,7 +22,10 @@
;;; Commentary:
;;
;; For C-like language major modes:
;; This file contains functions that can be shared by C-like language
;; major modes, like indenting and filling "/* */" block comments.
;;
;; For indenting and filling comments:
;;
;; - Use `c-ts-common-comment-setup' to setup comment variables and
;; filling.
@ -30,6 +33,14 @@
;; - Use simple-indent matcher `c-ts-common-looking-at-star' and
;; anchor `c-ts-common-comment-start-after-first-star' for indenting
;; block comments. See `c-ts-mode--indent-styles' for example.
;;
;; For indenting statements:
;;
;; - Set `c-ts-common-indent-offset',
;; `c-ts-common-indent-block-type-regexp', and
;; `c-ts-common-indent-bracketless-type-regexp', then use simple-indent
;; offset `c-ts-common-statement-offset' in
;; `treesit-simple-indent-rules'.
;;; Code:
@ -40,6 +51,8 @@
(declare-function treesit-node-end "treesit.c")
(declare-function treesit-node-type "treesit.c")
;;; Comment indentation and filling
(defun c-ts-common-looking-at-star (_n _p bol &rest _)
"A tree-sitter simple indent matcher.
Matches if there is a \"*\" after BOL."
@ -242,6 +255,107 @@ Set up:
(setq-local paragraph-separate paragraph-start)
(setq-local fill-paragraph-function #'c-ts-common--fill-paragraph))
;;; Statement indent
(defvar c-ts-common-indent-offset nil
"Indent offset used by `c-ts-common' indent functions.
This should be the symbol of the indent offset variable for the
particular major mode. This cannot be nil for `c-ts-common'
statement indent functions to work.")
(defvar c-ts-common-indent-block-type-regexp nil
"Regexp matching types of block nodes (i.e., {} blocks).
This cannot be nil for `c-ts-common' statement indent functions
to work.")
(defvar c-ts-common-indent-bracketless-type-regexp nil
"A regexp matching types of bracketless constructs.
These constructs include if, while, do-while, for statements. In
these statements, the body can omit the bracket, which requires
special handling from our bracket-counting indent algorithm.
This can be nil, meaning such special handling is not needed.")
(defun c-ts-common-statement-offset (node parent &rest _)
"This anchor is used for children of a statement inside a block.
This function basically counts the number of block nodes (i.e.,
brackets) (defined by `c-ts-mode--indent-block-type-regexp')
between NODE and the root node (not counting NODE itself), and
multiply that by `c-ts-common-indent-offset'.
To support GNU style, on each block level, this function also
checks whether the opening bracket { is on its own line, if so,
it adds an extra level, except for the top-level.
PARENT is NODE's parent."
(let ((level 0))
;; If point is on an empty line, NODE would be nil, but we pretend
;; there is a statement node.
(when (null node)
(setq node t))
;; If NODE is a opening bracket on its own line, take off one
;; level because the code below assumes NODE is a statement
;; _inside_ a {} block.
(when (string-match-p c-ts-common-indent-block-type-regexp
(treesit-node-type node))
(cl-decf level))
;; Go up the tree and compute indent level.
(while (if (eq node t)
(setq node parent)
node)
(when (string-match-p c-ts-common-indent-block-type-regexp
(treesit-node-type node))
(cl-incf level)
(save-excursion
(goto-char (treesit-node-start node))
;; Add an extra level if the opening bracket is on its own
;; line, except (1) it's at top-level, or (2) it's immediate
;; parent is another block.
(cond ((bolp) nil) ; Case (1).
((let ((parent-type (treesit-node-type
(treesit-node-parent node))))
;; Case (2).
(and parent-type
(or (string-match-p
c-ts-common-indent-block-type-regexp
parent-type))))
nil)
;; Add a level.
((looking-back (rx bol (* whitespace))
(line-beginning-position))
(cl-incf level)))))
(setq level (c-ts-mode--fix-bracketless-indent level node))
;; Go up the tree.
(setq node (treesit-node-parent node)))
(* level (symbol-value c-ts-common-indent-offset))))
(defun c-ts-mode--fix-bracketless-indent (level node)
"Takes LEVEL and NODE and return adjusted LEVEL.
This fixes indentation for cases shown in bug#61026. Basically
in C-like syntax, statements like if, for, while sometimes omit
the bracket in the body."
(let ((block-re c-ts-common-indent-block-type-regexp)
(statement-re
c-ts-common-indent-bracketless-type-regexp)
(node-type (treesit-node-type node))
(parent-type (treesit-node-type (treesit-node-parent node))))
(if (and block-re statement-re node-type parent-type
(not (string-match-p block-re node-type))
(string-match-p statement-re parent-type))
(1+ level)
level)))
(defun c-ts-mode--close-bracket-offset (node parent &rest _)
"Offset for the closing bracket, NODE.
It's basically one level less that the statements in the block.
PARENT is NODE's parent."
(- (c-ts-common-statement-offset node parent)
(symbol-value c-ts-common-indent-offset)))
(provide 'c-ts-common)
;;; c-ts-common.el ends here

View file

@ -63,11 +63,6 @@
;; will set up Emacs to use the C/C++ modes defined here for other
;; files, provided that you have the corresponding parser grammar
;; libraries installed.
;;
;; - Use variable `c-ts-mode-indent-block-type-regexp' with indent
;; offset c-ts-mode--statement-offset for indenting statements.
;; Again, see `c-ts-mode--indent-styles' for example.
;;
;;; Code:
@ -228,7 +223,7 @@ MODE is either `c' or `cpp'."
;; Labels.
((node-is "labeled_statement") parent-bol 0)
((parent-is "labeled_statement")
point-min c-ts-mode--statement-offset)
point-min c-ts-common-statement-offset)
((match "preproc_ifdef" "compound_statement") point-min 0)
((match "#endif" "preproc_ifdef") point-min 0)
@ -237,15 +232,6 @@ MODE is either `c' or `cpp'."
((match "preproc_function_def" "compound_statement") point-min 0)
((match "preproc_call" "compound_statement") point-min 0)
;; {} blocks.
((node-is "}") point-min c-ts-mode--close-bracket-offset)
((parent-is "compound_statement")
point-min c-ts-mode--statement-offset)
((parent-is "enumerator_list")
point-min c-ts-mode--statement-offset)
((parent-is "field_declaration_list")
point-min c-ts-mode--statement-offset)
((parent-is "function_definition") parent-bol 0)
((parent-is "conditional_expression") first-sibling 0)
((parent-is "assignment_expression") parent-bol c-ts-mode-indent-offset)
@ -266,13 +252,16 @@ MODE is either `c' or `cpp'."
;; Indent the body of namespace definitions.
((parent-is "declaration_list") parent-bol c-ts-mode-indent-offset)))
;; int[5] a = { 0, 0, 0, 0 };
((parent-is "initializer_list") parent-bol c-ts-mode-indent-offset)
((parent-is "if_statement") parent-bol c-ts-mode-indent-offset)
((parent-is "for_statement") parent-bol c-ts-mode-indent-offset)
((parent-is "while_statement") parent-bol c-ts-mode-indent-offset)
((parent-is "switch_statement") parent-bol c-ts-mode-indent-offset)
((parent-is "case_statement") parent-bol c-ts-mode-indent-offset)
((parent-is "do_statement") parent-bol c-ts-mode-indent-offset)
((parent-is "enumerator_list") point-min c-ts-common-statement-offset)
((parent-is "field_declaration_list") point-min c-ts-common-statement-offset)
;; {} blocks.
((node-is "}") point-min c-ts-mode--close-bracket-offset)
((parent-is "compound_statement") point-min c-ts-common-statement-offset)
((node-is "compound_statement") point-min c-ts-common-statement-offset)
,@(when (eq mode 'cpp)
`(((node-is "field_initializer_list") parent-bol ,(* c-ts-mode-indent-offset 2)))))))
`((gnu
@ -311,90 +300,6 @@ NODE should be a labeled_statement."
"labeled_statement")
(not (treesit-node-top-level func "compound_statement")))))
(defvar c-ts-mode-indent-block-type-regexp
(rx (or "compound_statement"
"field_declaration_list"
"enumerator_list"))
"Regexp matching types of block nodes (i.e., {} blocks).")
(defvar c-ts-mode--statement-offset-post-processr nil
"A functions that makes adjustments to `c-ts-mode--statement-offset'.
This is a function that takes two arguments, the current indent
level and the current node, and returns a new level.
When `c-ts-mode--statement-offset' runs and go up the parse tree,
it increments the indent level when some condition are met in
each level. At each level, after (possibly) incrementing the
offset, it calls this function, passing it the current indent
level and the current node, and use the return value as the new
indent level.")
(defun c-ts-mode--statement-offset (node parent &rest _)
"This anchor is used for children of a statement inside a block.
This function basically counts the number of block nodes (defined
by `c-ts-mode--indent-block-type-regexp') between NODE and the
root node (not counting NODE itself), and multiply that by
`c-ts-mode-indent-offset'.
To support GNU style, on each block level, this function also
checks whether the opening bracket { is on its own line, if so,
it adds an extra level, except for the top-level.
PARENT is NODE's parent."
(let ((level 0))
;; If point is on an empty line, NODE would be nil, but we pretend
;; there is a statement node.
(when (null node)
(setq node t))
(while (if (eq node t)
(setq node parent)
(setq node (treesit-node-parent node)))
(when (string-match-p c-ts-mode-indent-block-type-regexp
(treesit-node-type node))
(cl-incf level)
(save-excursion
(goto-char (treesit-node-start node))
;; Add an extra level if the opening bracket is on its own
;; line, except (1) it's at top-level, or (2) it's immediate
;; parent is another block.
(cond ((bolp) nil) ; Case (1).
((let ((parent-type (treesit-node-type
(treesit-node-parent node))))
;; Case (2).
(and parent-type
(string-match-p c-ts-mode-indent-block-type-regexp
parent-type)))
nil)
;; Add a level.
((looking-back (rx bol (* whitespace))
(line-beginning-position))
(cl-incf level)))))
(when c-ts-mode--statement-offset-post-processr
(setq level (funcall c-ts-mode--statement-offset-post-processr
level node))))
(* level c-ts-mode-indent-offset)))
(defun c-ts-mode--fix-bracketless-indent (level node)
"Takes LEVEL and NODE and returns adjusted LEVEL.
This fixes indentation for cases shown in bug#61026. Basically
in C/C++, constructs like if, for, while sometimes don't have
bracket."
(if (and (not (equal (treesit-node-type node) "compound_statement"))
(member (treesit-node-type (treesit-node-parent node))
'("if_statement" "while_statement" "do_statement"
"for_statement")))
(1+ level)
level))
(defun c-ts-mode--close-bracket-offset (node parent &rest _)
"Offset for the closing bracket, NODE.
It's basically one level less that the statements in the block.
PARENT is NODE's parent."
(- (c-ts-mode--statement-offset node parent)
c-ts-mode-indent-offset))
;;; Font-lock
(defvar c-ts-mode--preproc-keywords
@ -824,8 +729,14 @@ the semicolon. This function skips the semicolon."
;; Indent.
(when (eq c-ts-mode-indent-style 'linux)
(setq-local indent-tabs-mode t))
(setq-local c-ts-mode--statement-offset-post-processr
#'c-ts-mode--fix-bracketless-indent)
(setq-local c-ts-common-indent-offset 'c-ts-mode-indent-offset)
(setq-local c-ts-common-indent-block-type-regexp
(rx (or "compound_statement"
"field_declaration_list"
"enumerator_list")))
(setq-local c-ts-common-indent-bracketless-type-regexp
(rx (or "if_statement" "do_statement"
"for_statement" "while_statement")))
;; Comment
(c-ts-common-comment-setup)

View file

@ -114,7 +114,9 @@ int main() {
{
puts ("Hello");
}
for (int i=0; i<5; i++)
for (int i=0;
i<5;
i++)
if (true)
{
puts ("Hello");
@ -141,7 +143,9 @@ int main() {
if (true) {
puts ("Hello");
}
for (int i=0; i<5; i++)
for (int i=0;
i<5;
i++)
if (true) {
puts ("Hello");
}