Add 'case-symbols-as-words' to configure symbol case behavior

In some programming languages and styles, a symbol (or every
symbol in a sequence of symbols) might be capitalized, but the
individual words making up the symbol should never be capitalized.

For example, in OCaml, type names Look_like_this and variable names
look_like_this, but it is basically never correct for something to
Look_Like_This.  And one might have "aa_bb cc_dd ee_ff" or "Aa_bb
Cc_dd Ee_ff", but never "Aa_Bb Cc_Dd Ee_Ff".

To support this, the new variable 'case-symbols-as-words' causes
symbol constituents to be treated as part of words only for case
operations.

* src/casefiddle.c (case_ch_is_word): New function.
(case_character_impl, case_character): Use 'case_ch_is_word'.
(syms_of_casefiddle): Define 'case-symbols-as-words'.
* src/search.c (Freplace_match): Use 'case-symbols-as-words'
when calculating case pattern.
* test/src/casefiddle-tests.el (casefiddle-tests--check-syms)
(casefiddle-case-symbols-as-words): Test 'case-symbols-as-words'.
* etc/NEWS: Announce 'case-symbols-as-words'.
* doc/lispref/strings.texi (Case Conversion): Document
'case-symbols-as-words'.
(Bug#66614)
This commit is contained in:
Spencer Baugh 2023-10-21 11:09:39 -04:00 committed by Eli Zaretskii
parent 3dca52dd42
commit 5c8fc0b059
5 changed files with 56 additions and 8 deletions

View file

@ -1510,7 +1510,9 @@ case.
The definition of a word is any sequence of consecutive characters that
are assigned to the word constituent syntax class in the current syntax
table (@pxref{Syntax Class Table}).
table (@pxref{Syntax Class Table}), or if @code{case-symbols-as-words}
is non-nil, also characters assigned to the symbol constituent syntax
class.
When @var{string-or-char} is a character, this function does the same
thing as @code{upcase}.
@ -1542,7 +1544,9 @@ had its initial letter converted to upper case.
The definition of a word is any sequence of consecutive characters that
are assigned to the word constituent syntax class in the current syntax
table (@pxref{Syntax Class Table}).
table (@pxref{Syntax Class Table}), or if @code{case-symbols-as-words}
is non-nil, also characters assigned to the symbol constituent syntax
class.
When the argument to @code{upcase-initials} is a character,
@code{upcase-initials} has the same result as @code{upcase}.

View file

@ -1193,6 +1193,14 @@ instead of "ctags", "ebrowse", "etags", "hexl", "emacsclient", and
"rcs2log", when starting one of these built in programs in a
subprocess.
+++
** New variable 'case-symbols-as-words' affects case operations for symbols.
If non-nil, then case operations such as 'upcase-initials' or
'replace-match' (with nil FIXEDCASE) will treat the entire symbol name
as a single word. This is useful for programming languages and styles
where only the first letter of a symbol's name is ever capitalized.
It defaults to nil.
+++
** 'x-popup-menu' now understands touch screen events.
When a 'touchscreen-begin' or 'touchscreen-end' event is passed as the

View file

@ -92,6 +92,12 @@ prepare_casing_context (struct casing_context *ctx,
SETUP_BUFFER_SYNTAX_TABLE (); /* For syntax_prefix_flag_p. */
}
static bool
case_ch_is_word (enum syntaxcode syntax)
{
return syntax == Sword || (case_symbols_as_words && syntax == Ssymbol);
}
struct casing_str_buf
{
unsigned char data[max (6, MAX_MULTIBYTE_LENGTH)];
@ -115,7 +121,7 @@ case_character_impl (struct casing_str_buf *buf,
/* Update inword state */
bool was_inword = ctx->inword;
ctx->inword = SYNTAX (ch) == Sword &&
ctx->inword = case_ch_is_word (SYNTAX (ch)) &&
(!ctx->inbuffer || was_inword || !syntax_prefix_flag_p (ch));
/* Normalize flag so its one of CASE_UP, CASE_DOWN or CASE_CAPITALIZE. */
@ -222,7 +228,7 @@ case_character (struct casing_str_buf *buf, struct casing_context *ctx,
has a word syntax (i.e. current character is end of word), use final
sigma. */
if (was_inword && ch == GREEK_CAPITAL_LETTER_SIGMA && changed
&& (!next || SYNTAX (STRING_CHAR (next)) != Sword))
&& (!next || !case_ch_is_word (SYNTAX (STRING_CHAR (next)))))
{
buf->len_bytes = CHAR_STRING (GREEK_SMALL_LETTER_FINAL_SIGMA, buf->data);
buf->len_chars = 1;
@ -720,6 +726,21 @@ Called with one argument METHOD which can be:
3rd argument. */);
Vregion_extract_function = Qnil; /* simple.el sets this. */
DEFVAR_BOOL ("case-symbols-as-words", case_symbols_as_words,
doc: /* If non-nil, case functions treat symbol syntax as part of words.
Functions such as `upcase-initials' and `replace-match' check or modify
the case pattern of sequences of characters. Normally, these operate on
sequences of characters whose syntax is word constituent. If this
variable is non-nil, then they operate on sequences of characters whose
syntax is either word constituent or symbol constituent.
This is useful for programming languages and styles where only the first
letter of a symbol's name is ever capitalized.*/);
case_symbols_as_words = 0;
DEFSYM (Qcase_symbols_as_words, "case-symbols-as-words");
Fmake_variable_buffer_local (Qcase_symbols_as_words);
defsubr (&Supcase);
defsubr (&Sdowncase);
defsubr (&Scapitalize);

View file

@ -2365,7 +2365,7 @@ text has only capital letters and has at least one multiletter word,
convert NEWTEXT to all caps. Otherwise if all words are capitalized
in the replaced text, capitalize each word in NEWTEXT. Note that
what exactly is a word is determined by the syntax tables in effect
in the current buffer.
in the current buffer, and the variable `case-symbols-as-words'.
If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
Otherwise treat `\\' as special:
@ -2479,7 +2479,8 @@ since only regular expressions have distinguished subexpressions. */)
/* Cannot be all caps if any original char is lower case */
some_lowercase = 1;
if (SYNTAX (prevc) != Sword)
if (SYNTAX (prevc) != Sword
&& !(case_symbols_as_words && SYNTAX (prevc) == Ssymbol))
some_nonuppercase_initial = 1;
else
some_multiletter_word = 1;
@ -2487,7 +2488,8 @@ since only regular expressions have distinguished subexpressions. */)
else if (uppercasep (c))
{
some_uppercase = 1;
if (SYNTAX (prevc) != Sword)
if (SYNTAX (prevc) != Sword
&& !(case_symbols_as_words && SYNTAX (prevc) == Ssymbol))
;
else
some_multiletter_word = 1;
@ -2496,7 +2498,8 @@ since only regular expressions have distinguished subexpressions. */)
{
/* If the initial is a caseless word constituent,
treat that like a lowercase initial. */
if (SYNTAX (prevc) != Sword)
if (SYNTAX (prevc) != Sword
&& !(case_symbols_as_words && SYNTAX (prevc) == Ssymbol))
some_nonuppercase_initial = 1;
}

View file

@ -294,4 +294,16 @@
;;(should (string-equal (capitalize "indIá") "İndıa"))
))
(defun casefiddle-tests--check-syms (init with-words with-symbols)
(let ((case-symbols-as-words nil))
(should (string-equal (upcase-initials init) with-words)))
(let ((case-symbols-as-words t))
(should (string-equal (upcase-initials init) with-symbols))))
(ert-deftest casefiddle-case-symbols-as-words ()
(casefiddle-tests--check-syms "Aa_bb Cc_dd" "Aa_Bb Cc_Dd" "Aa_bb Cc_dd")
(casefiddle-tests--check-syms "Aa_bb cc_DD" "Aa_Bb Cc_DD" "Aa_bb Cc_DD")
(casefiddle-tests--check-syms "aa_bb cc_dd" "Aa_Bb Cc_Dd" "Aa_bb Cc_dd")
(casefiddle-tests--check-syms "Aa_Bb Cc_Dd" "Aa_Bb Cc_Dd" "Aa_Bb Cc_Dd"))
;;; casefiddle-tests.el ends here