Make downcasing unibyte strings in Turkish less wrong
* src/casefiddle.c (ascii_casify_character): New function. (do_casify_unibyte_string): Use it to make downcasing tr_TR.UTF-8 "I" less wrong. (Fdowncase): Mention caveats. (Fupcase): (Fcapitalize): (Fupcase_initials): Refer to details in `downcase'. (syms_of_casefiddle): Define more symbols.
This commit is contained in:
parent
4d4b401759
commit
53bea8796d
2 changed files with 51 additions and 9 deletions
12
etc/NEWS
12
etc/NEWS
|
@ -178,6 +178,18 @@ Emacs buffers, like indentation and the like. The new ert function
|
|||
|
||||
* Incompatible Lisp Changes in Emacs 29.1
|
||||
|
||||
---
|
||||
** 'downcase' details have changed slightly.
|
||||
In certain locales, changing the case of an ASCII-range character may
|
||||
turn it into a multibyte character, most notably with "I" in Turkish
|
||||
(the lowercase is "ı", 0x0131). Previously, 'downcase' on a unibyte
|
||||
string was buggy, and would mistakenly just return the lower byte of
|
||||
this, 0x31 (the digit "1"). 'downcase' on a unibyte string has now
|
||||
been changed to downcase such characters as if they were ASCII. To
|
||||
get proper locale-dependent downcasing, the string has to be converted
|
||||
to multibyte first. (This goes for the other case-changing functions,
|
||||
too.)
|
||||
|
||||
---
|
||||
** 'def' indentation changes.
|
||||
In 'emacs-lisp-mode', forms with a symbol with a name that start with
|
||||
|
|
|
@ -297,6 +297,16 @@ do_casify_multibyte_string (struct casing_context *ctx, Lisp_Object obj)
|
|||
return obj;
|
||||
}
|
||||
|
||||
static int
|
||||
ascii_casify_character (bool downcase, int c)
|
||||
{
|
||||
Lisp_Object cased = CHAR_TABLE_REF (downcase?
|
||||
uniprop_table (Qlowercase) :
|
||||
uniprop_table (Quppercase),
|
||||
c);
|
||||
return FIXNATP (cased) ? XFIXNAT (cased) : c;
|
||||
}
|
||||
|
||||
static Lisp_Object
|
||||
do_casify_unibyte_string (struct casing_context *ctx, Lisp_Object obj)
|
||||
{
|
||||
|
@ -310,11 +320,12 @@ do_casify_unibyte_string (struct casing_context *ctx, Lisp_Object obj)
|
|||
cased = case_single_character (ctx, ch);
|
||||
if (ch == cased)
|
||||
continue;
|
||||
cased = make_char_unibyte (cased);
|
||||
/* If the char can't be converted to a valid byte, just don't
|
||||
change it. */
|
||||
if (SINGLE_BYTE_CHAR_P (cased))
|
||||
SSET (obj, i, cased);
|
||||
/* If down/upcasing changed an ASCII character into a non-ASCII
|
||||
character (this can happen in some locales, like the Turkish
|
||||
"I"), downcase using the ASCII char table. */
|
||||
if (ASCII_CHAR_P (ch) && !SINGLE_BYTE_CHAR_P (cased))
|
||||
cased = ascii_casify_character (ctx->flag == CASE_DOWN, ch);
|
||||
SSET (obj, i, make_char_unibyte (cased));
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
@ -339,10 +350,13 @@ casify_object (enum case_action flag, Lisp_Object obj)
|
|||
|
||||
DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0,
|
||||
doc: /* Convert argument to upper case and return that.
|
||||
The argument may be a character or string. The result has the same type.
|
||||
The argument may be a character or string. The result has the same
|
||||
type. (See `downcase' for further details about the type.)
|
||||
|
||||
The argument object is not altered--the value is a copy. If argument
|
||||
is a character, characters which map to multiple code points when
|
||||
cased, e.g. fi, are returned unchanged.
|
||||
|
||||
See also `capitalize', `downcase' and `upcase-initials'. */)
|
||||
(Lisp_Object obj)
|
||||
{
|
||||
|
@ -351,7 +365,15 @@ See also `capitalize', `downcase' and `upcase-initials'. */)
|
|||
|
||||
DEFUN ("downcase", Fdowncase, Sdowncase, 1, 1, 0,
|
||||
doc: /* Convert argument to lower case and return that.
|
||||
The argument may be a character or string. The result has the same type.
|
||||
The argument may be a character or string. The result has the same type,
|
||||
including the multibyteness of the string.
|
||||
|
||||
This means that if this function is called with a unibyte string
|
||||
argument, and downcasing it would turn it into a multibyte string
|
||||
(according to the current locale), the downcasing is done using ASCII
|
||||
\"C\" rules instead. To accurately downcase according to the current
|
||||
locale, the string must be converted into multibyte first.
|
||||
|
||||
The argument object is not altered--the value is a copy. */)
|
||||
(Lisp_Object obj)
|
||||
{
|
||||
|
@ -362,7 +384,10 @@ DEFUN ("capitalize", Fcapitalize, Scapitalize, 1, 1, 0,
|
|||
doc: /* Convert argument to capitalized form and return that.
|
||||
This means that each word's first character is converted to either
|
||||
title case or upper case, and the rest to lower case.
|
||||
The argument may be a character or string. The result has the same type.
|
||||
|
||||
The argument may be a character or string. The result has the same
|
||||
type. (See `downcase' for further details about the type.)
|
||||
|
||||
The argument object is not altered--the value is a copy. If argument
|
||||
is a character, characters which map to multiple code points when
|
||||
cased, e.g. fi, are returned unchanged. */)
|
||||
|
@ -377,7 +402,10 @@ DEFUN ("upcase-initials", Fupcase_initials, Supcase_initials, 1, 1, 0,
|
|||
doc: /* Convert the initial of each word in the argument to upper case.
|
||||
This means that each word's first character is converted to either
|
||||
title case or upper case, and the rest are left unchanged.
|
||||
The argument may be a character or string. The result has the same type.
|
||||
|
||||
The argument may be a character or string. The result has the same
|
||||
type. (See `downcase' for further details about the type.)
|
||||
|
||||
The argument object is not altered--the value is a copy. If argument
|
||||
is a character, characters which map to multiple code points when
|
||||
cased, e.g. fi, are returned unchanged. */)
|
||||
|
@ -651,6 +679,8 @@ syms_of_casefiddle (void)
|
|||
DEFSYM (Qbounds, "bounds");
|
||||
DEFSYM (Qidentity, "identity");
|
||||
DEFSYM (Qtitlecase, "titlecase");
|
||||
DEFSYM (Qlowercase, "lowercase");
|
||||
DEFSYM (Quppercase, "uppercase");
|
||||
DEFSYM (Qspecial_uppercase, "special-uppercase");
|
||||
DEFSYM (Qspecial_lowercase, "special-lowercase");
|
||||
DEFSYM (Qspecial_titlecase, "special-titlecase");
|
||||
|
|
Loading…
Add table
Reference in a new issue