Allow user control on char-width of "ambiguous" characters

* src/character.c (syms_of_character) <ambiguous-width-chars>: New
char-table.

* lisp/international/characters.el (ambiguous-width-chars): Fill
the table.
(update-cjk-ambiguous-char-widths): New function.
(cjk-ambiguous-chars-are-wide): New defcustom, uses
'update-cjk-ambiguous-char-widths' as its :set function.
(use-cjk-char-width-table): Obey 'cjk-ambiguous-chars-are-wide' by
adding another child char-table for ambiguous-width characters,
where the width is set according to the option.

* lisp/language/chinese.el ("Chinese-GB", "Chinese-BIG5")
("Chinese-CNS", "Chinese-EUC-TW", "Chinese-GBK"):
* lisp/language/japanese.el ("Japanese"):
* lisp/language/korean.el ("Korean"): Add new language-info slot
'cjk-locale-symbol'.

Bug#64420
This commit is contained in:
Eli Zaretskii 2023-08-05 17:55:56 +03:00
parent 60e5f21218
commit a06a2950e1
5 changed files with 231 additions and 1 deletions

View file

@ -1394,6 +1394,174 @@ with L, LRE, or LRO Unicode bidi character type.")
(dolist (elt l)
(set-char-table-range char-width-table elt 2)))
;; A: East Asian "Ambiguous" characters.
(let ((l '((#x00A1 . #x00A1)
(#x00A4 . #x00A4)
(#x00A7 . #x00A8)
(#x00AA . #x00AA)
(#x00AD . #x00AE)
(#x00B0 . #x00B4)
(#x00B6 . #x00BA)
(#x00BC . #x00BF)
(#x00C6 . #x00C6)
(#x00D0 . #x00D0)
(#x00D7 . #x00D8)
(#x00E0 . #x00E1)
(#x00E6 . #x00E6)
(#x00E8 . #x00EA)
(#x00EC . #x00ED)
(#x00F0 . #x00F0)
(#x00F2 . #x00F3)
(#x00F7 . #x00FA)
(#x00FC . #x00FC)
(#x00FE . #x00FE)
(#x0101 . #x0101)
(#x0111 . #x0111)
(#x0113 . #x0113)
(#x011B . #x011B)
(#x0126 . #x0127)
(#x012B . #x012B)
(#x0131 . #x0133)
(#x0138 . #x0138)
(#x013F . #x0142)
(#x0144 . #x0144)
(#x0148 . #x014B)
(#x014D . #x014D)
(#x0152 . #x0153)
(#x0166 . #x0167)
(#x016B . #x016B)
(#x01CE . #x01CE)
(#x01D0 . #x01D0)
(#x01D2 . #x01D2)
(#x01D4 . #x01D4)
(#x01D6 . #x01D6)
(#x01D8 . #x01D8)
(#x01DA . #x01DA)
(#x01DC . #x01DC)
(#x0251 . #x0251)
(#x0261 . #x0261)
(#x02C4 . #x02C4)
(#x02C7 . #x02C7)
(#x02C9 . #x02CB)
(#x02CD . #x02CD)
(#x02D0 . #x02D0)
(#x02D8 . #x02DB)
(#x02DD . #x02DD)
(#x02DF . #x02DF)
(#x0300 . #x036F)
(#x0391 . #x03A1)
(#x03A3 . #x03A9)
(#x03B1 . #x03C1)
(#x03C3 . #x03C9)
(#x0401 . #x0401)
(#x0410 . #x044F)
(#x0451 . #x0451)
(#x2010 . #x2010)
(#x2013 . #x2016)
(#x2018 . #x2019)
(#x201C . #x201D)
(#x2020 . #x2022)
(#x2024 . #x2027)
(#x2030 . #x2030)
(#x2032 . #x2033)
(#x2035 . #x2035)
(#x203E . #x203E)
(#x2074 . #x2074)
(#x207F . #x207F)
(#x2081 . #x2084)
(#x20AC . #x20AC)
(#x2103 . #x2103)
(#x2105 . #x2105)
(#x2109 . #x2109)
(#x2113 . #x2113)
(#x2116 . #x2116)
(#x2121 . #x2122)
(#x2126 . #x2126)
(#x212B . #x212B)
(#x2153 . #x2154)
(#x215B . #x215E)
(#x2160 . #x216B)
(#x2170 . #x2179)
(#x2189 . #x2189)
(#x2190 . #x2199)
(#x21B8 . #x21B9)
(#x21D2 . #x21D2)
(#x21D4 . #x21D4)
(#x21E7 . #x21E7)
(#x2200 . #x2200)
(#x2202 . #x2203)
(#x2207 . #x2208)
(#x220B . #x220B)
(#x220F . #x220F)
(#x2211 . #x2211)
(#x2215 . #x2215)
(#x221A . #x221A)
(#x221D . #x2220)
(#x2223 . #x2223)
(#x2225 . #x2225)
(#x2227 . #x222C)
(#x222E . #x222E)
(#x2234 . #x2237)
(#x223C . #x223D)
(#x2248 . #x2248)
(#x224C . #x224C)
(#x2252 . #x2252)
(#x2260 . #x2261)
(#x2264 . #x2267)
(#x226A . #x226B)
(#x226E . #x226F)
(#x2282 . #x2283)
(#x2286 . #x2287)
(#x2295 . #x2295)
(#x2299 . #x2299)
(#x22A5 . #x22A5)
(#x22BF . #x22BF)
(#x2312 . #x2312)
(#x2460 . #x24E9)
(#x24EB . #x254B)
(#x2550 . #x2573)
(#x2580 . #x258F)
(#x2592 . #x2595)
(#x25A0 . #x25A1)
(#x25A3 . #x25A9)
(#x25B2 . #x25B3)
(#x25B6 . #x25B7)
(#x25BC . #x25BD)
(#x25C0 . #x25C1)
(#x25C6 . #x25C8)
(#x25CE . #x25D1)
(#x25E2 . #x25E5)
(#x25EF . #x25EF)
(#x2605 . #x2606)
(#x260E . #x260F)
(#x261C . #x261C)
(#x261E . #x261E)
(#x2640 . #x2640)
(#x2642 . #x2642)
(#x2660 . #x2661)
(#x2663 . #x2665)
(#x2667 . #x266A)
(#x266C . #x266D)
(#x266F . #x266F)
(#x269E . #x269F)
(#x26BF . #x26BF)
(#x26C6 . #x26CD)
(#x26CF . #x26D3)
(#x26D5 . #x26E1)
(#x26E3 . #x26E3)
(#x26E8 . #x26E9)
(#x26EB . #x26F1)
(#x26F4 . #x26F4)
(#x26F6 . #x26F9)
(#x26FB . #x26FC)
(#x26FE . #x26FF)
(#x273D . #x273D)
(#x2776 . #x277F)
(#x2B56 . #x2B59)
(#x3248 . #x324F))))
(dolist (elt l)
(set-char-table-range ambiguous-width-chars elt t)))
;; Other double width
;;(map-charset-chars
;; (lambda (range ignore) (set-char-table-range char-width-table range 2))
@ -1427,6 +1595,45 @@ with L, LRE, or LRO Unicode bidi character type.")
(chinese-cns11643-1 (#x2121 . #x427E)))
(ko_KR nil (korean-ksc5601 (#x2121 . #x2C7E)))))
(defun update-cjk-ambiguous-char-widths (locale-name)
"Update character widths for LOCALE-NAME using `ambiguous-width-chars'.
LOCALE-NAME is the symbol of a CJK locale, such as \\='zh_CN."
(let ((slot (assq locale-name cjk-char-width-table-list)))
(or slot (error "Unknown locale for CJK language environment: %s"
locale-name))
;; Force recomputation of child table in 'use-cjk-char-width-table'.
(setcar (cdr slot) nil)
(use-cjk-char-width-table locale-name)))
(defcustom cjk-ambiguous-chars-are-wide t
"Whether the \"ambiguous-width\" characters take 2 columns on display.
Some of the characters are defined by Unicode as being of \"ambiguous\"
width: the actual width, either 1 column or 2 columns, should be
determined at display time, depending on the language context.
If this variable is non-nil, Emacs will consider these characters as
full-width, i.e. taking 2 columns; otherwise they are narrow characters
taking 1 column on display. Which value is correct depends on the
fonts being used. In some CJK locales the fonts are set so that
these characters are displayed as full-width. This setting is most
important for text-mode frames, because there Emacs cannot access the
metrics of the fonts used by the console or the terminal emulator.
Do not set this directly via `setq'; instead, use `setopt' or the
Customize commands. Alternatively, call `update-cjk-ambiguous-char-widths'
passing it the symbol of the current locale environment, after changing
the value of the variable with `setq'."
:type 'boolean
:set (lambda (symbol value)
(set-default symbol value)
(let ((locsym (get-language-info current-language-environment
'cjk-locale-symbol)))
(when locsym
(update-cjk-ambiguous-char-widths locsym))))
:version "30.1"
:group 'display)
;; Internal use only.
;; Setup char-width-table appropriate for a language environment
;; corresponding to LOCALE-NAME (symbol).
@ -1448,7 +1655,15 @@ with L, LRE, or LRO Unicode bidi character type.")
(car code-range) (cdr code-range)))))
(optimize-char-table table)
(set-char-table-parent table char-width-table)
(setcar (cdr slot) table)))
(let ((tbl (make-char-table nil)))
(map-char-table
(lambda (range _val)
(set-char-table-range tbl range
(if cjk-ambiguous-chars-are-wide 2 1)))
ambiguous-width-chars)
(optimize-char-table tbl)
(set-char-table-parent tbl table)
(setcar (cdr slot) tbl))))
(setq char-width-table (nth 1 slot))))
(defun use-default-char-width-table ()

View file

@ -111,6 +111,7 @@
(set-language-info-alist
"Chinese-GB" '((charset chinese-gb2312 chinese-sisheng)
(iso639-language . zh)
(cjk-locale-symbol . zh_CN)
(setup-function . (lambda ()
(use-cjk-char-width-table 'zh_CN)))
(exit-function . use-default-char-width-table)
@ -142,6 +143,7 @@
(set-language-info-alist
"Chinese-BIG5" '((charset chinese-big5-1 chinese-big5-2)
(iso639-language . zh)
(cjk-locale-symbol . zh_HK)
(setup-function . (lambda ()
(use-cjk-char-width-table 'zh_HK)))
(exit-function . use-default-char-width-table)
@ -198,6 +200,7 @@
chinese-cns11643-5 chinese-cns11643-6
chinese-cns11643-7)
(iso639-language . zh)
(cjk-locale-symbol . zh_TW)
(setup-function . (lambda ()
(use-cjk-char-width-table 'zh_TW)))
(exit-function . use-default-char-width-table)
@ -218,6 +221,7 @@ accepts Big5 for input also (which is then converted to CNS)."))
chinese-cns11643-5 chinese-cns11643-6
chinese-cns11643-7 chinese-big5-1 chinese-big5-2)
(iso639-language . zh)
(cjk-locale-symbol . zh_TW)
(setup-function . (lambda ()
(use-cjk-char-width-table 'zh_TW)))
(exit-function . use-default-char-width-table)
@ -248,6 +252,7 @@ converted to CNS)."))
(set-language-info-alist
"Chinese-GBK" '((charset chinese-gbk)
(iso639-language . zh)
(cjk-locale-symbol . zh_CN)
(setup-function . (lambda ()
(use-cjk-char-width-table 'zh_CN)))
(exit-function . use-default-char-width-table)

View file

@ -208,6 +208,7 @@ eucJP-ms is defined in <http://www.opengroup.or.jp/jvc/cde/appendix.html>."
"Japanese" '((setup-function . setup-japanese-environment-internal)
(exit-function . use-default-char-width-table)
(iso639-language . ja)
(cjk-locale-symbol . ja_JP)
(tutorial . "TUTORIAL.ja")
(charset japanese-jisx0208
japanese-jisx0212 latin-jisx0201 katakana-jisx0201

View file

@ -68,6 +68,7 @@
(set-language-info-alist
"Korean" '((setup-function . setup-korean-environment-internal)
(exit-function . exit-korean-environment)
(cjk-locale-symbol . ko_KR)
(iso639-language . ko)
(tutorial . "TUTORIAL.ko")
(charset korean-ksc5601 cp949)

View file

@ -1117,6 +1117,14 @@ A char-table for width (columns) of each character. */);
char_table_set_range (Vchar_width_table, MAX_5_BYTE_CHAR + 1, MAX_CHAR,
make_fixnum (4));
DEFVAR_LISP ("ambiguous-width-chars", Vambiguous_width_chars,
doc: /*
A char-table for characters whose width (columns) can be 1 or 2.
The actual width depends on the language-environment and on the
value of `cjk-ambiguous-chars-are-wide'. */);
Vambiguous_width_chars = Fmake_char_table (Qnil, Qnil);
DEFVAR_LISP ("printable-chars", Vprintable_chars,
doc: /* A char-table for each printable character. */);
Vprintable_chars = Fmake_char_table (Qnil, Qnil);