Assign correct general-category and names to surrogates

* admin/unidata/unidata-gen.el (unidata-setup-list): Don't ignore
surrogates.  This avoids assigning them the default
general-category of 'Cn', i.e. unassigned codepoints.
(unidata-get-name): Give surrogates synthetic names.
This commit is contained in:
Eli Zaretskii 2015-04-14 18:37:07 +03:00
parent b5919771ae
commit 8802474a21

View file

@ -102,7 +102,8 @@
(tail table)
(block-names '(("^<CJK Ideograph" . CJK\ IDEOGRAPH)
("^<Hangul Syllable" . HANGUL\ SYLLABLE)
("^<.*Surrogate" . nil)
("^<.*High Surrogate" . HIGH\ SURROGATE)
("^<.*Low Surrogate" . LOW\ SURROGATE)
("^<.*Private Use" . PRIVATE\ USE)))
val char name)
(setq unidata-text-file (expand-file-name unidata-text-file unidata-dir))
@ -137,11 +138,8 @@
(if (string-match (caar l) block-name)
(setq name (cdar l) l nil)
(setq l (cdr l))))
(if (not name)
;; As this is a surrogate pair range, ignore it.
(setq val nil)
(setcar val (cons first char))
(setcar (cdr val) name))))
(setcar val (cons first char))
(setcar (cdr val) name)))
(when val
(setcdr tail (list val))
@ -783,6 +781,10 @@ Property value is a symbol `o' (Open), `c' (Close), or `n' (None)."
(format "%s-%04X" sym char))
((eq sym 'CJK\ COMPATIBILITY\ IDEOGRAPH)
(format "%s-%04X" sym char))
((eq sym 'HIGH\ SURROGATE)
(format "%s-%04X" sym char))
((eq sym 'LOW\ SURROGATE)
(format "%s-%04X" sym char))
((eq sym 'VARIATION\ SELECTOR)
(format "%s-%d" sym (+ (- char #xe0100) 17))))))))