Update Emacs sources for Unicode 16.0

* lisp/international/ucs-normalize.el (check-range): Update ranges
of character codes with decompositions.
* lisp/international/mule-cmds.el (ucs-names): Update unused
ranges.
* lisp/international/fontset.el (script-representative-chars)
(otf-script-alist, setup-default-fontset): Add new scripts.
* lisp/international/characters.el:
* admin/unidata/blocks.awk:
* test/manual/BidiCharacterTest.txt:
* test/lisp/international/ucs-normalize-tests.el
(ucs-normalize-tests--failing-lines-part2): Update for Unicode
16.0.

* etc/NEWS: Announce support for Unicode 16.0.
This commit is contained in:
Eli Zaretskii 2024-09-12 21:12:09 +03:00
parent 5e16d84689
commit 04e8ad6489
8 changed files with 72 additions and 32 deletions

View file

@ -57,6 +57,7 @@ BEGIN {
alias["block elements"] = "symbol"
alias["miscellaneous symbols"] = "symbol"
alias["symbols for legacy computing"] = "symbol"
alias["symbols for legacy computing supplement"] = "symbol"
alias["cjk strokes"] = "cjk-misc"
alias["cjk symbols and punctuation"] = "cjk-misc"
alias["halfwidth and fullwidth forms"] = "cjk-misc"

View file

@ -119,6 +119,9 @@ It is equivalent to running project-any-command with find-file.
** Internationalization
---
*** Emacs now supports Unicode version 16.0.
---
*** New language-environment and input method for Tifinagh.
The Tifinagh script is used to write the Berber languages.

View file

@ -1181,7 +1181,9 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x10A01 . #x10A0F)
(#x10A38 . #x10A3F)
(#x10AE5 . #x10AE6)
(#x10D69 . #x10D6D)
(#x10EAB . #x10EAC)
(#x10EFC . #x10EFF)
(#x11001 . #x11001)
(#x11038 . #x11046)
(#x1107F . #x11081)
@ -1207,6 +1209,11 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x11340 . #x11340)
(#x11366 . #x1136C)
(#x11370 . #x11374)
(#x113BB . #x113C0)
(#x113CE . #x113CE)
(#x113D0 . #x113D0)
(#x113D2 . #x113D2)
(#x113E1 . #x113E2)
(#x11438 . #x1143F)
(#x11442 . #x11444)
(#x11446 . #x11446)
@ -1236,12 +1243,18 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x11CAA . #x11CB0)
(#x11CB2 . #x11CB3)
(#x11CB5 . #x11CB6)
(#x11F5A . #x11F5A)
(#x13430 . #x13440)
(#x13447 . #x13455)
(#x1611E . #x16129)
(#x1612D . #x1612F)
(#x16AF0 . #x16AF4)
(#x16B30 . #x16B36)
(#x16F8F . #x16F92)
(#x16FE4 . #x16FE4)
(#x1BC9D . #x1BC9E)
(#x1BCA0 . #x1BCA3)
(#x1CF00 . #x1CF02)
(#x1D167 . #x1D169)
(#x1D173 . #x1D182)
(#x1D185 . #x1D18B)
@ -1258,6 +1271,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x1E01B . #x1E021)
(#x1E023 . #x1E024)
(#x1E026 . #x1E02A)
(#x1E5EE . #x1E5EF)
(#x1E8D0 . #x1E8D6)
(#x1E944 . #x1E94A)
(#xE0001 . #xE01EF))))
@ -1273,8 +1287,10 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x23F3 . #x23F3)
(#x25FD . #x25FE)
(#x2614 . #x2615)
(#x2630 . #x2637)
(#x2648 . #x2653)
(#x267F . #x267F)
(#x268A . #x268F)
(#x2693 . #x2693)
(#x26A1 . #x26A1)
(#x26AA . #x26AB)
@ -1308,7 +1324,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x3041 . #x3096)
(#x3099 . #x30FF)
(#x3105 . #x312F)
(#x3131 . #x31E3)
(#x3131 . #x31E5)
(#x31EF . #x31EF)
(#x31F0 . #x3247)
(#x3250 . #x4DBF)
@ -1326,6 +1342,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x17000 . #x187F7)
(#x18800 . #x18AFF)
(#x18B00 . #x18CD5)
(#x18CFF . #x18CFF)
(#x18D00 . #x18D08)
(#x1AFF0 . #x1AFF3)
(#x1AFF5 . #x1AFFB)
@ -1336,6 +1353,8 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x1B155 . #x1B155)
(#x1B164 . #x1B167)
(#x1B170 . #x1B2FB)
(#x1D300 . #x1D356)
(#x1D360 . #x1D376)
(#x1F004 . #x1F004)
(#x1F0CF . #x1F0CF)
(#x1F18E . #x1F18E)
@ -1383,11 +1402,10 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x1FA60 . #x1FA6D)
(#x1FA70 . #x1FA74)
(#x1FA78 . #x1FA7C)
(#x1FA80 . #x1FA88)
(#x1FA90 . #x1FABD)
(#x1FABF . #x1FAC5)
(#x1FACE . #x1FADB)
(#x1FAE0 . #x1FAE8)
(#x1FA80 . #x1FA89)
(#x1FA8F . #x1FAC6)
(#x1FACE . #x1FADC)
(#x1FADF . #x1FAE9)
(#x1FAF0 . #x1FAF8)
(#x1FB00 . #x1FB92)
(#x20000 . #x2FFFF)

View file

@ -237,6 +237,7 @@
(elbasan #x10500)
(caucasian-albanian #x10530)
(vithkuqi #x10570)
(todhri #x105C0 #x105ED)
(linear-a #x10600)
(cypriot-syllabary #x10800)
(palmyrene #x10860)
@ -246,6 +247,7 @@
(kharoshthi #x10A00)
(manichaean #x10AC0)
(hanifi-rohingya #x10D00 #x10D24 #x10D39)
(garay #x10D50 #x10D70 #x10D4A #x10D41)
(yezidi #x10E80)
(old-sogdian #x10F00)
(sogdian #x10F30)
@ -259,6 +261,7 @@
(khojki #x11200)
(khudawadi #x112B0)
(grantha #x11315 #x1133E #x11374)
(tulu-tigalari #x11380 #x113B8)
(newa #x11400)
(tirhuta #x11481 #x1148F #x114D0)
(siddham #x1158E #x115AF #x115D4)
@ -271,6 +274,7 @@
(zanabazar-square #x11A00)
(soyombo #x11A50)
(pau-cin-hau #x11AC0)
(sunuwar #x11BC0 #x11BF1)
(bhaiksuki #x11C00)
(marchen #x11C72)
(masaram-gondi #x11D00)
@ -280,10 +284,12 @@
(cuneiform #x12000)
(cypro-minoan #x12F90)
(egyptian #x13000)
(gurung-khema #x16100 #x1611E #x16131)
(mro #x16A40)
(tangsa #x16A70 #x16AC0)
(bassa-vah #x16AD0)
(pahawh-hmong #x16B11)
(kirat-rai #x16D43 #x16D63 #x16D71)
(medefaidrin #x16E40)
(tangut #x17000)
(khitan-small-script #x18B00)
@ -300,6 +306,7 @@
(toto #x1E290 #x1E295 #x1E2AD)
(wancho #x1E2C0 #x1E2E8 #x1E2EF)
(nag-mundari #x1E4D0 #x1E4EB #x1E4F0)
(ol-onal #x1E5D0 #x1E5F2)
(mende-kikakui #x1E810 #x1E8A6)
(adlam #x1E900 #x1E943)
(indic-siyaq-number #x1EC71 #x1EC9F)
@ -311,7 +318,7 @@
(defvar otf-script-alist)
;; The below was synchronized with the latest Sep 12, 2021 version of
;; The below was synchronized with the latest May 31, 2024 version of
;; https://docs.microsoft.com/en-us/typography/opentype/spec/scripttags
(setq otf-script-alist
'((adlm . adlam)
@ -356,6 +363,7 @@
(elba . elbasan)
(elym . elymaic)
(ethi . ethiopic)
(gara . garay)
(geor . georgian)
(glag . glagolitic)
(goth . gothic)
@ -366,6 +374,7 @@
(gong . gunjala-gondi)
(guru . gurmukhi)
(gur2 . gurmukhi)
(gukh . gurung-khema)
(hani . han)
(hang . hangul)
(jamo . hangul) ; Not recommended; use 'hang' instead.
@ -388,6 +397,7 @@
(khmr . khmer)
(khoj . khojki)
(sind . khudawadi)
(krai . kirat-rai)
(lao\ . lao)
(latn . latin)
(lepc . lepcha)
@ -428,6 +438,7 @@
(hmnp . nyiakeng-puachue-hmong)
(ogam . ogham)
(olck . ol-chiki)
(omao . ol-onal)
(ital . old-italic)
(xpeo . old-persian)
(narb . old-north-arabian)
@ -461,6 +472,7 @@
(sora . sora-sompeng)
(soyo . soyombo)
(sund . sundanese)
(sunu . sunuwar)
(sylo . syloti-nagri)
(syrc . syriac)
(tglg . tagalog)
@ -481,7 +493,9 @@
(tibt . tibetan)
(tfng . tifinagh)
(tirh . tirhuta)
(todr . todhri)
(toto . toto)
(tutg . tulu-tigalari)
(ugar . ugaritic)
(vith . vithkuqi)
(vai\ . vai)
@ -872,7 +886,9 @@
mahjong-tile
domino-tile
emoji
chess-symbol))
chess-symbol
garay
sunuwar))
(set-fontset-font "fontset-default"
script (font-spec :registry "iso10646-1" :script script)
nil 'append))

View file

@ -3108,10 +3108,11 @@ on encoding."
;; (#x4E00 . #x9FFF) CJK Unified Ideographs
(#xA000 . #xD7FF)
;; (#xD800 . #xF8FF) Surrogate/Private
(#xFB00 . #x134FF)
;; (#x13500 . #x143FF) unused
(#xFB00 . #x143FA)
(#x14400 . #x14646)
;; (#x14647 . #x167FF) unused
;; (#x14647 . #x160FF) unused
(#x16100 . #x16139)
;; (#x1613A . #x167FF) unused
(#x16800 . #x16F9F)
(#x16FE0 . #x16FF1)
;; (#x17000 . #x187FF) Tangut Ideographs
@ -3127,8 +3128,8 @@ on encoding."
(#x1B170 . #x1B2FF)
;; (#x1B300 . #x1BBFF) unused
(#x1BC00 . #x1BCAF)
;; (#x1BCB0 . #x1CEFF) unused
(#x1CF00 . #x1FFFF)
;; (#x1BCB0 . #x1CBFF) unused
(#x1CC00 . #x1FFFF)
;; (#x20000 . #xDFFFF) CJK Ideograph Extension A, B, etc, unused
(#xE0000 . #xE01FF)))
(gc-cons-threshold (max gc-cons-threshold 10000000))

View file

@ -142,7 +142,8 @@
(defvar check-range nil)
(setq check-range
'((#x00A0 . #x3400) (#xA600 . #xAC00) (#xF900 . #x11100)
(#x11100 . #x11A00) (#x1D000 . #x1E100) (#x1EE00 . #x1F300)
(#x11100 . #x11A00) (#x16100 . #x16DFF) (#x1CCD0 . #x1CCFF)
(#x1D000 . #x1E100) (#x1EE00 . #x1F300)
(#x1FBF0 . #x1FC00) (#x2F800 . #x2FB00)))
;; Basic normalization functions

View file

@ -247,20 +247,19 @@ Must be called with `ucs-normalize-tests--norm-buf' as current buffer."
ucs-normalize-tests--failing-lines-part1)))
(defconst ucs-normalize-tests--failing-lines-part2
(list 17789 17790 17801 17802 17807 17808 17811 17812
17815 17816 17821 17822 17829 17830 17907 17908
18023 18024 18049 18050 18055 18056 18459 18460
18605 18606 18617 18618 18621 18622 18625 18626
18627 18628 18631 18632 18633 18634 18663 18664
18669 18670 18673 18674 18679 18680 18685 18686
18691 18692 18695 18697 18699 18701 18703 18704
18705 18707 18709 18711 18713 18715 18717 18719
18721 18723 18725 18727 18729 18731 18733 18735
18737 18739 18740 18741 18742 18743 18889 18891
18893 18895 18897 18899 18901 18903 18905 18907
18909 18911 18913 18914 18915 18916 18917 18919
18921 18923 18925 18927 18929 18931 18933 18935
18937 18939 18941 18943 18945 18947 18948))
(list 17867 17868 17879 17880 17885 17886 17889 17890
17893 17894 17899 17900 17907 17908 17985 17986
18101 18102 18127 18128 18133 18134 18537 18538
18693 18694 18705 18706 18709 18710 18713 18714
18715 18716 18719 18720 18721 18722 18757 18758
18763 18764 18767 18768 18773 18774 18779 18780
18785 18786 18789 18791 18793 18795 18797 18798
18799 18801 18803 18805 18807 18835 18836 18837
18838 18839 18985 18987 18989 18991 18993 18995
18997 18999 19001 19003 19005 19007 19009 19010
19011 19012 19013 19015 19017 19019 19021 19023
19025 19027 19029 19031 19033 19035 19037 19039
19041 19043 19045 19047 19048))
(ert-deftest ucs-normalize-part2 ()
:tags '(:expensive-test)

View file

@ -1,7 +1,8 @@
# BidiCharacterTest-15.1.0.txt
# Date: 2023-01-05
# © 2023 Unicode®, Inc.
# For terms of use, see https://www.unicode.org/terms_of_use.html
# BidiCharacterTest-16.0.0.txt
# Date: 2024-02-02
# © 2024 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see https://www.unicode.org/reports/tr44/