Add support for functions that deal with Unicode scripts
* admin/unidata/Makefile.in (${unidir}/uni-scripts.el): Build uni-scripts.el. * admin/unidata/Scripts.txt: * admin/unidata/ScriptExtensions.txt: * admin/unidata/PropertyValueAliases.txt: New files from Unicode. * admin/unidata/README: Update. * admin/unidata/unidata-gen.el (unidata-gen-charprop): Allow writing other data, too. (unidata-gen-scripts, unidata-gen--read-script-aliases) (unidata-gen--insert-file): New functions to parse the Script* files. * lisp/international/textsec.el: Implement some functions that work on scripts.
This commit is contained in:
parent
39d4e1ca21
commit
e2c8091113
9 changed files with 5536 additions and 8 deletions
|
@ -44,7 +44,8 @@ unifiles = $(addprefix ${unidir}/,$(sort $(shell sed -n 's/^[ \t][ \t]*${lparen}
|
|||
.PHONY: all
|
||||
|
||||
all: ${top_srcdir}/src/macuvs.h ${unifiles} ${unidir}/charscript.el \
|
||||
${unidir}/charprop.el ${unidir}/emoji-zwj.el ${unidir}/emoji-labels.el
|
||||
${unidir}/charprop.el ${unidir}/emoji-zwj.el ${unidir}/emoji-labels.el \
|
||||
${unidir}/uni-scripts.el
|
||||
|
||||
## Specify .elc as an order-only prereq so as to not needlessly rebuild
|
||||
## target just because the .elc is missing.
|
||||
|
@ -82,6 +83,13 @@ ${unidir}/emoji-labels.el: ${unidir}/../international/emoji.el \
|
|||
${srcdir}/emoji-test.txt
|
||||
$(AM_V_GEN)${emacs} -l emoji.el -f emoji--generate-file $@
|
||||
|
||||
${unidir}/uni-scripts.el: ${srcdir}/unidata-gen.el \
|
||||
${srcdir}/Scripts.txt \
|
||||
${srcdir}/ScriptExtensions.txt \
|
||||
${srcdir}/PropertyValueAliases.txt
|
||||
$(AM_V_GEN)${emacs} -L ${srcdir} \
|
||||
-l unidata-gen -f unidata-gen-scripts $@
|
||||
|
||||
.PHONY: charscript.el
|
||||
charscript.el: ${unidir}/charscript.el
|
||||
|
||||
|
|
1615
admin/unidata/PropertyValueAliases.txt
Normal file
1615
admin/unidata/PropertyValueAliases.txt
Normal file
File diff suppressed because it is too large
Load diff
|
@ -48,3 +48,15 @@ https://www.unicode.org/Public/emoji/14.0/emoji-sequences.txt
|
|||
emoji-test.txt
|
||||
https://unicode.org/Public/emoji/14.0/emoji-test.txt
|
||||
2021-10-28
|
||||
|
||||
ScriptExtensions.txt
|
||||
https://www.unicode.org/Public/UCD/latest/ucd/ScriptExtensions.txt
|
||||
2022-01-17
|
||||
|
||||
Scripts.txt
|
||||
https://www.unicode.org/Public/UCD/latest/ucd/Scripts.txt
|
||||
2022-01-17
|
||||
|
||||
PropertyValueAliases.txt
|
||||
https://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt
|
||||
2022-01-17
|
||||
|
|
628
admin/unidata/ScriptExtensions.txt
Normal file
628
admin/unidata/ScriptExtensions.txt
Normal file
|
@ -0,0 +1,628 @@
|
|||
# ScriptExtensions-14.0.0.txt
|
||||
# Date: 2021-06-04, 02:19:38 GMT
|
||||
# © 2021 Unicode®, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
#
|
||||
# Unicode Character Database
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
# The Script_Extensions property indicates which characters are commonly used
|
||||
# with more than one script, but with a limited number of scripts.
|
||||
# For each code point, there is one or more property values. Each such value is a Script property value.
|
||||
# For more information, see:
|
||||
# UAX #24, Unicode Script Property: https://www.unicode.org/reports/tr24/
|
||||
# Especially the sections:
|
||||
# https://www.unicode.org/reports/tr24/#Assignment_Script_Values
|
||||
# https://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values
|
||||
#
|
||||
# Each Script_Extensions value in this file consists of a set
|
||||
# of one or more abbreviated Script property values. The ordering of the
|
||||
# values in that set is not material, but for stability in presentation
|
||||
# it is given here as alphabetical.
|
||||
#
|
||||
# The Script_Extensions values are presented in sorted order in the file.
|
||||
# They are sorted first by the number of Script property values in their sets,
|
||||
# and then alphabetically by first differing Script property value.
|
||||
#
|
||||
# Following each distinct Script_Extensions value is the list of code
|
||||
# points associated with that value, listed in code point order.
|
||||
#
|
||||
# All code points not explicitly listed for Script_Extensions
|
||||
# have as their value the corresponding Script property value
|
||||
#
|
||||
# @missing: 0000..10FFFF; <script>
|
||||
|
||||
# ================================================
|
||||
|
||||
# Property: Script_Extensions
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Beng
|
||||
|
||||
1CF7 ; Beng # Mc VEDIC SIGN ATIKRAMA
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva
|
||||
|
||||
1CD1 ; Deva # Mn VEDIC TONE SHARA
|
||||
1CD4 ; Deva # Mn VEDIC SIGN YAJURVEDIC MIDLINE SVARITA
|
||||
1CDB ; Deva # Mn VEDIC TONE TRIPLE SVARITA
|
||||
1CDE..1CDF ; Deva # Mn [2] VEDIC TONE TWO DOTS BELOW..VEDIC TONE THREE DOTS BELOW
|
||||
1CE2..1CE8 ; Deva # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
|
||||
1CEB..1CEC ; Deva # Lo [2] VEDIC SIGN ANUSVARA VAMAGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
|
||||
1CEE..1CF1 ; Deva # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA
|
||||
|
||||
# Total code points: 18
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Dupl
|
||||
|
||||
1BCA0..1BCA3 ; Dupl # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
|
||||
|
||||
# Total code points: 4
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Grek
|
||||
|
||||
0342 ; Grek # Mn COMBINING GREEK PERISPOMENI
|
||||
0345 ; Grek # Mn COMBINING GREEK YPOGEGRAMMENI
|
||||
1DC0..1DC1 ; Grek # Mn [2] COMBINING DOTTED GRAVE ACCENT..COMBINING DOTTED ACUTE ACCENT
|
||||
|
||||
# Total code points: 4
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Hani
|
||||
|
||||
3006 ; Hani # Lo IDEOGRAPHIC CLOSING MARK
|
||||
303E..303F ; Hani # So [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE
|
||||
3190..3191 ; Hani # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK
|
||||
3192..3195 ; Hani # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
|
||||
3196..319F ; Hani # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
|
||||
31C0..31E3 ; Hani # So [36] CJK STROKE T..CJK STROKE Q
|
||||
3220..3229 ; Hani # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
|
||||
322A..3247 ; Hani # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO
|
||||
3280..3289 ; Hani # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN
|
||||
328A..32B0 ; Hani # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT
|
||||
32C0..32CB ; Hani # So [12] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DECEMBER
|
||||
32FF ; Hani # So SQUARE ERA NAME REIWA
|
||||
3358..3370 ; Hani # So [25] IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ZERO..IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWENTY-FOUR
|
||||
337B..337F ; Hani # So [5] SQUARE ERA NAME HEISEI..SQUARE CORPORATION
|
||||
33E0..33FE ; Hani # So [31] IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY ONE..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE
|
||||
1D360..1D371 ; Hani # No [18] COUNTING ROD UNIT DIGIT ONE..COUNTING ROD TENS DIGIT NINE
|
||||
1F250..1F251 ; Hani # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
|
||||
|
||||
# Total code points: 238
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Latn
|
||||
|
||||
0363..036F ; Latn # Mn [13] COMBINING LATIN SMALL LETTER A..COMBINING LATIN SMALL LETTER X
|
||||
|
||||
# Total code points: 13
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Nand
|
||||
|
||||
1CFA ; Nand # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Syrc
|
||||
|
||||
1DFA ; Syrc # Mn COMBINING DOT BELOW LEFT
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Arab Copt
|
||||
|
||||
102E0 ; Arab Copt # Mn COPTIC EPACT THOUSANDS MARK
|
||||
102E1..102FB ; Arab Copt # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED
|
||||
|
||||
# Total code points: 28
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Arab Rohg
|
||||
|
||||
06D4 ; Arab Rohg # Po ARABIC FULL STOP
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Arab Nkoo
|
||||
|
||||
FD3E ; Arab Nkoo # Pe ORNATE LEFT PARENTHESIS
|
||||
FD3F ; Arab Nkoo # Ps ORNATE RIGHT PARENTHESIS
|
||||
|
||||
# Total code points: 2
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Arab Syrc
|
||||
|
||||
064B..0655 ; Arab Syrc # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW
|
||||
0670 ; Arab Syrc # Mn ARABIC LETTER SUPERSCRIPT ALEF
|
||||
|
||||
# Total code points: 12
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Arab Thaa
|
||||
|
||||
FDF2 ; Arab Thaa # Lo ARABIC LIGATURE ALLAH ISOLATED FORM
|
||||
FDFD ; Arab Thaa # So ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
|
||||
|
||||
# Total code points: 2
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Beng Deva
|
||||
|
||||
1CD5..1CD6 ; Beng Deva # Mn [2] VEDIC TONE YAJURVEDIC AGGRAVATED INDEPENDENT SVARITA..VEDIC TONE YAJURVEDIC INDEPENDENT SVARITA
|
||||
1CD8 ; Beng Deva # Mn VEDIC TONE CANDRA BELOW
|
||||
1CE1 ; Beng Deva # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
|
||||
1CEA ; Beng Deva # Lo VEDIC SIGN ANUSVARA BAHIRGOMUKHA
|
||||
1CED ; Beng Deva # Mn VEDIC SIGN TIRYAK
|
||||
1CF5..1CF6 ; Beng Deva # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA
|
||||
A8F1 ; Beng Deva # Mn COMBINING DEVANAGARI SIGN AVAGRAHA
|
||||
|
||||
# Total code points: 9
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Bopo Hani
|
||||
|
||||
302A..302D ; Bopo Hani # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK
|
||||
|
||||
# Total code points: 4
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Bugi Java
|
||||
|
||||
A9CF ; Bugi Java # Lm JAVANESE PANGRANGKEP
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Cprt Linb
|
||||
|
||||
10102 ; Cprt Linb # Po AEGEAN CHECK MARK
|
||||
10137..1013F ; Cprt Linb # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT
|
||||
|
||||
# Total code points: 10
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Cyrl Glag
|
||||
|
||||
0484 ; Cyrl Glag # Mn COMBINING CYRILLIC PALATALIZATION
|
||||
0487 ; Cyrl Glag # Mn COMBINING CYRILLIC POKRYTIE
|
||||
2E43 ; Cyrl Glag # Po DASH WITH LEFT UPTURN
|
||||
A66F ; Cyrl Glag # Mn COMBINING CYRILLIC VZMET
|
||||
|
||||
# Total code points: 4
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Cyrl Latn
|
||||
|
||||
0485..0486 ; Cyrl Latn # Mn [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA
|
||||
|
||||
# Total code points: 2
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Cyrl Perm
|
||||
|
||||
0483 ; Cyrl Perm # Mn COMBINING CYRILLIC TITLO
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Cyrl Syrc
|
||||
|
||||
1DF8 ; Cyrl Syrc # Mn COMBINING DOT ABOVE LEFT
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva Gran
|
||||
|
||||
1CD3 ; Deva Gran # Po VEDIC SIGN NIHSHVASA
|
||||
1CF3 ; Deva Gran # Lo VEDIC SIGN ROTATED ARDHAVISARGA
|
||||
1CF8..1CF9 ; Deva Gran # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
|
||||
|
||||
# Total code points: 4
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva Nand
|
||||
|
||||
1CE9 ; Deva Nand # Lo VEDIC SIGN ANUSVARA ANTARGOMUKHA
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva Shrd
|
||||
|
||||
1CD7 ; Deva Shrd # Mn VEDIC TONE YAJURVEDIC KATHAKA INDEPENDENT SVARITA
|
||||
1CD9 ; Deva Shrd # Mn VEDIC TONE YAJURVEDIC KATHAKA INDEPENDENT SVARITA SCHROEDER
|
||||
1CDC..1CDD ; Deva Shrd # Mn [2] VEDIC TONE KATHAKA ANUDATTA..VEDIC TONE DOT BELOW
|
||||
1CE0 ; Deva Shrd # Mn VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
|
||||
|
||||
# Total code points: 5
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva Taml
|
||||
|
||||
A8F3 ; Deva Taml # Lo DEVANAGARI SIGN CANDRABINDU VIRAMA
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Geor Latn
|
||||
|
||||
10FB ; Geor Latn # Po GEORGIAN PARAGRAPH SEPARATOR
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Gran Taml
|
||||
|
||||
0BE6..0BEF ; Gran Taml # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE
|
||||
0BF0..0BF2 ; Gran Taml # No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND
|
||||
0BF3 ; Gran Taml # So TAMIL DAY SIGN
|
||||
11301 ; Gran Taml # Mn GRANTHA SIGN CANDRABINDU
|
||||
11303 ; Gran Taml # Mc GRANTHA SIGN VISARGA
|
||||
1133B..1133C ; Gran Taml # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA
|
||||
11FD0..11FD1 ; Gran Taml # No [2] TAMIL FRACTION ONE QUARTER..TAMIL FRACTION ONE HALF-1
|
||||
11FD3 ; Gran Taml # No TAMIL FRACTION THREE QUARTERS
|
||||
|
||||
# Total code points: 21
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Gujr Khoj
|
||||
|
||||
0AE6..0AEF ; Gujr Khoj # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
|
||||
|
||||
# Total code points: 10
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Guru Mult
|
||||
|
||||
0A66..0A6F ; Guru Mult # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE
|
||||
|
||||
# Total code points: 10
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Hani Latn
|
||||
|
||||
A700..A707 ; Hani Latn # Sk [8] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER CHINESE TONE YANG RU
|
||||
|
||||
# Total code points: 8
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Hira Kana
|
||||
|
||||
3031..3035 ; Hira Kana # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF
|
||||
3099..309A ; Hira Kana # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
|
||||
309B..309C ; Hira Kana # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
|
||||
30A0 ; Hira Kana # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN
|
||||
30FC ; Hira Kana # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
FF70 ; Hira Kana # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
FF9E..FF9F ; Hira Kana # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
|
||||
|
||||
# Total code points: 14
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Knda Nand
|
||||
|
||||
0CE6..0CEF ; Knda Nand # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
|
||||
|
||||
# Total code points: 10
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Latn Mong
|
||||
|
||||
202F ; Latn Mong # Zs NARROW NO-BREAK SPACE
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Mani Ougr
|
||||
|
||||
10AF2 ; Mani Ougr # Po MANICHAEAN PUNCTUATION DOUBLE DOT WITHIN DOT
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Mong Phag
|
||||
|
||||
1802..1803 ; Mong Phag # Po [2] MONGOLIAN COMMA..MONGOLIAN FULL STOP
|
||||
1805 ; Mong Phag # Po MONGOLIAN FOUR DOTS
|
||||
|
||||
# Total code points: 3
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Arab Syrc Thaa
|
||||
|
||||
061C ; Arab Syrc Thaa # Cf ARABIC LETTER MARK
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Arab Thaa Yezi
|
||||
|
||||
0660..0669 ; Arab Thaa Yezi # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
|
||||
|
||||
# Total code points: 10
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Beng Cakm Sylo
|
||||
|
||||
09E6..09EF ; Beng Cakm Sylo # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE
|
||||
|
||||
# Total code points: 10
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Cakm Mymr Tale
|
||||
|
||||
1040..1049 ; Cakm Mymr Tale # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE
|
||||
|
||||
# Total code points: 10
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Cpmn Cprt Linb
|
||||
|
||||
10100..10101 ; Cpmn Cprt Linb # Po [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT
|
||||
|
||||
# Total code points: 2
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Cprt Lina Linb
|
||||
|
||||
10107..10133 ; Cprt Lina Linb # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND
|
||||
|
||||
# Total code points: 45
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva Gran Knda
|
||||
|
||||
1CF4 ; Deva Gran Knda # Mn VEDIC TONE CANDRA ABOVE
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva Gran Latn
|
||||
|
||||
20F0 ; Deva Gran Latn # Mn COMBINING ASTERISK ABOVE
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Hani Hira Kana
|
||||
|
||||
303C ; Hani Hira Kana # Lo MASU MARK
|
||||
303D ; Hani Hira Kana # Po PART ALTERNATION MARK
|
||||
|
||||
# Total code points: 2
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Kali Latn Mymr
|
||||
|
||||
A92E ; Kali Latn Mymr # Po KAYAH LI SIGN CWI
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Beng Deva Gran Knda
|
||||
|
||||
1CD0 ; Beng Deva Gran Knda # Mn VEDIC TONE KARSHANA
|
||||
1CD2 ; Beng Deva Gran Knda # Mn VEDIC TONE PRENKHA
|
||||
|
||||
# Total code points: 2
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Buhd Hano Tagb Tglg
|
||||
|
||||
1735..1736 ; Buhd Hano Tagb Tglg # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
|
||||
|
||||
# Total code points: 2
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva Dogr Kthi Mahj
|
||||
|
||||
0966..096F ; Deva Dogr Kthi Mahj # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
|
||||
|
||||
# Total code points: 10
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Bopo Hang Hani Hira Kana
|
||||
|
||||
3003 ; Bopo Hang Hani Hira Kana # Po DITTO MARK
|
||||
3013 ; Bopo Hang Hani Hira Kana # So GETA MARK
|
||||
301C ; Bopo Hang Hani Hira Kana # Pd WAVE DASH
|
||||
301D ; Bopo Hang Hani Hira Kana # Ps REVERSED DOUBLE PRIME QUOTATION MARK
|
||||
301E..301F ; Bopo Hang Hani Hira Kana # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK
|
||||
3030 ; Bopo Hang Hani Hira Kana # Pd WAVY DASH
|
||||
3037 ; Bopo Hang Hani Hira Kana # So IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL
|
||||
FE45..FE46 ; Bopo Hang Hani Hira Kana # Po [2] SESAME DOT..WHITE SESAME DOT
|
||||
|
||||
# Total code points: 10
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Arab Nkoo Rohg Syrc Thaa Yezi
|
||||
|
||||
060C ; Arab Nkoo Rohg Syrc Thaa Yezi # Po ARABIC COMMA
|
||||
061B ; Arab Nkoo Rohg Syrc Thaa Yezi # Po ARABIC SEMICOLON
|
||||
|
||||
# Total code points: 2
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Bopo Hang Hani Hira Kana Yiii
|
||||
|
||||
3001..3002 ; Bopo Hang Hani Hira Kana Yiii # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP
|
||||
3008 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT ANGLE BRACKET
|
||||
3009 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT ANGLE BRACKET
|
||||
300A ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT DOUBLE ANGLE BRACKET
|
||||
300B ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT DOUBLE ANGLE BRACKET
|
||||
300C ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT CORNER BRACKET
|
||||
300D ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT CORNER BRACKET
|
||||
300E ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE CORNER BRACKET
|
||||
300F ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE CORNER BRACKET
|
||||
3010 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT BLACK LENTICULAR BRACKET
|
||||
3011 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT BLACK LENTICULAR BRACKET
|
||||
3014 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT TORTOISE SHELL BRACKET
|
||||
3015 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT TORTOISE SHELL BRACKET
|
||||
3016 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE LENTICULAR BRACKET
|
||||
3017 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE LENTICULAR BRACKET
|
||||
3018 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE TORTOISE SHELL BRACKET
|
||||
3019 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE TORTOISE SHELL BRACKET
|
||||
301A ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE SQUARE BRACKET
|
||||
301B ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE SQUARE BRACKET
|
||||
30FB ; Bopo Hang Hani Hira Kana Yiii # Po KATAKANA MIDDLE DOT
|
||||
FF61 ; Bopo Hang Hani Hira Kana Yiii # Po HALFWIDTH IDEOGRAPHIC FULL STOP
|
||||
FF62 ; Bopo Hang Hani Hira Kana Yiii # Ps HALFWIDTH LEFT CORNER BRACKET
|
||||
FF63 ; Bopo Hang Hani Hira Kana Yiii # Pe HALFWIDTH RIGHT CORNER BRACKET
|
||||
FF64..FF65 ; Bopo Hang Hani Hira Kana Yiii # Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT
|
||||
|
||||
# Total code points: 26
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva Knda Mlym Orya Taml Telu
|
||||
|
||||
1CDA ; Deva Knda Mlym Orya Taml Telu # Mn VEDIC TONE DOUBLE SVARITA
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Adlm Arab Nkoo Rohg Syrc Thaa Yezi
|
||||
|
||||
061F ; Adlm Arab Nkoo Rohg Syrc Thaa Yezi # Po ARABIC QUESTION MARK
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Beng Deva Gran Knda Nand Orya Telu Tirh
|
||||
|
||||
1CF2 ; Beng Deva Gran Knda Nand Orya Telu Tirh # Lo VEDIC SIGN ARDHAVISARGA
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Adlm Arab Mand Mani Ougr Phlp Rohg Sogd Syrc
|
||||
|
||||
0640 ; Adlm Arab Mand Mani Ougr Phlp Rohg Sogd Syrc # Lm ARABIC TATWEEL
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh
|
||||
|
||||
A836..A837 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK
|
||||
A838 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # Sc NORTH INDIC RUPEE MARK
|
||||
A839 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # So NORTH INDIC QUANTITY MARK
|
||||
|
||||
# Total code points: 4
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Taml Telu Tirh
|
||||
|
||||
0952 ; Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Taml Telu Tirh # Mn DEVANAGARI STRESS SIGN ANUDATTA
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Shrd Taml Telu Tirh
|
||||
|
||||
0951 ; Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Shrd Taml Telu Tirh # Mn DEVANAGARI STRESS SIGN UDATTA
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Modi Nand Sind Takr Tirh
|
||||
|
||||
A833..A835 ; Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Modi Nand Sind Takr Tirh # No [3] NORTH INDIC FRACTION ONE SIXTEENTH..NORTH INDIC FRACTION THREE SIXTEENTHS
|
||||
|
||||
# Total code points: 3
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Mlym Modi Nand Sind Takr Tirh
|
||||
|
||||
A830..A832 ; Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Mlym Modi Nand Sind Takr Tirh # No [3] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE QUARTERS
|
||||
|
||||
# Total code points: 3
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh
|
||||
|
||||
0964 ; Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh # Po DEVANAGARI DANDA
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# ================================================
|
||||
|
||||
# Script_Extensions=Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Limb Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh
|
||||
|
||||
0965 ; Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Limb Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh # Po DEVANAGARI DOUBLE DANDA
|
||||
|
||||
# Total code points: 1
|
||||
|
||||
# EOF
|
2991
admin/unidata/Scripts.txt
Normal file
2991
admin/unidata/Scripts.txt
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1449,20 +1449,24 @@ Property value is a symbol `o' (Open), `c' (Close), or `n' (None)."
|
|||
(format ";;; %s ends here\n" basename)))))
|
||||
(or noninteractive (message "Generating %s...done" file)))
|
||||
|
||||
(defun unidata-gen-charprop (&optional charprop-file)
|
||||
(defun unidata-gen-charprop (&optional charprop-file text)
|
||||
(or charprop-file (setq charprop-file (pop command-line-args-left)))
|
||||
(with-temp-file charprop-file
|
||||
(insert ";; Automatically generated by unidata-gen.el."
|
||||
" -*- lexical-binding: t -*-\n"
|
||||
";; See the admin/unidata/ directory in the Emacs sources.\n")
|
||||
(dolist (elt unidata-file-alist)
|
||||
(dolist (proplist (cdr elt))
|
||||
(insert (format "(define-char-code-property '%S %S\n %S)\n"
|
||||
(unidata-prop-prop proplist) (car elt)
|
||||
(unidata-prop-docstring proplist)))))
|
||||
(if text
|
||||
(insert text)
|
||||
(dolist (elt unidata-file-alist)
|
||||
(dolist (proplist (cdr elt))
|
||||
(insert (format "(define-char-code-property '%S %S\n %S)\n"
|
||||
(unidata-prop-prop proplist) (car elt)
|
||||
(unidata-prop-docstring proplist))))))
|
||||
(or noninteractive (message "Writing %s..." charprop-file))
|
||||
(insert "\n"
|
||||
"(provide 'charprop)\n"
|
||||
(format "(provide '%s)\n"
|
||||
(file-name-sans-extension
|
||||
(file-name-nondirectory charprop-file)))
|
||||
"\n"
|
||||
";; Local Variables:\n"
|
||||
";; coding: utf-8\n"
|
||||
|
@ -1473,6 +1477,105 @@ Property value is a symbol `o' (Open), `c' (Close), or `n' (None)."
|
|||
(format ";;; %s ends here\n"
|
||||
(file-name-nondirectory charprop-file)))))
|
||||
|
||||
(defun unidata-gen-scripts (&optional file)
|
||||
;; Running from Makefile.
|
||||
(unless file
|
||||
(setq file (pop command-line-args-left)))
|
||||
(let ((aliases (unidata-gen--read-script-aliases))
|
||||
(table (make-char-table nil))
|
||||
(segmented (make-hash-table :test #'equal)))
|
||||
;; First parse the scripts.
|
||||
(with-temp-buffer
|
||||
(unidata-gen--insert-file "Scripts.txt")
|
||||
(while (not (eobp))
|
||||
;; 1700..1711 ; Tagalog # Lo [18] TAGALOG LETTER A..TAGALOG
|
||||
(when (looking-at "\\([0-9A-F]+\\)\\(?:\\.\\.\\([0-9A-F]+\\)\\)? +; +\\([^ ]+\\) +#")
|
||||
(let ((start (string-to-number (match-string 1) 16))
|
||||
(end (and (match-string 2)
|
||||
(string-to-number (match-string 2) 16)))
|
||||
(scripts (list (intern (string-replace
|
||||
"_" "-"
|
||||
(downcase (match-string 3)))))))
|
||||
(set-char-table-range
|
||||
table (if end (cons start end) start) scripts)))
|
||||
(forward-line 1)))
|
||||
|
||||
;; Then parse the file that lists "other scripts" that characters
|
||||
;; may appear in, and add those.
|
||||
(with-temp-buffer
|
||||
(unidata-gen--insert-file "ScriptExtensions.txt")
|
||||
(while (not (eobp))
|
||||
;; 102E0 ; Arab Copt # Mn COPTIC EPACT THOUSANDS MARK
|
||||
(when (looking-at "\\([0-9A-F]+\\)\\(?:\\.\\.\\([0-9A-F]+\\)\\)? +; +\\([^#]+\\)")
|
||||
(let ((start (string-to-number (match-string 1) 16))
|
||||
(end (and (match-string 2)
|
||||
(string-to-number (match-string 2) 16)))
|
||||
(scripts
|
||||
(mapcar
|
||||
(lambda (alias)
|
||||
(intern (string-replace
|
||||
"_" "-" (downcase
|
||||
(gethash alias aliases)))))
|
||||
(split-string (string-trim (match-string 3))))))
|
||||
(dolist (script scripts)
|
||||
(dotimes (i (- (1+ (or end start)) start))
|
||||
(set-char-table-range
|
||||
table (+ i start)
|
||||
(append (elt table (+ i start)) (list script)))))))
|
||||
(forward-line 1)))
|
||||
|
||||
;; Then go through the data and collect into buckets based on
|
||||
;; identical script lists.
|
||||
(map-char-table
|
||||
(lambda (key value)
|
||||
;; `map-char-table' is reused, so copy it.
|
||||
(push (if (consp key)
|
||||
(cons (car key) (cdr key))
|
||||
key)
|
||||
;; Keep the first element first, but sort the rest.
|
||||
(gethash (cons (car value)
|
||||
(sort (remq (car value) value) #'string<))
|
||||
segmented)))
|
||||
table)
|
||||
|
||||
;; Then go through the data and collect into buckets based on
|
||||
(let ((scripts nil))
|
||||
(maphash
|
||||
(lambda (segment chars)
|
||||
(push (cons segment chars) scripts))
|
||||
segmented)
|
||||
(setq scripts (sort scripts (lambda (s1 s2)
|
||||
(string< (caar s1) (caar s2)))))
|
||||
(with-temp-buffer
|
||||
(insert "(textsec--create-script-table '(\n")
|
||||
(dolist (script scripts)
|
||||
(insert "(" (prin1-to-string (car script)) "\n")
|
||||
(insert " " (prin1-to-string (cdr script)))
|
||||
(insert ")\n"))
|
||||
(insert "))\n")
|
||||
;; Write the file.
|
||||
(unidata-gen-charprop file (buffer-string))))))
|
||||
|
||||
(defun unidata-gen--read-script-aliases ()
|
||||
(let ((aliases (make-hash-table :test #'equal)))
|
||||
(with-temp-buffer
|
||||
(unidata-gen--insert-file "PropertyValueAliases.txt")
|
||||
(unless (re-search-forward "^# Script " nil t)
|
||||
(error "Can't find the Script section"))
|
||||
(forward-line 2)
|
||||
(while (looking-at "sc *;")
|
||||
(let ((elem (split-string (buffer-substring (point) (line-end-position))
|
||||
";" nil "[ \t]+")))
|
||||
(setf (gethash (nth 1 elem) aliases)
|
||||
(nth 2 elem)))
|
||||
(forward-line 1))
|
||||
aliases)))
|
||||
|
||||
(defun unidata-gen--insert-file (name)
|
||||
(insert-file-contents
|
||||
(expand-file-name (concat "../admin/unidata/" name)
|
||||
data-directory)))
|
||||
|
||||
|
||||
|
||||
;;; unidata-gen.el ends here
|
||||
|
|
4
etc/NEWS
4
etc/NEWS
|
@ -951,6 +951,10 @@ The input must be encoded text.
|
|||
|
||||
* Lisp Changes in Emacs 29.1
|
||||
|
||||
---
|
||||
** The Gnus range functions have been moved to a new library, range.el.
|
||||
All the old names have been made obsolete.
|
||||
|
||||
+++
|
||||
** New function 'function-alias-p'.
|
||||
This predicate says whether an object is a function alias, and if it
|
||||
|
|
95
lisp/international/textsec.el
Normal file
95
lisp/international/textsec.el
Normal file
|
@ -0,0 +1,95 @@
|
|||
;;; textsec.el --- Functions for handling homoglyphs and the like -*- lexical-binding: t; -*-
|
||||
|
||||
;; Copyright (C) 2022 Free Software Foundation, Inc.
|
||||
|
||||
;; This file is part of GNU Emacs.
|
||||
|
||||
;; GNU Emacs is free software: you can redistribute it and/or modify
|
||||
;; it under the terms of the GNU General Public License as published by
|
||||
;; the Free Software Foundation, either version 3 of the License, or
|
||||
;; (at your option) any later version.
|
||||
|
||||
;; GNU Emacs is distributed in the hope that it will be useful,
|
||||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;; GNU General Public License for more details.
|
||||
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
;;; Commentary:
|
||||
|
||||
;;
|
||||
|
||||
;;; Code:
|
||||
|
||||
(require 'cl-lib)
|
||||
|
||||
(defvar textsec--char-scripts nil)
|
||||
|
||||
(eval-and-compile
|
||||
(defun textsec--create-script-table (data)
|
||||
"Create the textsec--char-scripts char table."
|
||||
(setq textsec--char-scripts (make-char-table nil))
|
||||
(dolist (scripts data)
|
||||
(dolist (range (cadr scripts))
|
||||
(set-char-table-range textsec--char-scripts
|
||||
range (car scripts)))))
|
||||
(require 'uni-scripts))
|
||||
|
||||
(defun textsec-scripts (string)
|
||||
"Return a list of scripts used in STRING."
|
||||
(seq-map (lambda (char)
|
||||
(elt textsec--char-scripts char))
|
||||
string))
|
||||
|
||||
(defun textsec-single-script-p (string)
|
||||
"Return non-nil if STRING is all in a single script.
|
||||
|
||||
Note that the concept of \"single script\" used by this function
|
||||
isn't obvious -- some mixtures of scripts count as a \"single
|
||||
script\. See
|
||||
|
||||
https://www.unicode.org/reports/tr39/#Mixed_Script_Detection
|
||||
|
||||
for details."
|
||||
(let ((scripts (mapcar (lambda (s)
|
||||
(append s
|
||||
(mapcan (lambda (script)
|
||||
(copy-sequence
|
||||
(textsec--augment-script script)))
|
||||
s)))
|
||||
(textsec-scripts string))))
|
||||
(catch 'empty
|
||||
(cl-loop for s1 in scripts
|
||||
do (cl-loop for s2 in scripts
|
||||
when (and (not (memq 'common s1))
|
||||
(not (memq 'common s2))
|
||||
(not (memq 'inherited s1))
|
||||
(not (memq 'inherited s2))
|
||||
(not (seq-intersection s1 s2)))
|
||||
do (throw 'empty nil)))
|
||||
t)))
|
||||
|
||||
(defun textsec--augment-script (script)
|
||||
(cond
|
||||
((eq script 'han)
|
||||
'(hangul japan korea))
|
||||
((or (eq script 'hiragana)
|
||||
(eq script 'katakana))
|
||||
'(japan))
|
||||
((or (eq script 'hangul)
|
||||
(eq script 'bopomofo))
|
||||
'(korea))))
|
||||
|
||||
(defun textsec-covering-scripts (string)
|
||||
"Return a minimal list of scripts used in STRING."
|
||||
(let* ((scripts (textsec-scripts string))
|
||||
(set (car scripts)))
|
||||
(dolist (s scripts)
|
||||
(setq set (seq-union set (seq-difference s set))))
|
||||
(delq 'common (delq 'inherited set))))
|
||||
|
||||
(provide 'textsec)
|
||||
|
||||
;;; textsec.el ends here
|
72
test/lisp/international/textsec-tests.el
Normal file
72
test/lisp/international/textsec-tests.el
Normal file
|
@ -0,0 +1,72 @@
|
|||
;;; textsec-tests.el --- Tests for textsec.el -*- lexical-binding: t; -*-
|
||||
|
||||
;; Copyright (C) 2022 Free Software Foundation, Inc.
|
||||
|
||||
;; This file is part of GNU Emacs.
|
||||
|
||||
;; GNU Emacs is free software: you can redistribute it and/or modify
|
||||
;; it under the terms of the GNU General Public License as published by
|
||||
;; the Free Software Foundation, either version 3 of the License, or
|
||||
;; (at your option) any later version.
|
||||
|
||||
;; GNU Emacs is distributed in the hope that it will be useful,
|
||||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;; GNU General Public License for more details.
|
||||
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
;;; Commentary:
|
||||
|
||||
;;
|
||||
|
||||
;;; Code:
|
||||
|
||||
(require 'textsec)
|
||||
(require 'ert)
|
||||
(require 'ert-x)
|
||||
|
||||
(ert-deftest test-scripts ()
|
||||
(should (equal (textsec-scripts "Circle")
|
||||
'((latin) (latin) (latin) (latin) (latin) (latin))))
|
||||
(should (textsec-single-script-p "Circle"))
|
||||
|
||||
(should (equal (textsec-scripts "СігсӀе")
|
||||
'((cyrillic) (cyrillic) (cyrillic)
|
||||
(cyrillic) (cyrillic) (cyrillic))))
|
||||
(should (textsec-single-script-p "СігсӀе"))
|
||||
|
||||
(should (equal (textsec-scripts "Сirсlе")
|
||||
'((cyrillic) (latin) (latin) (cyrillic) (latin) (cyrillic))))
|
||||
(should-not (textsec-single-script-p "Сirсlе"))
|
||||
|
||||
(should (equal (textsec-scripts "Circ1e")
|
||||
'((latin) (latin) (latin) (latin) (common) (latin))))
|
||||
(should (textsec-single-script-p "Circ1e"))
|
||||
|
||||
(should (equal (textsec-scripts "C𝗂𝗋𝖼𝗅𝖾")
|
||||
'((latin) (common) (common) (common) (common) (common))))
|
||||
(should (textsec-single-script-p "C𝗂𝗋𝖼𝗅𝖾"))
|
||||
|
||||
(should (equal (textsec-scripts "𝖢𝗂𝗋𝖼𝗅𝖾")
|
||||
'((common) (common) (common) (common) (common) (common))))
|
||||
(should (textsec-single-script-p "𝖢𝗂𝗋𝖼𝗅𝖾"))
|
||||
|
||||
(should (equal (textsec-scripts "〆切")
|
||||
'((common han) (han))))
|
||||
(should (textsec-single-script-p "〆切"))
|
||||
|
||||
(should (equal (textsec-scripts "ねガ")
|
||||
'((hiragana) (katakana))))
|
||||
(should (textsec-single-script-p "ねガ")))
|
||||
|
||||
(ert-deftest test-minimal-scripts ()
|
||||
(should (equal (textsec-covering-scripts "Circle")
|
||||
'(latin)))
|
||||
(should (equal (textsec-covering-scripts "Сirсlе")
|
||||
'(cyrillic latin)))
|
||||
(should (equal (textsec-covering-scripts "〆切")
|
||||
'(han))))
|
||||
|
||||
;;; textsec-tests.el ends here
|
Loading…
Add table
Reference in a new issue