(locale-language-names): Modify the
format of elements and add more entries. (locale-preferred-coding-systems): Add more entries. (set-locale-environment): Adjusted for the change of locale-language-names.
This commit is contained in:
parent
8a46238114
commit
8dedddd58a
2 changed files with 137 additions and 84 deletions
|
@ -1,3 +1,11 @@
|
|||
2005-03-15 Kenichi Handa <handa@m17n.org>
|
||||
|
||||
* international/mule-cmds.el (locale-language-names): Modify the
|
||||
format of elements and add more entries.
|
||||
(locale-preferred-coding-systems): Add more entries.
|
||||
(set-locale-environment): Adjusted for the change of
|
||||
locale-language-names.
|
||||
|
||||
2005-03-14 Stefan Monnier <monnier@iro.umontreal.ca>
|
||||
|
||||
* pcvs.el (smerge-ediff): Remove bogus autoload.
|
||||
|
|
|
@ -2043,55 +2043,60 @@ of `buffer-file-coding-system' set by this function."
|
|||
;; and Chinese are exceptions, which are listed in the
|
||||
;; non-standard section at the bottom of locale-language-names.
|
||||
|
||||
; aa Afar
|
||||
; ab Abkhazian
|
||||
("aa_DJ" . "Latin-1") ; Afar
|
||||
("aa" . "UTF-8")
|
||||
;; ab Abkhazian
|
||||
("af" . "Latin-1") ; Afrikaans
|
||||
("am" . "Ethiopic") ; Amharic
|
||||
("am" "Ethiopic" utf-8) ; Amharic
|
||||
("an" . "Latin-9") ; Aragonese
|
||||
; ar Arabic glibc uses 8859-6
|
||||
; as Assamese
|
||||
; ay Aymara
|
||||
; az Azerbaijani
|
||||
("az" . "UTF-8") ; Azerbaijani
|
||||
; ba Bashkir
|
||||
("be" . "Belarusian") ; Belarusian [Byelorussian until early 1990s]
|
||||
("bg" . "Bulgarian") ; Bulgarian
|
||||
("be" "Belarusian" cp1251) ; Belarusian [Byelorussian until early 1990s]
|
||||
("bg" "Bulgarian" cp1251) ; Bulgarian
|
||||
; bh Bihari
|
||||
; bi Bislama
|
||||
; bn Bengali, Bangla
|
||||
("bn" . "UTF-8") ; Bengali, Bangla
|
||||
("bo" . "Tibetan")
|
||||
("br" . "Latin-1") ; Breton
|
||||
("bs" . "Latin-2") ; Bosnian
|
||||
("byn" . "UTF-8") ; Bilin; Blin
|
||||
("ca" . "Latin-1") ; Catalan
|
||||
; co Corsican
|
||||
("cs" . "Czech")
|
||||
("cy" . "Welsh") ; Welsh [glibc uses Latin-8. Did this change?]
|
||||
("cs" "Czech" iso-8859-2)
|
||||
("cy" "Welsh" iso-8859-14)
|
||||
("da" . "Latin-1") ; Danish
|
||||
("de" . "German")
|
||||
("de" "German" iso-8859-1)
|
||||
; dz Bhutani
|
||||
("el" . "Greek")
|
||||
("el" "Greek" iso-8859-7)
|
||||
;; Users who specify "en" explicitly typically want Latin-1, not ASCII.
|
||||
;; That's actually what the GNU locales define, modulo things like
|
||||
;; en_IN -- fx.
|
||||
("en_IN" "English" utf-8) ; glibc uses utf-8 for English in India
|
||||
("en" . "Latin-1") ; English
|
||||
("eo" . "Latin-3") ; Esperanto
|
||||
("es" . "Spanish")
|
||||
("et" . "Latin-4") ; Estonian
|
||||
("es" "Spanish" iso-8859-1)
|
||||
("et" . "Latin-1") ; Estonian
|
||||
("eu" . "Latin-1") ; Basque
|
||||
; fa Persian glibc uses utf-8
|
||||
("fa" . "UTF-8") ; Persian
|
||||
("fi" . "Latin-1") ; Finnish
|
||||
; fj Fiji
|
||||
("fj" . "Latin-1") ; Fiji
|
||||
("fo" . "Latin-1") ; Faroese
|
||||
("fr" . "French") ; French
|
||||
("fr" "French" iso-8859-1) ; French
|
||||
("fy" . "Latin-1") ; Frisian
|
||||
("ga" . "Latin-1") ; Irish Gaelic (new orthography)
|
||||
("gd" . "Latin-1") ; Scots Gaelic
|
||||
("gl" . "Latin-1") ; Galician
|
||||
("gd" . "Latin-9") ; Scots Gaelic
|
||||
("gez" "Ethiopic" utf-8) ; Geez
|
||||
("gl" . "Latin-1") ; Gallegan; Galician
|
||||
; gn Guarani
|
||||
; gu Gujarati
|
||||
("gv" . "Latin-8") ; Manx Gaelic glibc uses 8859-1
|
||||
("gu" . "UTF-8") ; Gujarati
|
||||
("gv" . "Latin-1") ; Manx Gaelic
|
||||
; ha Hausa
|
||||
("he" . "Hebrew")
|
||||
("hi" . "Devanagari") ; Hindi glibc uses utf-8
|
||||
("hr" . "Croatian") ; Croatian
|
||||
("he" "Hebrew" iso-8859-8)
|
||||
("hi" "Devanagari" utf-8) ; Hindi
|
||||
("hr" "Croatian" iso-8859-2) ; Croatian
|
||||
("hu" . "Latin-2") ; Hungarian
|
||||
; hy Armenian
|
||||
; ia Interlingua
|
||||
|
@ -2099,110 +2104,114 @@ of `buffer-file-coding-system' set by this function."
|
|||
; ie Interlingue
|
||||
; ik Inupiak
|
||||
("is" . "Latin-1") ; Icelandic
|
||||
("it" . "Italian") ; Italian
|
||||
("it" "Italian" iso-8859-1) ; Italian
|
||||
; iu Inuktitut
|
||||
("ja" . "Japanese")
|
||||
("iw" "Hebrew" iso-8859-8)
|
||||
("ja" "Japanese" euc-jp)
|
||||
; jw Javanese
|
||||
("ka" . "Georgian") ; Georgian
|
||||
("ka" "Georgian" georgian-ps) ; Georgian
|
||||
; kk Kazakh
|
||||
("kl" . "Latin-1") ; Greenlandic
|
||||
; km Cambodian
|
||||
; kn Kannada
|
||||
("ko" . "Korean")
|
||||
("kn" "Kannada" utf-8)
|
||||
("ko" "Korean" euc-kr)
|
||||
; ks Kashmiri
|
||||
; ku Kurdish
|
||||
("kw" . "Latin-1") ; Cornish
|
||||
; ky Kirghiz
|
||||
("la" . "Latin-1") ; Latin
|
||||
("lb" . "Latin-1") ; Luxemburgish
|
||||
("lg" . "Laint-6") ; Ganda
|
||||
; ln Lingala
|
||||
("lo" . "Lao") ; Laothian
|
||||
("lt" . "Lithuanian")
|
||||
("lo" "Lao" utf-8) ; Laothian
|
||||
("lt" "Lithuanian" iso-8859-13)
|
||||
("lv" . "Latvian") ; Latvian, Lettish
|
||||
; mg Malagasy
|
||||
("mi" . "Latin-7") ; Maori
|
||||
("mk" . "Cyrillic-ISO") ; Macedonian
|
||||
; ml Malayalam
|
||||
; mn Mongolian
|
||||
("mk" "Cyrillic-ISO" iso-8859-5) ; Macedonian
|
||||
("ml" "Malayalam" utf-8)
|
||||
("mn" . "UTF-8") ; Mongolian
|
||||
; mo Moldavian
|
||||
("mr" . "Devanagari") ; Marathi glibc uses utf-8
|
||||
("mr" "Devanagari" utf-8) ; Marathi
|
||||
("ms" . "Latin-1") ; Malay
|
||||
("mt" . "Latin-3") ; Maltese
|
||||
; my Burmese
|
||||
; na Nauru
|
||||
("ne" . "Devanagari") ; Nepali
|
||||
("nl" . "Dutch")
|
||||
("nb" . "Latin-1") ; Norwegian
|
||||
("ne" "Devanagari" utf-8) ; Nepali
|
||||
("nl" "Dutch" iso-8859-1)
|
||||
("no" . "Latin-1") ; Norwegian
|
||||
("oc" . "Latin-1") ; Occitan
|
||||
; om (Afan) Oromo
|
||||
("om_ET" . "UTF-8") ; (Afan) Oromo
|
||||
("om" . "Latin-1") ; (Afan) Oromo
|
||||
; or Oriya
|
||||
; pa Punjabi
|
||||
("pa" . "UTF-8") ; Punjabi
|
||||
("pl" . "Latin-2") ; Polish
|
||||
; ps Pashto, Pushto
|
||||
("pt" . "Latin-1") ; Portuguese
|
||||
; qu Quechua
|
||||
("rm" . "Latin-1") ; Rhaeto-Romanic
|
||||
; rn Kirundi
|
||||
("ro" . "Romanian")
|
||||
("ru.*[_.]koi8" . "Russian")
|
||||
("ru" . "Cyrillic-ISO") ; Russian
|
||||
("ro" "Romanian" iso-8859-2)
|
||||
("ru_RU" "Russian" iso-8859-5)
|
||||
("ru_UA" "Russian" koi8-u)
|
||||
; rw Kinyarwanda
|
||||
("sa" . "Devanagari") ; Sanskrit
|
||||
; sd Sindhi
|
||||
; se Northern Sami
|
||||
("se" . "UTF-8") ; Northern Sami
|
||||
; sg Sangho
|
||||
("sh" . "Latin-2") ; Serbo-Croatian
|
||||
; si Sinhalese
|
||||
("sk" . "Slovak")
|
||||
("sl" . "Slovenian")
|
||||
("sid" . "UTF-8") ; Sidamo
|
||||
("sk" "Slovak" iso-8859-2)
|
||||
("sl" "Slovenian" iso-8859-2)
|
||||
; sm Samoan
|
||||
; sn Shona
|
||||
; so Somali
|
||||
("so_ET" "UTF-8") ; Somali
|
||||
("so" "Latin-1") ; Somali
|
||||
("sq" . "Latin-1") ; Albanian
|
||||
("sr_YU@cyrillic" . "Cyrillic-ISO") ; Serbian (Cyrillic alphabet)
|
||||
("sr" . "Latin-2") ; Serbian (Latin alphabet)
|
||||
("sr_YU@cyrillic" . "Cyrillic-ISO") ; per glibc
|
||||
; ss Siswati
|
||||
; st Sesotho
|
||||
("st" . "Latin-1") ; Sesotho
|
||||
; su Sundanese
|
||||
("sv" . "Swedish") ; Swedish
|
||||
("sv" "Swedish" iso-8859-1) ; Swedish
|
||||
("sw" . "Latin-1") ; Swahili
|
||||
; ta Tamil glibc uses utf-8
|
||||
; te Telugu glibc uses utf-8
|
||||
("tg" . "Tajik")
|
||||
("th" . "Thai")
|
||||
; ti Tigrinya
|
||||
("ta" "Tamil" utf-8)
|
||||
("te" . "UTF-8") ; Telugu
|
||||
("tg" "Tajik" koi8-t)
|
||||
("th" "Thai" tis-620)
|
||||
("ti" "Ethiopic" utf-8) ; Tigrinya
|
||||
("tig_ER" . "UTF-8") ; Tigre
|
||||
; tk Turkmen
|
||||
("tl" . "Latin-1") ; Tagalog
|
||||
; tn Setswana
|
||||
; to Tonga
|
||||
("tr" . "Turkish")
|
||||
("tr" "Turkish" iso-8859-9)
|
||||
; ts Tsonga
|
||||
; tt Tatar
|
||||
("tt" . "UTF-8") ; Tatar
|
||||
; tw Twi
|
||||
; ug Uighur
|
||||
("uk" . "Ukrainian") ; Ukrainian
|
||||
; ur Urdu glibc uses utf-8
|
||||
("uk" "Ukrainian" koi8-u)
|
||||
("ur" . "UTF-8") ; Urdu
|
||||
("uz_UZ@cyrillic" . "UTF-8"); Uzbek
|
||||
("uz" . "Latin-1") ; Uzbek
|
||||
("vi" . "Vietnamese") ; glibc uses utf-8
|
||||
("vi" "Vietnamese" utf-8)
|
||||
; vo Volapuk
|
||||
("wa" . "Latin-1") ; Walloon
|
||||
; wo Wolof
|
||||
; xh Xhosa
|
||||
("xh" . "Latin-1") ; Xhosa
|
||||
("yi" . "Windows-1255") ; Yiddish
|
||||
; yo Yoruba
|
||||
; za Zhuang
|
||||
|
||||
; glibc:
|
||||
("zh_HK" . "Chinese-Big5")
|
||||
("zh_TW" . "Chinese-Big5")
|
||||
("zh_CN" . "Chinese-GB")
|
||||
("zh" . "Chinese-GB")
|
||||
; zh_CN.GB18030/GB18030 \
|
||||
; zh_CN.GBK/GBK \
|
||||
; zh_HK/BIG5-HKSCS \
|
||||
|
||||
("zh.*[._]big5" . "Chinese-BIG5")
|
||||
("zh.*[._]gbk" . nil) ; Solaris 2.7; has gbk-0 as well as GB 2312.1980-0
|
||||
("zh_tw" . "Chinese-CNS") ; glibc uses big5
|
||||
("zh_tw[._]euc-tw" . "Chinese-EUC-TW")
|
||||
("zh" . "Chinese-GB")
|
||||
; zu Zulu
|
||||
("zu" . "Latin-1") ; Zulu
|
||||
|
||||
;; ISO standard locales
|
||||
("c$" . "ASCII")
|
||||
|
@ -2222,10 +2231,16 @@ of `buffer-file-coding-system' set by this function."
|
|||
("chs" . "Chinese-GB") ; MS Windows Chinese Simplified
|
||||
("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional
|
||||
))
|
||||
"List of pairs of locale regexps and language names.
|
||||
The first element whose locale regexp matches the start of a downcased locale
|
||||
specifies the language name corresponding to that locale.
|
||||
If the language name is nil, there is no corresponding language environment.")
|
||||
"Alist of locale regexps vs the corresponding languages and coding systems.
|
||||
Each element has these form:
|
||||
\(LOCALE-REGEXP LANG-ENV CODING-SYSTEM)
|
||||
The first element whose LOCALE-REGEXP matches the start of a
|
||||
downcased locale specifies the LANG-ENV \(language environtment)
|
||||
and CODING-SYSTEM corresponding to that locale. If there is no
|
||||
appropriate language environment, the element may have this form:
|
||||
\(LOCALE-REGEXP . LANG-ENV)
|
||||
In this case, LANG-ENV is one of generic language environments for an
|
||||
specific encoding such as \"Latin-1\" and \"UTF-8\".")
|
||||
|
||||
(defconst locale-charset-language-names
|
||||
(purecopy
|
||||
|
@ -2243,20 +2258,43 @@ If the language name is nil, there is no corresponding language environment.")
|
|||
"List of pairs of locale regexps and charset language names.
|
||||
The first element whose locale regexp matches the start of a downcased locale
|
||||
specifies the language name whose charset corresponds to that locale.
|
||||
This language name is used if its charsets disagree with the charsets of
|
||||
the language name that would otherwise be used for this locale.")
|
||||
This language name is used if the locale is not listed in
|
||||
`locale-language-names'")
|
||||
|
||||
(defconst locale-preferred-coding-systems
|
||||
(purecopy
|
||||
'(("ja.*[._]euc" . japanese-iso-8bit)
|
||||
'((".*8859[-_]?1\\>" . iso-8859-1)
|
||||
(".*8859[-_]?2\\>" . iso-8859-2)
|
||||
(".*8859[-_]?3\\>" . iso-8859-3)
|
||||
(".*8859[-_]?4\\>" . iso-8859-4)
|
||||
(".*8859[-_]?9\\>" . iso-8859-9)
|
||||
(".*8859[-_]?14\\>" . iso-8859-14)
|
||||
(".*8859[-_]?15\\>" . iso-8859-15)
|
||||
(".*utf\\(?:-?8\\)?" . utf-8)
|
||||
;; utf-8@euro exists, so put this after utf-8. (@euro really
|
||||
;; specifies the currency, rather than the charset.)
|
||||
(".*@euro" . iso-8859-15)
|
||||
("koi8-?r" . koi8-r)
|
||||
("koi8-?u" . koi8-u)
|
||||
("tcvn" . tcvn)
|
||||
("big5" . big5)
|
||||
("euc-?tw" . euc-tw)
|
||||
;; We don't support GBK, but as it is upper compatible with
|
||||
;; GB-2312, we setup the default coding system to gb2312.
|
||||
("gbk" . gb2312)
|
||||
;; We don't support BIG5-HKSCS, but as it is upper compatible with
|
||||
;; BIG5, we setup the default coding system to big5.
|
||||
("big5hkscs" . big5)
|
||||
("ja.*[._]euc" . japanese-iso-8bit)
|
||||
("ja.*[._]jis7" . iso-2022-jp)
|
||||
("ja.*[._]pck" . japanese-shift-jis)
|
||||
("ja.*[._]sjis" . japanese-shift-jis)
|
||||
("jpn" . japanese-shift-jis) ; MS-Windows uses this.
|
||||
(".*[._]utf" . utf-8)))
|
||||
))
|
||||
"List of pairs of locale regexps and preferred coding systems.
|
||||
The first element whose locale regexp matches the start of a downcased locale
|
||||
specifies the coding system to prefer when using that locale.")
|
||||
specifies the coding system to prefer when using that locale.
|
||||
This coding system is used if the locale specifies a specific charset.")
|
||||
|
||||
(defun locale-name-match (key alist)
|
||||
"Search for KEY in ALIST, which should be a list of regexp-value pairs.
|
||||
|
@ -2386,12 +2424,17 @@ See also `locale-charset-language-names', `locale-language-names',
|
|||
(locale-charset-to-coding-system
|
||||
(match-string 1 locale)))))))
|
||||
|
||||
;; Give preference to charset-language-name over language-name.
|
||||
(if (and charset-language-name
|
||||
(not
|
||||
(equal (get-language-info language-name 'charset)
|
||||
(get-language-info charset-language-name 'charset))))
|
||||
(setq language-name charset-language-name))
|
||||
(if (consp language-name)
|
||||
;; locale-language-names specify both lang-env and coding.
|
||||
;; But, what specified in locale-preferred-coding-systems
|
||||
;; has higher priority.
|
||||
(setq coding-system (or coding-system
|
||||
(nth 1 language-name))
|
||||
language-name (car language-name))
|
||||
;; Otherwise, if locale is not listed in locale-language-names,
|
||||
;; use what listed in locale-charset-language-names.
|
||||
(if (not language-name)
|
||||
(setq language-name charset-language-name)))
|
||||
|
||||
(when language-name
|
||||
|
||||
|
@ -2417,7 +2460,9 @@ See also `locale-charset-language-names', `locale-language-names',
|
|||
(setq locale-coding-system
|
||||
(car (get-language-info language-name 'coding-priority))))
|
||||
|
||||
(when coding-system
|
||||
(when (and coding-system
|
||||
(not (coding-system-equal coding-system
|
||||
locale-coding-system)))
|
||||
(prefer-coding-system coding-system)
|
||||
(setq locale-coding-system coding-system))))
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue