(locale-language-names): Modify the

format of elements and add more entries.
(locale-preferred-coding-systems): Add more entries.
(set-locale-environment): Adjusted for the change of
locale-language-names.
This commit is contained in:
Kenichi Handa 2005-03-15 02:32:39 +00:00
parent 8a46238114
commit 8dedddd58a
2 changed files with 137 additions and 84 deletions

View file

@ -1,3 +1,11 @@
2005-03-15 Kenichi Handa <handa@m17n.org>
* international/mule-cmds.el (locale-language-names): Modify the
format of elements and add more entries.
(locale-preferred-coding-systems): Add more entries.
(set-locale-environment): Adjusted for the change of
locale-language-names.
2005-03-14 Stefan Monnier <monnier@iro.umontreal.ca>
* pcvs.el (smerge-ediff): Remove bogus autoload.

View file

@ -2043,55 +2043,60 @@ of `buffer-file-coding-system' set by this function."
;; and Chinese are exceptions, which are listed in the
;; non-standard section at the bottom of locale-language-names.
; aa Afar
; ab Abkhazian
("aa_DJ" . "Latin-1") ; Afar
("aa" . "UTF-8")
;; ab Abkhazian
("af" . "Latin-1") ; Afrikaans
("am" . "Ethiopic") ; Amharic
("am" "Ethiopic" utf-8) ; Amharic
("an" . "Latin-9") ; Aragonese
; ar Arabic glibc uses 8859-6
; as Assamese
; ay Aymara
; az Azerbaijani
("az" . "UTF-8") ; Azerbaijani
; ba Bashkir
("be" . "Belarusian") ; Belarusian [Byelorussian until early 1990s]
("bg" . "Bulgarian") ; Bulgarian
("be" "Belarusian" cp1251) ; Belarusian [Byelorussian until early 1990s]
("bg" "Bulgarian" cp1251) ; Bulgarian
; bh Bihari
; bi Bislama
; bn Bengali, Bangla
("bn" . "UTF-8") ; Bengali, Bangla
("bo" . "Tibetan")
("br" . "Latin-1") ; Breton
("bs" . "Latin-2") ; Bosnian
("byn" . "UTF-8") ; Bilin; Blin
("ca" . "Latin-1") ; Catalan
; co Corsican
("cs" . "Czech")
("cy" . "Welsh") ; Welsh [glibc uses Latin-8. Did this change?]
("cs" "Czech" iso-8859-2)
("cy" "Welsh" iso-8859-14)
("da" . "Latin-1") ; Danish
("de" . "German")
("de" "German" iso-8859-1)
; dz Bhutani
("el" . "Greek")
("el" "Greek" iso-8859-7)
;; Users who specify "en" explicitly typically want Latin-1, not ASCII.
;; That's actually what the GNU locales define, modulo things like
;; en_IN -- fx.
("en_IN" "English" utf-8) ; glibc uses utf-8 for English in India
("en" . "Latin-1") ; English
("eo" . "Latin-3") ; Esperanto
("es" . "Spanish")
("et" . "Latin-4") ; Estonian
("es" "Spanish" iso-8859-1)
("et" . "Latin-1") ; Estonian
("eu" . "Latin-1") ; Basque
; fa Persian glibc uses utf-8
("fa" . "UTF-8") ; Persian
("fi" . "Latin-1") ; Finnish
; fj Fiji
("fj" . "Latin-1") ; Fiji
("fo" . "Latin-1") ; Faroese
("fr" . "French") ; French
("fr" "French" iso-8859-1) ; French
("fy" . "Latin-1") ; Frisian
("ga" . "Latin-1") ; Irish Gaelic (new orthography)
("gd" . "Latin-1") ; Scots Gaelic
("gl" . "Latin-1") ; Galician
("gd" . "Latin-9") ; Scots Gaelic
("gez" "Ethiopic" utf-8) ; Geez
("gl" . "Latin-1") ; Gallegan; Galician
; gn Guarani
; gu Gujarati
("gv" . "Latin-8") ; Manx Gaelic glibc uses 8859-1
("gu" . "UTF-8") ; Gujarati
("gv" . "Latin-1") ; Manx Gaelic
; ha Hausa
("he" . "Hebrew")
("hi" . "Devanagari") ; Hindi glibc uses utf-8
("hr" . "Croatian") ; Croatian
("he" "Hebrew" iso-8859-8)
("hi" "Devanagari" utf-8) ; Hindi
("hr" "Croatian" iso-8859-2) ; Croatian
("hu" . "Latin-2") ; Hungarian
; hy Armenian
; ia Interlingua
@ -2099,110 +2104,114 @@ of `buffer-file-coding-system' set by this function."
; ie Interlingue
; ik Inupiak
("is" . "Latin-1") ; Icelandic
("it" . "Italian") ; Italian
("it" "Italian" iso-8859-1) ; Italian
; iu Inuktitut
("ja" . "Japanese")
("iw" "Hebrew" iso-8859-8)
("ja" "Japanese" euc-jp)
; jw Javanese
("ka" . "Georgian") ; Georgian
("ka" "Georgian" georgian-ps) ; Georgian
; kk Kazakh
("kl" . "Latin-1") ; Greenlandic
; km Cambodian
; kn Kannada
("ko" . "Korean")
("kn" "Kannada" utf-8)
("ko" "Korean" euc-kr)
; ks Kashmiri
; ku Kurdish
("kw" . "Latin-1") ; Cornish
; ky Kirghiz
("la" . "Latin-1") ; Latin
("lb" . "Latin-1") ; Luxemburgish
("lg" . "Laint-6") ; Ganda
; ln Lingala
("lo" . "Lao") ; Laothian
("lt" . "Lithuanian")
("lo" "Lao" utf-8) ; Laothian
("lt" "Lithuanian" iso-8859-13)
("lv" . "Latvian") ; Latvian, Lettish
; mg Malagasy
("mi" . "Latin-7") ; Maori
("mk" . "Cyrillic-ISO") ; Macedonian
; ml Malayalam
; mn Mongolian
("mk" "Cyrillic-ISO" iso-8859-5) ; Macedonian
("ml" "Malayalam" utf-8)
("mn" . "UTF-8") ; Mongolian
; mo Moldavian
("mr" . "Devanagari") ; Marathi glibc uses utf-8
("mr" "Devanagari" utf-8) ; Marathi
("ms" . "Latin-1") ; Malay
("mt" . "Latin-3") ; Maltese
; my Burmese
; na Nauru
("ne" . "Devanagari") ; Nepali
("nl" . "Dutch")
("nb" . "Latin-1") ; Norwegian
("ne" "Devanagari" utf-8) ; Nepali
("nl" "Dutch" iso-8859-1)
("no" . "Latin-1") ; Norwegian
("oc" . "Latin-1") ; Occitan
; om (Afan) Oromo
("om_ET" . "UTF-8") ; (Afan) Oromo
("om" . "Latin-1") ; (Afan) Oromo
; or Oriya
; pa Punjabi
("pa" . "UTF-8") ; Punjabi
("pl" . "Latin-2") ; Polish
; ps Pashto, Pushto
("pt" . "Latin-1") ; Portuguese
; qu Quechua
("rm" . "Latin-1") ; Rhaeto-Romanic
; rn Kirundi
("ro" . "Romanian")
("ru.*[_.]koi8" . "Russian")
("ru" . "Cyrillic-ISO") ; Russian
("ro" "Romanian" iso-8859-2)
("ru_RU" "Russian" iso-8859-5)
("ru_UA" "Russian" koi8-u)
; rw Kinyarwanda
("sa" . "Devanagari") ; Sanskrit
; sd Sindhi
; se Northern Sami
("se" . "UTF-8") ; Northern Sami
; sg Sangho
("sh" . "Latin-2") ; Serbo-Croatian
; si Sinhalese
("sk" . "Slovak")
("sl" . "Slovenian")
("sid" . "UTF-8") ; Sidamo
("sk" "Slovak" iso-8859-2)
("sl" "Slovenian" iso-8859-2)
; sm Samoan
; sn Shona
; so Somali
("so_ET" "UTF-8") ; Somali
("so" "Latin-1") ; Somali
("sq" . "Latin-1") ; Albanian
("sr_YU@cyrillic" . "Cyrillic-ISO") ; Serbian (Cyrillic alphabet)
("sr" . "Latin-2") ; Serbian (Latin alphabet)
("sr_YU@cyrillic" . "Cyrillic-ISO") ; per glibc
; ss Siswati
; st Sesotho
("st" . "Latin-1") ; Sesotho
; su Sundanese
("sv" . "Swedish") ; Swedish
("sv" "Swedish" iso-8859-1) ; Swedish
("sw" . "Latin-1") ; Swahili
; ta Tamil glibc uses utf-8
; te Telugu glibc uses utf-8
("tg" . "Tajik")
("th" . "Thai")
; ti Tigrinya
("ta" "Tamil" utf-8)
("te" . "UTF-8") ; Telugu
("tg" "Tajik" koi8-t)
("th" "Thai" tis-620)
("ti" "Ethiopic" utf-8) ; Tigrinya
("tig_ER" . "UTF-8") ; Tigre
; tk Turkmen
("tl" . "Latin-1") ; Tagalog
; tn Setswana
; to Tonga
("tr" . "Turkish")
("tr" "Turkish" iso-8859-9)
; ts Tsonga
; tt Tatar
("tt" . "UTF-8") ; Tatar
; tw Twi
; ug Uighur
("uk" . "Ukrainian") ; Ukrainian
; ur Urdu glibc uses utf-8
("uk" "Ukrainian" koi8-u)
("ur" . "UTF-8") ; Urdu
("uz_UZ@cyrillic" . "UTF-8"); Uzbek
("uz" . "Latin-1") ; Uzbek
("vi" . "Vietnamese") ; glibc uses utf-8
("vi" "Vietnamese" utf-8)
; vo Volapuk
("wa" . "Latin-1") ; Walloon
; wo Wolof
; xh Xhosa
("xh" . "Latin-1") ; Xhosa
("yi" . "Windows-1255") ; Yiddish
; yo Yoruba
; za Zhuang
; glibc:
("zh_HK" . "Chinese-Big5")
("zh_TW" . "Chinese-Big5")
("zh_CN" . "Chinese-GB")
("zh" . "Chinese-GB")
; zh_CN.GB18030/GB18030 \
; zh_CN.GBK/GBK \
; zh_HK/BIG5-HKSCS \
("zh.*[._]big5" . "Chinese-BIG5")
("zh.*[._]gbk" . nil) ; Solaris 2.7; has gbk-0 as well as GB 2312.1980-0
("zh_tw" . "Chinese-CNS") ; glibc uses big5
("zh_tw[._]euc-tw" . "Chinese-EUC-TW")
("zh" . "Chinese-GB")
; zu Zulu
("zu" . "Latin-1") ; Zulu
;; ISO standard locales
("c$" . "ASCII")
@ -2222,10 +2231,16 @@ of `buffer-file-coding-system' set by this function."
("chs" . "Chinese-GB") ; MS Windows Chinese Simplified
("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional
))
"List of pairs of locale regexps and language names.
The first element whose locale regexp matches the start of a downcased locale
specifies the language name corresponding to that locale.
If the language name is nil, there is no corresponding language environment.")
"Alist of locale regexps vs the corresponding languages and coding systems.
Each element has these form:
\(LOCALE-REGEXP LANG-ENV CODING-SYSTEM)
The first element whose LOCALE-REGEXP matches the start of a
downcased locale specifies the LANG-ENV \(language environtment)
and CODING-SYSTEM corresponding to that locale. If there is no
appropriate language environment, the element may have this form:
\(LOCALE-REGEXP . LANG-ENV)
In this case, LANG-ENV is one of generic language environments for an
specific encoding such as \"Latin-1\" and \"UTF-8\".")
(defconst locale-charset-language-names
(purecopy
@ -2243,20 +2258,43 @@ If the language name is nil, there is no corresponding language environment.")
"List of pairs of locale regexps and charset language names.
The first element whose locale regexp matches the start of a downcased locale
specifies the language name whose charset corresponds to that locale.
This language name is used if its charsets disagree with the charsets of
the language name that would otherwise be used for this locale.")
This language name is used if the locale is not listed in
`locale-language-names'")
(defconst locale-preferred-coding-systems
(purecopy
'(("ja.*[._]euc" . japanese-iso-8bit)
'((".*8859[-_]?1\\>" . iso-8859-1)
(".*8859[-_]?2\\>" . iso-8859-2)
(".*8859[-_]?3\\>" . iso-8859-3)
(".*8859[-_]?4\\>" . iso-8859-4)
(".*8859[-_]?9\\>" . iso-8859-9)
(".*8859[-_]?14\\>" . iso-8859-14)
(".*8859[-_]?15\\>" . iso-8859-15)
(".*utf\\(?:-?8\\)?" . utf-8)
;; utf-8@euro exists, so put this after utf-8. (@euro really
;; specifies the currency, rather than the charset.)
(".*@euro" . iso-8859-15)
("koi8-?r" . koi8-r)
("koi8-?u" . koi8-u)
("tcvn" . tcvn)
("big5" . big5)
("euc-?tw" . euc-tw)
;; We don't support GBK, but as it is upper compatible with
;; GB-2312, we setup the default coding system to gb2312.
("gbk" . gb2312)
;; We don't support BIG5-HKSCS, but as it is upper compatible with
;; BIG5, we setup the default coding system to big5.
("big5hkscs" . big5)
("ja.*[._]euc" . japanese-iso-8bit)
("ja.*[._]jis7" . iso-2022-jp)
("ja.*[._]pck" . japanese-shift-jis)
("ja.*[._]sjis" . japanese-shift-jis)
("jpn" . japanese-shift-jis) ; MS-Windows uses this.
(".*[._]utf" . utf-8)))
))
"List of pairs of locale regexps and preferred coding systems.
The first element whose locale regexp matches the start of a downcased locale
specifies the coding system to prefer when using that locale.")
specifies the coding system to prefer when using that locale.
This coding system is used if the locale specifies a specific charset.")
(defun locale-name-match (key alist)
"Search for KEY in ALIST, which should be a list of regexp-value pairs.
@ -2386,12 +2424,17 @@ See also `locale-charset-language-names', `locale-language-names',
(locale-charset-to-coding-system
(match-string 1 locale)))))))
;; Give preference to charset-language-name over language-name.
(if (and charset-language-name
(not
(equal (get-language-info language-name 'charset)
(get-language-info charset-language-name 'charset))))
(setq language-name charset-language-name))
(if (consp language-name)
;; locale-language-names specify both lang-env and coding.
;; But, what specified in locale-preferred-coding-systems
;; has higher priority.
(setq coding-system (or coding-system
(nth 1 language-name))
language-name (car language-name))
;; Otherwise, if locale is not listed in locale-language-names,
;; use what listed in locale-charset-language-names.
(if (not language-name)
(setq language-name charset-language-name)))
(when language-name
@ -2417,7 +2460,9 @@ See also `locale-charset-language-names', `locale-language-names',
(setq locale-coding-system
(car (get-language-info language-name 'coding-priority))))
(when coding-system
(when (and coding-system
(not (coding-system-equal coding-system
locale-coding-system)))
(prefer-coding-system coding-system)
(setq locale-coding-system coding-system))))