Improve locale and language environment setting at startup

* lisp/international/mule-cmds.el (locale-language-names): Add
more locales and their language environments.
(set-locale-environment): Use w32-multibyte-code-page, if
non-zero, as locale-coding-system.  (Bug#34684)

* src/w32fns.c (globals_of_w32fns) <w32-multibyte-code-page>:
New variable.

* etc/NEWS: Mention w32-multibyte-code-page.
This commit is contained in:
Eli Zaretskii 2019-03-16 13:59:03 +02:00
parent 164b78c714
commit 34dd4e0a83
3 changed files with 97 additions and 32 deletions

View file

@ -1736,6 +1736,14 @@ versions of MS-Windows. Set this variable to 50 if for some reason
you need the old behavior (and please report such situations to Emacs
developers).
---
** New variable 'w32-multibyte-code-page'.
This variable holds the value of the multibyte code page used by the
system. It is usually zero, which indicates that 'w32-ansi-code-page'
is being used, except in Far Eastern locales. When this variable is
non-zero, Emacs at startup sets 'locale-coding-system' to the
corresponding encoding, instead of using 'w32-ansi-code-page'.
+++
** On NS the behaviour of drag and drop can now be modified by use of
modifier keys in line with Apples guidelines. This makes the drag and

View file

@ -2181,22 +2181,27 @@ See `set-language-info-alist' for use in programs."
(defconst locale-language-names
(purecopy
'(
;; Locale names of the form LANGUAGE[_TERRITORY][.CODESET][@MODIFIER]
;; as specified in the Single Unix Spec, Version 2.
;; LANGUAGE is a language code taken from ISO 639:1988 (E/F)
;; with additions from ISO 639/RA Newsletter No.1/1989;
;; see Internet RFC 2165 (1997-06) and
;; http://www.evertype.com/standards/iso639/iso639-en.html
;; TERRITORY is a country code taken from ISO 3166
;; http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html.
;; CODESET and MODIFIER are implementation-dependent.
;; Locale names of the form LANGUAGE[_TERRITORY][.CODESET][@MODIFIER]
;; as specified in the Single Unix Spec, Version 2.
;; LANGUAGE is a language code taken from ISO 639:1988 (E/F)
;; with additions from ISO 639/RA Newsletter No.1/1989;
;; see Internet RFC 2165 (1997-06) and
;; http://www.evertype.com/standards/iso639/iso639-en.html
;; TERRITORY is a country code taken from ISO 3166
;; http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html.
;; CODESET and MODIFIER are implementation-dependent.
;; Language names for which there are no locales (yet) are
;; commented out.
;; jasonr comments: MS Windows uses three letter codes for
;; languages instead of the two letter ISO codes that POSIX
;; uses. In most cases the first two letters are the same, so
;; most of the regexps in locale-language-names work. Japanese
;; and Chinese are exceptions, which are listed in the
;; non-standard section at the bottom of locale-language-names.
;; uses. In most cases the first two letters are the same, so
;; most of the regexps in locale-language-names work. Japanese,
;; Chinese, and some others are exceptions, which are listed in the
;; non-standard section at the bottom of locale-language-names, or
;; in the main section, if otherwise we would pick up the wrong
;; entry (because the first matching entry is used).
("aa_DJ" . "Latin-1") ; Afar
("aa" . "UTF-8")
@ -2204,11 +2209,12 @@ See `set-language-info-alist' for use in programs."
("af" . "Latin-1") ; Afrikaans
("am" "Ethiopic" utf-8) ; Amharic
("an" . "Latin-9") ; Aragonese
("arn" . "UTF-8") ; MS-Windows Mapudungun, Mapuche
("ar" . "Arabic")
; as Assamese
("as" . "UTF-8") ; Assamese
; ay Aymara
("az" . "UTF-8") ; Azerbaijani
; ba Bashkir
("ba" . "UTF-8") ; Bashkir, Cyrillic script
("be" "Belarusian" cp1251) ; Belarusian [Byelorussian until early 1990s]
("bg" "Bulgarian" cp1251) ; Bulgarian
; bh Bihari
@ -2219,12 +2225,12 @@ See `set-language-info-alist' for use in programs."
("bs" . "Latin-2") ; Bosnian
("byn" . "UTF-8") ; Bilin; Blin
("ca" "Catalan" iso-8859-1) ; Catalan
; co Corsican
("co" . "UTF-8") ; Corsican
("cs" "Czech" iso-8859-2)
("cy" "Welsh" iso-8859-14)
("da" . "Latin-1") ; Danish
("de" "German" iso-8859-1)
; dv Divehi
("dv" . "UTF-8") ; Divehi
; dz Bhutani
("ee" . "Latin-4") ; Ewe
("el" "Greek" iso-8859-7)
@ -2238,6 +2244,8 @@ See `set-language-info-alist' for use in programs."
("et" . "Latin-9") ; Estonian
("eu" . "Latin-1") ; Basque
("fa" "Persian" utf-8) ; Persian
("fil" . "UTF-8") ; Filipino
("fpo" . "UTF-8") ; MS-Windows Filipino
("fi" . "Latin-9") ; Finnish
("fj" . "Latin-1") ; Fiji
("fo" . "Latin-1") ; Faroese
@ -2246,6 +2254,7 @@ See `set-language-info-alist' for use in programs."
("ga" . "Latin-1") ; Irish Gaelic (new orthography)
("gd" . "Latin-9") ; Scots Gaelic
("gez" "Ethiopic" utf-8) ; Geez
("gla" . "Latin-9") ; MS-Windows Scots Gaelic
("gl" . "Latin-1") ; Gallegan; Galician
; gn Guarani
("gu" "Gujarati" utf-8) ; Gujarati
@ -2256,27 +2265,33 @@ See `set-language-info-alist' for use in programs."
("hni_IN" . "UTF-8") ; Chhattisgarhi
("hr" "Croatian" iso-8859-2) ; Croatian
("hu" . "Latin-2") ; Hungarian
; hy Armenian
("hy" . "UTF-8") ; Armenian
; ia Interlingua
("id" . "Latin-1") ; Indonesian
; ie Interlingue
; ik Inupiak
("ig" . "UTF-8") ; Igbo (Nigeria)
("ibo" . "UTF-8") ; MS-Windows Igbo
; ik Inupiak, Inupiaq
("is" . "Latin-1") ; Icelandic
("it" "Italian" iso-8859-1) ; Italian
; iu Inuktitut
("iw" "Hebrew" iso-8859-8)
("ja" "Japanese" euc-jp)
; jw Javanese
("kal" . "Latin-1") ; MS-Windows Greenlandic
("ka" "Georgian" georgian-ps) ; Georgian
; kk Kazakh
("kk" . "UTF-8") ; Kazakh
("kl" . "Latin-1") ; Greenlandic
("km" "Khmer" utf-8) ; Cambodian, Khmer
("knk" "Devanagari" utf-8) ; MS-Windows Konkani
("kok" "Devanagari" utf-8) ; Konkani
("kn" "Kannada" utf-8)
("ko" "Korean" euc-kr)
("ks" . "UTF-8") ; Kashmiri
; ku Kurdish
("kw" . "Latin-1") ; Cornish
("ky" . "UTF-8") ; Kirghiz
("lao" "Lao" utf-8) ; MS-Windows Lao
("la" . "Latin-1") ; Latin
("lb" . "Latin-1") ; Luxemburgish
("lg" . "Latin-6") ; Ganda, a.k.a. Luganda
@ -2287,18 +2302,22 @@ See `set-language-info-alist' for use in programs."
; mg Malagasy
("mi" . "Latin-7") ; Maori
("mk" "Cyrillic-ISO" iso-8859-5) ; Macedonian
("mlt" . "Latin-3") ; MS-Windows Maltese
("ml" "Malayalam" utf-8)
("mn" . "UTF-8") ; Mongolian
; mo Moldavian
; mo Moldavian (retired)
("mri" . "Latin-7") ; MS-Windows Maori
("mr" "Devanagari" utf-8) ; Marathi
("ms" . "Latin-1") ; Malay
("mt" . "Latin-3") ; Maltese
("mym" "Malayalam" utf-8) ; MS-Windows Malayalam
("my" "Burmese" utf-8) ; Burmese
; na Nauru
("nb" . "Latin-1") ; Norwegian
("ne" "Devanagari" utf-8) ; Nepali
("nl" "Dutch" iso-8859-1)
("nn" . "Latin-1") ; Norwegian Nynorsk
("non" . "Latin-1") ; MS-Windows Norwegian Nynorsk
("no" . "Latin-1") ; Norwegian
("nr_ZA" . "UTF-8") ; South Ndebele
("nso_ZA" . "UTF-8") ; Pedi
@ -2308,7 +2327,8 @@ See `set-language-info-alist' for use in programs."
("or" "Oriya" utf-8)
("pa" "Punjabi" utf-8) ; Punjabi
("pl" "Polish" iso-8859-2) ; Polish
; ps Pashto, Pushto
("ps" . "UTF-8") ; Pashto, Pushto
("pas" . "UTF-8") ; MS-Windows Pashto
("pt_BR" "Brazilian Portuguese" iso-8859-1) ; Brazilian Portuguese
("pt" . "Latin-1") ; Portuguese
; qu Quechua
@ -2318,7 +2338,7 @@ See `set-language-info-alist' for use in programs."
("ru_RU.koi8r" "Cyrillic-KOI8" koi8-r)
("ru_RU" "Russian" iso-8859-5)
("ru_UA" "Russian" koi8-u)
; rw Kinyarwanda
("rw" . "UTF-8") ; Kinyarwanda
("sa" . "Devanagari") ; Sanskrit
; sd Sindhi
("se" . "UTF-8") ; Northern Sami
@ -2339,6 +2359,7 @@ See `set-language-info-alist' for use in programs."
; su Sundanese
("sv" "Swedish" iso-8859-1) ; Swedish
("sw" . "Latin-1") ; Swahili
("taj" "Tajik" koi8-t) ; MS-Windows Tajik w/Cyrillic script
("ta" "Tamil" utf-8)
("te" "Telugu" utf-8) ; Telugu
("tg" "Tajik" koi8-t)
@ -2348,15 +2369,17 @@ See `set-language-info-alist' for use in programs."
("th" "Thai" iso-8859-11)
("ti" "Ethiopic" utf-8) ; Tigrinya
("tig_ER" . "UTF-8") ; Tigre
; tk Turkmen
("tk" . "Latin-5") ; Turkmen
("tuk" . "Latin-5") ; MS-Windows Turkmen
("tl" . "Latin-1") ; Tagalog
("tn" . "Latin-9") ; Setswana, Tswana
; to Tonga
("tr" "Turkish" iso-8859-9)
("tsn" . "Latin-9") ; MS-Windows Tswana
("ts" . "Latin-1") ; Tsonga
("tt" . "UTF-8") ; Tatar
; tw Twi
; ug Uighur
("ug" . "UTF-8") ; Uighur
("uk" "Ukrainian" koi8-u)
("ur" . "UTF-8") ; Urdu
("uz_UZ@cyrillic" . "UTF-8"); Uzbek
@ -2365,10 +2388,10 @@ See `set-language-info-alist' for use in programs."
("vi" "Vietnamese" utf-8)
; vo Volapuk
("wa" . "Latin-1") ; Walloon
; wo Wolof
("wo" . "UTF-8") ; Wolof
("xh" . "Latin-1") ; Xhosa
("yi" . "Windows-1255") ; Yiddish
; yo Yoruba
("yo" . "UTF-8") ; Yoruba
; za Zhuang
("zh_HK" . "Chinese-Big5")
; zh_HK/BIG5-HKSCS \
@ -2378,6 +2401,9 @@ See `set-language-info-alist' for use in programs."
("zh_CN.GB18030" "Chinese-GB18030")
("zh_CN.UTF-8" . "Chinese-GBK")
("zh_CN" . "Chinese-GB")
("zhh" . "Chinese-Big5") ; MS-Windows Chinese (Hong Kong S.A.R.)
("zhi" . "Chinese-GBK") ; MS-Windows Chinese (Singapore)
("zhm" . "Chinese-Big5") ; MS-Windows Chinese (Macao S.A.R.)
("zh" . "Chinese-GB")
("zu" . "Latin-1") ; Zulu
@ -2395,12 +2421,23 @@ See `set-language-info-alist' for use in programs."
("sp" . "Cyrillic-ISO") ; Serbian (Cyrillic alphabet), e.g. X11R6.4
("su" . "Latin-1") ; Finnish, e.g. Solaris 2.6
("jp" . "Japanese") ; e.g. MS Windows
("chs" . "Chinese-GBK") ; MS Windows Chinese Simplified
("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional
("chs" . "Chinese-GBK") ; MS Windows Chinese Simplified (PRC)
("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional (Taiwan)
("gbz" . "UTF-8") ; MS Windows Dari Persian
("div" . "UTF-8") ; MS Windows Divehi (Maldives)
("wee" . "Latin-2") ; MS Windows Lower Sorbian
("wen" . "Latin-2") ; MS Windows Upper Sorbian
("ind" . "Latin-1") ; MS-Windows Indonesian
("sme" . "UTF-8") ; MS-Windows Northern Sami (Norway)
("smf" . "UTF-8") ; MS-Windows Northern Sami (Sweden)
("smg" . "ITF-8") ; MS-Windows Northern Sami (Finland)
("kdi" "Kannada" utf-8) ; MS-Windows Kannada
("mar" "Devanagari" utf-8) ; MS-Windows Marathi
("khm" "Khmer" utf-8) ; MS-Windows Khmer
("iri" . "Latin-1") ; MS-Windows Irish Gaelic
; mwk MS-Windows Mohawk (Canada)
("uig" . "UTF-8") ; MS-Windows Uighur
("kin" . "UTF-8") ; MS-Windows Kinyarwanda
))
"Alist of locale regexps vs the corresponding languages and coding systems.
Each element has this form:
@ -2702,10 +2739,20 @@ See also `locale-charset-language-names', `locale-language-names',
(output-coding
(if noninteractive
(intern (format "cp%d" (w32-get-console-output-codepage)))
code-page-coding)))
(when (coding-system-p code-page-coding)
code-page-coding))
(multibyte-code-page-coding
(or (and (boundp 'w32-multibyte-code-page)
(not (zerop w32-multibyte-code-page))
(intern (format "cp%d" w32-multibyte-code-page)))
code-page-coding))
(locale-coding
(if noninteractive
code-page-coding
multibyte-code-page-coding)))
(when (and (coding-system-p code-page-coding)
(coding-system-p locale-coding))
(or output-coding (setq output-coding code-page-coding))
(unless frame (setq locale-coding-system code-page-coding))
(unless frame (setq locale-coding-system locale-coding))
(set-keyboard-coding-system code-page-coding frame)
(set-terminal-coding-system output-coding frame)
(setq default-file-name-coding-system ansi-code-page-coding))))

View file

@ -48,6 +48,7 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
#ifdef WINDOWSNT
#include <mbstring.h>
#include <mbctype.h> /* for _getmbcp */
#endif /* WINDOWSNT */
#if CYGWIN
@ -10908,6 +10909,15 @@ globals_of_w32fns (void)
doc: /* The ANSI code page used by the system. */);
w32_ansi_code_page = GetACP ();
#ifndef CYGWIN
DEFVAR_INT ("w32-multibyte-code-page",
w32_multibyte_code_page,
doc: /* The current multibyte code page used by the system.
A value of zero indicates that the single-byte code page is in use,
see `w32-ansi-code-page'. */);
w32_multibyte_code_page = _getmbcp ();
#endif
if (os_subtype == OS_NT)
w32_unicode_gui = 1;
else