(po-content-type-charset-alist): Convert the
car of each association to a string. (po-find-file-coding-system-guts): If the charset matches a name of a codepage, set up that codepage and return it as a coding system to decode the file.
This commit is contained in:
parent
7dd4fd4347
commit
efe7ebe3d5
2 changed files with 99 additions and 78 deletions
|
@ -3,8 +3,13 @@
|
|||
* international/codepage.el (codepage-setup): Don't define a
|
||||
codepage if it is already defined.
|
||||
|
||||
* textmodes/po.el (po-find-charset): Search for the Charset=
|
||||
header even if we've read less than 4KB.
|
||||
* textmodes/po.el (po-content-type-charset-alist): Convert the
|
||||
car of each association to a string.
|
||||
(po-find-file-coding-system-guts): If the charset matches a name
|
||||
of a codepage, set up that codepage and return it as a coding
|
||||
system to decode the file.
|
||||
(po-find-charset): Search for the Charset= header even if we've
|
||||
read less than 4KB.
|
||||
<top-level>: Remove the setup for all known codepages: it seems
|
||||
to cause crashes in the CCL driver.
|
||||
|
||||
|
|
|
@ -34,77 +34,77 @@
|
|||
|
||||
(defconst po-content-type-charset-alist
|
||||
'(; Note: Emacs 21 doesn't support all encodings, thus the missing entries.
|
||||
(ASCII . undecided)
|
||||
(ANSI_X3.4-1968 . undecided)
|
||||
(US-ASCII . undecided)
|
||||
(ISO-8859-1 . iso-8859-1)
|
||||
(ISO_8859-1 . iso-8859-1)
|
||||
(ISO-8859-2 . iso-8859-2)
|
||||
(ISO_8859-2 . iso-8859-2)
|
||||
(ISO-8859-3 . iso-8859-3)
|
||||
(ISO_8859-3 . iso-8859-3)
|
||||
(ISO-8859-4 . iso-8859-4)
|
||||
(ISO_8859-4 . iso-8859-4)
|
||||
(ISO-8859-5 . iso-8859-5)
|
||||
(ISO_8859-5 . iso-8859-5)
|
||||
;(ISO-8859-6 . ??)
|
||||
;(ISO_8859-6 . ??)
|
||||
(ISO-8859-7 . iso-8859-7)
|
||||
(ISO_8859-7 . iso-8859-7)
|
||||
(ISO-8859-8 . iso-8859-8)
|
||||
(ISO_8859-8 . iso-8859-8)
|
||||
(ISO-8859-9 . iso-8859-9)
|
||||
(ISO_8859-9 . iso-8859-9)
|
||||
;(ISO-8859-13 . ??)
|
||||
;(ISO_8859-13 . ??)
|
||||
(ISO-8859-15 . iso-8859-15) ; requires Emacs 21
|
||||
(ISO_8859-15 . iso-8859-15) ; requires Emacs 21
|
||||
(KOI8-R . koi8-r)
|
||||
;(KOI8-U . ??)
|
||||
(CP437 . cp437) ; requires Emacs 20
|
||||
(CP775 . cp775) ; requires Emacs 20
|
||||
(CP850 . cp850) ; requires Emacs 20
|
||||
(CP852 . cp852) ; requires Emacs 20
|
||||
(CP855 . cp855) ; requires Emacs 20
|
||||
;(CP856 . ??)
|
||||
(CP857 . cp857) ; requires Emacs 20
|
||||
(CP861 . cp861) ; requires Emacs 20
|
||||
(CP862 . cp862) ; requires Emacs 20
|
||||
(CP864 . cp864) ; requires Emacs 20
|
||||
(CP865 . cp865) ; requires Emacs 20
|
||||
(CP866 . cp866) ; requires Emacs 21
|
||||
(CP869 . cp869) ; requires Emacs 20
|
||||
;(CP874 . ??)
|
||||
;(CP922 . ??)
|
||||
;(CP932 . ??)
|
||||
;(CP943 . ??)
|
||||
;(CP949 . ??)
|
||||
;(CP950 . ??)
|
||||
;(CP1046 . ??)
|
||||
;(CP1124 . ??)
|
||||
;(CP1129 . ??)
|
||||
(CP1250 . cp1250) ; requires Emacs 20
|
||||
(CP1251 . cp1251) ; requires Emacs 20
|
||||
(CP1252 . iso-8859-1) ; approximation
|
||||
(CP1253 . cp1253) ; requires Emacs 20
|
||||
(CP1254 . iso-8859-9) ; approximation
|
||||
(CP1255 . iso-8859-8) ; approximation
|
||||
;(CP1256 . ??)
|
||||
(CP1257 . cp1257) ; requires Emacs 20
|
||||
(GB2312 . cn-gb-2312) ; also named 'gb2312' in XEmacs 21 or Emacs 21
|
||||
("ASCII" . undecided)
|
||||
("ANSI_X3.4-1968" . undecided)
|
||||
("US-ASCII" . undecided)
|
||||
("ISO-8859-1" . iso-8859-1)
|
||||
("ISO_8859-1" . iso-8859-1)
|
||||
("ISO-8859-2" . iso-8859-2)
|
||||
("ISO_8859-2" . iso-8859-2)
|
||||
("ISO-8859-3" . iso-8859-3)
|
||||
("ISO_8859-3" . iso-8859-3)
|
||||
("ISO-8859-4" . iso-8859-4)
|
||||
("ISO_8859-4" . iso-8859-4)
|
||||
("ISO-8859-5" . iso-8859-5)
|
||||
("ISO_8859-5" . iso-8859-5)
|
||||
;("ISO-8859-6" . ??)
|
||||
;("ISO_8859-6" . ??)
|
||||
("ISO-8859-7" . iso-8859-7)
|
||||
("ISO_8859-7" . iso-8859-7)
|
||||
("ISO-8859-8" . iso-8859-8)
|
||||
("ISO_8859-8" . iso-8859-8)
|
||||
("ISO-8859-9" . iso-8859-9)
|
||||
("ISO_8859-9" . iso-8859-9)
|
||||
;("ISO-8859-13" . ??)
|
||||
;("ISO_8859-13" . ??)
|
||||
("ISO-8859-15" . iso-8859-15) ; requires Emacs 21
|
||||
("ISO_8859-15" . iso-8859-15) ; requires Emacs 21
|
||||
("KOI8-R" . koi8-r)
|
||||
;("KOI8-U" . ??)
|
||||
("CP437" . cp437) ; requires Emacs 20
|
||||
("CP775" . cp775) ; requires Emacs 20
|
||||
("CP850" . cp850) ; requires Emacs 20
|
||||
("CP852" . cp852) ; requires Emacs 20
|
||||
("CP855" . cp855) ; requires Emacs 20
|
||||
;("CP856" . ??)
|
||||
("CP857" . cp857) ; requires Emacs 20
|
||||
("CP861" . cp861) ; requires Emacs 20
|
||||
("CP862" . cp862) ; requires Emacs 20
|
||||
("CP864" . cp864) ; requires Emacs 20
|
||||
("CP865" . cp865) ; requires Emacs 20
|
||||
("CP866" . cp866) ; requires Emacs 21
|
||||
("CP869" . cp869) ; requires Emacs 20
|
||||
;("CP874" . ??)
|
||||
;("CP922" . ??)
|
||||
;("CP932" . ??)
|
||||
;("CP943" . ??)
|
||||
;("CP949" . ??)
|
||||
;("CP950" . ??)
|
||||
;("CP1046" . ??)
|
||||
;("CP1124" . ??)
|
||||
;("CP1129" . ??)
|
||||
("CP1250" . cp1250) ; requires Emacs 20
|
||||
("CP1251" . cp1251) ; requires Emacs 20
|
||||
("CP1252" . iso-8859-1) ; approximation
|
||||
("CP1253" . cp1253) ; requires Emacs 20
|
||||
("CP1254" . iso-8859-9) ; approximation
|
||||
("CP1255" . iso-8859-8) ; approximation
|
||||
;("CP1256" . ??)
|
||||
("CP1257" . cp1257) ; requires Emacs 20
|
||||
("GB2312" . cn-gb-2312) ; also named 'gb2312' in XEmacs 21 or Emacs 21
|
||||
; also named 'euc-cn' in Emacs 20 or Emacs 21
|
||||
(EUC-JP . euc-jp)
|
||||
(EUC-KR . euc-kr)
|
||||
;(EUC-TW . ??)
|
||||
(BIG5 . big5)
|
||||
;(BIG5-HKSCS . ??)
|
||||
;(GBK . ??)
|
||||
;(GB18030 . ??)
|
||||
(SHIFT_JIS . shift_jis)
|
||||
;(JOHAB . ??)
|
||||
(TIS-620 . tis-620) ; requires Emacs 20 or Emacs 21
|
||||
(VISCII . viscii) ; requires Emacs 20 or Emacs 21
|
||||
(UTF-8 . utf-8) ; requires Mule-UCS in Emacs 20, or Emacs 21
|
||||
("EUC-JP" . euc-jp)
|
||||
("EUC-KR" . euc-kr)
|
||||
;("EUC-TW" . ??)
|
||||
("BIG5" . big5)
|
||||
;("BIG5-HKSCS" . ??)
|
||||
;("GBK" . ??)
|
||||
;("GB18030" . ??)
|
||||
("SHIFT_JIS" . shift_jis)
|
||||
;("JOHAB" . ??)
|
||||
("TIS-620" . tis-620) ; requires Emacs 20 or Emacs 21
|
||||
("VISCII" . viscii) ; requires Emacs 20 or Emacs 21
|
||||
("UTF-8" . utf-8) ; requires Mule-UCS in Emacs 20, or Emacs 21
|
||||
)
|
||||
"How to convert a GNU libc/libiconv canonical charset name as seen in
|
||||
Content-Type into a Mule coding system.")
|
||||
|
@ -148,12 +148,28 @@ Called through file-coding-system-alist, before the file is visited for real."
|
|||
(with-temp-buffer
|
||||
(let* ((coding-system-for-read 'no-conversion)
|
||||
(charset (or (po-find-charset filename) "ascii"))
|
||||
(charset-upper (intern (upcase charset)))
|
||||
(charset-lower (intern (downcase charset))))
|
||||
(list (or (cdr (assq charset-upper po-content-type-charset-alist))
|
||||
(if (memq charset-lower (coding-system-list))
|
||||
charset-lower
|
||||
'no-conversion)))))))
|
||||
(charset-upper (upcase charset))
|
||||
(charset-lower (downcase charset))
|
||||
(candidate
|
||||
(cdr (assoc charset-upper po-content-type-charset-alist)))
|
||||
(try (or candidate (intern-soft charset-lower))))
|
||||
(list (cond ((and try (coding-system-p try))
|
||||
try)
|
||||
((and try
|
||||
(string-match "\\`cp[1-9][0-9][0-9]?\\'"
|
||||
(symbol-name try))
|
||||
(assoc (substring (symbol-name try) 2)
|
||||
(cp-supported-codepages)))
|
||||
(codepage-setup (substring (symbol-name try) 2))
|
||||
try)
|
||||
((and (string-match "\\`cp[1-9][0-9][0-9]?\\'"
|
||||
charset-lower)
|
||||
(assoc (substring charset-lower 2)
|
||||
(cp-supported-codepages)))
|
||||
(codepage-setup (substring charset-lower 2))
|
||||
(intern charset-lower))
|
||||
(t
|
||||
'no-conversion)))))))
|
||||
|
||||
;;;###autoload
|
||||
(defun po-find-file-coding-system (arg-list)
|
||||
|
|
Loading…
Add table
Reference in a new issue