(po-content-type-charset-alist): Delete most

entries.
(po-find-charset): Doc fix.  Remove interactive spec.
(po): Provide.
(po-find-file-coding-system-guts): Re-written to use
coding-system-alist, and code-pages, not codepage stuff.
This commit is contained in:
Dave Love 2002-10-30 19:47:53 +00:00
parent cf07d2bdf6
commit 3180897ccc

View file

@ -33,89 +33,14 @@
;;; Code:
(defconst po-content-type-charset-alist
'(; Note: Emacs 21 doesn't support all encodings, thus the missing entries.
("ASCII" . undecided)
'(("ASCII" . undecided)
("ANSI_X3.4-1968" . undecided)
("US-ASCII" . undecided)
("ISO-8859-1" . iso-8859-1)
("ISO_8859-1" . iso-8859-1)
("ISO-8859-2" . iso-8859-2)
("ISO_8859-2" . iso-8859-2)
("ISO-8859-3" . iso-8859-3)
("ISO_8859-3" . iso-8859-3)
("ISO-8859-4" . iso-8859-4)
("ISO_8859-4" . iso-8859-4)
("ISO-8859-5" . iso-8859-5)
("ISO_8859-5" . iso-8859-5)
;("ISO-8859-6" . ??)
;("ISO_8859-6" . ??)
("ISO-8859-7" . iso-8859-7)
("ISO_8859-7" . iso-8859-7)
("ISO-8859-8" . iso-8859-8)
("ISO_8859-8" . iso-8859-8)
("ISO-8859-9" . iso-8859-9)
("ISO_8859-9" . iso-8859-9)
;("ISO-8859-13" . ??)
;("ISO_8859-13" . ??)
;("ISO-8859-14" . ??)
;("ISO_8859-14" . ??)
("ISO-8859-15" . iso-8859-15) ; requires Emacs 21
("ISO_8859-15" . iso-8859-15) ; requires Emacs 21
("KOI8-R" . koi8-r)
;("KOI8-U" . ??)
;("KOI8-T" . ??)
("CP437" . cp437) ; requires Emacs 20
("CP775" . cp775) ; requires Emacs 20
("CP850" . cp850) ; requires Emacs 20
("CP852" . cp852) ; requires Emacs 20
("CP855" . cp855) ; requires Emacs 20
;("CP856" . ??)
("CP857" . cp857) ; requires Emacs 20
("CP861" . cp861) ; requires Emacs 20
("CP862" . cp862) ; requires Emacs 20
("CP864" . cp864) ; requires Emacs 20
("CP865" . cp865) ; requires Emacs 20
("CP866" . cp866) ; requires Emacs 21
("CP869" . cp869) ; requires Emacs 20
;("CP874" . ??)
;("CP922" . ??)
;("CP932" . ??)
;("CP943" . ??)
;("CP949" . ??)
;("CP950" . ??)
;("CP1046" . ??)
;("CP1124" . ??)
;("CP1129" . ??)
("CP1250" . cp1250) ; requires Emacs 20
("CP1251" . cp1251) ; requires Emacs 20
("CP1252" . iso-8859-1) ; approximation
("CP1253" . cp1253) ; requires Emacs 20
("CP1254" . iso-8859-9) ; approximation
("CP1255" . iso-8859-8) ; approximation
;("CP1256" . ??)
("CP1257" . cp1257) ; requires Emacs 20
("GB2312" . cn-gb-2312) ; also named 'gb2312' in XEmacs 21 or Emacs 21
; also named 'euc-cn' in Emacs 20 or Emacs 21
("EUC-JP" . euc-jp)
("EUC-KR" . euc-kr)
;("EUC-TW" . ??)
("BIG5" . big5)
;("BIG5-HKSCS" . ??)
;("GBK" . ??)
;("GB18030" . ??)
("SHIFT_JIS" . shift_jis)
;("JOHAB" . ??)
("TIS-620" . tis-620) ; requires Emacs 20 or Emacs 21
("VISCII" . viscii) ; requires Emacs 20 or Emacs 21
;("GEORGIAN-PS" . ??)
("UTF-8" . utf-8) ; requires Mule-UCS in Emacs 20, or Emacs 21
)
"How to convert a GNU libc/libiconv canonical charset name as seen in
Content-Type into a Mule coding system.")
("US-ASCII" . undecided))
"Alist of coding system versus GNU libc/libiconv canonical charset name.
Contains canonical charset names that don't correspond to coding systems.")
(defun po-find-charset (filename)
"Return PO file charset value."
(interactive)
"Return PO charset value for FILENAME."
(let ((charset-regexp
"^\"Content-Type: text/plain;[ \t]*charset=\\(.*\\)\\\\n\"")
(short-read nil))
@ -144,42 +69,49 @@ Content-Type into a Mule coding system.")
(match-string 1))))))
(defun po-find-file-coding-system-guts (operation filename)
"\
Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
Called through file-coding-system-alist, before the file is visited for real."
(and (eq operation 'insert-file-contents)
(file-exists-p filename)
(with-temp-buffer
(let* ((coding-system-for-read 'no-conversion)
(charset (or (po-find-charset filename) "ascii"))
(charset-upper (upcase charset))
(charset-lower (downcase charset))
(candidate
(cdr (assoc charset-upper po-content-type-charset-alist)))
(try (or candidate (intern-soft charset-lower))))
(list (cond ((and try (coding-system-p try))
try)
((and try
(string-match "\\`cp[1-9][0-9][0-9]?\\'"
(symbol-name try))
(assoc (substring (symbol-name try) 2)
(cp-supported-codepages)))
(codepage-setup (substring (symbol-name try) 2))
try)
((and (string-match "\\`cp[1-9][0-9][0-9]?\\'"
charset-lower)
(assoc (substring charset-lower 2)
(cp-supported-codepages)))
(codepage-setup (substring charset-lower 2))
(intern charset-lower))
(t
'no-conversion)))))))
"Return a (DECODING . ENCODING) pair for OPERATION on PO file FILENAME.
Do so according to FILENAME's declared charset."
(and
(eq operation 'insert-file-contents)
(file-exists-p filename)
(with-temp-buffer
(let* ((coding-system-for-read 'no-conversion)
(charset (or (po-find-charset filename) "ascii"))
assoc)
(list (cond
((setq assoc
(assoc-ignore-case charset
po-content-type-charset-alist))
(cdr assoc))
((or (setq assoc (assoc-ignore-case charset coding-system-alist))
(setq assoc
(assoc-ignore-case (subst-char-in-string ?_ ?-
charset)
coding-system-alist)))
(intern (car assoc)))
;; In principle we should also check the `mime-charset'
;; property of everything in the base coding system
;; list, but there should always be a coding system
;; corresponding to the MIME name.
((featurep 'code-pages)
;; Give up.
'raw-text)
(t
;; Try again with code-pages loaded. Maybe it's best
;; to require it initially?
(require 'code-pages nil t)
(if (or
(setq assoc (assoc-ignore-case charset coding-system-alist))
(setq assoc (assoc-ignore-case (subst-char-in-string
?_ ?- charset)
coding-system-alist)))
(intern (car assoc))
'raw-text))))))))
;;;###autoload
(defun po-find-file-coding-system (arg-list)
"\
Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
Called through file-coding-system-alist, before the file is visited for real."
"Return a (DECODING . ENCODING) pair, according to PO file's charset.
Called through `file-coding-system-alist', before the file is visited for real."
(po-find-file-coding-system-guts (car arg-list) (car (cdr arg-list))))
;; This is for XEmacs.
;(defun po-find-file-coding-system (operation filename)
@ -187,3 +119,7 @@ Called through file-coding-system-alist, before the file is visited for real."
;Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
;Called through file-coding-system-alist, before the file is visited for real."
; (po-find-file-coding-system-guts operation filename))
(provide 'po)
;;; po.el ends here