(unicodedata-file): New.

(unicode-data): New (adapted from unicode branch).
(describe-char): Use it.  Print char's unicode differently.  Avoid
elements with null cadr when formatting list.  Clarify error
message when used in Help buffer.
(button): Require when compiling.
(describe-char-after): Alias for obsolete command.
This commit is contained in:
Dave Love 2003-05-21 22:00:01 +00:00
parent edd3ff1dfc
commit 831ccfa6a2

View file

@ -1,6 +1,6 @@
;;; descr-text.el --- describe text mode
;; Copyright (c) 1994, 1995, 1996, 2001, 2002 Free Software Foundation, Inc.
;; Copyright (c) 1994, 1995, 1996, 2001, 02, 03 Free Software Foundation, Inc.
;; Author: Boris Goldowsky <boris@gnu.org>
;; Keywords: faces
@ -28,6 +28,8 @@
;;; Code:
(eval-when-compile (require 'button))
(defun describe-text-done ()
"Delete the current window or bury the current buffer."
(interactive)
@ -217,6 +219,215 @@ otherwise."
(widget-insert "There are text properties here:\n")
(describe-property-list properties)))))
(defcustom unicodedata-file nil
"Location of Unicode data file.
This is the UnicodeData.txt file from the Unicode consortium, used for
diagnostics. If it is non-nil `describe-char-after' will print data
looked up from it. This facility is mostly of use to people doing
multilingual development.
This is a fairly large file, not typically present on GNU systems. At
the time of writing it is at
<URL:ftp://www.unicode.org/Public/UNIDATA/UnicodeData.txt>."
:group 'mule
:version "21.5"
:type '(choice (const :tag "None" nil)
file))
;; We could convert the unidata file into a Lispy form once-for-all
;; and distribute it for loading on demand. It might be made more
;; space-efficient by splitting strings word-wise and replacing them
;; with lists of symbols interned in a private obarray, e.g.
;; "LATIN SMALL LETTER A" => '(LATIN SMALL LETTER A).
;; Fixme: Check whether this needs updating for Unicode 4.
(defun unicode-data (char)
"Return a list of Unicode data for unicode CHAR.
Each element is a list of a property description and the property value.
The list is null if CHAR isn't found in `unicodedata-file'."
(when unicodedata-file
(unless (file-exists-p unicodedata-file)
(error "`unicodedata-file' %s not found" unicodedata-file))
(save-excursion
;; Find file in fundamental mode to avoid, e.g. flyspell turned
;; on for .txt. Don't use RAWFILE arg in case of DOS line endings.
(set-buffer (let ((auto-mode-alist))
(find-file-noselect unicodedata-file)))
(goto-char (point-min))
(let ((hex (format "%04X" char))
found first last)
(if (re-search-forward (concat "^" hex) nil t)
(setq found t)
;; It's not listed explicitly. Look for ranges, e.g. CJK
;; ideographs, and check whether it's in one of them.
(while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t)
(>= char (setq first
(string-to-number (match-string 1) 16)))
(progn
(forward-line 1)
(looking-at "^\\([^;]+\\);[^;]+Last>;")
(> char
(setq last
(string-to-number (match-string 1) 16))))))
(if (and (>= char first)
(<= char last))
(setq found t)))
(if found
(let ((fields (mapcar (lambda (elt)
(if (> (length elt) 0)
elt))
(cdr (split-string
(buffer-substring
(line-beginning-position)
(line-end-position))
";")))))
;; The length depends on whether the last field was empty.
(unless (or (= 13 (length fields))
(= 14 (length fields)))
(error "Invalid contents in %s" unicodedata-file))
;; The field names and values lists are slightly
;; modified from Mule-UCS unidata.el.
(list
(list "Name" (let ((name (nth 0 fields)))
;; Check for <..., First>, <..., Last>
(if (string-match "\\`\\(<[^,]+\\)," name)
(concat (match-string 1 name) ">")
name)))
(list "Category"
(cdr (assoc
(nth 1 fields)
'(("Lu" . "uppercase letter")
("Ll" . "lowercase letter")
("Lt" . "titlecase letter")
("Mn" . "non-spacing mark")
("Mc" . "spacing-combining mark")
("Me" . "enclosing mark")
("Nd" . "decimal digit")
("Nl" . "letter number")
("No" . "other number")
("Zs" . "space separator")
("Zl" . "line separator")
("Zp" . "paragraph separator")
("Cc" . "other control")
("Cf" . "other format")
("Cs" . "surrogate")
("Co" . "private use")
("Cn" . "not assigned")
("Lm" . "modifier letter")
("Lo" . "other letter")
("Pc" . "connector punctuation")
("Pd" . "dash punctuation")
("Ps" . "open punctuation")
("Pe" . "close punctuation")
("Pi" . "initial-quotation punctuation")
("Pf" . "final-quotation punctuation")
("Po" . "other punctuation")
("Sm" . "math symbol")
("Sc" . "currency symbol")
("Sk" . "modifier symbol")
("So" . "other symbol")))))
(list "Combining class"
(cdr (assoc
(string-to-number (nth 2 fields))
'((0 . "Spacing")
(1 . "Overlays and interior")
(7 . "Nuktas")
(8 . "Hiragana/Katakana voicing marks")
(9 . "Viramas")
(10 . "Start of fixed position classes")
(199 . "End of fixed position classes")
(200 . "Below left attached")
(202 . "Below attached")
(204 . "Below right attached")
(208 . "Left attached (reordrant around \
single base character)")
(210 . "Right attached")
(212 . "Above left attached")
(214 . "Above attached")
(216 . "Above right attached")
(218 . "Below left")
(220 . "Below")
(222 . "Below right")
(224 . "Left (reordrant around single base \
character)")
(226 . "Right")
(228 . "Above left")
(230 . "Above")
(232 . "Above right")
(233 . "Double below")
(234 . "Double above")
(240 . "Below (iota subscript)")))))
(list "Bidi category"
(cdr (assoc
(nth 3 fields)
'(("L" . "Left-to-Right")
("LRE" . "Left-to-Right Embedding")
("LRO" . "Left-to-Right Override")
("R" . "Right-to-Left")
("AL" . "Right-to-Left Arabic")
("RLE" . "Right-to-Left Embedding")
("RLO" . "Right-to-Left Override")
("PDF" . "Pop Directional Format")
("EN" . "European Number")
("ES" . "European Number Separator")
("ET" . "European Number Terminator")
("AN" . "Arabic Number")
("CS" . "Common Number Separator")
("NSM" . "Non-Spacing Mark")
("BN" . "Boundary Neutral")
("B" . "Paragraph Separator")
("S" . "Segment Separator")
("WS" . "Whitespace")
("ON" . "Other Neutrals")))))
(list
"Decomposition"
(if (nth 4 fields)
(let* ((parts (split-string (nth 4 fields)))
(info (car parts)))
(if (string-match "\\`<\\(.+\\)>\\'" info)
(setq info (match-string 1 info))
(setq info nil))
(if info (setq parts (cdr parts)))
;; Maybe printing ? for unrepresentable unicodes
;; here and below should be changed?
(setq parts (mapconcat
(lambda (arg)
(string (or (decode-char
'ucs
(string-to-number arg 16))
??)))
parts " "))
(concat info parts))))
(list "Decimal digit value"
(nth 5 fields))
(list "Digit value"
(nth 6 fields))
(list "Numeric value"
(nth 7 fields))
(list "Mirrored"
(if (equal "Y" (nth 8 fields))
"yes"))
(list "Old name" (nth 9 fields))
(list "ISO 10646 comment" (nth 10 fields))
(list "Uppercase" (and (nth 11 fields)
(string (or (decode-char
'ucs
(string-to-number
(nth 11 fields) 16))
??))))
(list "Lowercase" (and (nth 12 fields)
(string (or (decode-char
'ucs
(string-to-number
(nth 12 fields) 16))
??))))
(list "Titlecase" (and (nth 13 fields)
(string (or (decode-char
'ucs
(string-to-number
(nth 13 fields) 16))
??)))))))))))
;;;###autoload
(defun describe-char (pos)
"Describe the character after POS (interactively, the character after point).
@ -234,7 +445,7 @@ as well as widgets, buttons, overlays, and text properties."
(composed (if composition (buffer-substring (car composition)
(nth 1 composition))))
(multibyte-p enable-multibyte-characters)
item-list max-width)
item-list max-width unicode)
(if (eq charset 'unknown)
(setq item-list
`(("character"
@ -243,12 +454,21 @@ as well as widgets, buttons, overlays, and text properties."
(single-key-description char)
(char-to-string char))
char char char))))
(if (or (< (char-after) 256)
(memq 'mule-utf-8 (find-coding-systems-region pos (1+ pos)))
(get-char-property pos 'untranslated-utf-8))
(setq unicode (or (get-char-property pos 'untranslated-utf-8)
(encode-char char 'ucs))))
(setq item-list
`(("character"
,(format "%s (0%o, %d, 0x%x)" (if (< char 256)
,(format "%s (0%o, %d, 0x%x%s)" (if (< char 256)
(single-key-description char)
(char-to-string char))
char char char))
char char char
(if unicode
(format ", U+%04X" (encode-char char 'ucs))
"")))
("charset"
,(symbol-name charset)
,(format "(%s)" (charset-description charset)))
@ -287,14 +507,6 @@ as well as widgets, buttons, overlays, and text properties."
(format "(encoded by coding system %S)" coding))
(list "not encodable by coding system"
(symbol-name coding)))))
,@(if (or (memq 'mule-utf-8
(find-coding-systems-region pos (1+ pos)))
(get-char-property pos 'untranslated-utf-8))
(let ((uc (or (get-char-property pos 'untranslated-utf-8)
(encode-char char 'ucs))))
(if uc
(list (list "Unicode"
(format "%04X" uc))))))
,(if (display-graphic-p (selected-frame))
(list "font" (or (internal-char-font pos)
"-- none --"))
@ -303,26 +515,31 @@ as well as widgets, buttons, overlays, and text properties."
(encoded (encode-coding-char char coding)))
(if encoded
(encoded-string-description encoded coding)
"not encodable")))))))
"not encodable"))))
,@(let ((unicodedata (and unicode
(unicode-data unicode))))
(if unicodedata
(cons (list "Unicode data" " ") unicodedata))))))
(setq max-width (apply #'max (mapcar #'(lambda (x) (length (car x)))
item-list)))
(when (eq (current-buffer) (get-buffer "*Help*"))
(error "Can't do self inspection"))
(error "Can't describe char in Help buffer"))
(with-output-to-temp-buffer "*Help*"
(with-current-buffer standard-output
(set-buffer-multibyte multibyte-p)
(let ((formatter (format "%%%ds:" max-width)))
(dolist (elt item-list)
(insert (format formatter (car elt)))
(dolist (clm (cdr elt))
(when (>= (+ (current-column)
(or (string-match "\n" clm)
(string-width clm)) 1)
(frame-width))
(insert "\n")
(indent-to (1+ max-width)))
(insert " " clm))
(insert "\n")))
(when (cadr elt)
(insert (format formatter (car elt)))
(dolist (clm (cdr elt))
(when (>= (+ (current-column)
(or (string-match "\n" clm)
(string-width clm)) 1)
(frame-width))
(insert "\n")
(indent-to (1+ max-width)))
(insert " " clm))
(insert "\n"))))
(when composition
(insert "\nComposed with the "
(cond
@ -354,6 +571,9 @@ as well as widgets, buttons, overlays, and text properties."
(describe-text-properties pos output))
(describe-text-mode))))))
(defalias 'describe-char-after 'describe-char)
(make-obsolete 'describe-char-after 'describe-char "21.5")
(provide 'descr-text)
;;; descr-text.el ends here