Improve the encoding by compound-text-with-extensions.
This commit is contained in:
parent
faa28da9b7
commit
7a84eee5b7
5 changed files with 147 additions and 76 deletions
|
@ -1,3 +1,14 @@
|
|||
2010-08-06 Kenichi Handa <handa@m17n.org>
|
||||
|
||||
* international/mule.el (define-charset): Store NAME as :base
|
||||
property.
|
||||
(ctext-non-standard-encodings-table): Pay attention to charset
|
||||
aliases.
|
||||
(ctext-pre-write-conversion): Sort ctext-standard-encodings by the
|
||||
current priority. Force using the designation of the specific
|
||||
charset by adding `charset' text property. Improve the whole
|
||||
algorithm.
|
||||
|
||||
2010-08-04 Kenichi Handa <handa@m17n.org>
|
||||
|
||||
* language/cyrillic.el: Don't add "microsoft-cp1251" to
|
||||
|
|
|
@ -282,6 +282,7 @@ attribute."
|
|||
(plist-put props :short-name (symbol-name name)))
|
||||
(or (plist-get props :long-name)
|
||||
(plist-put props :long-name (plist-get props :short-name)))
|
||||
(plist-put props :base name)
|
||||
;; We can probably get a worthwhile amount in purespace.
|
||||
(setq props
|
||||
(mapcar (lambda (elt)
|
||||
|
@ -1535,11 +1536,13 @@ of `ctext-non-standard-encodings-alist'.")
|
|||
(let* ((slot (assoc elt ctext-non-standard-encodings-alist))
|
||||
(charset (nth 3 slot)))
|
||||
(if (charsetp charset)
|
||||
(setcar tail (cons charset slot))
|
||||
(setcar tail
|
||||
(cons (plist-get (charset-plist charset) :base) slot))
|
||||
(setcar tail (cons (car charset) slot))
|
||||
(dolist (cs (cdr charset))
|
||||
(setcdr tail
|
||||
(cons (cons (car cs) slot) (cdr tail)))
|
||||
(cons (cons (plist-get (charset-plist (car cs)) :base) slot)
|
||||
(cdr tail)))
|
||||
(setq tail (cdr tail))))
|
||||
(setq tail (cdr tail))))
|
||||
table))
|
||||
|
@ -1559,74 +1562,56 @@ in-place."
|
|||
(setq from 1 to (point-max)))
|
||||
(save-restriction
|
||||
(narrow-to-region from to)
|
||||
(goto-char from)
|
||||
(let ((encoding-table (ctext-non-standard-encodings-table))
|
||||
(charset-list ctext-standard-encodings)
|
||||
(charset-list (sort-charsets
|
||||
(copy-sequence ctext-standard-encodings)))
|
||||
(end-pos (make-marker))
|
||||
last-coding-system-used
|
||||
last-pos last-encoding-info
|
||||
encoding-info end-pos ch charset)
|
||||
last-pos charset encoding-info)
|
||||
(dolist (elt encoding-table)
|
||||
(push (car elt) charset-list))
|
||||
(goto-char (setq last-pos from))
|
||||
(setq end-pos (point-marker))
|
||||
(while (re-search-forward "[^\000-\177]+" nil t)
|
||||
(while (re-search-forward "[^\0-\177]+" nil t)
|
||||
;; Found a sequence of non-ASCII characters.
|
||||
(setq last-pos (match-beginning 0)
|
||||
ch (char-after last-pos)
|
||||
charset (char-charset ch charset-list)
|
||||
last-encoding-info
|
||||
(if charset
|
||||
(or (cdr (assq charset encoding-table))
|
||||
charset)
|
||||
'utf-8))
|
||||
(set-marker end-pos (match-end 0))
|
||||
(goto-char (1+ last-pos))
|
||||
(while (marker-position end-pos)
|
||||
(if (< (point) end-pos)
|
||||
(progn
|
||||
(setq charset (char-charset (following-char) charset-list)
|
||||
encoding-info
|
||||
(if charset
|
||||
(or (cdr (assq charset encoding-table))
|
||||
charset)
|
||||
'utf-8))
|
||||
(forward-char 1))
|
||||
(setq encoding-info nil)
|
||||
(set-marker end-pos nil))
|
||||
(unless (eq last-encoding-info encoding-info)
|
||||
(cond ((consp last-encoding-info)
|
||||
;; Encode the previous range using an extended
|
||||
;; segment.
|
||||
(let ((encoding-name (car last-encoding-info))
|
||||
(coding-system (nth 1 last-encoding-info))
|
||||
(noctets (nth 2 last-encoding-info))
|
||||
len)
|
||||
(encode-coding-region last-pos (point) coding-system)
|
||||
(setq len (+ (length encoding-name) 1
|
||||
(- (point) last-pos)))
|
||||
;; According to the spec of CTEXT, it is not
|
||||
;; necessary to produce this extra designation
|
||||
;; sequence, but some buggy application
|
||||
;; (e.g. crxvt-gb) requires it.
|
||||
(insert "\e(B")
|
||||
(save-excursion
|
||||
(goto-char last-pos)
|
||||
(insert (format "\e%%/%d" noctets))
|
||||
(insert-byte (+ (/ len 128) 128) 1)
|
||||
(insert-byte (+ (% len 128) 128) 1)
|
||||
(insert encoding-name)
|
||||
(insert 2))))
|
||||
((eq last-encoding-info 'utf-8)
|
||||
;; Encode the previous range using UTF-8 encoding
|
||||
;; extention.
|
||||
(encode-coding-region last-pos (point) 'mule-utf-8)
|
||||
(save-excursion
|
||||
(goto-char last-pos)
|
||||
(insert "\e%G"))
|
||||
(insert "\e%@"))
|
||||
(t
|
||||
(put-text-property last-pos (point) 'charset charset)))
|
||||
(setq last-pos (point)
|
||||
last-encoding-info encoding-info))))
|
||||
(goto-char (match-beginning 0))
|
||||
(setq last-pos (point)
|
||||
charset (char-charset (following-char) charset-list))
|
||||
(forward-char 1)
|
||||
(while (and (< (point) end-pos)
|
||||
(eq charset (char-charset (following-char) charset-list)))
|
||||
(forward-char 1))
|
||||
(if charset
|
||||
(if (setq encoding-info (cdr (assq charset encoding-table)))
|
||||
;; Encode this range using an extended segment.
|
||||
(let ((encoding-name (car encoding-info))
|
||||
(coding-system (nth 1 encoding-info))
|
||||
(noctets (nth 2 encoding-info))
|
||||
len)
|
||||
(encode-coding-region last-pos (point) coding-system)
|
||||
(setq len (+ (length encoding-name) 1
|
||||
(- (point) last-pos)))
|
||||
;; According to the spec of CTEXT, it is not
|
||||
;; necessary to produce this extra designation
|
||||
;; sequence, but some buggy application
|
||||
;; (e.g. crxvt-gb) requires it.
|
||||
(insert "\e(B")
|
||||
(save-excursion
|
||||
(goto-char last-pos)
|
||||
(insert (format "\e%%/%d" noctets))
|
||||
(insert-byte (+ (/ len 128) 128) 1)
|
||||
(insert-byte (+ (% len 128) 128) 1)
|
||||
(insert encoding-name)
|
||||
(insert 2)))
|
||||
;; Encode this range as characters in CHARSET.
|
||||
(put-text-property last-pos (point) 'charset charset))
|
||||
;; Encode this range using UTF-8 encoding extention.
|
||||
(encode-coding-region last-pos (point) 'mule-utf-8)
|
||||
(save-excursion
|
||||
(goto-char last-pos)
|
||||
(insert "\e%G"))
|
||||
(insert "\e%@")))
|
||||
(goto-char (point-min)))))
|
||||
;; Must return nil, as build_annotations_2 expects that.
|
||||
nil)
|
||||
|
|
|
@ -1,3 +1,14 @@
|
|||
2010-08-06 Kenichi Handa <handa@m17n.org>
|
||||
|
||||
* charset.c: Include <stdlib.h>
|
||||
(struct charset_sort_data): New struct.
|
||||
(charset_compare): New function.
|
||||
(Fsort_charsets): New funciton.
|
||||
(syms_of_charset): Declare Fsort_charsets as a Lisp function.
|
||||
|
||||
* coding.c (decode_coding_iso_2022): Fix checking of dimension
|
||||
number in CTEXT extended segment.
|
||||
|
||||
2010-08-01 Juanma Barranquero <lekktu@gmail.com>
|
||||
|
||||
* w32fns.c (syms_of_w32fns) <x-max-tooltip-size>: Fix typo in docstring.
|
||||
|
|
|
@ -28,6 +28,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
|
|||
#include <config.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <ctype.h>
|
||||
#include <sys/types.h>
|
||||
|
@ -2139,23 +2140,22 @@ that case, find the charset from what supported by that coding system. */)
|
|||
charset = CHAR_CHARSET (XINT (ch));
|
||||
else
|
||||
{
|
||||
Lisp_Object charset_list;
|
||||
|
||||
if (CONSP (restriction))
|
||||
{
|
||||
for (charset_list = Qnil; CONSP (restriction);
|
||||
restriction = XCDR (restriction))
|
||||
{
|
||||
int id;
|
||||
int c = XFASTINT (ch);
|
||||
|
||||
CHECK_CHARSET_GET_ID (XCAR (restriction), id);
|
||||
charset_list = Fcons (make_number (id), charset_list);
|
||||
for (; CONSP (restriction); restriction = XCDR (restriction))
|
||||
{
|
||||
struct charset *charset;
|
||||
|
||||
CHECK_CHARSET_GET_CHARSET (XCAR (restriction), charset);
|
||||
if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset))
|
||||
return XCAR (restriction);
|
||||
}
|
||||
charset_list = Fnreverse (charset_list);
|
||||
return Qnil;
|
||||
}
|
||||
else
|
||||
charset_list = coding_system_charset_list (restriction);
|
||||
charset = char_charset (XINT (ch), charset_list, NULL);
|
||||
restriction = coding_system_charset_list (restriction);
|
||||
charset = char_charset (XINT (ch), restriction, NULL);
|
||||
if (! charset)
|
||||
return Qnil;
|
||||
}
|
||||
|
@ -2312,6 +2312,69 @@ Return charset identification number of CHARSET. */)
|
|||
return make_number (id);
|
||||
}
|
||||
|
||||
struct charset_sort_data
|
||||
{
|
||||
Lisp_Object charset;
|
||||
int id;
|
||||
int priority;
|
||||
};
|
||||
|
||||
static int
|
||||
charset_compare (const void *d1, const void *d2)
|
||||
{
|
||||
const struct charset_sort_data *data1 = d1, *data2 = d2;
|
||||
return (data1->priority - data2->priority);
|
||||
}
|
||||
|
||||
DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
|
||||
doc: /* Sort charset list CHARSETS by a priority of each charset.
|
||||
Return the sorted list. CHARSETS is modified by side effects.
|
||||
See also `charset-priority-list' and `set-charset-priority'. */)
|
||||
(Lisp_Object charsets)
|
||||
{
|
||||
Lisp_Object len = Flength (charsets);
|
||||
int n = XFASTINT (len), i, j, done;
|
||||
Lisp_Object tail, elt, attrs;
|
||||
struct charset_sort_data *sort_data;
|
||||
int id, min_id, max_id;
|
||||
USE_SAFE_ALLOCA;
|
||||
|
||||
if (n == 0)
|
||||
return Qnil;
|
||||
SAFE_ALLOCA (sort_data, struct charset_sort_data *, sizeof (*sort_data) * n);
|
||||
for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
|
||||
{
|
||||
elt = XCAR (tail);
|
||||
CHECK_CHARSET_GET_ATTR (elt, attrs);
|
||||
sort_data[i].charset = elt;
|
||||
sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
|
||||
if (i == 0)
|
||||
min_id = max_id = id;
|
||||
else if (id < min_id)
|
||||
min_id = id;
|
||||
else if (id > max_id)
|
||||
max_id = id;
|
||||
}
|
||||
for (done = 0, tail = Vcharset_ordered_list, i = 0;
|
||||
done < n && CONSP (tail); tail = XCDR (tail), i++)
|
||||
{
|
||||
elt = XCAR (tail);
|
||||
id = XFASTINT (elt);
|
||||
if (id >= min_id && id <= max_id)
|
||||
for (j = 0; j < n; j++)
|
||||
if (sort_data[j].id == id)
|
||||
{
|
||||
sort_data[j].priority = i;
|
||||
done++;
|
||||
}
|
||||
}
|
||||
qsort (sort_data, n, sizeof *sort_data, charset_compare);
|
||||
for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
|
||||
XSETCAR (tail, sort_data[i].charset);
|
||||
SAFE_FREE ();
|
||||
return charsets;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
init_charset ()
|
||||
|
@ -2414,6 +2477,7 @@ syms_of_charset ()
|
|||
defsubr (&Scharset_priority_list);
|
||||
defsubr (&Sset_charset_priority);
|
||||
defsubr (&Scharset_id_internal);
|
||||
defsubr (&Ssort_charsets);
|
||||
|
||||
DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
|
||||
doc: /* *List of directories to search for charset map files. */);
|
||||
|
|
|
@ -3935,7 +3935,7 @@ decode_coding_iso_2022 (coding)
|
|||
int size;
|
||||
|
||||
ONE_MORE_BYTE (dim);
|
||||
if (dim < 0 || dim > 4)
|
||||
if (dim < '0' || dim > '4')
|
||||
goto invalid_code;
|
||||
ONE_MORE_BYTE (M);
|
||||
if (M < 128)
|
||||
|
|
Loading…
Add table
Reference in a new issue