Fix bug #16043 with crashes when displaying new bidi control characters.
src/bidi.c (bidi_get_type, bidi_get_category): Handle the isolate directional control characters. Update type and category determination according to the UBA from Unicode v6.3. (bidi_category_t): New category EXPLICIT_FORMATTING. src/dispextern.h (bidi_type_t): Update to include new bidirectional properties introduced with Unicode v6.3. admin/unidata/unidata-gen.el (unidata-prop-alist): Update bidi-class to include the new isolate-related classes introduced with Unicode v6.3. (unidata-encode-val): Accept an additional optional argument, a warning message to emit when UnicodeData.txt defines bidi-class values that are not in unidata-prop-alist. Add a comment explaining what should maintainers do if/when such a warning ever appears. (unidata-gen-table): Call unidata-encode-val with 3rd arg non-nil when generating uni-bidi.el.
This commit is contained in:
parent
456760a5b4
commit
ad82612405
5 changed files with 87 additions and 30 deletions
|
@ -1,3 +1,16 @@
|
|||
2013-12-04 Eli Zaretskii <eliz@gnu.org>
|
||||
|
||||
* unidata/unidata-gen.el (unidata-prop-alist): Update bidi-class
|
||||
to include the new isolate-related classes introduced with Unicode
|
||||
v6.3.
|
||||
(unidata-encode-val): Accept an additional optional argument, a
|
||||
warning message to emit when UnicodeData.txt defines bidi-class
|
||||
values that are not in unidata-prop-alist. Add a comment
|
||||
explaining what should maintainers do if/when such a warning ever
|
||||
appears.
|
||||
(unidata-gen-table): Call unidata-encode-val with 3rd arg non-nil
|
||||
when generating uni-bidi.el.
|
||||
|
||||
2013-12-01 Glenn Morris <rgm@gnu.org>
|
||||
|
||||
* unidata/Makefile.in (${DSTDIR}/charprop.el):
|
||||
|
|
|
@ -194,8 +194,8 @@ Property value is an integer."
|
|||
4 unidata-gen-table-symbol "uni-bidi.el"
|
||||
"Unicode bidi class.
|
||||
Property value is one of the following symbols:
|
||||
L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET,
|
||||
AN, CS, NSM, BN, B, S, WS, ON"
|
||||
L, LRE, LRO, LRI, R, AL, RLE, RLO, RLI, FSI, PDF, PDI,
|
||||
EN, ES, ET, AN, CS, NSM, BN, B, S, WS, ON"
|
||||
unidata-describe-bidi-class
|
||||
;; The assignment of default values to blocks of code points
|
||||
;; follows the file DerivedBidiClass.txt from the Unicode
|
||||
|
@ -205,7 +205,8 @@ Property value is one of the following symbols:
|
|||
(#xFB1D #xFB4F R) (#x10800 #x10FFF R) (#x1E800 #x1EFFF R))
|
||||
;; The order of elements must be in sync with bidi_type_t in
|
||||
;; src/dispextern.h.
|
||||
(L R EN AN BN B AL LRE LRO RLE RLO PDF ES ET CS NSM S WS ON))
|
||||
(L R EN AN BN B AL LRE LRO RLE RLO PDF LRI RLI FSI PDI
|
||||
ES ET CS NSM S WS ON))
|
||||
(decomposition
|
||||
5 unidata-gen-table-decomposition "uni-decomposition.el"
|
||||
"Unicode decomposition mapping.
|
||||
|
@ -397,12 +398,17 @@ is the character itself.")))
|
|||
;; If VAL is one of VALn, just return n.
|
||||
;; Otherwise, VAL-LIST is modified to this:
|
||||
;; ((nil . 0) (VAL1 . 1) (VAL2 . 2) ... (VAL . n+1))
|
||||
;;
|
||||
;; WARN is an optional warning to display when the value list is
|
||||
;; extended, for property values that need to be in sync with other
|
||||
;; parts of Emacs; currently only used for bidi-class.
|
||||
|
||||
(defun unidata-encode-val (val-list val)
|
||||
(defun unidata-encode-val (val-list val &optional warn)
|
||||
(let ((slot (assoc val val-list))
|
||||
val-code)
|
||||
(if slot
|
||||
(cdr slot)
|
||||
(if warn (message warn val))
|
||||
(setq val-code (length val-list))
|
||||
(nconc val-list (list (cons val val-code)))
|
||||
val-code)))
|
||||
|
@ -413,6 +419,16 @@ is the character itself.")))
|
|||
(let ((table (make-char-table 'char-code-property-table))
|
||||
(prop-idx (unidata-prop-index prop))
|
||||
(vec (make-vector 128 0))
|
||||
;; When this warning is printed, there's a need to make the
|
||||
;; following changes:
|
||||
;; (1) update unidata-prop-alist with the new bidi-class values;
|
||||
;; (2) extend bidi_type_t enumeration on src/dispextern.h to
|
||||
;; include the new classes;
|
||||
;; (3) possibly update the assertion in bidi.c:bidi_check_type; and
|
||||
;; (4) possibly update the switch cases in
|
||||
;; bidi.c:bidi_get_type and bidi.c:bidi_get_category.
|
||||
(bidi-warning "\
|
||||
** Found new bidi-class '%s', please update bidi.c and dispextern.h")
|
||||
tail elt range val val-code idx slot
|
||||
prev-range-data)
|
||||
(setq val-list (cons nil (copy-sequence val-list)))
|
||||
|
@ -438,7 +454,9 @@ is the character itself.")))
|
|||
(setq elt (car tail) tail (cdr tail))
|
||||
(setq range (car elt)
|
||||
val (funcall val-func (nth prop-idx elt)))
|
||||
(setq val-code (if val (unidata-encode-val val-list val)))
|
||||
(setq val-code (if val (unidata-encode-val val-list val
|
||||
(and (eq prop 'bidi-class)
|
||||
bidi-warning))))
|
||||
(if (consp range)
|
||||
(when val-code
|
||||
(set-char-table-range table range val-code)
|
||||
|
@ -486,7 +504,9 @@ is the character itself.")))
|
|||
(setq new-val (funcall val-func (nth prop-idx elt)))
|
||||
(if (not (eq val new-val))
|
||||
(setq val new-val
|
||||
val-code (if val (unidata-encode-val val-list val))))
|
||||
val-code (if val (unidata-encode-val
|
||||
val-list val (and (eq prop 'bidi-class)
|
||||
bidi-warning)))))
|
||||
(if val-code
|
||||
(aset vec (- range start) val-code))
|
||||
(setq tail (cdr tail)))
|
||||
|
|
|
@ -1,3 +1,13 @@
|
|||
2013-12-04 Eli Zaretskii <eliz@gnu.org>
|
||||
|
||||
* bidi.c (bidi_get_type, bidi_get_category): Handle the isolate
|
||||
directional control characters. Update type and category
|
||||
determination according to the UBA from Unicode v6.3.
|
||||
(bidi_category_t): New category EXPLICIT_FORMATTING.
|
||||
|
||||
* dispextern.h (bidi_type_t): Update to include new bidirectional
|
||||
properties introduced with Unicode v6.3. (Bug#16043)
|
||||
|
||||
2013-12-04 Martin Rudalics <rudalics@gmx.at>
|
||||
|
||||
* xterm.c (XTflash): Fix coordinate of bottom area to flash
|
||||
|
|
58
src/bidi.c
58
src/bidi.c
|
@ -76,7 +76,8 @@ typedef enum {
|
|||
UNKNOWN_BC,
|
||||
NEUTRAL,
|
||||
WEAK,
|
||||
STRONG
|
||||
STRONG,
|
||||
EXPLICIT_FORMATTING
|
||||
} bidi_category_t;
|
||||
|
||||
/* UAX#9 says to search only for L, AL, or R types of characters, and
|
||||
|
@ -115,13 +116,9 @@ bidi_get_type (int ch, bidi_dir_t override)
|
|||
if (default_type == UNKNOWN_BT)
|
||||
emacs_abort ();
|
||||
|
||||
if (override == NEUTRAL_DIR)
|
||||
return default_type;
|
||||
|
||||
switch (default_type)
|
||||
{
|
||||
/* Although UAX#9 does not tell, it doesn't make sense to
|
||||
override NEUTRAL_B and LRM/RLM characters. */
|
||||
case WEAK_BN:
|
||||
case NEUTRAL_B:
|
||||
case LRE:
|
||||
case LRO:
|
||||
|
@ -129,20 +126,20 @@ bidi_get_type (int ch, bidi_dir_t override)
|
|||
case RLO:
|
||||
case PDF:
|
||||
return default_type;
|
||||
/* FIXME: The isolate controls are treated as BN until we add
|
||||
support for UBA v6.3. */
|
||||
case LRI:
|
||||
case RLI:
|
||||
case FSI:
|
||||
case PDI:
|
||||
return WEAK_BN;
|
||||
default:
|
||||
switch (ch)
|
||||
{
|
||||
case LRM_CHAR:
|
||||
case RLM_CHAR:
|
||||
return default_type;
|
||||
default:
|
||||
if (override == L2R) /* X6 */
|
||||
return STRONG_L;
|
||||
else if (override == R2L)
|
||||
return STRONG_R;
|
||||
else
|
||||
emacs_abort (); /* can't happen: handled above */
|
||||
}
|
||||
if (override == L2R)
|
||||
return STRONG_L;
|
||||
else if (override == R2L)
|
||||
return STRONG_R;
|
||||
else
|
||||
return default_type;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -163,12 +160,7 @@ bidi_get_category (bidi_type_t type)
|
|||
case STRONG_L:
|
||||
case STRONG_R:
|
||||
case STRONG_AL:
|
||||
case LRE:
|
||||
case LRO:
|
||||
case RLE:
|
||||
case RLO:
|
||||
return STRONG;
|
||||
case PDF: /* ??? really?? */
|
||||
case WEAK_EN:
|
||||
case WEAK_ES:
|
||||
case WEAK_ET:
|
||||
|
@ -176,12 +168,30 @@ bidi_get_category (bidi_type_t type)
|
|||
case WEAK_CS:
|
||||
case WEAK_NSM:
|
||||
case WEAK_BN:
|
||||
/* FIXME */
|
||||
case LRI:
|
||||
case RLI:
|
||||
case FSI:
|
||||
case PDI:
|
||||
return WEAK;
|
||||
case NEUTRAL_B:
|
||||
case NEUTRAL_S:
|
||||
case NEUTRAL_WS:
|
||||
case NEUTRAL_ON:
|
||||
return NEUTRAL;
|
||||
case LRE:
|
||||
case LRO:
|
||||
case RLE:
|
||||
case RLO:
|
||||
case PDF:
|
||||
#if 0
|
||||
/* FIXME: This awaits implementation of isolate support. */
|
||||
case LRI:
|
||||
case RLI:
|
||||
case FSI:
|
||||
case PDI:
|
||||
#endif
|
||||
return EXPLICIT_FORMATTING;
|
||||
default:
|
||||
emacs_abort ();
|
||||
}
|
||||
|
|
|
@ -1895,6 +1895,10 @@ typedef enum {
|
|||
RLE, /* right-to-left embedding */
|
||||
RLO, /* right-to-left override */
|
||||
PDF, /* pop directional format */
|
||||
LRI, /* left-to-right isolate */
|
||||
RLI, /* right-to-left isolate */
|
||||
FSI, /* first strong isolate */
|
||||
PDI, /* pop directional isolate */
|
||||
WEAK_ES, /* european number separator */
|
||||
WEAK_ET, /* european number terminator */
|
||||
WEAK_CS, /* common separator */
|
||||
|
|
Loading…
Add table
Reference in a new issue