Fix bug #16043 with crashes when displaying new bidi control characters.

src/bidi.c (bidi_get_type, bidi_get_category): Handle the isolate
 directional control characters.  Update type and category
 determination according to the UBA from Unicode v6.3.
 (bidi_category_t): New category EXPLICIT_FORMATTING.
 src/dispextern.h (bidi_type_t): Update to include new bidirectional
 properties introduced with Unicode v6.3.

 admin/unidata/unidata-gen.el (unidata-prop-alist): Update bidi-class
 to include the new isolate-related classes introduced with Unicode
 v6.3.
 (unidata-encode-val): Accept an additional optional argument, a
 warning message to emit when UnicodeData.txt defines bidi-class
 values that are not in unidata-prop-alist.  Add a comment
 explaining what should maintainers do if/when such a warning ever
 appears.
 (unidata-gen-table): Call unidata-encode-val with 3rd arg non-nil
 when generating uni-bidi.el.
This commit is contained in:
Eli Zaretskii 2013-12-04 18:58:05 +02:00
parent 456760a5b4
commit ad82612405
5 changed files with 87 additions and 30 deletions

View file

@ -1,3 +1,16 @@
2013-12-04 Eli Zaretskii <eliz@gnu.org>
* unidata/unidata-gen.el (unidata-prop-alist): Update bidi-class
to include the new isolate-related classes introduced with Unicode
v6.3.
(unidata-encode-val): Accept an additional optional argument, a
warning message to emit when UnicodeData.txt defines bidi-class
values that are not in unidata-prop-alist. Add a comment
explaining what should maintainers do if/when such a warning ever
appears.
(unidata-gen-table): Call unidata-encode-val with 3rd arg non-nil
when generating uni-bidi.el.
2013-12-01 Glenn Morris <rgm@gnu.org>
* unidata/Makefile.in (${DSTDIR}/charprop.el):

View file

@ -194,8 +194,8 @@ Property value is an integer."
4 unidata-gen-table-symbol "uni-bidi.el"
"Unicode bidi class.
Property value is one of the following symbols:
L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET,
AN, CS, NSM, BN, B, S, WS, ON"
L, LRE, LRO, LRI, R, AL, RLE, RLO, RLI, FSI, PDF, PDI,
EN, ES, ET, AN, CS, NSM, BN, B, S, WS, ON"
unidata-describe-bidi-class
;; The assignment of default values to blocks of code points
;; follows the file DerivedBidiClass.txt from the Unicode
@ -205,7 +205,8 @@ Property value is one of the following symbols:
(#xFB1D #xFB4F R) (#x10800 #x10FFF R) (#x1E800 #x1EFFF R))
;; The order of elements must be in sync with bidi_type_t in
;; src/dispextern.h.
(L R EN AN BN B AL LRE LRO RLE RLO PDF ES ET CS NSM S WS ON))
(L R EN AN BN B AL LRE LRO RLE RLO PDF LRI RLI FSI PDI
ES ET CS NSM S WS ON))
(decomposition
5 unidata-gen-table-decomposition "uni-decomposition.el"
"Unicode decomposition mapping.
@ -397,12 +398,17 @@ is the character itself.")))
;; If VAL is one of VALn, just return n.
;; Otherwise, VAL-LIST is modified to this:
;; ((nil . 0) (VAL1 . 1) (VAL2 . 2) ... (VAL . n+1))
;;
;; WARN is an optional warning to display when the value list is
;; extended, for property values that need to be in sync with other
;; parts of Emacs; currently only used for bidi-class.
(defun unidata-encode-val (val-list val)
(defun unidata-encode-val (val-list val &optional warn)
(let ((slot (assoc val val-list))
val-code)
(if slot
(cdr slot)
(if warn (message warn val))
(setq val-code (length val-list))
(nconc val-list (list (cons val val-code)))
val-code)))
@ -413,6 +419,16 @@ is the character itself.")))
(let ((table (make-char-table 'char-code-property-table))
(prop-idx (unidata-prop-index prop))
(vec (make-vector 128 0))
;; When this warning is printed, there's a need to make the
;; following changes:
;; (1) update unidata-prop-alist with the new bidi-class values;
;; (2) extend bidi_type_t enumeration on src/dispextern.h to
;; include the new classes;
;; (3) possibly update the assertion in bidi.c:bidi_check_type; and
;; (4) possibly update the switch cases in
;; bidi.c:bidi_get_type and bidi.c:bidi_get_category.
(bidi-warning "\
** Found new bidi-class '%s', please update bidi.c and dispextern.h")
tail elt range val val-code idx slot
prev-range-data)
(setq val-list (cons nil (copy-sequence val-list)))
@ -438,7 +454,9 @@ is the character itself.")))
(setq elt (car tail) tail (cdr tail))
(setq range (car elt)
val (funcall val-func (nth prop-idx elt)))
(setq val-code (if val (unidata-encode-val val-list val)))
(setq val-code (if val (unidata-encode-val val-list val
(and (eq prop 'bidi-class)
bidi-warning))))
(if (consp range)
(when val-code
(set-char-table-range table range val-code)
@ -486,7 +504,9 @@ is the character itself.")))
(setq new-val (funcall val-func (nth prop-idx elt)))
(if (not (eq val new-val))
(setq val new-val
val-code (if val (unidata-encode-val val-list val))))
val-code (if val (unidata-encode-val
val-list val (and (eq prop 'bidi-class)
bidi-warning)))))
(if val-code
(aset vec (- range start) val-code))
(setq tail (cdr tail)))

View file

@ -1,3 +1,13 @@
2013-12-04 Eli Zaretskii <eliz@gnu.org>
* bidi.c (bidi_get_type, bidi_get_category): Handle the isolate
directional control characters. Update type and category
determination according to the UBA from Unicode v6.3.
(bidi_category_t): New category EXPLICIT_FORMATTING.
* dispextern.h (bidi_type_t): Update to include new bidirectional
properties introduced with Unicode v6.3. (Bug#16043)
2013-12-04 Martin Rudalics <rudalics@gmx.at>
* xterm.c (XTflash): Fix coordinate of bottom area to flash

View file

@ -76,7 +76,8 @@ typedef enum {
UNKNOWN_BC,
NEUTRAL,
WEAK,
STRONG
STRONG,
EXPLICIT_FORMATTING
} bidi_category_t;
/* UAX#9 says to search only for L, AL, or R types of characters, and
@ -115,13 +116,9 @@ bidi_get_type (int ch, bidi_dir_t override)
if (default_type == UNKNOWN_BT)
emacs_abort ();
if (override == NEUTRAL_DIR)
return default_type;
switch (default_type)
{
/* Although UAX#9 does not tell, it doesn't make sense to
override NEUTRAL_B and LRM/RLM characters. */
case WEAK_BN:
case NEUTRAL_B:
case LRE:
case LRO:
@ -129,20 +126,20 @@ bidi_get_type (int ch, bidi_dir_t override)
case RLO:
case PDF:
return default_type;
/* FIXME: The isolate controls are treated as BN until we add
support for UBA v6.3. */
case LRI:
case RLI:
case FSI:
case PDI:
return WEAK_BN;
default:
switch (ch)
{
case LRM_CHAR:
case RLM_CHAR:
return default_type;
default:
if (override == L2R) /* X6 */
return STRONG_L;
else if (override == R2L)
return STRONG_R;
else
emacs_abort (); /* can't happen: handled above */
}
if (override == L2R)
return STRONG_L;
else if (override == R2L)
return STRONG_R;
else
return default_type;
}
}
@ -163,12 +160,7 @@ bidi_get_category (bidi_type_t type)
case STRONG_L:
case STRONG_R:
case STRONG_AL:
case LRE:
case LRO:
case RLE:
case RLO:
return STRONG;
case PDF: /* ??? really?? */
case WEAK_EN:
case WEAK_ES:
case WEAK_ET:
@ -176,12 +168,30 @@ bidi_get_category (bidi_type_t type)
case WEAK_CS:
case WEAK_NSM:
case WEAK_BN:
/* FIXME */
case LRI:
case RLI:
case FSI:
case PDI:
return WEAK;
case NEUTRAL_B:
case NEUTRAL_S:
case NEUTRAL_WS:
case NEUTRAL_ON:
return NEUTRAL;
case LRE:
case LRO:
case RLE:
case RLO:
case PDF:
#if 0
/* FIXME: This awaits implementation of isolate support. */
case LRI:
case RLI:
case FSI:
case PDI:
#endif
return EXPLICIT_FORMATTING;
default:
emacs_abort ();
}

View file

@ -1895,6 +1895,10 @@ typedef enum {
RLE, /* right-to-left embedding */
RLO, /* right-to-left override */
PDF, /* pop directional format */
LRI, /* left-to-right isolate */
RLI, /* right-to-left isolate */
FSI, /* first strong isolate */
PDI, /* pop directional isolate */
WEAK_ES, /* european number separator */
WEAK_ET, /* european number terminator */
WEAK_CS, /* common separator */