Allow to search for characters whose bidi directionality was overridden.
src/bidi.c (bidi_find_first_overridden): New function. src/xdisp.c (Fbidi_find_overridden_directionality): New function. (syms_of_xdisp): Defsubr it. src/dispextern.h (bidi_find_first_overridden): Add prototype. doc/lispref/display.texi (Bidirectional Display): Document 'bidi-find-overridden-directionality'. etc/NEWS: Mention 'bidi-find-overridden-directionality'.
This commit is contained in:
parent
a92789b1fc
commit
dd601050e7
8 changed files with 245 additions and 0 deletions
|
@ -1,3 +1,8 @@
|
|||
2014-12-02 Eli Zaretskii <eliz@gnu.org>
|
||||
|
||||
* display.texi (Bidirectional Display): Document
|
||||
'bidi-find-overridden-directionality'.
|
||||
|
||||
2014-11-29 Paul Eggert <eggert@cs.ucla.edu>
|
||||
|
||||
Lessen focus on ChangeLog files, as opposed to change log entries.
|
||||
|
|
|
@ -6800,3 +6800,57 @@ affect all Emacs frames and windows.
|
|||
appropriate mirrored character in the reordered text. Lisp programs
|
||||
can affect the mirrored display by changing this property. Again, any
|
||||
such changes affect all of Emacs display.
|
||||
|
||||
@cindex overriding bidirectional properties
|
||||
@cindex directional overrides
|
||||
@cindex LRO
|
||||
@cindex RLO
|
||||
The bidirectional properties of characters can be overridden by
|
||||
inserting into the text special directional control characters,
|
||||
LEFT-TO-RIGHT OVERRIDE (@acronym{LRO}) and RIGHT-TO-LEFT OVERRIDE
|
||||
(@acronym{RLO}). Any characters between a @acronym{RLO} and the
|
||||
following newline or POP DIRECTIONAL FORMATTING (@acronym{PDF})
|
||||
control character, whichever comes first, will be displayed as if they
|
||||
were strong right-to-left characters, i.e.@: they will be reversed on
|
||||
display. Similarly, any characters between @acronym{LRO} and
|
||||
@acronym{PDF} or newline will display as if they were strong
|
||||
left-to-right, and will @emph{not} be reversed even if they are strong
|
||||
right-to-left characters.
|
||||
|
||||
@cindex phishing using directional overrides
|
||||
@cindex malicious use of directional overrides
|
||||
These overrides are useful when you want to make some text
|
||||
unaffected by the reordering algorithm, and instead directly control
|
||||
the display order. But they can also be used for malicious purposes,
|
||||
known as @dfn{phishing}. Specifically, a URL on a Web page or a link
|
||||
in an email message can be manipulated to make its visual appearance
|
||||
unrecognizable, or similar to some popular benign location, while the
|
||||
real location, interpreted by a browser in the logical order, is very
|
||||
different.
|
||||
|
||||
Emacs provides a primitive that applications can use to detect
|
||||
instances of text whose bidirectional properties were overridden so as
|
||||
to make a left-to-right character display as if it were a
|
||||
right-to-left character, or vise versa.
|
||||
|
||||
@defun bidi-find-overridden-directionality from to &optional object
|
||||
This function looks at the text of the specified @var{object} between
|
||||
positions @var{from} (inclusive) and @var{to} (exclusive), and returns
|
||||
the first position where it finds a strong left-to-right character
|
||||
whose directional properties were forced to display the character as
|
||||
right-to-left, or for a strong right-to-left character that was forced
|
||||
to display as left-to-right. If it finds no such characters in the
|
||||
specified region of text, it returns @code{nil}.
|
||||
|
||||
The optional argument @var{object} specifies which text to search, and
|
||||
defaults to the current buffer. If @var{object} is non-@code{nil}, it
|
||||
can be some other buffer, or it can be a string or a window. If it is
|
||||
a string, the function searches that string. If it is a window, the
|
||||
function searches the buffer displayed in that window. If a buffer
|
||||
whose text you want to examine is displayed in some window, we
|
||||
recommend to specify it by that window, rather than pass the buffer to
|
||||
the function. This is because telling the function about the window
|
||||
allows it to correctly account for window-specific overlays, which
|
||||
might change the result of the function if some text in the buffer is
|
||||
covered by overlays.
|
||||
@end defun
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
2014-12-02 Eli Zaretskii <eliz@gnu.org>
|
||||
|
||||
* NEWS: Mention 'bidi-find-overridden-directionality'.
|
||||
|
||||
2014-11-29 Paul Eggert <eggert@cs.ucla.edu>
|
||||
|
||||
Lessen focus on ChangeLog files, as opposed to change log entries.
|
||||
|
|
7
etc/NEWS
7
etc/NEWS
|
@ -98,6 +98,13 @@ environment. For the time being this is implemented for modern POSIX
|
|||
systems and for MS-Windows, for other systems they fall back to their
|
||||
counterparts `string-lessp' and `string-equal'.
|
||||
|
||||
+++
|
||||
** The new function `bidi-find-overridden-directionality' allows to
|
||||
find characters whose directionality was, perhaps maliciously,
|
||||
overridden by directional override control characters. Lisp programs
|
||||
can use this to detect potential phishing of URLs and other links that
|
||||
exploits bidirectional display reordering.
|
||||
|
||||
*** The ls-lisp package uses `string-collate-lessp' to sort file names.
|
||||
If you want the old, locale-independent sorting, customize the new
|
||||
option `ls-lisp-use-string-collate' to a nil value.
|
||||
|
|
|
@ -1,3 +1,12 @@
|
|||
2014-12-02 Eli Zaretskii <eliz@gnu.org>
|
||||
|
||||
* bidi.c (bidi_find_first_overridden): New function.
|
||||
|
||||
* xdisp.c (Fbidi_find_overridden_directionality): New function.
|
||||
(syms_of_xdisp): Defsubr it.
|
||||
|
||||
* dispextern.h (bidi_find_first_overridden): Add prototype.
|
||||
|
||||
2014-12-02 Jan Djärv <jan.h.d@swipnet.se>
|
||||
|
||||
* nsimage.m (initFromSkipXBM:width:height:flip:length:): Set bmRep
|
||||
|
|
27
src/bidi.c
27
src/bidi.c
|
@ -3376,6 +3376,33 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
|
|||
UNGCPRO;
|
||||
}
|
||||
|
||||
/* Utility function for looking for strong directional characters
|
||||
whose bidi type was overridden by a directional override. */
|
||||
ptrdiff_t
|
||||
bidi_find_first_overridden (struct bidi_it *bidi_it)
|
||||
{
|
||||
ptrdiff_t found_pos = ZV;
|
||||
|
||||
do
|
||||
{
|
||||
/* Need to call bidi_resolve_weak, not bidi_resolve_explicit,
|
||||
because the directional overrides are applied by the
|
||||
former. */
|
||||
bidi_type_t type = bidi_resolve_weak (bidi_it);
|
||||
|
||||
if ((type == STRONG_R && bidi_it->orig_type == STRONG_L)
|
||||
|| (type == STRONG_L
|
||||
&& (bidi_it->orig_type == STRONG_R
|
||||
|| bidi_it->orig_type == STRONG_AL)))
|
||||
found_pos = bidi_it->charpos;
|
||||
} while (found_pos == ZV
|
||||
&& bidi_it->charpos < ZV
|
||||
&& bidi_it->ch != BIDI_EOB
|
||||
&& bidi_it->ch != '\n');
|
||||
|
||||
return found_pos;
|
||||
}
|
||||
|
||||
/* This is meant to be called from within the debugger, whenever you
|
||||
wish to examine the cache contents. */
|
||||
void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
|
||||
|
|
|
@ -3173,6 +3173,7 @@ extern void bidi_push_it (struct bidi_it *);
|
|||
extern void bidi_pop_it (struct bidi_it *);
|
||||
extern void *bidi_shelve_cache (void);
|
||||
extern void bidi_unshelve_cache (void *, bool);
|
||||
extern ptrdiff_t bidi_find_first_overridden (struct bidi_it *);
|
||||
|
||||
/* Defined in xdisp.c */
|
||||
|
||||
|
|
138
src/xdisp.c
138
src/xdisp.c
|
@ -21032,6 +21032,143 @@ See also `bidi-paragraph-direction'. */)
|
|||
}
|
||||
}
|
||||
|
||||
DEFUN ("bidi-find-overridden-directionality",
|
||||
Fbidi_find_overridden_directionality,
|
||||
Sbidi_find_overridden_directionality, 2, 3, 0,
|
||||
doc: /* Return position between FROM and TO where directionality was overridden.
|
||||
|
||||
This function returns the first character position in the specified
|
||||
region of OBJECT where there is a character whose `bidi-class' property
|
||||
is `L', but which was forced to display as `R' by a directional
|
||||
override, and likewise with characters whose `bidi-class' is `R'
|
||||
or `AL' that were forced to display as `L'.
|
||||
|
||||
If no such character is found, the function returns nil.
|
||||
|
||||
OBJECT is a Lisp string or buffer to search for overridden
|
||||
directionality, and defaults to the current buffer if nil or omitted.
|
||||
OBJECT can also be a window, in which case the function will search
|
||||
the buffer displayed in that window. Passing the window instead of
|
||||
a buffer is preferable when the buffer is displayed in some window,
|
||||
because this function will then be able to correctly account for
|
||||
window-specific overlays, which can affect the results.
|
||||
|
||||
Strong directional characters `L', `R', and `AL' can have their
|
||||
intrinsic directionality overridden by directional override
|
||||
control characters RLO \(u+202e) and LRO \(u+202d). See the
|
||||
function `get-char-code-property' for a way to inquire about
|
||||
the `bidi-class' property of a character. */)
|
||||
(Lisp_Object from, Lisp_Object to, Lisp_Object object)
|
||||
{
|
||||
struct buffer *buf = current_buffer;
|
||||
struct buffer *old = buf;
|
||||
struct window *w = NULL;
|
||||
bool frame_window_p = FRAME_WINDOW_P (SELECTED_FRAME ());
|
||||
struct bidi_it itb;
|
||||
ptrdiff_t from_pos, to_pos, from_bpos;
|
||||
void *itb_data;
|
||||
|
||||
if (!NILP (object))
|
||||
{
|
||||
if (BUFFERP (object))
|
||||
buf = XBUFFER (object);
|
||||
else if (WINDOWP (object))
|
||||
{
|
||||
w = decode_live_window (object);
|
||||
buf = XBUFFER (w->contents);
|
||||
frame_window_p = FRAME_WINDOW_P (XFRAME (w->frame));
|
||||
}
|
||||
else
|
||||
CHECK_STRING (object);
|
||||
}
|
||||
|
||||
if (STRINGP (object))
|
||||
{
|
||||
/* Characters in unibyte strings are always treated by bidi.c as
|
||||
strong LTR. */
|
||||
if (!STRING_MULTIBYTE (object)
|
||||
/* When we are loading loadup.el, the character property
|
||||
tables needed for bidi iteration are not yet
|
||||
available. */
|
||||
|| !NILP (Vpurify_flag))
|
||||
return Qnil;
|
||||
|
||||
validate_subarray (object, from, to, SCHARS (object), &from_pos, &to_pos);
|
||||
if (from_pos >= SCHARS (object))
|
||||
return Qnil;
|
||||
|
||||
/* Set up the bidi iterator. */
|
||||
itb_data = bidi_shelve_cache ();
|
||||
itb.paragraph_dir = NEUTRAL_DIR;
|
||||
itb.string.lstring = object;
|
||||
itb.string.s = NULL;
|
||||
itb.string.schars = SCHARS (object);
|
||||
itb.string.bufpos = 0;
|
||||
itb.string.from_disp_str = 0;
|
||||
itb.string.unibyte = 0;
|
||||
itb.w = w;
|
||||
bidi_init_it (0, 0, frame_window_p, &itb);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Nothing this fancy can happen in unibyte buffers, or in a
|
||||
buffer that disabled reordering, or if FROM is at EOB. */
|
||||
if (NILP (BVAR (buf, bidi_display_reordering))
|
||||
|| NILP (BVAR (buf, enable_multibyte_characters))
|
||||
/* When we are loading loadup.el, the character property
|
||||
tables needed for bidi iteration are not yet
|
||||
available. */
|
||||
|| !NILP (Vpurify_flag))
|
||||
return Qnil;
|
||||
|
||||
set_buffer_temp (buf);
|
||||
validate_region (&from, &to);
|
||||
from_pos = XINT (from);
|
||||
to_pos = XINT (to);
|
||||
if (from_pos >= ZV)
|
||||
return Qnil;
|
||||
|
||||
/* Set up the bidi iterator. */
|
||||
itb_data = bidi_shelve_cache ();
|
||||
from_bpos = CHAR_TO_BYTE (from_pos);
|
||||
if (from_pos == BEGV)
|
||||
{
|
||||
itb.charpos = BEGV;
|
||||
itb.bytepos = BEGV_BYTE;
|
||||
}
|
||||
else if (FETCH_CHAR (from_bpos - 1) == '\n')
|
||||
{
|
||||
itb.charpos = from_pos;
|
||||
itb.bytepos = from_bpos;
|
||||
}
|
||||
else
|
||||
itb.charpos = find_newline_no_quit (from_pos, CHAR_TO_BYTE (from_pos),
|
||||
-1, &itb.bytepos);
|
||||
itb.paragraph_dir = NEUTRAL_DIR;
|
||||
itb.string.s = NULL;
|
||||
itb.string.lstring = Qnil;
|
||||
itb.string.bufpos = 0;
|
||||
itb.string.from_disp_str = 0;
|
||||
itb.string.unibyte = 0;
|
||||
itb.w = w;
|
||||
bidi_init_it (itb.charpos, itb.bytepos, frame_window_p, &itb);
|
||||
}
|
||||
|
||||
ptrdiff_t found;
|
||||
do {
|
||||
/* For the purposes of this function, the actual base direction of
|
||||
the paragraph doesn't matter, so just set it to L2R. */
|
||||
bidi_paragraph_init (L2R, &itb, 0);
|
||||
while ((found = bidi_find_first_overridden (&itb)) < from_pos)
|
||||
;
|
||||
} while (found == ZV && itb.ch == '\n' && itb.charpos < to_pos);
|
||||
|
||||
bidi_unshelve_cache (itb_data, 0);
|
||||
set_buffer_temp (old);
|
||||
|
||||
return (from_pos <= found && found < to_pos) ? make_number (found) : Qnil;
|
||||
}
|
||||
|
||||
DEFUN ("move-point-visually", Fmove_point_visually,
|
||||
Smove_point_visually, 1, 1, 0,
|
||||
doc: /* Move point in the visual order in the specified DIRECTION.
|
||||
|
@ -30461,6 +30598,7 @@ syms_of_xdisp (void)
|
|||
defsubr (&Scurrent_bidi_paragraph_direction);
|
||||
defsubr (&Swindow_text_pixel_size);
|
||||
defsubr (&Smove_point_visually);
|
||||
defsubr (&Sbidi_find_overridden_directionality);
|
||||
|
||||
DEFSYM (Qmenu_bar_update_hook, "menu-bar-update-hook");
|
||||
DEFSYM (Qoverriding_terminal_local_map, "overriding-terminal-local-map");
|
||||
|
|
Loading…
Add table
Reference in a new issue