Make regex matching reentrant; update syntax during match
* src/lisp.h (compile_pattern): Remove prototype of now-internal function. * src/regex.c (POS_AS_IN_BUFFER): Consult gl_state instead of re_match_object: the latter can change in Lisp. (re_match_2_internal): Switch back to UPDATE_SYNTAX_* FROM UPDATE_SYNTAX_FAST*, allowing calls into Lisp. * src/regex.h (re_match_object): Uncomment declaration. * src/search.c (struct regexp_cache): Add `busy' field. (thaw_buffer_relocation): Delete; rely on unbind. (compile_pattern_1): Assert pattern isn't busy. (shrink_regexp_cache): Don't shrink busy patterns. (clear_regexp_cache): Don't nuke busy patterns. (unfreeze_pattern, freeze_pattern): New functions. (compile_pattern): Return a regexp_cache pointer instead of the re_pattern_buffer, allowing callers to use `freeze_pattern' if needed. Do not consider busy patterns as cache hit candidates; error if we run out of non-busy cache entries. (looking_at_1, fast_looking_at): Snapshot Vinhibit_changing_match_data; mark pattern busy while we're matching it; unbind. (string_match_1, fast_string_match_internal) (fast_c_string_match_ignore_case): Adjust for compile_pattern return type. (search_buffer_re): Regex code from old search_buffer moved here; snapshot Vinhibit_changing_match_data; mark pattern busy while we're matching it; unbind. (search_buffer_non_re): Non-regex code from old search_buffer moved here. (search_buffer): Split into search_buffer_re, search_buffer_non_re. (syms_of_search): Staticpro re_match_object, even though we really shouldn't have to. * src/syntax.h (UPDATE_SYNTAX_TABLE_FORWARD_FAST): (UPDATE_SYNTAX_TABLE_FAST): Remove. * src/thread.h (struct thread_state): Remove m_re_match_object, which is global again. (It never needs to be preserved across thread switch.)
This commit is contained in:
parent
1502b377d3
commit
938d252d1c
6 changed files with 453 additions and 424 deletions
|
@ -4029,10 +4029,6 @@ extern void restore_search_regs (void);
|
|||
extern void update_search_regs (ptrdiff_t oldstart,
|
||||
ptrdiff_t oldend, ptrdiff_t newend);
|
||||
extern void record_unwind_save_match_data (void);
|
||||
struct re_registers;
|
||||
extern struct re_pattern_buffer *compile_pattern (Lisp_Object,
|
||||
struct re_registers *,
|
||||
Lisp_Object, bool, bool);
|
||||
extern ptrdiff_t fast_string_match_internal (Lisp_Object, Lisp_Object,
|
||||
Lisp_Object);
|
||||
|
||||
|
|
30
src/regex.c
30
src/regex.c
|
@ -155,7 +155,8 @@
|
|||
# define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
|
||||
/* Strings are 0-indexed, buffers are 1-indexed; we pun on the boolean
|
||||
result to get the right base index. */
|
||||
# define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
|
||||
# define POS_AS_IN_BUFFER(p) \
|
||||
((p) + (NILP (gl_state.object) || BUFFERP (gl_state.object)))
|
||||
|
||||
# define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
|
||||
# define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
|
||||
|
@ -1233,6 +1234,15 @@ static const char *re_error_msgid[] =
|
|||
# undef MATCH_MAY_ALLOCATE
|
||||
#endif
|
||||
|
||||
/* While regex matching of a single compiled pattern isn't reentrant
|
||||
(because we compile regexes to bytecode programs, and the bytecode
|
||||
programs are self-modifying), the regex machinery must nevertheless
|
||||
be reentrant with respect to _different_ patterns, and we do that
|
||||
by avoiding global variables and using MATCH_MAY_ALLOCATE. */
|
||||
#if !defined MATCH_MAY_ALLOCATE && defined emacs
|
||||
# error "Emacs requires MATCH_MAY_ALLOCATE"
|
||||
#endif
|
||||
|
||||
|
||||
/* Failure stack declarations and macros; both re_compile_fastmap and
|
||||
re_match_2 use a failure stack. These have to be macros because of
|
||||
|
@ -5895,12 +5905,12 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
|
|||
#ifdef emacs
|
||||
ssize_t offset = PTR_TO_OFFSET (d - 1);
|
||||
ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
|
||||
UPDATE_SYNTAX_TABLE_FAST (charpos);
|
||||
UPDATE_SYNTAX_TABLE (charpos);
|
||||
#endif
|
||||
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
|
||||
s1 = SYNTAX (c1);
|
||||
#ifdef emacs
|
||||
UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos + 1);
|
||||
UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
|
||||
#endif
|
||||
PREFETCH_NOLIMIT ();
|
||||
GET_CHAR_AFTER (c2, d, dummy);
|
||||
|
@ -5937,7 +5947,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
|
|||
#ifdef emacs
|
||||
ssize_t offset = PTR_TO_OFFSET (d);
|
||||
ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
|
||||
UPDATE_SYNTAX_TABLE_FAST (charpos);
|
||||
UPDATE_SYNTAX_TABLE (charpos);
|
||||
#endif
|
||||
PREFETCH ();
|
||||
GET_CHAR_AFTER (c2, d, dummy);
|
||||
|
@ -5982,7 +5992,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
|
|||
#ifdef emacs
|
||||
ssize_t offset = PTR_TO_OFFSET (d) - 1;
|
||||
ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
|
||||
UPDATE_SYNTAX_TABLE_FAST (charpos);
|
||||
UPDATE_SYNTAX_TABLE (charpos);
|
||||
#endif
|
||||
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
|
||||
s1 = SYNTAX (c1);
|
||||
|
@ -5997,7 +6007,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
|
|||
PREFETCH_NOLIMIT ();
|
||||
GET_CHAR_AFTER (c2, d, dummy);
|
||||
#ifdef emacs
|
||||
UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos);
|
||||
UPDATE_SYNTAX_TABLE_FORWARD (charpos);
|
||||
#endif
|
||||
s2 = SYNTAX (c2);
|
||||
|
||||
|
@ -6026,7 +6036,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
|
|||
#ifdef emacs
|
||||
ssize_t offset = PTR_TO_OFFSET (d);
|
||||
ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
|
||||
UPDATE_SYNTAX_TABLE_FAST (charpos);
|
||||
UPDATE_SYNTAX_TABLE (charpos);
|
||||
#endif
|
||||
PREFETCH ();
|
||||
c2 = RE_STRING_CHAR (d, target_multibyte);
|
||||
|
@ -6069,7 +6079,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
|
|||
#ifdef emacs
|
||||
ssize_t offset = PTR_TO_OFFSET (d) - 1;
|
||||
ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
|
||||
UPDATE_SYNTAX_TABLE_FAST (charpos);
|
||||
UPDATE_SYNTAX_TABLE (charpos);
|
||||
#endif
|
||||
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
|
||||
s1 = SYNTAX (c1);
|
||||
|
@ -6084,7 +6094,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
|
|||
PREFETCH_NOLIMIT ();
|
||||
c2 = RE_STRING_CHAR (d, target_multibyte);
|
||||
#ifdef emacs
|
||||
UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos + 1);
|
||||
UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
|
||||
#endif
|
||||
s2 = SYNTAX (c2);
|
||||
|
||||
|
@ -6107,7 +6117,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
|
|||
{
|
||||
ssize_t offset = PTR_TO_OFFSET (d);
|
||||
ssize_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
|
||||
UPDATE_SYNTAX_TABLE_FAST (pos1);
|
||||
UPDATE_SYNTAX_TABLE (pos1);
|
||||
}
|
||||
#endif
|
||||
{
|
||||
|
|
11
src/regex.h
11
src/regex.h
|
@ -181,8 +181,15 @@ typedef unsigned long reg_syntax_t;
|
|||
string; if it's nil, we are matching text in the current buffer; if
|
||||
it's t, we are matching text in a C string.
|
||||
|
||||
This is defined as a macro in thread.h, which see. */
|
||||
/* extern Lisp_Object re_match_object; */
|
||||
This value is effectively another parameter to re_search_2 and
|
||||
re_match_2. No calls into Lisp or thread switches are allowed
|
||||
before setting re_match_object and calling into the regex search
|
||||
and match functions. These functions capture the current value of
|
||||
re_match_object into gl_state on entry.
|
||||
|
||||
TODO: once we get rid of the !emacs case in this code, turn into an
|
||||
actual function parameter. */
|
||||
extern Lisp_Object re_match_object;
|
||||
#endif
|
||||
|
||||
/* Roughly the maximum number of failure points on the stack. */
|
||||
|
|
809
src/search.c
809
src/search.c
|
@ -48,6 +48,8 @@ struct regexp_cache
|
|||
char fastmap[0400];
|
||||
/* True means regexp was compiled to do full POSIX backtracking. */
|
||||
bool posix;
|
||||
/* True means we're inside a buffer match. */
|
||||
bool busy;
|
||||
};
|
||||
|
||||
/* The instances of that struct. */
|
||||
|
@ -93,6 +95,8 @@ static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t,
|
|||
ptrdiff_t, ptrdiff_t, EMACS_INT, int,
|
||||
Lisp_Object, Lisp_Object, bool);
|
||||
|
||||
Lisp_Object re_match_object;
|
||||
|
||||
static _Noreturn void
|
||||
matcher_overflow (void)
|
||||
{
|
||||
|
@ -110,14 +114,6 @@ freeze_buffer_relocation (void)
|
|||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
thaw_buffer_relocation (void)
|
||||
{
|
||||
#ifdef REL_ALLOC
|
||||
unbind_to (SPECPDL_INDEX () - 1, Qnil);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Compile a regexp and signal a Lisp error if anything goes wrong.
|
||||
PATTERN is the pattern to compile.
|
||||
CP is the place to put the result.
|
||||
|
@ -134,6 +130,7 @@ compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
|
|||
const char *whitespace_regexp;
|
||||
char *val;
|
||||
|
||||
eassert (!cp->busy);
|
||||
cp->regexp = Qnil;
|
||||
cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
|
||||
cp->posix = posix;
|
||||
|
@ -170,10 +167,11 @@ shrink_regexp_cache (void)
|
|||
struct regexp_cache *cp;
|
||||
|
||||
for (cp = searchbuf_head; cp != 0; cp = cp->next)
|
||||
{
|
||||
cp->buf.allocated = cp->buf.used;
|
||||
cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
|
||||
}
|
||||
if (!cp->busy)
|
||||
{
|
||||
cp->buf.allocated = cp->buf.used;
|
||||
cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
|
||||
}
|
||||
}
|
||||
|
||||
/* Clear the regexp cache w.r.t. a particular syntax table,
|
||||
|
@ -190,10 +188,25 @@ clear_regexp_cache (void)
|
|||
/* It's tempting to compare with the syntax-table we've actually changed,
|
||||
but it's not sufficient because char-table inheritance means that
|
||||
modifying one syntax-table can change others at the same time. */
|
||||
if (!EQ (searchbufs[i].syntax_table, Qt))
|
||||
if (!searchbufs[i].busy && !EQ (searchbufs[i].syntax_table, Qt))
|
||||
searchbufs[i].regexp = Qnil;
|
||||
}
|
||||
|
||||
static void
|
||||
unfreeze_pattern (void *arg)
|
||||
{
|
||||
struct regexp_cache *searchbuf = arg;
|
||||
searchbuf->busy = false;
|
||||
}
|
||||
|
||||
static void
|
||||
freeze_pattern (struct regexp_cache *searchbuf)
|
||||
{
|
||||
eassert (!searchbuf->busy);
|
||||
record_unwind_protect_ptr (unfreeze_pattern, searchbuf);
|
||||
searchbuf->busy = true;
|
||||
}
|
||||
|
||||
/* Compile a regexp if necessary, but first check to see if there's one in
|
||||
the cache.
|
||||
PATTERN is the pattern to compile.
|
||||
|
@ -205,7 +218,7 @@ clear_regexp_cache (void)
|
|||
POSIX is true if we want full backtracking (POSIX style) for this pattern.
|
||||
False means backtrack only enough to get a valid match. */
|
||||
|
||||
struct re_pattern_buffer *
|
||||
static struct regexp_cache *
|
||||
compile_pattern (Lisp_Object pattern, struct re_registers *regp,
|
||||
Lisp_Object translate, bool posix, bool multibyte)
|
||||
{
|
||||
|
@ -222,6 +235,7 @@ compile_pattern (Lisp_Object pattern, struct re_registers *regp,
|
|||
if (NILP (cp->regexp))
|
||||
goto compile_it;
|
||||
if (SCHARS (cp->regexp) == SCHARS (pattern)
|
||||
&& !cp->busy
|
||||
&& STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
|
||||
&& !NILP (Fstring_equal (cp->regexp, pattern))
|
||||
&& EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
|
||||
|
@ -237,7 +251,10 @@ compile_pattern (Lisp_Object pattern, struct re_registers *regp,
|
|||
string value. */
|
||||
if (cp->next == 0)
|
||||
{
|
||||
if (cp->busy)
|
||||
error ("Too much matching reentrancy");
|
||||
compile_it:
|
||||
eassert (!cp->busy);
|
||||
compile_pattern_1 (cp, pattern, translate, posix);
|
||||
break;
|
||||
}
|
||||
|
@ -258,8 +275,7 @@ compile_pattern (Lisp_Object pattern, struct re_registers *regp,
|
|||
/* The compiled pattern can be used both for multibyte and unibyte
|
||||
target. But, we have to tell which the pattern is used for. */
|
||||
cp->buf.target_multibyte = multibyte;
|
||||
|
||||
return &cp->buf;
|
||||
return cp;
|
||||
}
|
||||
|
||||
|
||||
|
@ -270,7 +286,6 @@ looking_at_1 (Lisp_Object string, bool posix)
|
|||
unsigned char *p1, *p2;
|
||||
ptrdiff_t s1, s2;
|
||||
register ptrdiff_t i;
|
||||
struct re_pattern_buffer *bufp;
|
||||
|
||||
if (running_asynch_code)
|
||||
save_search_regs ();
|
||||
|
@ -280,13 +295,17 @@ looking_at_1 (Lisp_Object string, bool posix)
|
|||
BVAR (current_buffer, case_eqv_table));
|
||||
|
||||
CHECK_STRING (string);
|
||||
bufp = compile_pattern (string,
|
||||
(NILP (Vinhibit_changing_match_data)
|
||||
? &search_regs : NULL),
|
||||
(!NILP (BVAR (current_buffer, case_fold_search))
|
||||
? BVAR (current_buffer, case_canon_table) : Qnil),
|
||||
posix,
|
||||
!NILP (BVAR (current_buffer, enable_multibyte_characters)));
|
||||
|
||||
/* Snapshot in case Lisp changes the value. */
|
||||
bool preserve_match_data = NILP (Vinhibit_changing_match_data);
|
||||
|
||||
struct regexp_cache *cache_entry = compile_pattern (
|
||||
string,
|
||||
preserve_match_data ? &search_regs : NULL,
|
||||
(!NILP (BVAR (current_buffer, case_fold_search))
|
||||
? BVAR (current_buffer, case_canon_table) : Qnil),
|
||||
posix,
|
||||
!NILP (BVAR (current_buffer, enable_multibyte_characters)));
|
||||
|
||||
/* Do a pending quit right away, to avoid paradoxical behavior */
|
||||
maybe_quit ();
|
||||
|
@ -310,21 +329,20 @@ looking_at_1 (Lisp_Object string, bool posix)
|
|||
s2 = 0;
|
||||
}
|
||||
|
||||
re_match_object = Qnil;
|
||||
|
||||
ptrdiff_t count = SPECPDL_INDEX ();
|
||||
freeze_buffer_relocation ();
|
||||
i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
|
||||
freeze_pattern (cache_entry);
|
||||
re_match_object = Qnil;
|
||||
i = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
|
||||
PT_BYTE - BEGV_BYTE,
|
||||
(NILP (Vinhibit_changing_match_data)
|
||||
? &search_regs : NULL),
|
||||
preserve_match_data ? &search_regs : NULL,
|
||||
ZV_BYTE - BEGV_BYTE);
|
||||
thaw_buffer_relocation ();
|
||||
|
||||
if (i == -2)
|
||||
matcher_overflow ();
|
||||
|
||||
val = (i >= 0 ? Qt : Qnil);
|
||||
if (NILP (Vinhibit_changing_match_data) && i >= 0)
|
||||
if (preserve_match_data && i >= 0)
|
||||
{
|
||||
for (i = 0; i < search_regs.num_regs; i++)
|
||||
if (search_regs.start[i] >= 0)
|
||||
|
@ -338,7 +356,7 @@ looking_at_1 (Lisp_Object string, bool posix)
|
|||
XSETBUFFER (last_thing_searched, current_buffer);
|
||||
}
|
||||
|
||||
return val;
|
||||
return unbind_to (count, val);
|
||||
}
|
||||
|
||||
DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
|
||||
|
@ -396,15 +414,14 @@ string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
|
|||
set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
|
||||
BVAR (current_buffer, case_eqv_table));
|
||||
|
||||
bufp = compile_pattern (regexp,
|
||||
(NILP (Vinhibit_changing_match_data)
|
||||
? &search_regs : NULL),
|
||||
(!NILP (BVAR (current_buffer, case_fold_search))
|
||||
? BVAR (current_buffer, case_canon_table) : Qnil),
|
||||
posix,
|
||||
STRING_MULTIBYTE (string));
|
||||
bufp = &compile_pattern (regexp,
|
||||
(NILP (Vinhibit_changing_match_data)
|
||||
? &search_regs : NULL),
|
||||
(!NILP (BVAR (current_buffer, case_fold_search))
|
||||
? BVAR (current_buffer, case_canon_table) : Qnil),
|
||||
posix,
|
||||
STRING_MULTIBYTE (string))->buf;
|
||||
re_match_object = string;
|
||||
|
||||
val = re_search (bufp, SSDATA (string),
|
||||
SBYTES (string), pos_byte,
|
||||
SBYTES (string) - pos_byte,
|
||||
|
@ -471,10 +488,9 @@ fast_string_match_internal (Lisp_Object regexp, Lisp_Object string,
|
|||
ptrdiff_t val;
|
||||
struct re_pattern_buffer *bufp;
|
||||
|
||||
bufp = compile_pattern (regexp, 0, table,
|
||||
0, STRING_MULTIBYTE (string));
|
||||
bufp = &compile_pattern (regexp, 0, table,
|
||||
0, STRING_MULTIBYTE (string))->buf;
|
||||
re_match_object = string;
|
||||
|
||||
val = re_search (bufp, SSDATA (string),
|
||||
SBYTES (string), 0,
|
||||
SBYTES (string), 0);
|
||||
|
@ -494,10 +510,10 @@ fast_c_string_match_ignore_case (Lisp_Object regexp,
|
|||
struct re_pattern_buffer *bufp;
|
||||
|
||||
regexp = string_make_unibyte (regexp);
|
||||
bufp = &compile_pattern (regexp, 0,
|
||||
Vascii_canon_table, 0,
|
||||
0)->buf;
|
||||
re_match_object = Qt;
|
||||
bufp = compile_pattern (regexp, 0,
|
||||
Vascii_canon_table, 0,
|
||||
0);
|
||||
val = re_search (bufp, string, len, 0, len, 0);
|
||||
return val;
|
||||
}
|
||||
|
@ -513,7 +529,6 @@ fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
|
|||
ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
|
||||
{
|
||||
bool multibyte;
|
||||
struct re_pattern_buffer *buf;
|
||||
unsigned char *p1, *p2;
|
||||
ptrdiff_t s1, s2;
|
||||
ptrdiff_t len;
|
||||
|
@ -528,7 +543,6 @@ fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
|
|||
s1 = 0;
|
||||
p2 = SDATA (string);
|
||||
s2 = SBYTES (string);
|
||||
re_match_object = string;
|
||||
multibyte = STRING_MULTIBYTE (string);
|
||||
}
|
||||
else
|
||||
|
@ -554,16 +568,19 @@ fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
|
|||
s1 = ZV_BYTE - BEGV_BYTE;
|
||||
s2 = 0;
|
||||
}
|
||||
re_match_object = Qnil;
|
||||
multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
|
||||
}
|
||||
|
||||
buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
|
||||
struct regexp_cache *cache_entry =
|
||||
compile_pattern (regexp, 0, Qnil, 0, multibyte);
|
||||
ptrdiff_t count = SPECPDL_INDEX ();
|
||||
freeze_buffer_relocation ();
|
||||
len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
|
||||
freeze_pattern (cache_entry);
|
||||
re_match_object = STRINGP (string) ? string : Qnil;
|
||||
len = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
|
||||
pos_byte, NULL, limit_byte);
|
||||
thaw_buffer_relocation ();
|
||||
|
||||
unbind_to (count, Qnil);
|
||||
return len;
|
||||
}
|
||||
|
||||
|
@ -1150,356 +1167,373 @@ while (0)
|
|||
(i.e. Vinhibit_changing_match_data is non-nil). */
|
||||
static struct re_registers search_regs_1;
|
||||
|
||||
static EMACS_INT
|
||||
search_buffer_re (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
|
||||
ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
|
||||
Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
|
||||
{
|
||||
unsigned char *p1, *p2;
|
||||
ptrdiff_t s1, s2;
|
||||
|
||||
/* Snapshot in case Lisp changes the value. */
|
||||
bool preserve_match_data = NILP (Vinhibit_changing_match_data);
|
||||
|
||||
struct regexp_cache *cache_entry =
|
||||
compile_pattern (string,
|
||||
preserve_match_data ? &search_regs : &search_regs_1,
|
||||
trt, posix,
|
||||
!NILP (BVAR (current_buffer, enable_multibyte_characters)));
|
||||
struct re_pattern_buffer *bufp = &cache_entry->buf;
|
||||
|
||||
maybe_quit (); /* Do a pending quit right away,
|
||||
to avoid paradoxical behavior */
|
||||
/* Get pointers and sizes of the two strings
|
||||
that make up the visible portion of the buffer. */
|
||||
|
||||
p1 = BEGV_ADDR;
|
||||
s1 = GPT_BYTE - BEGV_BYTE;
|
||||
p2 = GAP_END_ADDR;
|
||||
s2 = ZV_BYTE - GPT_BYTE;
|
||||
if (s1 < 0)
|
||||
{
|
||||
p2 = p1;
|
||||
s2 = ZV_BYTE - BEGV_BYTE;
|
||||
s1 = 0;
|
||||
}
|
||||
if (s2 < 0)
|
||||
{
|
||||
s1 = ZV_BYTE - BEGV_BYTE;
|
||||
s2 = 0;
|
||||
}
|
||||
|
||||
ptrdiff_t count = SPECPDL_INDEX ();
|
||||
freeze_buffer_relocation ();
|
||||
freeze_pattern (cache_entry);
|
||||
|
||||
while (n < 0)
|
||||
{
|
||||
ptrdiff_t val;
|
||||
|
||||
re_match_object = Qnil;
|
||||
val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
|
||||
pos_byte - BEGV_BYTE, lim_byte - pos_byte,
|
||||
preserve_match_data ? &search_regs : &search_regs_1,
|
||||
/* Don't allow match past current point */
|
||||
pos_byte - BEGV_BYTE);
|
||||
if (val == -2)
|
||||
{
|
||||
matcher_overflow ();
|
||||
}
|
||||
if (val >= 0)
|
||||
{
|
||||
if (preserve_match_data)
|
||||
{
|
||||
pos_byte = search_regs.start[0] + BEGV_BYTE;
|
||||
for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
|
||||
if (search_regs.start[i] >= 0)
|
||||
{
|
||||
search_regs.start[i]
|
||||
= BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
|
||||
search_regs.end[i]
|
||||
= BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
|
||||
}
|
||||
XSETBUFFER (last_thing_searched, current_buffer);
|
||||
/* Set pos to the new position. */
|
||||
pos = search_regs.start[0];
|
||||
}
|
||||
else
|
||||
{
|
||||
pos_byte = search_regs_1.start[0] + BEGV_BYTE;
|
||||
/* Set pos to the new position. */
|
||||
pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
unbind_to (count, Qnil);
|
||||
return (n);
|
||||
}
|
||||
n++;
|
||||
maybe_quit ();
|
||||
}
|
||||
while (n > 0)
|
||||
{
|
||||
ptrdiff_t val;
|
||||
|
||||
re_match_object = Qnil;
|
||||
val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
|
||||
pos_byte - BEGV_BYTE, lim_byte - pos_byte,
|
||||
preserve_match_data ? &search_regs : &search_regs_1,
|
||||
lim_byte - BEGV_BYTE);
|
||||
if (val == -2)
|
||||
{
|
||||
matcher_overflow ();
|
||||
}
|
||||
if (val >= 0)
|
||||
{
|
||||
if (preserve_match_data)
|
||||
{
|
||||
pos_byte = search_regs.end[0] + BEGV_BYTE;
|
||||
for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
|
||||
if (search_regs.start[i] >= 0)
|
||||
{
|
||||
search_regs.start[i]
|
||||
= BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
|
||||
search_regs.end[i]
|
||||
= BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
|
||||
}
|
||||
XSETBUFFER (last_thing_searched, current_buffer);
|
||||
pos = search_regs.end[0];
|
||||
}
|
||||
else
|
||||
{
|
||||
pos_byte = search_regs_1.end[0] + BEGV_BYTE;
|
||||
pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
unbind_to (count, Qnil);
|
||||
return (0 - n);
|
||||
}
|
||||
n--;
|
||||
maybe_quit ();
|
||||
}
|
||||
unbind_to (count, Qnil);
|
||||
return (pos);
|
||||
}
|
||||
|
||||
static EMACS_INT
|
||||
search_buffer_non_re (Lisp_Object string, ptrdiff_t pos,
|
||||
ptrdiff_t pos_byte, ptrdiff_t lim, ptrdiff_t lim_byte,
|
||||
EMACS_INT n, int RE, Lisp_Object trt, Lisp_Object inverse_trt,
|
||||
bool posix)
|
||||
{
|
||||
unsigned char *raw_pattern, *pat;
|
||||
ptrdiff_t raw_pattern_size;
|
||||
ptrdiff_t raw_pattern_size_byte;
|
||||
unsigned char *patbuf;
|
||||
bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
|
||||
unsigned char *base_pat;
|
||||
/* Set to positive if we find a non-ASCII char that need
|
||||
translation. Otherwise set to zero later. */
|
||||
int char_base = -1;
|
||||
bool boyer_moore_ok = 1;
|
||||
USE_SAFE_ALLOCA;
|
||||
|
||||
/* MULTIBYTE says whether the text to be searched is multibyte.
|
||||
We must convert PATTERN to match that, or we will not really
|
||||
find things right. */
|
||||
|
||||
if (multibyte == STRING_MULTIBYTE (string))
|
||||
{
|
||||
raw_pattern = SDATA (string);
|
||||
raw_pattern_size = SCHARS (string);
|
||||
raw_pattern_size_byte = SBYTES (string);
|
||||
}
|
||||
else if (multibyte)
|
||||
{
|
||||
raw_pattern_size = SCHARS (string);
|
||||
raw_pattern_size_byte
|
||||
= count_size_as_multibyte (SDATA (string),
|
||||
raw_pattern_size);
|
||||
raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
|
||||
copy_text (SDATA (string), raw_pattern,
|
||||
SCHARS (string), 0, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Converting multibyte to single-byte.
|
||||
|
||||
??? Perhaps this conversion should be done in a special way
|
||||
by subtracting nonascii-insert-offset from each non-ASCII char,
|
||||
so that only the multibyte chars which really correspond to
|
||||
the chosen single-byte character set can possibly match. */
|
||||
raw_pattern_size = SCHARS (string);
|
||||
raw_pattern_size_byte = SCHARS (string);
|
||||
raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
|
||||
copy_text (SDATA (string), raw_pattern,
|
||||
SBYTES (string), 1, 0);
|
||||
}
|
||||
|
||||
/* Copy and optionally translate the pattern. */
|
||||
ptrdiff_t len = raw_pattern_size;
|
||||
ptrdiff_t len_byte = raw_pattern_size_byte;
|
||||
SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len);
|
||||
pat = patbuf;
|
||||
base_pat = raw_pattern;
|
||||
if (multibyte)
|
||||
{
|
||||
/* Fill patbuf by translated characters in STRING while
|
||||
checking if we can use boyer-moore search. If TRT is
|
||||
non-nil, we can use boyer-moore search only if TRT can be
|
||||
represented by the byte array of 256 elements. For that,
|
||||
all non-ASCII case-equivalents of all case-sensitive
|
||||
characters in STRING must belong to the same character
|
||||
group (two characters belong to the same group iff their
|
||||
multibyte forms are the same except for the last byte;
|
||||
i.e. every 64 characters form a group; U+0000..U+003F,
|
||||
U+0040..U+007F, U+0080..U+00BF, ...). */
|
||||
|
||||
while (--len >= 0)
|
||||
{
|
||||
unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
|
||||
int c, translated, inverse;
|
||||
int in_charlen, charlen;
|
||||
|
||||
/* If we got here and the RE flag is set, it's because we're
|
||||
dealing with a regexp known to be trivial, so the backslash
|
||||
just quotes the next character. */
|
||||
if (RE && *base_pat == '\\')
|
||||
{
|
||||
len--;
|
||||
raw_pattern_size--;
|
||||
len_byte--;
|
||||
base_pat++;
|
||||
}
|
||||
|
||||
c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
|
||||
|
||||
if (NILP (trt))
|
||||
{
|
||||
str = base_pat;
|
||||
charlen = in_charlen;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Translate the character. */
|
||||
TRANSLATE (translated, trt, c);
|
||||
charlen = CHAR_STRING (translated, str_base);
|
||||
str = str_base;
|
||||
|
||||
/* Check if C has any other case-equivalents. */
|
||||
TRANSLATE (inverse, inverse_trt, c);
|
||||
/* If so, check if we can use boyer-moore. */
|
||||
if (c != inverse && boyer_moore_ok)
|
||||
{
|
||||
/* Check if all equivalents belong to the same
|
||||
group of characters. Note that the check of C
|
||||
itself is done by the last iteration. */
|
||||
int this_char_base = -1;
|
||||
|
||||
while (boyer_moore_ok)
|
||||
{
|
||||
if (ASCII_CHAR_P (inverse))
|
||||
{
|
||||
if (this_char_base > 0)
|
||||
boyer_moore_ok = 0;
|
||||
else
|
||||
this_char_base = 0;
|
||||
}
|
||||
else if (CHAR_BYTE8_P (inverse))
|
||||
/* Boyer-moore search can't handle a
|
||||
translation of an eight-bit
|
||||
character. */
|
||||
boyer_moore_ok = 0;
|
||||
else if (this_char_base < 0)
|
||||
{
|
||||
this_char_base = inverse & ~0x3F;
|
||||
if (char_base < 0)
|
||||
char_base = this_char_base;
|
||||
else if (this_char_base != char_base)
|
||||
boyer_moore_ok = 0;
|
||||
}
|
||||
else if ((inverse & ~0x3F) != this_char_base)
|
||||
boyer_moore_ok = 0;
|
||||
if (c == inverse)
|
||||
break;
|
||||
TRANSLATE (inverse, inverse_trt, inverse);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Store this character into the translated pattern. */
|
||||
memcpy (pat, str, charlen);
|
||||
pat += charlen;
|
||||
base_pat += in_charlen;
|
||||
len_byte -= in_charlen;
|
||||
}
|
||||
|
||||
/* If char_base is still negative we didn't find any translated
|
||||
non-ASCII characters. */
|
||||
if (char_base < 0)
|
||||
char_base = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Unibyte buffer. */
|
||||
char_base = 0;
|
||||
while (--len >= 0)
|
||||
{
|
||||
int c, translated, inverse;
|
||||
|
||||
/* If we got here and the RE flag is set, it's because we're
|
||||
dealing with a regexp known to be trivial, so the backslash
|
||||
just quotes the next character. */
|
||||
if (RE && *base_pat == '\\')
|
||||
{
|
||||
len--;
|
||||
raw_pattern_size--;
|
||||
base_pat++;
|
||||
}
|
||||
c = *base_pat++;
|
||||
TRANSLATE (translated, trt, c);
|
||||
*pat++ = translated;
|
||||
/* Check that none of C's equivalents violates the
|
||||
assumptions of boyer_moore. */
|
||||
TRANSLATE (inverse, inverse_trt, c);
|
||||
while (1)
|
||||
{
|
||||
if (inverse >= 0200)
|
||||
{
|
||||
boyer_moore_ok = 0;
|
||||
break;
|
||||
}
|
||||
if (c == inverse)
|
||||
break;
|
||||
TRANSLATE (inverse, inverse_trt, inverse);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
len_byte = pat - patbuf;
|
||||
pat = base_pat = patbuf;
|
||||
|
||||
EMACS_INT result
|
||||
= (boyer_moore_ok
|
||||
? boyer_moore (n, pat, len_byte, trt, inverse_trt,
|
||||
pos_byte, lim_byte,
|
||||
char_base)
|
||||
: simple_search (n, pat, raw_pattern_size, len_byte, trt,
|
||||
pos, pos_byte, lim, lim_byte));
|
||||
SAFE_FREE ();
|
||||
return result;
|
||||
}
|
||||
|
||||
static EMACS_INT
|
||||
search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
|
||||
ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
|
||||
int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
|
||||
{
|
||||
ptrdiff_t len = SCHARS (string);
|
||||
ptrdiff_t len_byte = SBYTES (string);
|
||||
register ptrdiff_t i;
|
||||
|
||||
if (running_asynch_code)
|
||||
save_search_regs ();
|
||||
|
||||
/* Searching 0 times means don't move. */
|
||||
/* Null string is found at starting position. */
|
||||
if (len == 0 || n == 0)
|
||||
if (n == 0 || SCHARS (string) == 0)
|
||||
{
|
||||
set_search_regs (pos_byte, 0);
|
||||
return pos;
|
||||
}
|
||||
|
||||
if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
|
||||
{
|
||||
unsigned char *p1, *p2;
|
||||
ptrdiff_t s1, s2;
|
||||
struct re_pattern_buffer *bufp;
|
||||
pos = search_buffer_re (string, pos, pos_byte, lim, lim_byte,
|
||||
n, trt, inverse_trt, posix);
|
||||
else
|
||||
pos = search_buffer_non_re (string, pos, pos_byte, lim, lim_byte,
|
||||
n, RE, trt, inverse_trt, posix);
|
||||
|
||||
bufp = compile_pattern (string,
|
||||
(NILP (Vinhibit_changing_match_data)
|
||||
? &search_regs : &search_regs_1),
|
||||
trt, posix,
|
||||
!NILP (BVAR (current_buffer, enable_multibyte_characters)));
|
||||
|
||||
maybe_quit (); /* Do a pending quit right away,
|
||||
to avoid paradoxical behavior */
|
||||
/* Get pointers and sizes of the two strings
|
||||
that make up the visible portion of the buffer. */
|
||||
|
||||
p1 = BEGV_ADDR;
|
||||
s1 = GPT_BYTE - BEGV_BYTE;
|
||||
p2 = GAP_END_ADDR;
|
||||
s2 = ZV_BYTE - GPT_BYTE;
|
||||
if (s1 < 0)
|
||||
{
|
||||
p2 = p1;
|
||||
s2 = ZV_BYTE - BEGV_BYTE;
|
||||
s1 = 0;
|
||||
}
|
||||
if (s2 < 0)
|
||||
{
|
||||
s1 = ZV_BYTE - BEGV_BYTE;
|
||||
s2 = 0;
|
||||
}
|
||||
re_match_object = Qnil;
|
||||
|
||||
freeze_buffer_relocation ();
|
||||
|
||||
while (n < 0)
|
||||
{
|
||||
ptrdiff_t val;
|
||||
|
||||
val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
|
||||
pos_byte - BEGV_BYTE, lim_byte - pos_byte,
|
||||
(NILP (Vinhibit_changing_match_data)
|
||||
? &search_regs : &search_regs_1),
|
||||
/* Don't allow match past current point */
|
||||
pos_byte - BEGV_BYTE);
|
||||
if (val == -2)
|
||||
{
|
||||
matcher_overflow ();
|
||||
}
|
||||
if (val >= 0)
|
||||
{
|
||||
if (NILP (Vinhibit_changing_match_data))
|
||||
{
|
||||
pos_byte = search_regs.start[0] + BEGV_BYTE;
|
||||
for (i = 0; i < search_regs.num_regs; i++)
|
||||
if (search_regs.start[i] >= 0)
|
||||
{
|
||||
search_regs.start[i]
|
||||
= BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
|
||||
search_regs.end[i]
|
||||
= BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
|
||||
}
|
||||
XSETBUFFER (last_thing_searched, current_buffer);
|
||||
/* Set pos to the new position. */
|
||||
pos = search_regs.start[0];
|
||||
}
|
||||
else
|
||||
{
|
||||
pos_byte = search_regs_1.start[0] + BEGV_BYTE;
|
||||
/* Set pos to the new position. */
|
||||
pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
thaw_buffer_relocation ();
|
||||
return (n);
|
||||
}
|
||||
n++;
|
||||
maybe_quit ();
|
||||
}
|
||||
while (n > 0)
|
||||
{
|
||||
ptrdiff_t val;
|
||||
|
||||
val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
|
||||
pos_byte - BEGV_BYTE, lim_byte - pos_byte,
|
||||
(NILP (Vinhibit_changing_match_data)
|
||||
? &search_regs : &search_regs_1),
|
||||
lim_byte - BEGV_BYTE);
|
||||
if (val == -2)
|
||||
{
|
||||
matcher_overflow ();
|
||||
}
|
||||
if (val >= 0)
|
||||
{
|
||||
if (NILP (Vinhibit_changing_match_data))
|
||||
{
|
||||
pos_byte = search_regs.end[0] + BEGV_BYTE;
|
||||
for (i = 0; i < search_regs.num_regs; i++)
|
||||
if (search_regs.start[i] >= 0)
|
||||
{
|
||||
search_regs.start[i]
|
||||
= BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
|
||||
search_regs.end[i]
|
||||
= BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
|
||||
}
|
||||
XSETBUFFER (last_thing_searched, current_buffer);
|
||||
pos = search_regs.end[0];
|
||||
}
|
||||
else
|
||||
{
|
||||
pos_byte = search_regs_1.end[0] + BEGV_BYTE;
|
||||
pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
thaw_buffer_relocation ();
|
||||
return (0 - n);
|
||||
}
|
||||
n--;
|
||||
maybe_quit ();
|
||||
}
|
||||
thaw_buffer_relocation ();
|
||||
return (pos);
|
||||
}
|
||||
else /* non-RE case */
|
||||
{
|
||||
unsigned char *raw_pattern, *pat;
|
||||
ptrdiff_t raw_pattern_size;
|
||||
ptrdiff_t raw_pattern_size_byte;
|
||||
unsigned char *patbuf;
|
||||
bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
|
||||
unsigned char *base_pat;
|
||||
/* Set to positive if we find a non-ASCII char that need
|
||||
translation. Otherwise set to zero later. */
|
||||
int char_base = -1;
|
||||
bool boyer_moore_ok = 1;
|
||||
USE_SAFE_ALLOCA;
|
||||
|
||||
/* MULTIBYTE says whether the text to be searched is multibyte.
|
||||
We must convert PATTERN to match that, or we will not really
|
||||
find things right. */
|
||||
|
||||
if (multibyte == STRING_MULTIBYTE (string))
|
||||
{
|
||||
raw_pattern = SDATA (string);
|
||||
raw_pattern_size = SCHARS (string);
|
||||
raw_pattern_size_byte = SBYTES (string);
|
||||
}
|
||||
else if (multibyte)
|
||||
{
|
||||
raw_pattern_size = SCHARS (string);
|
||||
raw_pattern_size_byte
|
||||
= count_size_as_multibyte (SDATA (string),
|
||||
raw_pattern_size);
|
||||
raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
|
||||
copy_text (SDATA (string), raw_pattern,
|
||||
SCHARS (string), 0, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Converting multibyte to single-byte.
|
||||
|
||||
??? Perhaps this conversion should be done in a special way
|
||||
by subtracting nonascii-insert-offset from each non-ASCII char,
|
||||
so that only the multibyte chars which really correspond to
|
||||
the chosen single-byte character set can possibly match. */
|
||||
raw_pattern_size = SCHARS (string);
|
||||
raw_pattern_size_byte = SCHARS (string);
|
||||
raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
|
||||
copy_text (SDATA (string), raw_pattern,
|
||||
SBYTES (string), 1, 0);
|
||||
}
|
||||
|
||||
/* Copy and optionally translate the pattern. */
|
||||
len = raw_pattern_size;
|
||||
len_byte = raw_pattern_size_byte;
|
||||
SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len);
|
||||
pat = patbuf;
|
||||
base_pat = raw_pattern;
|
||||
if (multibyte)
|
||||
{
|
||||
/* Fill patbuf by translated characters in STRING while
|
||||
checking if we can use boyer-moore search. If TRT is
|
||||
non-nil, we can use boyer-moore search only if TRT can be
|
||||
represented by the byte array of 256 elements. For that,
|
||||
all non-ASCII case-equivalents of all case-sensitive
|
||||
characters in STRING must belong to the same character
|
||||
group (two characters belong to the same group iff their
|
||||
multibyte forms are the same except for the last byte;
|
||||
i.e. every 64 characters form a group; U+0000..U+003F,
|
||||
U+0040..U+007F, U+0080..U+00BF, ...). */
|
||||
|
||||
while (--len >= 0)
|
||||
{
|
||||
unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
|
||||
int c, translated, inverse;
|
||||
int in_charlen, charlen;
|
||||
|
||||
/* If we got here and the RE flag is set, it's because we're
|
||||
dealing with a regexp known to be trivial, so the backslash
|
||||
just quotes the next character. */
|
||||
if (RE && *base_pat == '\\')
|
||||
{
|
||||
len--;
|
||||
raw_pattern_size--;
|
||||
len_byte--;
|
||||
base_pat++;
|
||||
}
|
||||
|
||||
c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
|
||||
|
||||
if (NILP (trt))
|
||||
{
|
||||
str = base_pat;
|
||||
charlen = in_charlen;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Translate the character. */
|
||||
TRANSLATE (translated, trt, c);
|
||||
charlen = CHAR_STRING (translated, str_base);
|
||||
str = str_base;
|
||||
|
||||
/* Check if C has any other case-equivalents. */
|
||||
TRANSLATE (inverse, inverse_trt, c);
|
||||
/* If so, check if we can use boyer-moore. */
|
||||
if (c != inverse && boyer_moore_ok)
|
||||
{
|
||||
/* Check if all equivalents belong to the same
|
||||
group of characters. Note that the check of C
|
||||
itself is done by the last iteration. */
|
||||
int this_char_base = -1;
|
||||
|
||||
while (boyer_moore_ok)
|
||||
{
|
||||
if (ASCII_CHAR_P (inverse))
|
||||
{
|
||||
if (this_char_base > 0)
|
||||
boyer_moore_ok = 0;
|
||||
else
|
||||
this_char_base = 0;
|
||||
}
|
||||
else if (CHAR_BYTE8_P (inverse))
|
||||
/* Boyer-moore search can't handle a
|
||||
translation of an eight-bit
|
||||
character. */
|
||||
boyer_moore_ok = 0;
|
||||
else if (this_char_base < 0)
|
||||
{
|
||||
this_char_base = inverse & ~0x3F;
|
||||
if (char_base < 0)
|
||||
char_base = this_char_base;
|
||||
else if (this_char_base != char_base)
|
||||
boyer_moore_ok = 0;
|
||||
}
|
||||
else if ((inverse & ~0x3F) != this_char_base)
|
||||
boyer_moore_ok = 0;
|
||||
if (c == inverse)
|
||||
break;
|
||||
TRANSLATE (inverse, inverse_trt, inverse);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Store this character into the translated pattern. */
|
||||
memcpy (pat, str, charlen);
|
||||
pat += charlen;
|
||||
base_pat += in_charlen;
|
||||
len_byte -= in_charlen;
|
||||
}
|
||||
|
||||
/* If char_base is still negative we didn't find any translated
|
||||
non-ASCII characters. */
|
||||
if (char_base < 0)
|
||||
char_base = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Unibyte buffer. */
|
||||
char_base = 0;
|
||||
while (--len >= 0)
|
||||
{
|
||||
int c, translated, inverse;
|
||||
|
||||
/* If we got here and the RE flag is set, it's because we're
|
||||
dealing with a regexp known to be trivial, so the backslash
|
||||
just quotes the next character. */
|
||||
if (RE && *base_pat == '\\')
|
||||
{
|
||||
len--;
|
||||
raw_pattern_size--;
|
||||
base_pat++;
|
||||
}
|
||||
c = *base_pat++;
|
||||
TRANSLATE (translated, trt, c);
|
||||
*pat++ = translated;
|
||||
/* Check that none of C's equivalents violates the
|
||||
assumptions of boyer_moore. */
|
||||
TRANSLATE (inverse, inverse_trt, c);
|
||||
while (1)
|
||||
{
|
||||
if (inverse >= 0200)
|
||||
{
|
||||
boyer_moore_ok = 0;
|
||||
break;
|
||||
}
|
||||
if (c == inverse)
|
||||
break;
|
||||
TRANSLATE (inverse, inverse_trt, inverse);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
len_byte = pat - patbuf;
|
||||
pat = base_pat = patbuf;
|
||||
|
||||
EMACS_INT result
|
||||
= (boyer_moore_ok
|
||||
? boyer_moore (n, pat, len_byte, trt, inverse_trt,
|
||||
pos_byte, lim_byte,
|
||||
char_base)
|
||||
: simple_search (n, pat, raw_pattern_size, len_byte, trt,
|
||||
pos, pos_byte, lim, lim_byte));
|
||||
SAFE_FREE ();
|
||||
return result;
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
/* Do a simple string search N times for the string PAT,
|
||||
|
@ -3353,6 +3387,7 @@ the buffer. If the buffer doesn't have a cache, the value is nil. */)
|
|||
return val;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
syms_of_search (void)
|
||||
{
|
||||
|
@ -3365,6 +3400,7 @@ syms_of_search (void)
|
|||
searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
|
||||
searchbufs[i].regexp = Qnil;
|
||||
searchbufs[i].f_whitespace_regexp = Qnil;
|
||||
searchbufs[i].busy = false;
|
||||
searchbufs[i].syntax_table = Qnil;
|
||||
staticpro (&searchbufs[i].regexp);
|
||||
staticpro (&searchbufs[i].f_whitespace_regexp);
|
||||
|
@ -3405,6 +3441,9 @@ syms_of_search (void)
|
|||
saved_last_thing_searched = Qnil;
|
||||
staticpro (&saved_last_thing_searched);
|
||||
|
||||
re_match_object = Qnil;
|
||||
staticpro (&re_match_object);
|
||||
|
||||
DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
|
||||
doc: /* Regexp to substitute for bunches of spaces in regexp search.
|
||||
Some commands use this for user-specified regexps.
|
||||
|
|
14
src/syntax.h
14
src/syntax.h
|
@ -186,13 +186,6 @@ UPDATE_SYNTAX_TABLE_FORWARD (ptrdiff_t charpos)
|
|||
false, gl_state.object);
|
||||
}
|
||||
|
||||
INLINE void
|
||||
UPDATE_SYNTAX_TABLE_FORWARD_FAST (ptrdiff_t charpos)
|
||||
{
|
||||
if (parse_sexp_lookup_properties && charpos >= gl_state.e_property)
|
||||
update_syntax_table (charpos + gl_state.offset, 1, false, gl_state.object);
|
||||
}
|
||||
|
||||
/* Make syntax table state (gl_state) good for CHARPOS, assuming it is
|
||||
currently good for a position after CHARPOS. */
|
||||
|
||||
|
@ -212,13 +205,6 @@ UPDATE_SYNTAX_TABLE (ptrdiff_t charpos)
|
|||
UPDATE_SYNTAX_TABLE_FORWARD (charpos);
|
||||
}
|
||||
|
||||
INLINE void
|
||||
UPDATE_SYNTAX_TABLE_FAST (ptrdiff_t charpos)
|
||||
{
|
||||
UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
|
||||
UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos);
|
||||
}
|
||||
|
||||
/* Set up the buffer-global syntax table. */
|
||||
|
||||
INLINE void
|
||||
|
|
|
@ -137,15 +137,6 @@ struct thread_state
|
|||
struct re_registers m_saved_search_regs;
|
||||
#define saved_search_regs (current_thread->m_saved_search_regs)
|
||||
|
||||
/* This is the string or buffer in which we
|
||||
are matching. It is used for looking up syntax properties.
|
||||
|
||||
If the value is a Lisp string object, we are matching text in that
|
||||
string; if it's nil, we are matching text in the current buffer; if
|
||||
it's t, we are matching text in a C string. */
|
||||
Lisp_Object m_re_match_object;
|
||||
#define re_match_object (current_thread->m_re_match_object)
|
||||
|
||||
/* This member is different from waiting_for_input.
|
||||
It is used to communicate to a lisp process-filter/sentinel (via the
|
||||
function Fwaiting_for_user_input_p) whether Emacs was waiting
|
||||
|
|
Loading…
Add table
Reference in a new issue