Fix handling of allocation in regex matching

`re_match_2_internal' uses pointers to the lisp objects that it
searches.  Since it may call malloc when growing the "fail stack", these
pointers may be invalidated while searching, resulting in memory
curruption (Bug #24358).

To fix this, we check the pointer that the lisp object (as specified by
re_match_object) points to before and after growing the stack, and
update existing pointers accordingly.

* src/regex.c (STR_BASE_PTR): New macro.
(ENSURE_FAIL_STACK, re_search_2): Use it to convert pointers into
offsets before possible malloc call, and back into pointers again
afterwards.
(POS_AS_IN_BUFFER): Add explanatory comment about punning trick.
* src/search.c (search_buffer): Instead of storing search location as
pointers, store them as pointers and recompute the corresponding address
for each call to `re_search_2'.
(string_match_1, fast_string_match_internal, fast_looking_at):
* src/dired.c (directory_files_internal): Set `re_match_object' to Qnil
after calling `re_search' or `re_match_2'.
* src/regex.h (re_match_object): Mention new usage in commentary.
This commit is contained in:
Noam Postavsky 2016-10-19 20:23:50 -04:00
parent 5a26c9b0e1
commit ad66b3fadb
4 changed files with 103 additions and 17 deletions

View file

@ -259,9 +259,11 @@ directory_files_internal (Lisp_Object directory, Lisp_Object full,
QUIT; QUIT;
bool wanted = (NILP (match) bool wanted = (NILP (match)
|| re_search (bufp, SSDATA (name), len, 0, len, 0) >= 0); || (re_match_object = name,
re_search (bufp, SSDATA (name), len, 0, len, 0) >= 0));
immediate_quit = 0; immediate_quit = 0;
re_match_object = Qnil; /* Stop protecting name from GC. */
if (wanted) if (wanted)
{ {

View file

@ -152,6 +152,8 @@
/* Converts the pointer to the char to BEG-based offset from the start. */ /* Converts the pointer to the char to BEG-based offset from the start. */
# define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d)) # define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
/* Strings are 0-indexed, buffers are 1-indexed; we pun on the boolean
result to get the right base index. */
# define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object))) # define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
# define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte) # define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
@ -1436,11 +1438,62 @@ typedef struct
#define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer #define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer
#define TOP_FAILURE_HANDLE() fail_stack.frame #define TOP_FAILURE_HANDLE() fail_stack.frame
#ifdef emacs
#define STR_BASE_PTR(obj) \
(NILP (obj) ? current_buffer->text->beg : \
STRINGP (obj) ? SDATA (obj) : \
NULL)
#else
#define STR_BASE_PTR(obj) NULL
#endif
#define ENSURE_FAIL_STACK(space) \ #define ENSURE_FAIL_STACK(space) \
while (REMAINING_AVAIL_SLOTS <= space) { \ while (REMAINING_AVAIL_SLOTS <= space) { \
re_char* orig_base = STR_BASE_PTR (re_match_object); \
ptrdiff_t string1_off, end1_off, end_match_1_off; \
ptrdiff_t string2_off, end2_off, end_match_2_off; \
ptrdiff_t d_off, dend_off, dfail_off; \
if (orig_base) \
{ \
if (string1) \
{ \
string1_off = string1 - orig_base; \
end1_off = end1 - orig_base; \
end_match_1_off = end_match_1 - orig_base; \
} \
if (string2) \
{ \
string2_off = string2 - orig_base; \
end2_off = end2 - orig_base; \
end_match_2_off = end_match_2 - orig_base; \
} \
d_off = d - orig_base; \
dend_off = dend - orig_base; \
dfail_off = dfail - orig_base; \
} \
if (!GROW_FAIL_STACK (fail_stack)) \ if (!GROW_FAIL_STACK (fail_stack)) \
return -2; \ return -2; \
/* GROW_FAIL_STACK may call malloc and relocate the string */ \
/* pointers. */ \
re_char* new_base = STR_BASE_PTR (re_match_object); \
if (new_base && new_base != orig_base) \
{ \
if (string1) \
{ \
string1 = new_base + string1_off; \
end1 = new_base + end1_off; \
end_match_1 = new_base + end_match_1_off; \
} \
if (string2) \
{ \
string2 = new_base + string2_off; \
end2 = new_base + end2_off; \
end_match_2 = new_base + end_match_2_off; \
} \
d = new_base + d_off; \
dend = new_base + dend_off; \
dfail = new_base + dfail_off; \
} \
DEBUG_PRINT ("\n Doubled stack; size now: %zd\n", (fail_stack).size);\ DEBUG_PRINT ("\n Doubled stack; size now: %zd\n", (fail_stack).size);\
DEBUG_PRINT (" slots available: %zd\n", REMAINING_AVAIL_SLOTS);\ DEBUG_PRINT (" slots available: %zd\n", REMAINING_AVAIL_SLOTS);\
} }
@ -4443,6 +4496,16 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
&& !bufp->can_be_null) && !bufp->can_be_null)
return -1; return -1;
/* re_match_2_internal may allocate, causing a relocation of the
lisp text object that we're searching. */
ptrdiff_t offset1, offset2;
re_char *orig_base = STR_BASE_PTR (re_match_object);
if (orig_base)
{
if (string1) offset1 = string1 - orig_base;
if (string2) offset2 = string2 - orig_base;
}
val = re_match_2_internal (bufp, string1, size1, string2, size2, val = re_match_2_internal (bufp, string1, size1, string2, size2,
startpos, regs, stop); startpos, regs, stop);
@ -4452,6 +4515,13 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1,
if (val == -2) if (val == -2)
return -2; return -2;
re_char *new_base = STR_BASE_PTR (re_match_object);
if (new_base && new_base != orig_base)
{
if (string1) string1 = offset1 + new_base;
if (string2) string2 = offset2 + new_base;
}
advance: advance:
if (!range) if (!range)
break; break;
@ -4887,8 +4957,8 @@ WEAK_ALIAS (__re_match, re_match)
#endif /* not emacs */ #endif /* not emacs */
#ifdef emacs #ifdef emacs
/* In Emacs, this is the string or buffer in which we /* In Emacs, this is the string or buffer in which we are matching.
are matching. It is used for looking up syntax properties. */ See the declaration in regex.h for details. */
Lisp_Object re_match_object; Lisp_Object re_match_object;
#endif #endif

View file

@ -169,7 +169,9 @@ extern reg_syntax_t re_syntax_options;
#ifdef emacs #ifdef emacs
# include "lisp.h" # include "lisp.h"
/* In Emacs, this is the string or buffer in which we are matching. /* In Emacs, this is the string or buffer in which we are matching.
It is used for looking up syntax properties. It is used for looking up syntax properties, and also to recompute
pointers in case the object is relocated as a side effect of
calling malloc (if it calls r_alloc_sbrk in ralloc.c).
If the value is a Lisp string object, we are matching text in that If the value is a Lisp string object, we are matching text in that
string; if it's nil, we are matching text in the current buffer; if string; if it's nil, we are matching text in the current buffer; if

View file

@ -287,8 +287,10 @@ looking_at_1 (Lisp_Object string, bool posix)
immediate_quit = 1; immediate_quit = 1;
QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */ QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
/* Get pointers and sizes of the two strings /* Get pointers and sizes of the two strings that make up the
that make up the visible portion of the buffer. */ visible portion of the buffer. Note that we can use pointers
here, unlike in search_buffer, because we only call re_match_2
once, after which we never use the pointers again. */
p1 = BEGV_ADDR; p1 = BEGV_ADDR;
s1 = GPT_BYTE - BEGV_BYTE; s1 = GPT_BYTE - BEGV_BYTE;
@ -407,6 +409,7 @@ string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
(NILP (Vinhibit_changing_match_data) (NILP (Vinhibit_changing_match_data)
? &search_regs : NULL)); ? &search_regs : NULL));
immediate_quit = 0; immediate_quit = 0;
re_match_object = Qnil; /* Stop protecting string from GC. */
/* Set last_thing_searched only when match data is changed. */ /* Set last_thing_searched only when match data is changed. */
if (NILP (Vinhibit_changing_match_data)) if (NILP (Vinhibit_changing_match_data))
@ -477,6 +480,7 @@ fast_string_match_internal (Lisp_Object regexp, Lisp_Object string,
SBYTES (string), 0, SBYTES (string), 0,
SBYTES (string), 0); SBYTES (string), 0);
immediate_quit = 0; immediate_quit = 0;
re_match_object = Qnil; /* Stop protecting string from GC. */
return val; return val;
} }
@ -564,6 +568,7 @@ fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2, len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
pos_byte, NULL, limit_byte); pos_byte, NULL, limit_byte);
immediate_quit = 0; immediate_quit = 0;
re_match_object = Qnil; /* Stop protecting string from GC. */
return len; return len;
} }
@ -1178,8 +1183,8 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp))) if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
{ {
unsigned char *p1, *p2; unsigned char *base;
ptrdiff_t s1, s2; ptrdiff_t off1, off2, s1, s2;
struct re_pattern_buffer *bufp; struct re_pattern_buffer *bufp;
bufp = compile_pattern (string, bufp = compile_pattern (string,
@ -1193,16 +1198,19 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
can take too long. */ can take too long. */
QUIT; /* Do a pending quit right away, QUIT; /* Do a pending quit right away,
to avoid paradoxical behavior */ to avoid paradoxical behavior */
/* Get pointers and sizes of the two strings /* Get offsets and sizes of the two strings that make up the
that make up the visible portion of the buffer. */ visible portion of the buffer. We compute offsets instead of
pointers because re_search_2 may call malloc and therefore
change the buffer text address. */
p1 = BEGV_ADDR; base = current_buffer->text->beg;
off1 = BEGV_ADDR - base;
s1 = GPT_BYTE - BEGV_BYTE; s1 = GPT_BYTE - BEGV_BYTE;
p2 = GAP_END_ADDR; off2 = GAP_END_ADDR - base;
s2 = ZV_BYTE - GPT_BYTE; s2 = ZV_BYTE - GPT_BYTE;
if (s1 < 0) if (s1 < 0)
{ {
p2 = p1; off2 = off1;
s2 = ZV_BYTE - BEGV_BYTE; s2 = ZV_BYTE - BEGV_BYTE;
s1 = 0; s1 = 0;
} }
@ -1217,7 +1225,9 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
{ {
ptrdiff_t val; ptrdiff_t val;
val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2, val = re_search_2 (bufp,
(char*) (base + off1), s1,
(char*) (base + off2), s2,
pos_byte - BEGV_BYTE, lim_byte - pos_byte, pos_byte - BEGV_BYTE, lim_byte - pos_byte,
(NILP (Vinhibit_changing_match_data) (NILP (Vinhibit_changing_match_data)
? &search_regs : &search_regs_1), ? &search_regs : &search_regs_1),
@ -1262,7 +1272,9 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
{ {
ptrdiff_t val; ptrdiff_t val;
val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2, val = re_search_2 (bufp,
(char*) (base + off1), s1,
(char*) (base + off2), s2,
pos_byte - BEGV_BYTE, lim_byte - pos_byte, pos_byte - BEGV_BYTE, lim_byte - pos_byte,
(NILP (Vinhibit_changing_match_data) (NILP (Vinhibit_changing_match_data)
? &search_regs : &search_regs_1), ? &search_regs : &search_regs_1),