regex.c: Fix recent regression with mutually_exclusive_p
The new analysis code ended up increasing the scope of an optimization a bit too far. Reign it in. * src/regex-emacs.c (struct mutexcl_data): Add `unconstrained` field. (mutually_exclusive_one): Use and set it. (mutually_exclusive_p): Initialize it. * test/src/regex-emacs-tests.el (regexp-tests-backtrack-optimization): Add test.
This commit is contained in:
parent
849de5aa1a
commit
37130fd500
2 changed files with 38 additions and 12 deletions
|
@ -3899,6 +3899,7 @@ mutually_exclusive_charset (struct re_pattern_buffer *bufp, re_char *p1,
|
|||
struct mutexcl_data {
|
||||
struct re_pattern_buffer *bufp;
|
||||
re_char *p1;
|
||||
bool unconstrained;
|
||||
};
|
||||
|
||||
static bool
|
||||
|
@ -3907,7 +3908,32 @@ mutually_exclusive_one (re_char *p2, void *arg)
|
|||
struct mutexcl_data *data = arg;
|
||||
switch (*p2)
|
||||
{
|
||||
case succeed:
|
||||
/* If `p1` matches, `succeed` can still match, so we should return
|
||||
`false`. *BUT* when N iterations of `p1` and N+1 iterations of `p1`
|
||||
match, the `succeed` that comes after N+1 always takes precedence
|
||||
over the one after N because we always prefer a longer match, so
|
||||
the succeed after N can actually be replaced by a "fail" without
|
||||
changing the end result.
|
||||
In this sense, "if `p1` matches, `succeed` can't match".
|
||||
So we can return `true`.
|
||||
*BUT* this only holds if we're sure that the N+1 will indeed succeed,
|
||||
so we need to make sure there is no other matching operator between
|
||||
the exit of the iteration and the `succeed`. */
|
||||
return data->unconstrained;
|
||||
|
||||
/* Remember that there may be an empty matching operator on the way.
|
||||
If we return true, this is the "end" of this control flow path,
|
||||
so it can't get in the way of a subsequent `succeed. */
|
||||
#define RETURN_CONSTRAIN(v) \
|
||||
{ bool tmp = (v); \
|
||||
if (!tmp) \
|
||||
data->unconstrained = false; \
|
||||
return tmp; \
|
||||
}
|
||||
|
||||
case endline:
|
||||
RETURN_CONSTRAIN (mutually_exclusive_exactn (data->bufp, data->p1, p2));
|
||||
case exactn:
|
||||
return mutually_exclusive_exactn (data->bufp, data->p1, p2);
|
||||
case charset:
|
||||
|
@ -3945,18 +3971,17 @@ mutually_exclusive_one (re_char *p2, void *arg)
|
|||
return (*data->p1 == categoryspec && data->p1[1] == p2[1]);
|
||||
|
||||
case endbuf:
|
||||
case succeed:
|
||||
return true;
|
||||
case wordbeg:
|
||||
return (*data->p1 == notsyntaxspec && data->p1[1] == Sword);
|
||||
RETURN_CONSTRAIN (*data->p1 == notsyntaxspec && data->p1[1] == Sword);
|
||||
case wordend:
|
||||
return (*data->p1 == syntaxspec && data->p1[1] == Sword);
|
||||
RETURN_CONSTRAIN (*data->p1 == syntaxspec && data->p1[1] == Sword);
|
||||
case symbeg:
|
||||
return (*data->p1 == notsyntaxspec
|
||||
&& (data->p1[1] == Ssymbol || data->p1[1] == Sword));
|
||||
RETURN_CONSTRAIN (*data->p1 == notsyntaxspec
|
||||
&& (data->p1[1] == Ssymbol || data->p1[1] == Sword));
|
||||
case symend:
|
||||
return (*data->p1 == syntaxspec
|
||||
&& (data->p1[1] == Ssymbol || data->p1[1] == Sword));
|
||||
RETURN_CONSTRAIN (*data->p1 == syntaxspec
|
||||
&& (data->p1[1] == Ssymbol || data->p1[1] == Sword));
|
||||
|
||||
case duplicate:
|
||||
/* At this point, we know nothing about what this can match, sadly. */
|
||||
|
@ -3976,7 +4001,7 @@ static bool
|
|||
mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
|
||||
re_char *p2)
|
||||
{
|
||||
struct mutexcl_data data = { bufp, p1 };
|
||||
struct mutexcl_data data = { bufp, p1, true };
|
||||
return forall_firstchar (bufp, p2, NULL, mutually_exclusive_one, &data);
|
||||
}
|
||||
|
||||
|
|
|
@ -555,10 +555,10 @@ known/benign differences in behavior.")
|
|||
|
||||
(defconst regex-tests-PTESTS-whitelist
|
||||
[
|
||||
;; emacs doesn't see DEL (0x7f) as a [:cntrl:] character
|
||||
;; Emacs doesn't see DEL (0x7f) as a [:cntrl:] character
|
||||
138
|
||||
|
||||
;; emacs doesn't barf on weird ranges such as [b-a], but simply
|
||||
;; Emacs doesn't barf on weird ranges such as [b-a], but simply
|
||||
;; fails to match
|
||||
168
|
||||
]
|
||||
|
@ -872,14 +872,14 @@ This evaluates the TESTS test cases from glibc."
|
|||
(should (equal (string-match "\\`\\(?:ab\\)*\\'" "a") nil))
|
||||
(should (equal (string-match "\\`a\\{2\\}*\\'" "a") nil)))
|
||||
|
||||
(ert-deftest regexp-tests-backtrack-optimization () ;bug#61514
|
||||
(ert-deftest regexp-tests-backtrack-optimization ()
|
||||
;; Make sure we don't use up the regexp stack needlessly.
|
||||
(with-current-buffer (get-buffer-create "*bug*")
|
||||
(erase-buffer)
|
||||
(insert (make-string 1000000 ?x) "=")
|
||||
(goto-char (point-min))
|
||||
;; Make sure we do perform the optimization (if we don't, the
|
||||
;; below will burp with regexp-stack-overflow).
|
||||
;; below will burp with regexp-stack-overflow). ;bug#61514
|
||||
(should (looking-at "x*=*"))
|
||||
(should (looking-at "x*\\(=\\|:\\)"))
|
||||
(should (looking-at "x*\\(=\\|:\\)*"))
|
||||
|
@ -908,6 +908,7 @@ This evaluates the TESTS test cases from glibc."
|
|||
(should (eq 0 (string-match "\\(ca*\\|ab\\)+d" "cabd")))
|
||||
(should (string-match "\\(aa*\\|b\\)*c" "ababc"))
|
||||
(should (string-match " \\sw*\\bfoo" " foo"))
|
||||
(should (string-match ".*\\>" "hello "))
|
||||
))
|
||||
|
||||
(ert-deftest regexp-tests-zero-width-assertion-repetition ()
|
||||
|
|
Loading…
Add table
Reference in a new issue