Fix regexp character class syntax property ghost matching bug
The syntax-table-dependent regexp character classes [:space:], [:word:] and [:punct:] always use the buffer-local syntax table for performance reasons. Fix a bug that could cause ghost (mis)matches from use of lingering state by constructs that do use syntax properties, such as `\sX`. * src/regex-emacs.c (BUFFER_SYNTAX): New macro. (ISPUNCT, ISSPACE, ISWORD): Use BUFFER_SYNTAX instead of SYNTAX. (regex_compile): Delete syntax table setup code that is no longer needed. * test/src/regex-emacs-tests.el (regex-emacs-syntax-properties): New regression test.
This commit is contained in:
parent
cfdce1a19f
commit
5d2d28458d
2 changed files with 28 additions and 12 deletions
|
@ -47,6 +47,9 @@
|
|||
/* Make syntax table lookup grant data in gl_state. */
|
||||
#define SYNTAX(c) syntax_property (c, 1)
|
||||
|
||||
/* Explicit syntax lookup using the buffer-local table. */
|
||||
#define BUFFER_SYNTAX(c) syntax_property (c, 0)
|
||||
|
||||
#define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
|
||||
#define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
|
||||
#define RE_STRING_CHAR(p, multibyte) \
|
||||
|
@ -132,18 +135,22 @@
|
|||
|
||||
#define ISLOWER(c) lowercasep (c)
|
||||
|
||||
#define ISUPPER(c) uppercasep (c)
|
||||
|
||||
/* The following predicates use the buffer-local syntax table and
|
||||
ignore syntax properties, for consistency with the up-front
|
||||
assumptions made at compile time. */
|
||||
|
||||
#define ISPUNCT(c) (IS_REAL_ASCII (c) \
|
||||
? ((c) > ' ' && (c) < 0177 \
|
||||
&& !(((c) >= 'a' && (c) <= 'z') \
|
||||
|| ((c) >= 'A' && (c) <= 'Z') \
|
||||
|| ((c) >= '0' && (c) <= '9'))) \
|
||||
: SYNTAX (c) != Sword)
|
||||
: BUFFER_SYNTAX (c) != Sword)
|
||||
|
||||
#define ISSPACE(c) (SYNTAX (c) == Swhitespace)
|
||||
#define ISSPACE(c) (BUFFER_SYNTAX (c) == Swhitespace)
|
||||
|
||||
#define ISUPPER(c) uppercasep (c)
|
||||
|
||||
#define ISWORD(c) (SYNTAX (c) == Sword)
|
||||
#define ISWORD(c) (BUFFER_SYNTAX (c) == Sword)
|
||||
|
||||
/* Use alloca instead of malloc. This is because using malloc in
|
||||
re_search* or re_match* could cause memory leaks when C-g is used
|
||||
|
@ -2048,13 +2055,6 @@ regex_compile (re_char *pattern, ptrdiff_t size,
|
|||
is_xdigit, since they can only match ASCII characters.
|
||||
We don't need to handle them for multibyte. */
|
||||
|
||||
/* Setup the gl_state object to its buffer-defined value.
|
||||
This hardcodes the buffer-global syntax-table for ASCII
|
||||
chars, while the other chars will obey syntax-table
|
||||
properties. It's not ideal, but it's the way it's been
|
||||
done until now. */
|
||||
SETUP_BUFFER_SYNTAX_TABLE ();
|
||||
|
||||
for (c = 0; c < 0x80; ++c)
|
||||
if (re_iswctype (c, cc))
|
||||
{
|
||||
|
|
|
@ -949,4 +949,20 @@ This evaluates the TESTS test cases from glibc."
|
|||
(should (equal (smatch "a\\=*b" "ab") 0))
|
||||
))
|
||||
|
||||
(ert-deftest regex-emacs-syntax-properties ()
|
||||
;; Verify absence of character class syntax property ghost matching bug.
|
||||
(let ((re "\\s-[[:space:]]")
|
||||
(s (concat "a"
|
||||
(propertize "b" 'syntax-table '(0)) ; whitespace
|
||||
"éz"))
|
||||
(parse-sexp-lookup-properties t))
|
||||
;; Test matching in a string...
|
||||
(should (equal (string-match re s) nil))
|
||||
;; ... and in a buffer.
|
||||
(should (equal (with-temp-buffer
|
||||
(insert s)
|
||||
(goto-char (point-min))
|
||||
(re-search-forward re nil t))
|
||||
nil))))
|
||||
|
||||
;;; regex-emacs-tests.el ends here
|
||||
|
|
Loading…
Add table
Reference in a new issue