diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi index e759967aa8a..26985b5d267 100644 --- a/doc/lispref/searching.texi +++ b/doc/lispref/searching.texi @@ -642,10 +642,10 @@ is omitted, the minimum is 0; if @var{n} is omitted, there is no maximum. For both forms, @var{m} and @var{n}, if specified, may be no larger than @ifnottex -2**15 @minus{} 1 +2**16 @minus{} 1 @end ifnottex @tex -@math{2^{15}-1} +@math{2^{16}-1} @end tex . diff --git a/etc/NEWS b/etc/NEWS index ad315536034..27bde2d147c 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -80,6 +80,13 @@ indirectly, e.g., by checking that functions like It blocks line breaking after a one-letter word, also in the case when this word is preceded by a non-space, but non-alphanumeric character. ++++ +** The limit on repetitions in regexps has been raised to 2^16-1. +It was previously limited to 2^15-1. For example, the following +regular expression was previously invalid, but is now accepted: + + x\{32768\} + * Editing Changes in Emacs 27.1 diff --git a/lisp/isearch.el b/lisp/isearch.el index 23dd9afccdb..9297c0f95ba 100644 --- a/lisp/isearch.el +++ b/lisp/isearch.el @@ -2858,7 +2858,7 @@ Optional third argument, if t, means if fail just return nil (no error). (setq isearch-error (car (cdr lossage))) (cond ((string-match - "\\`Premature \\|\\`Unmatched \\|\\`Invalid " + "\\`Premature \\|\\`Unmatched " isearch-error) (setq isearch-error "incomplete input")) ((and (not isearch-regexp) diff --git a/src/regex.c b/src/regex.c index 2185fc97d3b..122cf712422 100644 --- a/src/regex.c +++ b/src/regex.c @@ -1194,7 +1194,8 @@ static const char *re_error_msgid[] = gettext_noop ("Premature end of regular expression"), /* REG_EEND */ gettext_noop ("Regular expression too big"), /* REG_ESIZE */ gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */ - gettext_noop ("Range striding over charsets") /* REG_ERANGEX */ + gettext_noop ("Range striding over charsets"), /* REG_ERANGEX */ + gettext_noop ("Invalid content of \\{\\}, repetitions too big") /* REG_ESIZEBR */ }; /* Whether to allocate memory during matching. */ @@ -1915,7 +1916,7 @@ struct range_table_work_area if (num < 0) \ num = 0; \ if (RE_DUP_MAX / 10 - (RE_DUP_MAX % 10 < c - '0') < num) \ - FREE_STACK_RETURN (REG_BADBR); \ + FREE_STACK_RETURN (REG_ESIZEBR); \ num = num * 10 + c - '0'; \ if (p == pend) \ FREE_STACK_RETURN (REG_EBRACE); \ diff --git a/src/regex.h b/src/regex.h index b4aad6daac9..6974951f575 100644 --- a/src/regex.h +++ b/src/regex.h @@ -270,8 +270,10 @@ extern ptrdiff_t emacs_re_safe_alloca; #ifdef RE_DUP_MAX # undef RE_DUP_MAX #endif -/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ -#define RE_DUP_MAX (0x7fff) +/* Repeat counts are stored in opcodes as 2 byte integers. This was + previously limited to 7fff because the parsing code uses signed + ints. But Emacs only runs on 32 bit platforms anyway. */ +#define RE_DUP_MAX (0xffff) /* POSIX `cflags' bits (i.e., information for `regcomp'). */ @@ -337,7 +339,8 @@ typedef enum REG_EEND, /* Premature end. */ REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ REG_ERPAREN, /* Unmatched ) or \); not returned from regcomp. */ - REG_ERANGEX /* Range striding over charsets. */ + REG_ERANGEX, /* Range striding over charsets. */ + REG_ESIZEBR /* n or m too big in \{n,m\} */ } reg_errcode_t; /* This data structure represents a compiled pattern. Before calling diff --git a/test/src/regex-tests.el b/test/src/regex-tests.el index 86aa7d26350..083ed5c4c8c 100644 --- a/test/src/regex-tests.el +++ b/test/src/regex-tests.el @@ -677,4 +677,10 @@ This evaluates the PTESTS test cases from glibc." This evaluates the TESTS test cases from glibc." (should-not (regex-tests-TESTS))) +(ert-deftest regex-repeat-limit () + "Test the #xFFFF repeat limit." + (should (string-match "\\`x\\{65535\\}" (make-string 65535 ?x))) + (should-not (string-match "\\`x\\{65535\\}" (make-string 65534 ?x))) + (should-error (string-match "\\`x\\{65536\\}" "X") :type 'invalid-regexp)) + ;;; regex-tests.el ends here