Remove the optional KEEP-ORDER argument to regexp-opt

This argument was added for the 'or' clause in rx, but it turned out
to be a bad idea (bug#37659), and there seems to be little other use
for it.

* lisp/emacs-lisp/regexp-opt.el (regexp-opt): Remove KEEP-ORDER.
* doc/lispref/searching.texi (Regexp Functions):
* etc/NEWS: Remove it from the documentation.
* test/lisp/emacs-lisp/regexp-opt-tests.el (regexp-opt-test--match-all)
(regexp-opt-test--check-perm, regexp-opt-test--explain-perm)
(regexp-opt-keep-order, regexp-opt-longest-match): Simplify test.
This commit is contained in:
Mattias Engdegård 2020-02-13 20:06:48 +01:00
parent d1e8ce8bb6
commit 9f6a4bbcc9
4 changed files with 19 additions and 84 deletions

View file

@ -1745,7 +1745,7 @@ any special characters.
@end defun @end defun
@cindex optimize regexp @cindex optimize regexp
@defun regexp-opt strings &optional paren keep-order @defun regexp-opt strings &optional paren
This function returns an efficient regular expression that will match This function returns an efficient regular expression that will match
any of the strings in the list @var{strings}. This is useful when you any of the strings in the list @var{strings}. This is useful when you
need to make matching or searching as fast as possible---for example, need to make matching or searching as fast as possible---for example,
@ -1783,11 +1783,8 @@ if it is necessary to ensure that a postfix operator appended to
it will apply to the whole expression. it will apply to the whole expression.
@end table @end table
The optional argument @var{keep-order}, if non-@code{nil}, forces the The returned regexp is ordered in such a way that it will always match
match to be performed in the order given, as if the strings were made the longest string possible.
into a regexp by joining them with the @samp{\|} operator. If nil or
omitted, the returned regexp will always match the longest string
possible.
Up to reordering, the resulting regexp of @code{regexp-opt} is Up to reordering, the resulting regexp of @code{regexp-opt} is
equivalent to but usually more efficient than that of a simplified equivalent to but usually more efficient than that of a simplified

View file

@ -3526,13 +3526,6 @@ the process. That way 'make-process' can start remote processes.
This is currently supported on GNUish hosts and on modern versions of This is currently supported on GNUish hosts and on modern versions of
MS-Windows. MS-Windows.
+++
** The function 'regexp-opt' accepts an additional optional argument.
By default, the regexp returned by 'regexp-opt' may match the strings
in any order. If the new third argument is non-nil, the match is
guaranteed to be performed in the order given, as if the strings were
made into a regexp by joining them with '\|'.
+++ +++
** The function 'regexp-opt', when given an empty list of strings, now ** The function 'regexp-opt', when given an empty list of strings, now
returns a regexp that never matches anything, which is an identity for returns a regexp that never matches anything, which is an identity for

View file

@ -84,7 +84,7 @@
;;; Code: ;;; Code:
;;;###autoload ;;;###autoload
(defun regexp-opt (strings &optional paren keep-order) (defun regexp-opt (strings &optional paren)
"Return a regexp to match a string in the list STRINGS. "Return a regexp to match a string in the list STRINGS.
Each member of STRINGS is treated as a fixed string, not as a regexp. Each member of STRINGS is treated as a fixed string, not as a regexp.
Optional PAREN specifies how the returned regexp is surrounded by Optional PAREN specifies how the returned regexp is surrounded by
@ -114,11 +114,8 @@ nil
necessary to ensure that a postfix operator appended to it will necessary to ensure that a postfix operator appended to it will
apply to the whole expression. apply to the whole expression.
The optional argument KEEP-ORDER, if non-nil, forces the match to The returned regexp is ordered in such a way that it will always
be performed in the order given, as if the strings were made into match the longest string possible.
a regexp by joining them with the `\\|' operator. If nil or
omitted, the returned regexp is will always match the longest
string possible.
Up to reordering, the resulting regexp is equivalent to but Up to reordering, the resulting regexp is equivalent to but
usually more efficient than that of a simplified version: usually more efficient than that of a simplified version:
@ -140,34 +137,12 @@ usually more efficient than that of a simplified version:
(completion-ignore-case nil) (completion-ignore-case nil)
(completion-regexp-list nil) (completion-regexp-list nil)
(open (cond ((stringp paren) paren) (paren "\\("))) (open (cond ((stringp paren) paren) (paren "\\(")))
(re (re (if strings
(cond
;; No strings: return an unmatchable regexp.
((null strings)
(concat (or open "\\(?:") regexp-unmatchable "\\)"))
;; The algorithm will generate a pattern that matches
;; longer strings in the list before shorter. If the
;; list order matters, then no string must come after a
;; proper prefix of that string. To check this, verify
;; that a straight or-pattern matches each string
;; entirely.
((and keep-order
(let* ((case-fold-search nil)
(alts (mapconcat #'regexp-quote strings "\\|")))
(and (let ((s strings))
(while (and s
(string-match alts (car s))
(= (match-end 0) (length (car s))))
(setq s (cdr s)))
;; If we exited early, we found evidence that
;; regexp-opt-group cannot be used.
s)
(concat (or open "\\(?:") alts "\\)")))))
(t
(regexp-opt-group (regexp-opt-group
(delete-dups (sort (copy-sequence strings) 'string-lessp)) (delete-dups (sort (copy-sequence strings) 'string-lessp))
(or open t) (not open)))))) (or open t) (not open))
;; No strings: return an unmatchable regexp.
(concat (or open "\\(?:") regexp-unmatchable "\\)"))))
(cond ((eq paren 'words) (cond ((eq paren 'words)
(concat "\\<" re "\\>")) (concat "\\<" re "\\>"))
((eq paren 'symbols) ((eq paren 'symbols)

View file

@ -47,43 +47,13 @@
(mapcar (lambda (i) (regexp-opt-test--permutation i list)) (mapcar (lambda (i) (regexp-opt-test--permutation i list))
(number-sequence 0 (1- (regexp-opt-test--factorial (length list)))))) (number-sequence 0 (1- (regexp-opt-test--factorial (length list))))))
(defun regexp-opt-test--match-all (words re) (ert-deftest regexp-opt-longest-match ()
(mapcar (lambda (w) (and (string-match re w) "Check that the regexp always matches as much as possible."
(match-string 0 w))) (let ((s "abcd"))
words))
(defun regexp-opt-test--check-perm (perm)
(let* ((ref-re (mapconcat #'regexp-quote perm "\\|"))
(opt-re (regexp-opt perm nil t))
(ref (regexp-opt-test--match-all perm ref-re))
(opt (regexp-opt-test--match-all perm opt-re)))
(equal opt ref)))
(defun regexp-opt-test--explain-perm (perm)
(let* ((ref-re (mapconcat #'regexp-quote perm "\\|"))
(opt-re (regexp-opt perm nil t))
(ref (regexp-opt-test--match-all perm ref-re))
(opt (regexp-opt-test--match-all perm opt-re)))
(concat "\n"
(format "Naïve regexp: %s\n" ref-re)
(format "Optimized regexp: %s\n" opt-re)
(format "Got: %s\n" opt)
(format "Expected: %s\n" ref))))
(put 'regexp-opt-test--check-perm 'ert-explainer 'regexp-opt-test--explain-perm)
(ert-deftest regexp-opt-keep-order ()
"Check that KEEP-ORDER works."
(dolist (perm (regexp-opt-test--permutations '("abc" "bca" "cab")))
(should (regexp-opt-test--check-perm perm)))
(dolist (perm (regexp-opt-test--permutations '("abc" "ab" "bca" "bc")))
(should (regexp-opt-test--check-perm perm)))
(dolist (perm (regexp-opt-test--permutations '("abxy" "cdxy")))
(should (regexp-opt-test--check-perm perm)))
(dolist (perm (regexp-opt-test--permutations '("afgx" "bfgx" "afgy" "bfgy")))
(should (regexp-opt-test--check-perm perm)))
(dolist (perm (regexp-opt-test--permutations '("a" "ab" "ac" "abc"))) (dolist (perm (regexp-opt-test--permutations '("a" "ab" "ac" "abc")))
(should (regexp-opt-test--check-perm perm)))) (should (equal (and (string-match (regexp-opt perm) s)
(match-string 0 s))
"abc")))))
(ert-deftest regexp-opt-charset () (ert-deftest regexp-opt-charset ()
(should (equal (regexp-opt-charset '(?a ?b ?a)) "[ab]")) (should (equal (regexp-opt-charset '(?a ?b ?a)) "[ab]"))