Remove subsumed repetitions in regexps

Make regexps smaller and faster by removing terms that are superfluous
by virtue of standing next to another term that matches more.  See
https://lists.gnu.org/archive/html/emacs-devel/2020-01/msg00949.html
for details.

* lisp/bs.el (bs--make-header-match-string):
* lisp/gnus/deuglify.el (gnus-outlook-repair-attribution-block):
* lisp/gnus/message.el (message-subject-trailing-was-ask-regexp)
(message-subject-trailing-was-regexp):
* lisp/informat.el (Info-validate):
* lisp/net/browse-url.el (browse-url-button-regexp):
* lisp/net/rcirc.el (rcirc-url-regexp):
* lisp/org/ob-core.el (org-babel-remove-result):
* lisp/org/ob-fortran.el (org-babel-fortran-ensure-main-wrap):
* lisp/org/org-capture.el (org-capture-set-target-location):
* lisp/org/org-table.el (org-table-expand-lhs-ranges):
* lisp/org/org.el (org-maybe-keyword-time-regexp, org-ts-regexp)
(org-ts-regexp-inactive, org-ts-regexp-both):
* lisp/play/gametree.el (gametree-hack-file-layout):
* lisp/progmodes/cc-mode.el (c-Java-defun-prompt-regexp):
* lisp/progmodes/idlw-shell.el (idlwave-shell-halting-error):
* lisp/progmodes/ruby-mode.el (ruby-mode-set-encoding):
* lisp/progmodes/verilog-mode.el (verilog-error-font-lock-keywords)
(verilog-verilint-off, verilog-case-indent-level)
(verilog-within-translate-off, verilog-start-translate-off)
(verilog-back-to-start-translate-off, verilog-end-translate-off)
(verilog-expand-dirnames):
* lisp/term.el (term-control-seq-regexp):
* lisp/textmodes/reftex-vars.el (featurep):
* lisp/url/url-gw.el (url-open-telnet):
* lisp/vc/ediff-ptch.el (ediff-context-diff-label-regexp):
* lisp/vc/pcvs-parse.el (cvs-parse-status):
* test/src/regex-emacs-tests.el (regex-tests-PCRE):
Remove subsumed repetitions.
* lisp/progmodes/sh-script.el (sh-syntax-propertize-function):
Simplify repetition of a repetition.
This commit is contained in:
Mattias Engdegård 2020-02-20 15:45:44 +01:00
parent 8dc4034ed6
commit 770f76f050
23 changed files with 42 additions and 37 deletions

View file

@ -173,7 +173,12 @@ return a string representing the column's value."
(defun bs--make-header-match-string ()
"Return a regexp matching the first line of a Buffer Selection Menu buffer."
(concat "^\\(" (mapconcat #'car bs-attributes-list " *") " *$\\)"))
(concat "^\\("
(apply #'concat (mapcan (lambda (e)
(and (not (equal (car e) ""))
(list " *" (car e))))
bs-attributes-list))
" *$\\)"))
;; Font-Lock-Settings
(defvar bs-mode-font-lock-keywords

View file

@ -403,9 +403,9 @@ NODISPLAY is non-nil, don't redisplay the article buffer."
(gnus-with-article-buffer
(article-goto-body)
(when (re-search-forward
(concat "^[" cite-marks " \t]*--* ?[^-]+ [^-]+ ?--*\\s *\n"
(concat "^[" cite-marks " \t]*--*[^-]+ [^-]+--*\\s *\n"
"[^\n:]+:[ \t]*\\([^\n]+\\)\n"
"\\([^\n:]+:[ \t]*[^\n]+\n\\)+")
"\\([^\n:]+:[^\n]+\n\\)+")
nil t)
(gnus-kill-all-overlays)
(replace-match "\\1 wrote:\n")

View file

@ -322,7 +322,7 @@ used."
:group 'message-various)
(defcustom message-subject-trailing-was-ask-regexp
"[ \t]*\\([[(]+[Ww][Aa][Ss]:?[ \t]*.*[])]+\\)"
"[ \t]*\\([[(]+[Ww][Aa][Ss].*[])]+\\)"
"Regexp matching \"(was: <old subject>)\" in the subject line.
The function `message-strip-subject-trailing-was' uses this regexp if
@ -337,7 +337,7 @@ It is okay to create some false positives here, as the user is asked."
:type 'regexp)
(defcustom message-subject-trailing-was-regexp
"[ \t]*\\((*[Ww][Aa][Ss]:[ \t]*.*)\\)"
"[ \t]*\\((*[Ww][Aa][Ss]:.*)\\)"
"Regexp matching \"(was: <old subject>)\" in the subject line.
If `message-subject-trailing-was-query' is set to t, the subject is

View file

@ -337,7 +337,7 @@ Check that every node pointer points to an existing node."
(point))))
(Info-extract-menu-node-name))))
(goto-char (point-min))
(while (re-search-forward "\\*note[ \n]*[^:\t]*:" nil t)
(while (re-search-forward "\\*note\\>[^:\t]*:" nil t)
(goto-char (+ (match-beginning 0) 5))
(skip-chars-forward " \n")
(Info-validate-node-name

View file

@ -216,7 +216,7 @@ be used instead."
"\\(?:"
;; Match paired parentheses, e.g. in Wikipedia URLs:
;; http://thread.gmane.org/47B4E3B2.3050402@gmail.com
"[" chars punct "]+" "(" "[" chars punct "]+" "[" chars "]*)"
"[" chars punct "]+" "(" "[" chars punct "]+" ")"
"\\(?:" "[" chars punct "]+" "[" chars "]" "\\)?"
"\\|"
"[" chars punct "]+" "[" chars "]"

View file

@ -2421,7 +2421,7 @@ keywords when no KEYWORD is given."
(concat
"\\(?:"
;; Match paired parentheses, e.g. in Wikipedia URLs:
"[" chars punct "]+" "(" "[" chars punct "]+" "[" chars "]*)" "[" chars "]"
"[" chars punct "]+" "(" "[" chars punct "]+" ")" "[" chars "]"
"\\|"
"[" chars punct "]+" "[" chars "]"
"\\)"))

View file

@ -2437,7 +2437,7 @@ INFO may provide the values of these header arguments (in the
(when location
(save-excursion
(goto-char location)
(when (looking-at (concat org-babel-result-regexp ".*$"))
(when (looking-at org-babel-result-regexp)
(delete-region
(if keep-keyword (line-beginning-position 2)
(save-excursion

View file

@ -106,7 +106,7 @@ its header arguments."
(defun org-babel-fortran-ensure-main-wrap (body params)
"Wrap body in a \"program ... end program\" block if none exists."
(if (string-match "^[ \t]*program[ \t]*.*" (capitalize body))
(if (string-match "^[ \t]*program\\>" (capitalize body))
(let ((vars (org-babel--get-vars params)))
(when vars (error "Cannot use :vars if `program' statement is present"))
body)

View file

@ -1021,7 +1021,7 @@ Store them in the capture property list."
(apply #'encode-time 0 0
org-extend-today-until
(cl-cdddr (decode-time prompt-time))))
((string-match "\\([^ ]+\\)--?[^ ]+[ ]+\\(.*\\)"
((string-match "\\([^ ]+\\)-[^ ]+[ ]+\\(.*\\)"
org-read-date-final-answer)
;; Replace any time range by its start.
(apply #'encode-time

View file

@ -3099,7 +3099,7 @@ function assumes the table is already analyzed (i.e., using
(let ((lhs (car e))
(rhs (cdr e)))
(cond
((string-match-p "\\`@-?[-+0-9]+\\$-?[0-9]+\\'" lhs)
((string-match-p "\\`@[-+0-9]+\\$-?[0-9]+\\'" lhs)
;; This just refers to one fixed field.
(push e res))
((string-match-p "\\`[a-zA-Z][_a-zA-Z0-9]*\\'" lhs)

View file

@ -460,7 +460,7 @@ Matched keyword is in group 1.")
org-clock-string)
t)
"\\)?"
" *\\([[<][0-9]\\{4\\}-[0-9]\\{2\\}-[0-9]\\{2\\} ?[^]\r\n>]*?[]>]"
" *\\([[<][0-9]\\{4\\}-[0-9]\\{2\\}-[0-9]\\{2\\}[^]\r\n>]*[]>]"
"\\|"
"<%%([^\r\n>]*>\\)")
"Matches a timestamp, possibly preceded by a keyword.")
@ -564,14 +564,14 @@ Effort estimates given in this property need to have the format H:MM.")
;;;; Timestamp
(defconst org-ts-regexp "<\\([0-9]\\{4\\}-[0-9]\\{2\\}-[0-9]\\{2\\} ?[^\r\n>]*?\\)>"
(defconst org-ts-regexp "<\\([0-9]\\{4\\}-[0-9]\\{2\\}-[0-9]\\{2\\}[^\r\n>]*\\)>"
"Regular expression for fast time stamp matching.")
(defconst org-ts-regexp-inactive
"\\[\\([0-9]\\{4\\}-[0-9]\\{2\\}-[0-9]\\{2\\} ?[^\r\n>]*?\\)\\]"
"\\[\\([0-9]\\{4\\}-[0-9]\\{2\\}-[0-9]\\{2\\}[^\r\n>]*\\)\\]"
"Regular expression for fast inactive time stamp matching.")
(defconst org-ts-regexp-both "[[<]\\([0-9]\\{4\\}-[0-9]\\{2\\}-[0-9]\\{2\\} ?[^]\r\n>]*?\\)[]>]"
(defconst org-ts-regexp-both "[[<]\\([0-9]\\{4\\}-[0-9]\\{2\\}-[0-9]\\{2\\}[^]\r\n>]*\\)[]>]"
"Regular expression for fast time stamp matching.")
(defconst org-ts-regexp0

View file

@ -324,7 +324,7 @@ This value is simply the outline heading level of the current line."
(defun gametree-hack-file-layout ()
(save-excursion
(goto-char (point-min))
(if (looking-at "[^\n]*-*-[^\n]*gametree-local-layout: \\([^;\n]*\\);")
(if (looking-at "[^\n]*-[^\n]*gametree-local-layout: \\([^;\n]*\\);")
(progn
(goto-char (match-beginning 1))
(delete-region (point) (match-end 1))

View file

@ -2671,7 +2671,7 @@ Key bindings:
;; since it's practically impossible to write a regexp that reliably
;; matches such a construct. Other tools are necessary.
(defconst c-Java-defun-prompt-regexp
"^[ \t]*\\(\\(\\(public\\|protected\\|private\\|const\\|abstract\\|synchronized\\|final\\|static\\|threadsafe\\|transient\\|native\\|volatile\\)\\s-+\\)*\\(\\(\\([[a-zA-Z][][_$.a-zA-Z0-9]*[][_$.a-zA-Z0-9]+\\|[[a-zA-Z]\\)\\s-*\\)\\s-+\\)\\)?\\(\\([[a-zA-Z][][_$.a-zA-Z0-9]*\\s-+\\)\\s-*\\)?\\([_a-zA-Z][^][ \t:;.,{}()\^?=]*\\|\\([_$a-zA-Z][_$.a-zA-Z0-9]*\\)\\)\\s-*\\(([^);{}]*)\\)?\\([] \t]*\\)\\(\\s-*\\<throws\\>\\s-*\\(\\([_$a-zA-Z][_$.a-zA-Z0-9]*\\)[, \t\n\r\f\v]*\\)+\\)?\\s-*")
"^[ \t]*\\(\\(\\(public\\|protected\\|private\\|const\\|abstract\\|synchronized\\|final\\|static\\|threadsafe\\|transient\\|native\\|volatile\\)\\s-+\\)*\\(\\(\\([[a-zA-Z][][_$.a-zA-Z0-9]+\\|[[a-zA-Z]\\)\\s-*\\)\\s-+\\)\\)?\\(\\([[a-zA-Z][][_$.a-zA-Z0-9]*\\s-+\\)\\s-*\\)?\\([_a-zA-Z][^][ \t:;.,{}()\^?=]*\\|\\([_$a-zA-Z][_$.a-zA-Z0-9]*\\)\\)\\s-*\\(([^);{}]*)\\)?\\([] \t]*\\)\\(\\s-*\\<throws\\>\\s-*\\(\\([_$a-zA-Z][_$.a-zA-Z0-9]*\\)[, \t\n\r\f\v]*\\)+\\)?\\s-*")
(easy-menu-define c-java-menu java-mode-map "Java Mode Commands"
(cons "Java" (c-lang-const c-mode-menu java)))

View file

@ -1598,7 +1598,7 @@ number.")
"A regular expression to match any IDL error.")
(defvar idlwave-shell-halting-error
"^% .*\n\\([^%].*\n\\)*% Execution halted at:\\(\\s-*\\S-+\\s-*[0-9]+\\s-*.*\\)\n"
"^% .*\n\\([^%].*\n\\)*% Execution halted at:\\(\\s-*\\S-+\\s-*[0-9]+.*\\)\n"
"A regular expression to match errors which halt execution.")
(defvar idlwave-shell-cant-continue-error

View file

@ -801,7 +801,7 @@ The style of the comment is controlled by `ruby-encoding-magic-comment-style'."
(let ((coding-system (ruby--detect-encoding)))
(when coding-system
(if (looking-at "^#!") (beginning-of-line 2))
(cond ((looking-at "\\s *#\\s *.*\\(en\\)?coding\\s *:\\s *\\([-a-z0-9_]*\\)")
(cond ((looking-at "\\s *#.*\\(en\\)?coding\\s *:\\s *\\([-a-z0-9_]*\\)")
;; update existing encoding comment if necessary
(unless (string= (match-string 2) coding-system)
(goto-char (match-beginning 2))

View file

@ -1096,7 +1096,7 @@ subshells can nest."
(")" (0 (sh-font-lock-paren (match-beginning 0))))
;; Highlight (possibly nested) subshells inside "" quoted
;; regions correctly.
("\"\\(?:\\(?:[^\\\"]\\|\\\\.\\)*?\\)??\\(\\$(\\|`\\)"
("\"\\(?:[^\\\"]\\|\\\\.\\)*?\\(\\$(\\|`\\)"
(1 (ignore
(if (nth 8 (save-excursion (syntax-ppss (match-beginning 0))))
(goto-char (1+ (match-beginning 0)))

View file

@ -958,8 +958,8 @@ See `compilation-error-regexp-alist-alist' for the formatting. For XEmacs.")
("syntax error:.*\n\\([^ \t]+\\) *\\([0-9]+\\):" 1 bold t)
("syntax error:.*\n\\([^ \t]+\\) *\\([0-9]+\\):" 2 bold t)
;; verilog-verilator
(".*%?\\(Error\\|Warning\\)\\(-[^:]+\\|\\):[\n ]*\\([^ \t:]+\\):\\([0-9]+\\):" 3 bold t)
(".*%?\\(Error\\|Warning\\)\\(-[^:]+\\|\\):[\n ]*\\([^ \t:]+\\):\\([0-9]+\\):" 4 bold t)
(".*\\(Error\\|Warning\\)\\(-[^:]+\\|\\):[\n ]*\\([^ \t:]+\\):\\([0-9]+\\):" 3 bold t)
(".*\\(Error\\|Warning\\)\\(-[^:]+\\|\\):[\n ]*\\([^ \t:]+\\):\\([0-9]+\\):" 4 bold t)
;; verilog-leda
("^In file \\([^ \t]+\\)[ \t]+line[ \t]+\\([0-9]+\\):\n[^\n]*\n[^\n]*\n\\(Warning\\|Error\\|Failure\\)[^\n]*" 1 bold t)
("^In file \\([^ \t]+\\)[ \t]+line[ \t]+\\([0-9]+\\):\n[^\n]*\n[^\n]*\n\\(Warning\\|Error\\|Failure\\)[^\n]*" 2 bold t)
@ -5345,7 +5345,7 @@ becomes:
(interactive)
(save-excursion
(beginning-of-line)
(when (looking-at "\\(.*\\)([WE]\\([0-9A-Z]+\\)).*,\\s +line\\s +[0-9]+:\\s +\\([^:\n]+\\):?.*$")
(when (looking-at "\\(.*\\)([WE]\\([0-9A-Z]+\\)).*,\\s +line\\s +[0-9]+:\\s +\\([^:\n]+\\).*$")
(replace-match (format
;; %3s makes numbers 1-999 line up nicely
"\\1//Verilint %3s off // WARNING: \\3"
@ -6788,7 +6788,7 @@ Do not count named blocks or case-statements."
((looking-at verilog-named-block-re)
(current-column))
((and (not (looking-at verilog-extended-case-re))
(looking-at "^[^:;]+[ \t]*:"))
(looking-at "^[^:;]+:"))
(verilog-re-search-forward ":" nil t)
(skip-chars-forward " \t")
(current-column))
@ -7782,7 +7782,7 @@ If search fails, other files are checked based on
"Return point if within translate-off region, else nil."
(and (save-excursion
(re-search-backward
(concat "//\\s-*.*\\s-*" verilog-directive-regexp "\\(on\\|off\\)\\>")
(concat "//.*" verilog-directive-regexp "\\(on\\|off\\)\\>")
nil t))
(equal "off" (match-string 2))
(point)))
@ -7790,14 +7790,14 @@ If search fails, other files are checked based on
(defun verilog-start-translate-off (limit)
"Return point before translate-off directive if before LIMIT, else nil."
(when (re-search-forward
(concat "//\\s-*.*\\s-*" verilog-directive-regexp "off\\>")
(concat "//.*" verilog-directive-regexp "off\\>")
limit t)
(match-beginning 0)))
(defun verilog-back-to-start-translate-off (limit)
"Return point before translate-off directive if before LIMIT, else nil."
(when (re-search-backward
(concat "//\\s-*.*\\s-*" verilog-directive-regexp "off\\>")
(concat "//.*" verilog-directive-regexp "off\\>")
limit t)
(match-beginning 0)))
@ -7805,7 +7805,7 @@ If search fails, other files are checked based on
"Return point after translate-on directive if before LIMIT, else nil."
(re-search-forward (concat
"//\\s-*.*\\s-*" verilog-directive-regexp "on\\>") limit t))
"//.*" verilog-directive-regexp "on\\>") limit t))
(defun verilog-match-translate-off (limit)
"Match a translate-off block, setting `match-data' and returning t, else nil.
@ -9982,7 +9982,7 @@ Or, just the existing dirnames themselves if there are no wildcards."
(while dirnames
(setq dirname (car dirnames)
dirnames (cdr dirnames))
(cond ((string-match (concat "^\\(\\|[/\\]*[^*?]*[/\\]\\)" ; root
(cond ((string-match (concat "^\\(\\|[^*?]*[/\\]\\)" ; root
"\\([^/\\]*[*?][^/\\]*\\)" ; filename with *?
"\\(.*\\)") ; rest
dirname)

View file

@ -2796,12 +2796,12 @@ See `term-prompt-regexp'."
"\\(?:[\r\n\000\007\t\b\016\017]\\|"
;; some Emacs specific control sequences, implemented by
;; `term-command-hook',
"\032[^\n]+\r?\n\\|"
"\032[^\n]+\n\\|"
;; a C1 escape coded character (see [ECMA-48] section 5.3 "Elements
;; of the C1 set"),
"\e\\(?:[DM78c]\\|"
;; another Emacs specific control sequence,
"AnSiT[^\n]+\r?\n\\|"
"AnSiT[^\n]+\n\\|"
;; or an escape sequence (section 5.4 "Control Sequences"),
"\\[\\([\x30-\x3F]*\\)[\x20-\x2F]*[\x40-\x7E]\\)\\)")
"Regexp matching control sequences handled by term.el.")

View file

@ -925,7 +925,7 @@ DOWNCASE t: Downcase words before using them."
"\\<label[[:space:]]*=[[:space:]]*"
;; Match the label value; braces around the value are
;; optional.
"{?\\(?1:[^] ,}\r\n\t%]+\\)}?"
"{?\\(?1:[^] ,}\r\n\t%]+\\)"
;; We are done. Just search until the next closing bracket
"[^]]*\\]"))
"List of regexps matching \\label definitions.

View file

@ -191,7 +191,7 @@ linked Emacs under SunOS 4.x."
proc (concat (mapconcat 'identity
(append url-gateway-telnet-parameters
(list host service)) " ") "\n"))
(url-wait-for-string "^\r*Escape character.*\r*\n+" proc)
(url-wait-for-string "^\r*Escape character.*\n+" proc)
(delete-region (point-min) (match-end 0))
(process-send-string proc "\^]\n")
(url-wait-for-string "^telnet" proc)

View file

@ -119,7 +119,7 @@ patch. So, don't change these variables, unless the default doesn't work."
(defcustom ediff-context-diff-label-regexp
(let ((stuff "\\([^ \t\n]+\\)"))
(concat "\\(" ; context diff 2-liner
"^\\*\\*\\* +" stuff "[^*]+[\t ]*\n--- +" stuff
"^\\*\\*\\* +" stuff "[^*]+\n--- +" stuff
"\\|" ; unified format diff 2-liner
"^--- +" stuff ".*\n\\+\\+\\+ +" stuff
"\\)"))

View file

@ -472,7 +472,7 @@ The remaining KEYS are passed directly to `cvs-create-fileinfo'."
;; Let's not get all worked up if the format changes a bit
(cvs-match " *Working revision:.*$"))
(cvs-or
(cvs-match " *RCS Version:[ \t]*\\([0-9.]+\\)[ \t]*.*$" (head-rev 1))
(cvs-match " *RCS Version:[ \t]*\\([0-9.]+\\).*$" (head-rev 1))
(cvs-match " *Repository revision:[ \t]*\\([0-9.]+\\)[ \t]*\\(.*\\)$"
(head-rev 1))
(cvs-match " *Repository revision:.*"))

View file

@ -505,7 +505,7 @@ differences in behavior.")
(cond
;; pattern
((save-excursion (re-search-forward "^/\\(.*\\)/\\(.*i?\\)$" nil t))
((save-excursion (re-search-forward "^/\\(.*\\)/\\(.*\\)$" nil t))
(setq icase (string= "i" (match-string 2))
pattern (regex-tests-unextend (match-string 1))))