Handle raw bytes, and LF in ranges, in rx `any' argument strings
* lisp/emacs-lisp/rx.el (rx-check-any-string): Rewrite to handle raw bytes in unibyte strings and accept LF as range endpoints (Bug#33205). * test/lisp/emacs-lisp/rx-tests.el: Add tests for the above.
This commit is contained in:
parent
fb10834a60
commit
b71d4ce056
2 changed files with 51 additions and 22 deletions
|
@ -449,28 +449,35 @@ Only both edges of each range is checked."
|
|||
|
||||
|
||||
(defun rx-check-any-string (str)
|
||||
"Check string argument STR for Rx `any'."
|
||||
(let ((i 0)
|
||||
c1 c2 l)
|
||||
(if (= 0 (length str))
|
||||
(error "String arg for Rx `any' must not be empty"))
|
||||
(while (string-match ".-." str i)
|
||||
;; string before range: convert it to characters
|
||||
(if (< i (match-beginning 0))
|
||||
(setq l (nconc
|
||||
l
|
||||
(append (substring str i (match-beginning 0)) nil))))
|
||||
;; range
|
||||
(setq i (match-end 0)
|
||||
c1 (aref str (match-beginning 0))
|
||||
c2 (aref str (1- i)))
|
||||
(cond
|
||||
((< c1 c2) (setq l (nconc l (list (cons c1 c2)))))
|
||||
((= c1 c2) (setq l (nconc l (list c1))))))
|
||||
;; rest?
|
||||
(if (< i (length str))
|
||||
(setq l (nconc l (append (substring str i) nil))))
|
||||
l))
|
||||
"Turn the `any' argument string STR into a list of characters.
|
||||
The original order is not preserved. Ranges, \"A-Z\", become pairs, (?A . ?Z)."
|
||||
(let ((decode-char
|
||||
;; Make sure raw bytes are decoded as such, to avoid confusion with
|
||||
;; U+0080..U+00FF.
|
||||
(if (multibyte-string-p str)
|
||||
#'identity
|
||||
(lambda (c) (if (<= #x80 c #xff)
|
||||
(+ c #x3fff00)
|
||||
c))))
|
||||
(len (length str))
|
||||
(i 0)
|
||||
(ret nil))
|
||||
(if (= 0 len)
|
||||
(error "String arg for Rx `any' must not be empty"))
|
||||
(while (< i len)
|
||||
(cond ((and (< i (- len 2))
|
||||
(= (aref str (+ i 1)) ?-))
|
||||
;; Range.
|
||||
(let ((start (funcall decode-char (aref str i)))
|
||||
(end (funcall decode-char (aref str (+ i 2)))))
|
||||
(cond ((< start end) (push (cons start end) ret))
|
||||
((= start end) (push start ret)))
|
||||
(setq i (+ i 3))))
|
||||
(t
|
||||
;; Single character.
|
||||
(push (funcall decode-char (aref str i)) ret)
|
||||
(setq i (+ i 1)))))
|
||||
ret))
|
||||
|
||||
|
||||
(defun rx-check-any (arg)
|
||||
|
|
|
@ -33,6 +33,28 @@
|
|||
(number-sequence ?< ?\])
|
||||
(number-sequence ?- ?:))))))
|
||||
|
||||
(ert-deftest rx-char-any-range-nl ()
|
||||
"Test character alternatives with LF as a range endpoint."
|
||||
(should (equal (rx (any "\n-\r"))
|
||||
"[\n-\r]"))
|
||||
(should (equal (rx (any "\a-\n"))
|
||||
"[\a-\n]")))
|
||||
|
||||
(ert-deftest rx-char-any-raw-byte ()
|
||||
"Test raw bytes in character alternatives."
|
||||
;; Separate raw characters.
|
||||
(should (equal (string-match-p (rx (any "\326A\333B"))
|
||||
"X\326\333")
|
||||
1))
|
||||
;; Range of raw characters, unibyte.
|
||||
(should (equal (string-match-p (rx (any "\200-\377"))
|
||||
"ÿA\310B")
|
||||
2))
|
||||
;; Range of raw characters, multibyte.
|
||||
(should (equal (string-match-p (rx (any "Å\211\326-\377\177"))
|
||||
"XY\355\177\327")
|
||||
2)))
|
||||
|
||||
(ert-deftest rx-pcase ()
|
||||
(should (equal (pcase "a 1 2 3 1 1 b"
|
||||
((rx (let u (+ digit)) space
|
||||
|
|
Loading…
Add table
Reference in a new issue