Speed up Unicode normalisation tests by a factor of 5

After this change, ucs-normalize-tests are still very slow but
somewhat less disastrously so (from 100 to 20 min on this machine).

* test/lisp/international/ucs-normalize-tests.el
(ucs-normalize-tests--normalization-equal-p)
(ucs-normalize-tests--normalization-chareq-p)
(ucs-normalize-tests--rule1-holds-p)
(ucs-normalize-tests--rule2-holds-p)
(ucs-normalize-tests--part1-rule2):
Run only over the Unicode code space.
Hoist `with-current-buffer` to reduce overhead.
This commit is contained in:
Mattias Engdegård 2022-12-03 19:19:28 +01:00
parent afa4fcb95b
commit c5ba47c889

View file

@ -59,7 +59,7 @@ And NORM is one of the symbols `NFC', `NFD', `NFKC', `NFKD' for brevity."
(NFD . ucs-normalize-NFD-region) (NFD . ucs-normalize-NFD-region)
(NFKC . ucs-normalize-NFKC-region) (NFKC . ucs-normalize-NFKC-region)
(NFKD . ucs-normalize-NFKD-region)))) (NFKD . ucs-normalize-NFKD-region))))
`(with-current-buffer ucs-normalize-tests--norm-buf `(progn
(erase-buffer) (erase-buffer)
(insert ,str) (insert ,str)
(,(cdr (assq norm norm-alist)) (point-min) (point-max)) (,(cdr (assq norm norm-alist)) (point-min) (point-max))
@ -74,7 +74,7 @@ And NORM is one of the symbols `NFC', `NFD', `NFKC', `NFKD' for brevity."
(NFD . ucs-normalize-NFD-region) (NFD . ucs-normalize-NFD-region)
(NFKC . ucs-normalize-NFKC-region) (NFKC . ucs-normalize-NFKC-region)
(NFKD . ucs-normalize-NFKD-region)))) (NFKD . ucs-normalize-NFKD-region))))
`(with-current-buffer ucs-normalize-tests--norm-buf `(progn
(erase-buffer) (erase-buffer)
(insert ,char) (insert ,char)
(,(cdr (assq norm norm-alist)) (point-min) (point-max)) (,(cdr (assq norm norm-alist)) (point-min) (point-max))
@ -90,36 +90,37 @@ The following invariants must be true for all conformant implementations..."
;; See `ucs-normalize-tests--rule2-holds-p'. ;; See `ucs-normalize-tests--rule2-holds-p'.
(aset ucs-normalize-tests--chars-part1 (aset ucs-normalize-tests--chars-part1
(aref source 0) 1)) (aref source 0) 1))
(and (with-current-buffer ucs-normalize-tests--norm-buf
;; c2 == toNFC(c1) == toNFC(c2) == toNFC(c3) (and
(ucs-normalize-tests--normalization-equal-p NFC source nfc) ;; c2 == toNFC(c1) == toNFC(c2) == toNFC(c3)
(ucs-normalize-tests--normalization-equal-p NFC nfc nfc) (ucs-normalize-tests--normalization-equal-p NFC source nfc)
(ucs-normalize-tests--normalization-equal-p NFC nfd nfc) (ucs-normalize-tests--normalization-equal-p NFC nfc nfc)
;; c4 == toNFC(c4) == toNFC(c5) (ucs-normalize-tests--normalization-equal-p NFC nfd nfc)
(ucs-normalize-tests--normalization-equal-p NFC nfkc nfkc) ;; c4 == toNFC(c4) == toNFC(c5)
(ucs-normalize-tests--normalization-equal-p NFC nfkd nfkc) (ucs-normalize-tests--normalization-equal-p NFC nfkc nfkc)
(ucs-normalize-tests--normalization-equal-p NFC nfkd nfkc)
;; c3 == toNFD(c1) == toNFD(c2) == toNFD(c3) ;; c3 == toNFD(c1) == toNFD(c2) == toNFD(c3)
(ucs-normalize-tests--normalization-equal-p NFD source nfd) (ucs-normalize-tests--normalization-equal-p NFD source nfd)
(ucs-normalize-tests--normalization-equal-p NFD nfc nfd) (ucs-normalize-tests--normalization-equal-p NFD nfc nfd)
(ucs-normalize-tests--normalization-equal-p NFD nfd nfd) (ucs-normalize-tests--normalization-equal-p NFD nfd nfd)
;; c5 == toNFD(c4) == toNFD(c5) ;; c5 == toNFD(c4) == toNFD(c5)
(ucs-normalize-tests--normalization-equal-p NFD nfkc nfkd) (ucs-normalize-tests--normalization-equal-p NFD nfkc nfkd)
(ucs-normalize-tests--normalization-equal-p NFD nfkd nfkd) (ucs-normalize-tests--normalization-equal-p NFD nfkd nfkd)
;; c4 == toNFKC(c1) == toNFKC(c2) == toNFKC(c3) == toNFKC(c4) == toNFKC(c5) ;; c4 == toNFKC(c1) == toNFKC(c2) == toNFKC(c3) == toNFKC(c4) == toNFKC(c5)
(ucs-normalize-tests--normalization-equal-p NFKC source nfkc) (ucs-normalize-tests--normalization-equal-p NFKC source nfkc)
(ucs-normalize-tests--normalization-equal-p NFKC nfc nfkc) (ucs-normalize-tests--normalization-equal-p NFKC nfc nfkc)
(ucs-normalize-tests--normalization-equal-p NFKC nfd nfkc) (ucs-normalize-tests--normalization-equal-p NFKC nfd nfkc)
(ucs-normalize-tests--normalization-equal-p NFKC nfkc nfkc) (ucs-normalize-tests--normalization-equal-p NFKC nfkc nfkc)
(ucs-normalize-tests--normalization-equal-p NFKC nfkd nfkc) (ucs-normalize-tests--normalization-equal-p NFKC nfkd nfkc)
;; c5 == toNFKD(c1) == toNFKD(c2) == toNFKD(c3) == toNFKD(c4) == toNFKD(c5) ;; c5 == toNFKD(c1) == toNFKD(c2) == toNFKD(c3) == toNFKD(c4) == toNFKD(c5)
(ucs-normalize-tests--normalization-equal-p NFKD source nfkd) (ucs-normalize-tests--normalization-equal-p NFKD source nfkd)
(ucs-normalize-tests--normalization-equal-p NFKD nfc nfkd) (ucs-normalize-tests--normalization-equal-p NFKD nfc nfkd)
(ucs-normalize-tests--normalization-equal-p NFKD nfd nfkd) (ucs-normalize-tests--normalization-equal-p NFKD nfd nfkd)
(ucs-normalize-tests--normalization-equal-p NFKD nfkc nfkd) (ucs-normalize-tests--normalization-equal-p NFKD nfkc nfkd)
(ucs-normalize-tests--normalization-equal-p NFKD nfkd nfkd))) (ucs-normalize-tests--normalization-equal-p NFKD nfkd nfkd))))
(defsubst ucs-normalize-tests--rule2-holds-p (X) (defsubst ucs-normalize-tests--rule2-holds-p (X)
"Check 2nd conformance rule. "Check 2nd conformance rule.
@ -127,7 +128,9 @@ For every code point X assigned in this version of Unicode that
is not specifically listed in Part 1, the following invariants is not specifically listed in Part 1, the following invariants
must be true for all conformant implementations: must be true for all conformant implementations:
X == toNFC(X) == toNFD(X) == toNFKC(X) == toNFKD(X)" X == toNFC(X) == toNFD(X) == toNFKC(X) == toNFKD(X)
Must be called with `ucs-normalize-tests--norm-buf' as current buffer."
(and (ucs-normalize-tests--normalization-chareq-p NFC X X) (and (ucs-normalize-tests--normalization-chareq-p NFC X X)
(ucs-normalize-tests--normalization-chareq-p NFD X X) (ucs-normalize-tests--normalization-chareq-p NFD X X)
(ucs-normalize-tests--normalization-chareq-p NFKC X X) (ucs-normalize-tests--normalization-chareq-p NFKC X X)
@ -230,20 +233,23 @@ must be true for all conformant implementations:
(defun ucs-normalize-tests--part1-rule2 (chars-part1) (defun ucs-normalize-tests--part1-rule2 (chars-part1)
(let ((reporter (make-progress-reporter "UCS Normalize Test Part1, rule 2" (let ((reporter (make-progress-reporter "UCS Normalize Test Part1, rule 2"
0 (max-char))) 0 (max-char t)))
(failed-chars nil)) (failed-chars nil)
(map-char-table (unicode-max (max-char t)))
(lambda (char-range listed-in-part) (with-current-buffer ucs-normalize-tests--norm-buf
(unless (eq listed-in-part 1) (map-char-table
(if (characterp char-range) (lambda (char-range listed-in-part)
(progn (unless (ucs-normalize-tests--rule2-holds-p char-range) (unless (eq listed-in-part 1)
(push char-range failed-chars)) (if (characterp char-range)
(progress-reporter-update reporter char-range)) (progn (unless (ucs-normalize-tests--rule2-holds-p char-range)
(cl-loop for char from (car char-range) to (cdr char-range) (push char-range failed-chars))
unless (ucs-normalize-tests--rule2-holds-p char) (progress-reporter-update reporter char-range))
do (push char failed-chars) (cl-loop for char from (car char-range) to (min (cdr char-range)
do (progress-reporter-update reporter char))))) unicode-max)
chars-part1) unless (ucs-normalize-tests--rule2-holds-p char)
do (push char failed-chars)
do (progress-reporter-update reporter char)))))
chars-part1))
(progress-reporter-done reporter) (progress-reporter-done reporter)
failed-chars)) failed-chars))