Speed up Unicode normalisation tests by a factor of 5

After this change, ucs-normalize-tests are still very slow but
somewhat less disastrously so (from 100 to 20 min on this machine).

* test/lisp/international/ucs-normalize-tests.el
(ucs-normalize-tests--normalization-equal-p)
(ucs-normalize-tests--normalization-chareq-p)
(ucs-normalize-tests--rule1-holds-p)
(ucs-normalize-tests--rule2-holds-p)
(ucs-normalize-tests--part1-rule2):
Run only over the Unicode code space.
Hoist `with-current-buffer` to reduce overhead.
This commit is contained in:
Mattias Engdegård 2022-12-03 19:19:28 +01:00
parent afa4fcb95b
commit c5ba47c889

View file

@ -59,7 +59,7 @@ And NORM is one of the symbols `NFC', `NFD', `NFKC', `NFKD' for brevity."
(NFD . ucs-normalize-NFD-region) (NFD . ucs-normalize-NFD-region)
(NFKC . ucs-normalize-NFKC-region) (NFKC . ucs-normalize-NFKC-region)
(NFKD . ucs-normalize-NFKD-region)))) (NFKD . ucs-normalize-NFKD-region))))
`(with-current-buffer ucs-normalize-tests--norm-buf `(progn
(erase-buffer) (erase-buffer)
(insert ,str) (insert ,str)
(,(cdr (assq norm norm-alist)) (point-min) (point-max)) (,(cdr (assq norm norm-alist)) (point-min) (point-max))
@ -74,7 +74,7 @@ And NORM is one of the symbols `NFC', `NFD', `NFKC', `NFKD' for brevity."
(NFD . ucs-normalize-NFD-region) (NFD . ucs-normalize-NFD-region)
(NFKC . ucs-normalize-NFKC-region) (NFKC . ucs-normalize-NFKC-region)
(NFKD . ucs-normalize-NFKD-region)))) (NFKD . ucs-normalize-NFKD-region))))
`(with-current-buffer ucs-normalize-tests--norm-buf `(progn
(erase-buffer) (erase-buffer)
(insert ,char) (insert ,char)
(,(cdr (assq norm norm-alist)) (point-min) (point-max)) (,(cdr (assq norm norm-alist)) (point-min) (point-max))
@ -90,6 +90,7 @@ The following invariants must be true for all conformant implementations..."
;; See `ucs-normalize-tests--rule2-holds-p'. ;; See `ucs-normalize-tests--rule2-holds-p'.
(aset ucs-normalize-tests--chars-part1 (aset ucs-normalize-tests--chars-part1
(aref source 0) 1)) (aref source 0) 1))
(with-current-buffer ucs-normalize-tests--norm-buf
(and (and
;; c2 == toNFC(c1) == toNFC(c2) == toNFC(c3) ;; c2 == toNFC(c1) == toNFC(c2) == toNFC(c3)
(ucs-normalize-tests--normalization-equal-p NFC source nfc) (ucs-normalize-tests--normalization-equal-p NFC source nfc)
@ -119,7 +120,7 @@ The following invariants must be true for all conformant implementations..."
(ucs-normalize-tests--normalization-equal-p NFKD nfc nfkd) (ucs-normalize-tests--normalization-equal-p NFKD nfc nfkd)
(ucs-normalize-tests--normalization-equal-p NFKD nfd nfkd) (ucs-normalize-tests--normalization-equal-p NFKD nfd nfkd)
(ucs-normalize-tests--normalization-equal-p NFKD nfkc nfkd) (ucs-normalize-tests--normalization-equal-p NFKD nfkc nfkd)
(ucs-normalize-tests--normalization-equal-p NFKD nfkd nfkd))) (ucs-normalize-tests--normalization-equal-p NFKD nfkd nfkd))))
(defsubst ucs-normalize-tests--rule2-holds-p (X) (defsubst ucs-normalize-tests--rule2-holds-p (X)
"Check 2nd conformance rule. "Check 2nd conformance rule.
@ -127,7 +128,9 @@ For every code point X assigned in this version of Unicode that
is not specifically listed in Part 1, the following invariants is not specifically listed in Part 1, the following invariants
must be true for all conformant implementations: must be true for all conformant implementations:
X == toNFC(X) == toNFD(X) == toNFKC(X) == toNFKD(X)" X == toNFC(X) == toNFD(X) == toNFKC(X) == toNFKD(X)
Must be called with `ucs-normalize-tests--norm-buf' as current buffer."
(and (ucs-normalize-tests--normalization-chareq-p NFC X X) (and (ucs-normalize-tests--normalization-chareq-p NFC X X)
(ucs-normalize-tests--normalization-chareq-p NFD X X) (ucs-normalize-tests--normalization-chareq-p NFD X X)
(ucs-normalize-tests--normalization-chareq-p NFKC X X) (ucs-normalize-tests--normalization-chareq-p NFKC X X)
@ -230,8 +233,10 @@ must be true for all conformant implementations:
(defun ucs-normalize-tests--part1-rule2 (chars-part1) (defun ucs-normalize-tests--part1-rule2 (chars-part1)
(let ((reporter (make-progress-reporter "UCS Normalize Test Part1, rule 2" (let ((reporter (make-progress-reporter "UCS Normalize Test Part1, rule 2"
0 (max-char))) 0 (max-char t)))
(failed-chars nil)) (failed-chars nil)
(unicode-max (max-char t)))
(with-current-buffer ucs-normalize-tests--norm-buf
(map-char-table (map-char-table
(lambda (char-range listed-in-part) (lambda (char-range listed-in-part)
(unless (eq listed-in-part 1) (unless (eq listed-in-part 1)
@ -239,11 +244,12 @@ must be true for all conformant implementations:
(progn (unless (ucs-normalize-tests--rule2-holds-p char-range) (progn (unless (ucs-normalize-tests--rule2-holds-p char-range)
(push char-range failed-chars)) (push char-range failed-chars))
(progress-reporter-update reporter char-range)) (progress-reporter-update reporter char-range))
(cl-loop for char from (car char-range) to (cdr char-range) (cl-loop for char from (car char-range) to (min (cdr char-range)
unicode-max)
unless (ucs-normalize-tests--rule2-holds-p char) unless (ucs-normalize-tests--rule2-holds-p char)
do (push char failed-chars) do (push char failed-chars)
do (progress-reporter-update reporter char))))) do (progress-reporter-update reporter char)))))
chars-part1) chars-part1))
(progress-reporter-done reporter) (progress-reporter-done reporter)
failed-chars)) failed-chars))