Fix idna-mapping-table following Unicode 16 changes

The latest version of UTS #46 in Unicode 16 has changed the way it
indicates which codepoints are invalid in domain names, causing
'idna-mapping-table' to contain incorrect information, which then breaks
'textsec-domain-suspicious-p' and our test suite.  (Bug#73312)

* admin/unidata/unidata-gen.el (unidata-gen-idna-mapping): Check the
IDNA validity field in "IdnaMappingTable.txt" in addition to checking
the status field, as the latter can now be 'valid' for disallowed
codepoints.
This commit is contained in:
Robert Pluim 2024-09-17 15:19:01 +02:00
parent 8eb66cca78
commit 7d365a2d72

View file

@ -1598,15 +1598,21 @@ same directory."))
(let ((map (make-char-table nil))) (let ((map (make-char-table nil)))
(with-temp-buffer (with-temp-buffer
(unidata-gen--insert-file "IdnaMappingTable.txt") (unidata-gen--insert-file "IdnaMappingTable.txt")
(while (re-search-forward "^\\([0-9A-F]+\\)\\(?:\\.\\.\\([0-9A-F]+\\)\\)? +; +\\([^ ]+\\) +\\(?:; +\\([ 0-9A-F]+\\)\\)?" (while (re-search-forward "^\\([0-9A-F]+\\)\\(?:\\.\\.\\([0-9A-F]+\\)\\)? +; +\\([^ ]+\\) +\\(?:; +\\([ 0-9A-F]+\\)\\)?\\(?:; \\(NV8\\|XV8\\)\\)?"
nil t) nil t)
(let ((start (match-string 1)) (let ((start (match-string 1))
(end (match-string 2)) (end (match-string 2))
(status (match-string 3)) (status (match-string 3))
(mapped (match-string 4))) (mapped (match-string 4))
(idna-status (match-string 5)))
;; Make reading the file slightly faster by using `t' ;; Make reading the file slightly faster by using `t'
;; instead of `disallowed' all over the place. ;; instead of `disallowed' all over the place.
(when (string-match-p "\\`disallowed" status) (when (or (string-match-p "\\`disallowed" status)
;; UTS #46 messed us about with "status = valid" for
;; invalid characters, so we need to check for "NV8" or
;; "XV8".
(string= idna-status "NV8")
(string= idna-status "XV8"))
(setq status "t")) (setq status "t"))
(unless (or (equal status "valid") (unless (or (equal status "valid")
(equal status "deviation")) (equal status "deviation"))