From 7d365a2d72d8e656262205827cc5fdf423c3a41f Mon Sep 17 00:00:00 2001 From: Robert Pluim Date: Tue, 17 Sep 2024 15:19:01 +0200 Subject: [PATCH] Fix idna-mapping-table following Unicode 16 changes The latest version of UTS #46 in Unicode 16 has changed the way it indicates which codepoints are invalid in domain names, causing 'idna-mapping-table' to contain incorrect information, which then breaks 'textsec-domain-suspicious-p' and our test suite. (Bug#73312) * admin/unidata/unidata-gen.el (unidata-gen-idna-mapping): Check the IDNA validity field in "IdnaMappingTable.txt" in addition to checking the status field, as the latter can now be 'valid' for disallowed codepoints. --- admin/unidata/unidata-gen.el | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/admin/unidata/unidata-gen.el b/admin/unidata/unidata-gen.el index 7be03fe63af..71ea7bddb84 100644 --- a/admin/unidata/unidata-gen.el +++ b/admin/unidata/unidata-gen.el @@ -1598,15 +1598,21 @@ same directory.")) (let ((map (make-char-table nil))) (with-temp-buffer (unidata-gen--insert-file "IdnaMappingTable.txt") - (while (re-search-forward "^\\([0-9A-F]+\\)\\(?:\\.\\.\\([0-9A-F]+\\)\\)? +; +\\([^ ]+\\) +\\(?:; +\\([ 0-9A-F]+\\)\\)?" + (while (re-search-forward "^\\([0-9A-F]+\\)\\(?:\\.\\.\\([0-9A-F]+\\)\\)? +; +\\([^ ]+\\) +\\(?:; +\\([ 0-9A-F]+\\)\\)?\\(?:; \\(NV8\\|XV8\\)\\)?" nil t) (let ((start (match-string 1)) (end (match-string 2)) (status (match-string 3)) - (mapped (match-string 4))) + (mapped (match-string 4)) + (idna-status (match-string 5))) ;; Make reading the file slightly faster by using `t' ;; instead of `disallowed' all over the place. - (when (string-match-p "\\`disallowed" status) + (when (or (string-match-p "\\`disallowed" status) + ;; UTS #46 messed us about with "status = valid" for + ;; invalid characters, so we need to check for "NV8" or + ;; "XV8". + (string= idna-status "NV8") + (string= idna-status "XV8")) (setq status "t")) (unless (or (equal status "valid") (equal status "deviation"))