Add textsec-domain-suspicious-p
* .gitignore: Ignore idna-mapping.el. * admin/notes/unicode: Note idna-mapping file. * admin/unidata/IdnaMappingTable.txt: New file. * admin/unidata/Makefile.in (all): Generate idna-mapping.el. * admin/unidata/unidata-gen.el (unidata-gen-idna-mapping): Generate. * lisp/international/textsec.el (textsec-domain-suspicious-p): New function.
This commit is contained in:
parent
702ce8dc3e
commit
9f25c41ad4
10 changed files with 15348 additions and 9 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -221,6 +221,7 @@ lisp/international/emoji-zwj.el
|
|||
lisp/international/emoji-labels.el
|
||||
lisp/international/eucjp-ms.el
|
||||
lisp/international/uni-*.el
|
||||
lisp/international/idna-mapping.el
|
||||
lisp/language/pinyin.el
|
||||
|
||||
# Documentation.
|
||||
|
|
|
@ -26,13 +26,15 @@ Emacs uses the following files from the Unicode Character Database
|
|||
. BidiCharacterTest.txt
|
||||
|
||||
Emacs also uses the file emoji-test.txt which should be imported from
|
||||
the Unicode's Public/emoji/ directory.
|
||||
the Unicode's Public/emoji/ directory, and IdnaMappingTable.txt from
|
||||
the Public/idna/ directory.
|
||||
|
||||
First, the first 14 files and emoji-test.txt need to be copied into
|
||||
admin/unidata/, and the file https://www.unicode.org/copyright.html
|
||||
should be copied over copyright.html in admin/unidata (some of them
|
||||
might need trailing whitespace removed before they can be committed to
|
||||
the Emacs repository).
|
||||
First, the first 14 files, emoji-test.txt and IdnaMappingTable.txt
|
||||
need to be copied into admin/unidata/, and the file
|
||||
https://www.unicode.org/copyright.html should be copied over
|
||||
copyright.html in admin/unidata (some of them might need trailing
|
||||
whitespace removed before they can be committed to the Emacs
|
||||
repository).
|
||||
|
||||
Then Emacs should be rebuilt for them to take effect. Rebuilding
|
||||
Emacs updates several derived files elsewhere in the Emacs source
|
||||
|
|
8921
admin/unidata/IdnaMappingTable.txt
Normal file
8921
admin/unidata/IdnaMappingTable.txt
Normal file
File diff suppressed because it is too large
Load diff
|
@ -45,7 +45,8 @@ unifiles = $(addprefix ${unidir}/,$(sort $(shell sed -n 's/^[ \t][ \t]*${lparen}
|
|||
|
||||
all: ${top_srcdir}/src/macuvs.h ${unifiles} ${unidir}/charscript.el \
|
||||
${unidir}/charprop.el ${unidir}/emoji-zwj.el ${unidir}/emoji-labels.el \
|
||||
${unidir}/uni-scripts.el ${unidir}/uni-confusable.el
|
||||
${unidir}/uni-scripts.el ${unidir}/uni-confusable.el \
|
||||
${unidir}/idna-mapping.el
|
||||
|
||||
## Specify .elc as an order-only prereq so as to not needlessly rebuild
|
||||
## target just because the .elc is missing.
|
||||
|
@ -95,6 +96,11 @@ ${unidir}/uni-confusable.el: ${srcdir}/unidata-gen.el \
|
|||
$(AM_V_GEN)${emacs} -L ${srcdir} \
|
||||
-l unidata-gen.el -f unidata-gen-confusable $@
|
||||
|
||||
${unidir}/idna-mapping.el: ${srcdir}/unidata-gen.el \
|
||||
${srcdir}/IdnaMappingTable.txt
|
||||
$(AM_V_GEN)${emacs} -L ${srcdir} \
|
||||
-l unidata-gen.el -f unidata-gen-idna-mapping $@
|
||||
|
||||
.PHONY: charscript.el
|
||||
charscript.el: ${unidir}/charscript.el
|
||||
|
||||
|
|
|
@ -61,6 +61,6 @@ PropertyValueAliases.txt
|
|||
https://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt
|
||||
2022-01-17
|
||||
|
||||
confusables.txt
|
||||
https://www.unicode.org/Public/security/latest/confusables.txt
|
||||
IdnaMappingTable.txt
|
||||
https://www.unicode.org/Public/idna/latest/IdnaMappingTable.txt
|
||||
2022-01-18
|
||||
|
|
|
@ -1602,6 +1602,46 @@ Property value is a symbol `o' (Open), `c' (Close), or `n' (None)."
|
|||
(insert ")")
|
||||
(unidata-gen-charprop file (buffer-string)))))
|
||||
|
||||
(defun unidata-gen-idna-mapping (&optional file)
|
||||
;; Running from Makefile.
|
||||
(unless file
|
||||
(setq file (pop command-line-args-left)))
|
||||
(let ((map (make-char-table nil)))
|
||||
(with-temp-buffer
|
||||
(unidata-gen--insert-file "IdnaMappingTable.txt")
|
||||
(while (re-search-forward "^\\([0-9A-F]+\\)\\(?:\\.\\.\\([0-9A-F]+\\)\\)? +; +\\([^ ]+\\) +\\(?:; +\\([ 0-9A-F]+\\)\\)?"
|
||||
nil t)
|
||||
(let ((start (match-string 1))
|
||||
(end (match-string 2))
|
||||
(status (match-string 3))
|
||||
(mapped (match-string 4)))
|
||||
;; Make reading the file slightly faster by using `t'
|
||||
;; instead of `disallowed' all over the place.
|
||||
(when (string-match-p "\\`disallowed" status)
|
||||
(setq status "t"))
|
||||
(unless (or (equal status "valid")
|
||||
(equal status "deviation"))
|
||||
(set-char-table-range
|
||||
map
|
||||
(if end
|
||||
(cons (string-to-number start 16)
|
||||
(string-to-number end 16))
|
||||
(string-to-number start 16))
|
||||
(cond
|
||||
((equal status "mapped")
|
||||
(apply #'string
|
||||
(mapcar (lambda (char)
|
||||
(string-to-number char 16))
|
||||
(split-string (string-trim mapped)))))
|
||||
(t
|
||||
(intern status))))))))
|
||||
(with-temp-buffer
|
||||
(insert "(defconst idna-mapping-table\n")
|
||||
(let ((print-length nil))
|
||||
(prin1 map (current-buffer)))
|
||||
(insert ")")
|
||||
(unidata-gen-charprop file (buffer-string)))))
|
||||
|
||||
|
||||
|
||||
;;; unidata-gen.el ends here
|
||||
|
|
|
@ -26,6 +26,8 @@
|
|||
(require 'cl-lib)
|
||||
(require 'uni-confusable)
|
||||
(require 'ucs-normalize)
|
||||
(require 'idna-mapping)
|
||||
(require 'puny)
|
||||
|
||||
(defvar textsec--char-scripts nil)
|
||||
|
||||
|
@ -222,6 +224,18 @@ STRING isn't a single script string."
|
|||
(textsec-single-script-p string1)
|
||||
(textsec-single-script-p string2)))
|
||||
|
||||
(defun textsec-domain-suspicious-p (domain)
|
||||
(catch 'found
|
||||
(seq-do
|
||||
(lambda (char)
|
||||
(when (eq (elt idna-mapping-table char) t)
|
||||
(throw 'found (format "Disallowed character: `%s' (#x%x)"
|
||||
(string char) char))))
|
||||
domain)
|
||||
(unless (puny-highly-restrictive-domain-p domain)
|
||||
(throw 'found "%s is not highly restrictive"))
|
||||
nil))
|
||||
|
||||
(provide 'textsec)
|
||||
|
||||
;;; textsec.el ends here
|
||||
|
|
|
@ -110,4 +110,9 @@
|
|||
(should-not (textsec-whole-script-confusable-p "paypal" "pаypаl"))
|
||||
(should (textsec-whole-script-confusable-p "scope""ѕсоре")))
|
||||
|
||||
(ert-deftest test-suspiction-domain ()
|
||||
(should (textsec-domain-suspicious-p "foo/bar.org"))
|
||||
(should-not (textsec-domain-suspicious-p "foo.org"))
|
||||
(should (textsec-domain-suspicious-p "f\N{LEFT-TO-RIGHT ISOLATE}oo.org")))
|
||||
|
||||
;;; textsec-tests.el ends here
|
||||
|
|
6344
test/lisp/net/puny-resources/IdnaTestV2.txt
Normal file
6344
test/lisp/net/puny-resources/IdnaTestV2.txt
Normal file
File diff suppressed because it is too large
Load diff
|
@ -68,4 +68,10 @@
|
|||
"xn--b.com-gra"))
|
||||
(should (equal (puny-encode-string "Bä.com") "xn--b.com-gra")))
|
||||
|
||||
;;; TODO!
|
||||
;; puny-resources/IdnaTestV2.txt has a bunch of tests, and they should
|
||||
;; be implemented. However, the puny encoding does not fully
|
||||
;; implement https://www.unicode.org/reports/tr46/#Conformance yet, so
|
||||
;; it'll fail.
|
||||
|
||||
;;; puny-tests.el ends here
|
||||
|
|
Loading…
Add table
Reference in a new issue