Add textsec functions for verifying email addresses
* lisp/international/characters.el (bidi-control-characters): Rename from glyphless--bidi-control-characters for use in textsec, and add LRM/RLM/ALM. (update-glyphless-char-display): Adjust the code. * lisp/international/textsec.el (textsec-local-address-suspicious-p) (textsec-name-suspicious-p, textsec-suspicious-nonspacing-p) (textsec-email-suspicious-p): New functions.
This commit is contained in:
parent
4f23dbaa67
commit
ce63f91025
3 changed files with 124 additions and 5 deletions
|
@ -1526,8 +1526,11 @@ Setup `char-width-table' appropriate for non-CJK language environment."
|
|||
|
||||
;; We can't use the \N{name} things here, because this file is used
|
||||
;; too early in the build process.
|
||||
(defvar glyphless--bidi-control-characters
|
||||
'(#x202a ; ?\N{left-to-right embedding}
|
||||
(defvar bidi-control-characters
|
||||
'(#x200e ; ?\N{left-to-right mark}
|
||||
#x200f ; ?\N{right-to-left mark}
|
||||
#x061c ; ?\N{arabic letter mark}
|
||||
#x202a ; ?\N{left-to-right embedding}
|
||||
#x202b ; ?\N{right-to-left embedding}
|
||||
#x202d ; ?\N{left-to-right override}
|
||||
#x202e ; ?\N{right-to-left override}
|
||||
|
@ -1535,7 +1538,8 @@ Setup `char-width-table' appropriate for non-CJK language environment."
|
|||
#x2067 ; ?\N{right-to-left isolate}
|
||||
#x2068 ; ?\N{first strong isolate}
|
||||
#x202c ; ?\N{pop directional formatting}
|
||||
#x2069)) ; ?\N{pop directional isolate})
|
||||
#x2069) ; ?\N{pop directional isolate}
|
||||
"List of bidirectional control characters.")
|
||||
|
||||
(defun update-glyphless-char-display (&optional variable value)
|
||||
"Make the setting of `glyphless-char-display-control' take effect.
|
||||
|
@ -1582,8 +1586,7 @@ option `glyphless-char-display'."
|
|||
(or (aref char-acronym-table from)
|
||||
"UNK")))
|
||||
(when (or (eq target 'format-control)
|
||||
(memq from
|
||||
glyphless--bidi-control-characters))
|
||||
(memq from bidi-control-characters))
|
||||
(set-char-table-range glyphless-char-display
|
||||
from this-method)))
|
||||
(setq from (1+ from))))))
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
(require 'ucs-normalize)
|
||||
(require 'idna-mapping)
|
||||
(require 'puny)
|
||||
(require 'mail-parse)
|
||||
|
||||
(defvar textsec--char-scripts nil)
|
||||
|
||||
|
@ -225,6 +226,9 @@ STRING isn't a single script string."
|
|||
(textsec-single-script-p string2)))
|
||||
|
||||
(defun textsec-domain-suspicious-p (domain)
|
||||
"Say whether DOMAIN looks suspicious.
|
||||
If it isn't, nil is returned. If it is, a string explaining the
|
||||
problem is returned."
|
||||
(catch 'found
|
||||
(seq-do
|
||||
(lambda (char)
|
||||
|
@ -236,6 +240,79 @@ STRING isn't a single script string."
|
|||
(throw 'found "%s is not highly restrictive"))
|
||||
nil))
|
||||
|
||||
(defun textsec-local-address-suspicious-p (local)
|
||||
"Say whether LOCAL looks suspicious.
|
||||
LOCAL is the bit before \"@\" in an email address.
|
||||
|
||||
If it suspicious, nil is returned. If it is, a string explaining
|
||||
the problem is returned."
|
||||
(cond
|
||||
((not (equal local (ucs-normalize-NFKC-string local)))
|
||||
(format "`%s' is not in normalized format `%s'"
|
||||
local (ucs-normalize-NFKC-string local)))
|
||||
((textsec-mixed-numbers-p local)
|
||||
(format "`%s' contains numbers from different number systems" local))
|
||||
((eq (textsec-restriction-level local) 'unrestricted)
|
||||
(format "`%s' isn't restrictive enough" local))
|
||||
((string-match-p "\\`\\.\\|\\.\\'\\|\\.\\." local)
|
||||
(format "`%s' contains invalid dots" local))))
|
||||
|
||||
(defun textsec-name-suspicious-p (name)
|
||||
"Say whether NAME looks suspicious.
|
||||
NAME is (for instance) the free-text name from an email address.
|
||||
|
||||
If it suspicious, nil is returned. If it is, a string explaining
|
||||
the problem is returned."
|
||||
(cond
|
||||
((not (equal name (ucs-normalize-NFC-string name)))
|
||||
(format "`%s' is not in normalized format `%s'"
|
||||
name (ucs-normalize-NFC-string name)))
|
||||
((seq-find (lambda (char)
|
||||
(and (member char bidi-control-characters)
|
||||
(not (member char
|
||||
'( ?\N{left-to-right mark}
|
||||
?\N{right-to-left mark}
|
||||
?\N{arabic letter mark})))))
|
||||
name)
|
||||
(format "The string contains bidirectional control characters"))
|
||||
((textsec-suspicious-nonspacing-p name))))
|
||||
|
||||
(defun textsec-suspicious-nonspacing-p (string)
|
||||
"Say whether STRING has a suspicious use of nonspacing characters.
|
||||
If it suspicious, nil is returned. If it is, a string explaining
|
||||
the problem is returned."
|
||||
(let ((prev nil)
|
||||
(nonspace-count 0))
|
||||
(catch 'found
|
||||
(seq-do
|
||||
(lambda (char)
|
||||
(let ((nonspacing
|
||||
(memq (get-char-code-property char 'general-category)
|
||||
'(Cf Cc Mn))))
|
||||
(when (and nonspacing
|
||||
(equal char prev))
|
||||
(throw 'found "Two identical nonspacing characters in a row"))
|
||||
(setq nonspace-count (if nonspacing
|
||||
(1+ nonspace-count)
|
||||
0))
|
||||
(when (> nonspace-count 4)
|
||||
(throw 'found
|
||||
"Excessive number of nonspacing characters in a row"))
|
||||
(setq prev char)))
|
||||
string)
|
||||
nil)))
|
||||
|
||||
(defun textsec-email-suspicious-p (email)
|
||||
"Say whether EMAIL looks suspicious.
|
||||
If it isn't, nil is returned. If it is, a string explaining the
|
||||
problem is returned."
|
||||
(pcase-let* ((`(,address . ,name) (mail-header-parse-address email t))
|
||||
(`(,local ,domain) (split-string address "@")))
|
||||
(or
|
||||
(textsec-domain-suspicious-p domain)
|
||||
(textsec-local-address-suspicious-p local)
|
||||
(textsec-name-suspicious-p name))))
|
||||
|
||||
(provide 'textsec)
|
||||
|
||||
;;; textsec.el ends here
|
||||
|
|
|
@ -115,4 +115,43 @@
|
|||
(should-not (textsec-domain-suspicious-p "foo.org"))
|
||||
(should (textsec-domain-suspicious-p "f\N{LEFT-TO-RIGHT ISOLATE}oo.org")))
|
||||
|
||||
(ert-deftest test-suspicious-local ()
|
||||
(should-not (textsec-local-address-suspicious-p "larsi"))
|
||||
(should (textsec-local-address-suspicious-p ".larsi"))
|
||||
(should (textsec-local-address-suspicious-p "larsi."))
|
||||
(should-not (textsec-local-address-suspicious-p "la.rsi"))
|
||||
(should (textsec-local-address-suspicious-p "lar..si"))
|
||||
|
||||
(should-not (textsec-local-address-suspicious-p "LÅRSI"))
|
||||
(should (textsec-local-address-suspicious-p "LÅRSI"))
|
||||
|
||||
(should (textsec-local-address-suspicious-p "larsi8৪")))
|
||||
|
||||
(ert-deftest test-suspicious-name ()
|
||||
(should-not (textsec-name-suspicious-p "Lars Ingebrigtsen"))
|
||||
(should (textsec-name-suspicious-p "LÅRS INGEBRIGTSEN"))
|
||||
(should-not (textsec-name-suspicious-p "LÅRS INGEBRIGTSEN"))
|
||||
|
||||
(should (textsec-name-suspicious-p
|
||||
"Lars Ingebrigtsen\N{LEFT-TO-RIGHT ISOLATE}"))
|
||||
(should-not (textsec-name-suspicious-p
|
||||
"Lars Ingebrigtsen\N{LEFT-TO-RIGHT MARK}"))
|
||||
|
||||
(should (textsec-name-suspicious-p
|
||||
"\N{LEFT-TO-RIGHT MARK}\N{LEFT-TO-RIGHT MARK}Lars Ingebrigtsen"))
|
||||
(should-not (textsec-name-suspicious-p
|
||||
"\N{LEFT-TO-RIGHT MARK}\N{RIGHT-TO-LEFT MARK}Lars Ingebrigtsen"))
|
||||
(should (textsec-name-suspicious-p
|
||||
"\N{LEFT-TO-RIGHT MARK}\N{RIGHT-TO-LEFT MARK}\N{LEFT-TO-RIGHT MARK}\N{RIGHT-TO-LEFT MARK}\N{LEFT-TO-RIGHT MARK}Lars Ingebrigtsen")))
|
||||
|
||||
(ert-deftest test-suspicious-email ()
|
||||
(should-not
|
||||
(textsec-email-suspicious-p "Lars Ingebrigtsen <larsi@gnus.org>"))
|
||||
(should
|
||||
(textsec-email-suspicious-p "LÅrs Ingebrigtsen <larsi@gnus.org>"))
|
||||
(should
|
||||
(textsec-email-suspicious-p "Lars Ingebrigtsen <.larsi@gnus.org>"))
|
||||
(should
|
||||
(textsec-email-suspicious-p "Lars Ingebrigtsen <larsi@gn\N{LEFT-TO-RIGHT ISOLATE}us.org>")))
|
||||
|
||||
;;; textsec-tests.el ends here
|
||||
|
|
Loading…
Add table
Reference in a new issue