; Improve doc strings in textsec.el
* lisp/international/textsec.el (textsec-email-address-suspicious-p) (textsec-email-address-header-suspicious-p, textsec-scripts) (textsec-single-script-p, textsec-covering-scripts) (textsec-restriction-level, textsec-mixed-numbers-p) (textsec-resolved-script-set) (textsec-single-script-confusable-p) (textsec-mixed-script-confusable-p) (textsec-whole-script-confusable-p) (textsec-local-address-suspicious-p) (textsec-bidi-controls-suspicious-p, textsec-name-suspicious-p) (textsec-suspicious-nonspacing-p): Doc fixes.
This commit is contained in:
parent
5f3f461cd0
commit
9fbe9dc4c3
1 changed files with 56 additions and 25 deletions
|
@ -44,15 +44,20 @@
|
|||
(require 'uni-scripts))
|
||||
|
||||
(defun textsec-scripts (string)
|
||||
"Return a list of Unicode scripts used in STRING.
|
||||
The scripts returned by this function use the Unicode Script property
|
||||
as defined by the Unicode Standard Annex 24 (UAX#24)."
|
||||
"Return a list of Unicode scripts used by characters in STRING.
|
||||
The return value is a list where for each character in STRING,
|
||||
there is a list of script symbols for that character. Thus, each
|
||||
script's symbol can appear more than once; use `textsec-covering-scripts'
|
||||
to obtain a list in which each script appears at most once.
|
||||
The script symbols returned by this function follow the Unicode Script
|
||||
property of characters as defined by the Unicode Standard Annex 24 (UAX#24).
|
||||
See the Unicode UCD file Scripts.txt for the scripts defined by Unicode."
|
||||
(seq-map (lambda (char)
|
||||
(elt textsec--char-scripts char))
|
||||
string))
|
||||
|
||||
(defun textsec-single-script-p (string)
|
||||
"Return non-nil if STRING is all in a single Unicode script.
|
||||
"Return non-nil if STRING's characters belong to a single Unicode script.
|
||||
|
||||
Note that the concept of \"single script\" used by this function
|
||||
isn't obvious -- some mixtures of scripts count as a \"single
|
||||
|
@ -60,8 +65,8 @@ script\". See
|
|||
|
||||
https://www.unicode.org/reports/tr39/#Mixed_Script_Detection
|
||||
|
||||
for details. The Unicode scripts are as defined by the
|
||||
Unicode Standard Annex 24 (UAX#24)."
|
||||
for details. The Unicode script property of a characters is defined by
|
||||
the Unicode Standard Annex 24 (UAX#24)."
|
||||
(let ((scripts (mapcar
|
||||
(lambda (s)
|
||||
(append s
|
||||
|
@ -98,9 +103,11 @@ Unicode Standard Annex 24 (UAX#24)."
|
|||
'(korea))))
|
||||
|
||||
(defun textsec-covering-scripts (string)
|
||||
"Return a minimal list of scripts used in STRING.
|
||||
"Return a minimal list of scripts used by characters in STRING.
|
||||
Note that a string may have several different minimal cover sets.
|
||||
The scripts are as defined by the Unicode Standard Annex 24 (UAX#24)."
|
||||
The return value is a list of script symbols.
|
||||
The script property of characters is defined by the Unicode Standard
|
||||
Annex 24 (UAX#24)."
|
||||
(let* ((scripts (textsec-scripts string))
|
||||
(set (car scripts)))
|
||||
(dolist (s scripts)
|
||||
|
@ -108,7 +115,8 @@ The scripts are as defined by the Unicode Standard Annex 24 (UAX#24)."
|
|||
(sort (delq 'common (delq 'inherited set)) #'string<)))
|
||||
|
||||
(defun textsec-restriction-level (string)
|
||||
"Say what restriction level STRING qualifies for.
|
||||
"Return the restriction level for which STRING qualifies.
|
||||
The return value is a symbol.
|
||||
Levels are (in decreasing order of restrictiveness) `ascii-only',
|
||||
`single-script', `highly-restrictive', `moderately-restrictive',
|
||||
`minimally-restrictive' and `unrestricted'."
|
||||
|
@ -163,7 +171,14 @@ Levels are (in decreasing order of restrictiveness) `ascii-only',
|
|||
'unrestricted))))
|
||||
|
||||
(defun textsec-mixed-numbers-p (string)
|
||||
"Return non-nil if STRING includes numbers from different decimal systems."
|
||||
"Return non-nil if STRING includes numbers from different decimal systems.
|
||||
|
||||
This function examines only characters in STRING whose Unicode general
|
||||
category, as reported by `get-char-code-property' with its second
|
||||
argument \\='general-category, is Decimal_Numbers (Nd). It returns
|
||||
non-nil if it finds numerical characters from different numerical
|
||||
systems. For example, ASCII digit characters and ARABIC-INDIC DIGIT
|
||||
characters belong to different decimal systems."
|
||||
(>
|
||||
(length
|
||||
(seq-uniq
|
||||
|
@ -199,15 +214,20 @@ This algorithm is described in:
|
|||
|
||||
(defun textsec-resolved-script-set (string)
|
||||
"Return the resolved script set for STRING.
|
||||
This is the minimal covering script set for STRING, but is nil is
|
||||
STRING isn't a single script string.
|
||||
The scripts are as defined by the Unicode Standard Annex 24 (UAX#24)."
|
||||
The value is a list whose members are symbols of the minimal covering
|
||||
script set for STRING; the value is nil if STRING isn't a single-script
|
||||
string.
|
||||
The script property of characters is defined by the Unicode Standard
|
||||
Annex 24 (UAX#24)."
|
||||
(and (textsec-single-script-p string)
|
||||
(textsec-covering-scripts string)))
|
||||
|
||||
(defun textsec-single-script-confusable-p (string1 string2)
|
||||
"Say whether STRING1 and STRING2 are single-script confusables.
|
||||
The scripts are as defined by the Unicode Standard Annex 24 (UAX#24)."
|
||||
Two strings are said to be confusables if they might look very
|
||||
similarly on display.
|
||||
The script property of characters is defined by the Unicode Standard
|
||||
Annex 24 (UAX#24)."
|
||||
(and (equal (textsec-unconfuse-string string1)
|
||||
(textsec-unconfuse-string string2))
|
||||
;; And they have to have at least one resolved script in
|
||||
|
@ -217,7 +237,10 @@ The scripts are as defined by the Unicode Standard Annex 24 (UAX#24)."
|
|||
|
||||
(defun textsec-mixed-script-confusable-p (string1 string2)
|
||||
"Say whether STRING1 and STRING2 are mixed-script confusables.
|
||||
The scripts are as defined by the Unicode Standard Annex 24 (UAX#24)."
|
||||
Two strings are said to be confusables if they might look very
|
||||
similarly on display.
|
||||
The script property of characters is defined by the Unicode Standard
|
||||
Annex 24 (UAX#24)."
|
||||
(and (equal (textsec-unconfuse-string string1)
|
||||
(textsec-unconfuse-string string2))
|
||||
;; And they have no resolved scripts in common.
|
||||
|
@ -225,8 +248,11 @@ The scripts are as defined by the Unicode Standard Annex 24 (UAX#24)."
|
|||
(textsec-resolved-script-set string2)))))
|
||||
|
||||
(defun textsec-whole-script-confusable-p (string1 string2)
|
||||
"Say whether STRING1 and STRING2 are whole-script confusables.
|
||||
The scripts are as defined by the Unicode Standard Annex 24 (UAX#24)."
|
||||
"Say whether two single-script strings STRING1 and STRING2 are confusables.
|
||||
Two strings are said to be confusables if they might look very
|
||||
similarly on display.
|
||||
The script property of characters is defined by the Unicode Standard
|
||||
Annex 24 (UAX#24)."
|
||||
(and (textsec-mixed-script-confusable-p string1 string2)
|
||||
(textsec-single-script-p string1)
|
||||
(textsec-single-script-p string2)))
|
||||
|
@ -287,7 +313,7 @@ or use certain other unusual mixtures of characters."
|
|||
|
||||
(defun textsec-local-address-suspicious-p (local)
|
||||
"Say whether LOCAL part of an email address looks suspicious.
|
||||
LOCAL is the bit before \"@\" in an email address.
|
||||
LOCAL is the part before \"@\" in an email address, a string.
|
||||
|
||||
If it isn't suspicious, return nil. If it is, return a string explaining
|
||||
the potential problem.
|
||||
|
@ -307,7 +333,7 @@ certain other unusual mixtures of characters."
|
|||
(format "`%s' contains invalid dots" local))))
|
||||
|
||||
(defun textsec-bidi-controls-suspicious-p (string)
|
||||
"Return non-nil of STRING uses bidi controls in suspicious ways.
|
||||
"Return non-nil of STRING uses bidirectional controls in suspicious ways.
|
||||
If STRING doesn't include any suspicious uses of bidirectional
|
||||
formatting control characters, return nil. Otherwise, return the
|
||||
index of the first character in STRING affected by such suspicious
|
||||
|
@ -315,8 +341,8 @@ use of bidi controls. If the returned value is beyond the length
|
|||
of STRING, it means any text following STRING on display might be
|
||||
affected by bidi controls in STRING."
|
||||
(with-temp-buffer
|
||||
;; We add a string that's representative of some text that could
|
||||
;; follow STRING, with the purpose of detecting residual bidi
|
||||
;; We follow STRING with text that's representative of some text
|
||||
;; that could follow it, with the purpose of detecting residual bidi
|
||||
;; state at end of STRING which could then affect the following
|
||||
;; text.
|
||||
(insert string "a1א:!")
|
||||
|
@ -327,8 +353,8 @@ affected by bidi controls in STRING."
|
|||
|
||||
(defun textsec-name-suspicious-p (name)
|
||||
"Say whether NAME looks suspicious.
|
||||
NAME is (for instance) the free-text display name part of an
|
||||
email address.
|
||||
NAME is a string, for instance, the free-text display name part
|
||||
of an email address.
|
||||
|
||||
If it isn't suspicious, return nil. If it is, return a string
|
||||
explaining the potential problem.
|
||||
|
@ -360,6 +386,10 @@ other unusual mixtures of characters."
|
|||
If it doesn't, return nil. If it does, return a string explaining
|
||||
the potential problem.
|
||||
|
||||
Nonspacing characters are those whose general Unicode category is
|
||||
Mn (nonspacing mark) or Me (enclosing mark). Examples include
|
||||
diacritics and accents.
|
||||
|
||||
Use of nonspacing characters is considered suspicious if there are
|
||||
two or more consecutive identical nonspacing characters, or too many
|
||||
consecutive nonspacing characters."
|
||||
|
@ -385,10 +415,11 @@ consecutive nonspacing characters."
|
|||
nil)))
|
||||
|
||||
(defun textsec-email-address-suspicious-p (address)
|
||||
"Say whether EMAIL address looks suspicious.
|
||||
"Say whether email ADDRESS looks suspicious.
|
||||
If it isn't, return nil. If it is, return a string explaining the
|
||||
potential problem.
|
||||
|
||||
ADDRESS should be a string that specifies an email address.
|
||||
An email address is considered suspicious if either of its two
|
||||
parts -- the local address name or the domain -- are found to be
|
||||
suspicious by, respectively, `textsec-local-address-suspicious-p'
|
||||
|
@ -399,7 +430,7 @@ and `textsec-domain-suspicious-p'."
|
|||
(textsec-local-address-suspicious-p local))))
|
||||
|
||||
(defun textsec-email-address-header-suspicious-p (email)
|
||||
"Say whether EMAIL looks suspicious.
|
||||
"Say whether EMAIL address specification looks suspicious.
|
||||
If it isn't, return nil. If it is, return a string explaining the
|
||||
potential problem.
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue