Add textsec predicates for different types of confusability

* lisp/international/textsec.el (textsec-resolved-script-set)
(textsec-single-script-confusable-p)
(textsec-mixed-script-confusable-p)
(textsec-whole-script-confusable-p): New functions.
This commit is contained in:
Lars Ingebrigtsen 2022-01-18 10:24:32 +01:00
parent 19fefea1ca
commit 4eebf528fc
2 changed files with 48 additions and 1 deletions

View file

@ -192,6 +192,36 @@ This algorithm is described in:
(string char)))
(ucs-normalize-NFD-string string)))))
(defun textsec-resolved-script-set (string)
"Return the resolved script set for STRING.
This is the minimal covering script set for STRING, but is nil is
STRING isn't a single script string."
(and (textsec-single-script-p string)
(textsec-covering-scripts string)))
(defun textsec-single-script-confusable-p (string1 string2)
"Say whether STRING1 and STRING2 are single script confusables."
(and (equal (textsec-unconfuse-string string1)
(textsec-unconfuse-string string2))
;; And they have to have at least one resolved script in
;; common.
(seq-intersection (textsec-resolved-script-set string1)
(textsec-resolved-script-set string2))))
(defun textsec-mixed-script-confusable-p (string1 string2)
"Say whether STRING1 and STRING2 are mixed script confusables."
(and (equal (textsec-unconfuse-string string1)
(textsec-unconfuse-string string2))
;; And they have no resolved scripts in common.
(null (seq-intersection (textsec-resolved-script-set string1)
(textsec-resolved-script-set string2)))))
(defun textsec-whole-script-confusable-p (string1 string2)
"Say whether STRING1 and STRING2 are whole script confusables."
(and (textsec-mixed-script-confusable-p string1 string2)
(textsec-single-script-p string1)
(textsec-single-script-p string2)))
(provide 'textsec)
;;; textsec.el ends here

View file

@ -86,11 +86,28 @@
(should-not (textsec-mixed-numbers-p "8foo8"))
(should (textsec-mixed-numbers-p "8foo")))
(ert-deftest test-resolved ()
(should (equal (textsec-resolved-script-set "ljeto")
'(latin)))
(should-not (textsec-resolved-script-set "Сirсlе")))
(ert-deftest test-confusable ()
(should (equal (textsec-unconfuse-string "ljeto") "ljeto"))
(should (textsec-ascii-confusable-p "ljeto"))
(should-not (textsec-ascii-confusable-p "ljeto"))
(should (equal (textsec-unconfuse-string "") ""))
(should-not (textsec-ascii-confusable-p "")))
(should-not (textsec-ascii-confusable-p ""))
(should (textsec-single-script-confusable-p "ljeto" "ljeto"))
(should-not (textsec-single-script-confusable-p "paypal" "pаypаl"))
(should-not (textsec-single-script-confusable-p "scope""ѕсоре"))
(should-not (textsec-mixed-script-confusable-p "ljeto" "ljeto"))
(should (textsec-mixed-script-confusable-p "paypal" "pаypаl"))
(should (textsec-mixed-script-confusable-p "scope""ѕсоре"))
(should-not (textsec-whole-script-confusable-p "ljeto" "ljeto"))
(should-not (textsec-whole-script-confusable-p "paypal" "pаypаl"))
(should (textsec-whole-script-confusable-p "scope""ѕсоре")))
;;; textsec-tests.el ends here