Customizable char-fold with char-fold-symmetric, char-fold-include (bug#35689)

* doc/emacs/search.texi (Lax Search): Document
char-fold-symmetric, char-fold-include, char-fold-exclude.

* lisp/char-fold.el (char-fold--default-include)
(char-fold--default-exclude, char-fold--default-symmetric)
(char-fold--previous): New defconsts.
(char-fold-include, char-fold-exclude, char-fold-symmetric):
New defcustoms.
(char-fold-make-table): Use them.
(char-fold-update-table): New function called at top-level.

* test/lisp/char-fold-tests.el (char-fold--test-no-match-exactly)
(char-fold--permutation): New functions.
(char-fold--test-without-customization)
(char-fold--test-with-customization): New tests.
This commit is contained in:
Juri Linkov 2019-07-23 23:27:28 +03:00
parent a48726ebae
commit 376f5df3cc
4 changed files with 222 additions and 12 deletions

View file

@ -1354,10 +1354,21 @@ folding, but only for that search. (Replace commands have a different
default, controlled by a separate option; see @ref{Replacement and Lax
Matches}.)
Like with case folding, typing an explicit variant of a character,
such as @code{@"a}, as part of the search string disables character
folding for that search. If you delete such a character from the
search string, this effect ceases.
@vindex char-fold-symmetric
By default, typing an explicit variant of a character, such as
@code{@"a}, as part of the search string doesn't match its base
character, such as @code{a}. But if you customize the variable
@code{char-fold-symmetric} to @code{t}, then search commands treat
equivalent characters the same and use of any of a set of equivalent
characters in a search string finds any of them in the text being
searched, so typing an accented character @code{@"a} matches the
letter @code{a} as well as all the other variants like @code{@'a}.
@vindex char-fold-include
@vindex char-fold-exclude
You can add new foldings using the customizable variable
@code{char-fold-include}, or remove the existing ones using the
customizable variable @code{char-fold-exclude}.
@node Replace
@section Replacement Commands

View file

@ -1175,6 +1175,15 @@ rather than stopping after one level, such that searching for
e.g. GREEK SMALL LETTER IOTA will now also find GREEK SMALL LETTER
IOTA WITH OXIA.
+++
*** New char-folding options: 'char-fold-include' lets you add ad hoc
foldings, 'char-fold-exclude' to remove foldings from default decomposition,
and 'char-fold-symmetric' to search for any of an equivalence class of
characters. For example, with a 'nil' value of 'char-fold-symmetric'
you can search for "e" to find "é", but not vice versa. With a non-nil
value you can search for either, for example, you can search for "é"
to find "e".
** Debugger
+++

View file

@ -22,7 +22,18 @@
;;; Code:
(eval-and-compile (put 'char-fold-table 'char-table-extra-slots 1))
(eval-and-compile
(put 'char-fold-table 'char-table-extra-slots 1)
(defconst char-fold--default-include
'((?\" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "🙷" "🙶" "🙸" "«" "»")
(?' "" "" "" "" "" "" "" "" "󠀢" "" "" "" "")
(?` "" "" "" "󠀢" "" "")))
(defconst char-fold--default-exclude nil)
(defconst char-fold--default-symmetric nil)
(defconst char-fold--previous (list char-fold--default-include
char-fold--default-exclude
char-fold--default-symmetric)))
(eval-and-compile
(defun char-fold-make-table ()
@ -116,21 +127,70 @@
(aref equiv (car simpler-decomp)))))))))))
table)
;; Add some manual entries.
(dolist (it '((?\" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "🙷" "🙶" "🙸" "«" "»")
(?' "" "" "" "" "" "" "" "" "󠀢" "" "" "" "")
(?` "" "" "" "󠀢" "" "")))
;; Add some entries to default decomposition
(dolist (it (or (bound-and-true-p char-fold-include)
char-fold--default-include))
(let ((idx (car it))
(chars (cdr it)))
(aset equiv idx (append chars (aref equiv idx)))))
;; Remove some entries from default decomposition
(dolist (it (or (bound-and-true-p char-fold-exclude)
char-fold--default-exclude))
(let ((idx (car it))
(chars (cdr it)))
(when (aref equiv idx)
(dolist (char chars)
(aset equiv idx (remove char (aref equiv idx)))))))
;; Add symmetric entries
(when (or (bound-and-true-p char-fold-symmetric)
char-fold--default-symmetric)
(let ((symmetric (make-hash-table :test 'eq)))
;; Initialize hashes
(map-char-table
(lambda (char decomp-list)
(puthash char (make-hash-table :test 'equal) symmetric)
(dolist (decomp decomp-list)
(puthash (string-to-char decomp) (make-hash-table :test 'equal) symmetric)))
equiv)
(map-char-table
(lambda (char decomp-list)
(dolist (decomp decomp-list)
(if (< (length decomp) 2)
;; Add single-char symmetric pairs to hash
(let ((decomp-list (cons (char-to-string char) decomp-list))
(decomp-hash (gethash (string-to-char decomp) symmetric)))
(dolist (decomp2 decomp-list)
(unless (equal decomp decomp2)
(puthash decomp2 t decomp-hash)
(puthash decomp t (gethash (string-to-char decomp2) symmetric)))))
;; Add multi-char symmetric pairs to equiv-multi char-table
(let ((decomp-list (cons (char-to-string char) decomp-list))
(prefix (string-to-char decomp))
(suffix (substring decomp 1)))
(puthash decomp t (gethash char symmetric))
(dolist (decomp2 decomp-list)
(if (< (length decomp2) 2)
(aset equiv-multi prefix
(cons (cons suffix (regexp-quote decomp2))
(aref equiv-multi prefix)))))))))
equiv)
;; Update equiv char-table from hash
(maphash
(lambda (char decomp-hash)
(let (schars)
(maphash (lambda (schar _) (push schar schars)) decomp-hash)
(aset equiv char schars)))
symmetric)))
;; Convert the lists of characters we compiled into regexps.
(map-char-table
(lambda (char decomp-list)
(let ((re (regexp-opt (cons (char-to-string char) decomp-list))))
(if (consp char) ; FIXME: char never is consp?
(set-char-table-range equiv char re)
(aset equiv char re))))
(aset equiv char re)))
equiv)
equiv)))
@ -159,6 +219,61 @@ For instance, the default alist for ?f includes:
Exceptionally for the space character (32), ALIST is ignored.")
(defun char-fold-update-table ()
(let ((new (list (or (bound-and-true-p char-fold-include)
char-fold--default-include)
(or (bound-and-true-p char-fold-exclude)
char-fold--default-exclude)
(or (bound-and-true-p char-fold-symmetric)
char-fold--default-symmetric))))
(unless (equal char-fold--previous new)
(setq char-fold-table (char-fold-make-table)
char-fold--previous new))))
(defcustom char-fold-include char-fold--default-include
"Additional character foldings to include.
Each entry is a list of a character and the strings that fold into it."
:type '(alist :key-type (character :tag "Fold to character")
:value-type (repeat (string :tag "Fold from string")))
:initialize #'custom-initialize-default
:set (lambda (sym val)
(custom-set-default sym val)
(char-fold-update-table))
:group 'isearch
:version "27.1")
(defcustom char-fold-exclude char-fold--default-exclude
"Character foldings to remove from default decompisitions.
Each entry is a list of a character and the strings to remove from folding."
:type '(alist :key-type (character :tag "Fold to character")
:value-type (repeat (string :tag "Fold from string")))
:initialize #'custom-initialize-default
:set (lambda (sym val)
(custom-set-default sym val)
(char-fold-update-table))
:group 'isearch
:version "27.1")
(defcustom char-fold-symmetric char-fold--default-symmetric
"Non-nil means char-fold searching treats equivalent chars the same.
That is, use of any of a set of char-fold equivalent chars in a search
string finds any of them in the text being searched.
If nil then only the \"base\" or \"canonical\" char of the set matches
any of them. The others match only themselves, even when char-folding
is turned on."
:type 'boolean
:initialize #'custom-initialize-default
:set (lambda (sym val)
(custom-set-default sym val)
(char-fold-update-table))
:group 'isearch
:version "27.1")
(char-fold-update-table)
(defun char-fold--make-space-string (n)
"Return a string that matches N spaces."
(format "\\(?:%s\\|%s\\)"

View file

@ -44,6 +44,16 @@
(should (string-match (char-fold--ascii-upcase re) (downcase it)))
(should (string-match (char-fold--ascii-downcase re) (upcase it)))))))
(defun char-fold--test-no-match-exactly (string &rest strings-to-match)
(let ((re (concat "\\`" (char-fold-to-regexp string) "\\'")))
(dolist (it strings-to-match)
(should-not (string-match re it)))
;; Case folding
(let ((case-fold-search t))
(dolist (it strings-to-match)
(should-not (string-match (char-fold--ascii-upcase re) (downcase it)))
(should-not (string-match (char-fold--ascii-downcase re) (upcase it)))))))
(defun char-fold--test-search-with-contents (contents string)
(with-temp-buffer
(insert contents)
@ -53,6 +63,11 @@
(should (char-fold-search-forward string nil 'noerror))
(should (char-fold-search-backward string nil 'noerror))))
(defun char-fold--permutation (strings)
(mapcar (lambda (string)
(cons string (remove string strings)))
strings))
(ert-deftest char-fold--test-consistency ()
(dotimes (n 30)
@ -132,5 +147,65 @@
;; Ensure it took less than a second.
(should (< (- (time-to-seconds) time) 1))))))
(ert-deftest char-fold--test-without-customization ()
(let* ((matches
'(
("e" "" "" "ë" "")
("ι"
"ί" ;; 1 level decomposition
"" ;; 2 level decomposition
;; FIXME:
;; "ΐ" ;; 3 level decomposition
)
)))
(dolist (strings matches)
(apply 'char-fold--test-match-exactly strings))))
(ert-deftest char-fold--test-with-customization ()
:tags '(:expensive-test)
(let* ((char-fold-include
'(
( "ss") ;; de
(?o "ø") ;; da no nb nn
(?l "ł") ;; pl
))
;; FIXME: move language-specific settings to defaults
(char-fold-exclude
'(
(?a "å") ;; sv da no nb nn
(?a "ä") ;; sv fi et
(?o "ö") ;; sv fi et
(?n "ñ") ;; es
( "й") ;; ru
))
(char-fold-symmetric t)
(char-fold-table (char-fold-make-table))
(matches
'(
("e" "" "" "ë" "")
("е" "ё" "ё")
("ι" "ί" ""
;; FIXME: "ΐ"
)
("ß" "ss")
("o" "ø")
("l" "ł")
))
(no-matches
'(
("a" "å")
("a" "ä")
("o" "ö")
("n" "ñ")
("и" "й")
)))
(dolist (strings matches)
(dolist (permutation (char-fold--permutation strings))
(apply 'char-fold--test-match-exactly permutation)))
(dolist (strings no-matches)
(dolist (permutation (char-fold--permutation strings))
(apply 'char-fold--test-no-match-exactly permutation)))))
(provide 'char-fold-tests)
;;; char-fold-tests.el ends here