Customizable char-fold with char-fold-symmetric, char-fold-include (bug#35689)
* doc/emacs/search.texi (Lax Search): Document char-fold-symmetric, char-fold-include, char-fold-exclude. * lisp/char-fold.el (char-fold--default-include) (char-fold--default-exclude, char-fold--default-symmetric) (char-fold--previous): New defconsts. (char-fold-include, char-fold-exclude, char-fold-symmetric): New defcustoms. (char-fold-make-table): Use them. (char-fold-update-table): New function called at top-level. * test/lisp/char-fold-tests.el (char-fold--test-no-match-exactly) (char-fold--permutation): New functions. (char-fold--test-without-customization) (char-fold--test-with-customization): New tests.
This commit is contained in:
parent
a48726ebae
commit
376f5df3cc
4 changed files with 222 additions and 12 deletions
|
@ -1354,10 +1354,21 @@ folding, but only for that search. (Replace commands have a different
|
|||
default, controlled by a separate option; see @ref{Replacement and Lax
|
||||
Matches}.)
|
||||
|
||||
Like with case folding, typing an explicit variant of a character,
|
||||
such as @code{@"a}, as part of the search string disables character
|
||||
folding for that search. If you delete such a character from the
|
||||
search string, this effect ceases.
|
||||
@vindex char-fold-symmetric
|
||||
By default, typing an explicit variant of a character, such as
|
||||
@code{@"a}, as part of the search string doesn't match its base
|
||||
character, such as @code{a}. But if you customize the variable
|
||||
@code{char-fold-symmetric} to @code{t}, then search commands treat
|
||||
equivalent characters the same and use of any of a set of equivalent
|
||||
characters in a search string finds any of them in the text being
|
||||
searched, so typing an accented character @code{@"a} matches the
|
||||
letter @code{a} as well as all the other variants like @code{@'a}.
|
||||
|
||||
@vindex char-fold-include
|
||||
@vindex char-fold-exclude
|
||||
You can add new foldings using the customizable variable
|
||||
@code{char-fold-include}, or remove the existing ones using the
|
||||
customizable variable @code{char-fold-exclude}.
|
||||
|
||||
@node Replace
|
||||
@section Replacement Commands
|
||||
|
|
9
etc/NEWS
9
etc/NEWS
|
@ -1175,6 +1175,15 @@ rather than stopping after one level, such that searching for
|
|||
e.g. GREEK SMALL LETTER IOTA will now also find GREEK SMALL LETTER
|
||||
IOTA WITH OXIA.
|
||||
|
||||
+++
|
||||
*** New char-folding options: 'char-fold-include' lets you add ad hoc
|
||||
foldings, 'char-fold-exclude' to remove foldings from default decomposition,
|
||||
and 'char-fold-symmetric' to search for any of an equivalence class of
|
||||
characters. For example, with a 'nil' value of 'char-fold-symmetric'
|
||||
you can search for "e" to find "é", but not vice versa. With a non-nil
|
||||
value you can search for either, for example, you can search for "é"
|
||||
to find "e".
|
||||
|
||||
** Debugger
|
||||
|
||||
+++
|
||||
|
|
|
@ -22,7 +22,18 @@
|
|||
|
||||
;;; Code:
|
||||
|
||||
(eval-and-compile (put 'char-fold-table 'char-table-extra-slots 1))
|
||||
(eval-and-compile
|
||||
(put 'char-fold-table 'char-table-extra-slots 1)
|
||||
(defconst char-fold--default-include
|
||||
'((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷" "🙶" "🙸" "«" "»")
|
||||
(?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "" "❮" "❯" "‹" "›")
|
||||
(?` "❛" "‘" "‛" "" "❮" "‹")))
|
||||
(defconst char-fold--default-exclude nil)
|
||||
(defconst char-fold--default-symmetric nil)
|
||||
(defconst char-fold--previous (list char-fold--default-include
|
||||
char-fold--default-exclude
|
||||
char-fold--default-symmetric)))
|
||||
|
||||
|
||||
(eval-and-compile
|
||||
(defun char-fold-make-table ()
|
||||
|
@ -116,21 +127,70 @@
|
|||
(aref equiv (car simpler-decomp)))))))))))
|
||||
table)
|
||||
|
||||
;; Add some manual entries.
|
||||
(dolist (it '((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷" "🙶" "🙸" "«" "»")
|
||||
(?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "" "❮" "❯" "‹" "›")
|
||||
(?` "❛" "‘" "‛" "" "❮" "‹")))
|
||||
;; Add some entries to default decomposition
|
||||
(dolist (it (or (bound-and-true-p char-fold-include)
|
||||
char-fold--default-include))
|
||||
(let ((idx (car it))
|
||||
(chars (cdr it)))
|
||||
(aset equiv idx (append chars (aref equiv idx)))))
|
||||
|
||||
;; Remove some entries from default decomposition
|
||||
(dolist (it (or (bound-and-true-p char-fold-exclude)
|
||||
char-fold--default-exclude))
|
||||
(let ((idx (car it))
|
||||
(chars (cdr it)))
|
||||
(when (aref equiv idx)
|
||||
(dolist (char chars)
|
||||
(aset equiv idx (remove char (aref equiv idx)))))))
|
||||
|
||||
;; Add symmetric entries
|
||||
(when (or (bound-and-true-p char-fold-symmetric)
|
||||
char-fold--default-symmetric)
|
||||
(let ((symmetric (make-hash-table :test 'eq)))
|
||||
;; Initialize hashes
|
||||
(map-char-table
|
||||
(lambda (char decomp-list)
|
||||
(puthash char (make-hash-table :test 'equal) symmetric)
|
||||
(dolist (decomp decomp-list)
|
||||
(puthash (string-to-char decomp) (make-hash-table :test 'equal) symmetric)))
|
||||
equiv)
|
||||
|
||||
(map-char-table
|
||||
(lambda (char decomp-list)
|
||||
(dolist (decomp decomp-list)
|
||||
(if (< (length decomp) 2)
|
||||
;; Add single-char symmetric pairs to hash
|
||||
(let ((decomp-list (cons (char-to-string char) decomp-list))
|
||||
(decomp-hash (gethash (string-to-char decomp) symmetric)))
|
||||
(dolist (decomp2 decomp-list)
|
||||
(unless (equal decomp decomp2)
|
||||
(puthash decomp2 t decomp-hash)
|
||||
(puthash decomp t (gethash (string-to-char decomp2) symmetric)))))
|
||||
;; Add multi-char symmetric pairs to equiv-multi char-table
|
||||
(let ((decomp-list (cons (char-to-string char) decomp-list))
|
||||
(prefix (string-to-char decomp))
|
||||
(suffix (substring decomp 1)))
|
||||
(puthash decomp t (gethash char symmetric))
|
||||
(dolist (decomp2 decomp-list)
|
||||
(if (< (length decomp2) 2)
|
||||
(aset equiv-multi prefix
|
||||
(cons (cons suffix (regexp-quote decomp2))
|
||||
(aref equiv-multi prefix)))))))))
|
||||
equiv)
|
||||
|
||||
;; Update equiv char-table from hash
|
||||
(maphash
|
||||
(lambda (char decomp-hash)
|
||||
(let (schars)
|
||||
(maphash (lambda (schar _) (push schar schars)) decomp-hash)
|
||||
(aset equiv char schars)))
|
||||
symmetric)))
|
||||
|
||||
;; Convert the lists of characters we compiled into regexps.
|
||||
(map-char-table
|
||||
(lambda (char decomp-list)
|
||||
(let ((re (regexp-opt (cons (char-to-string char) decomp-list))))
|
||||
(if (consp char) ; FIXME: char never is consp?
|
||||
(set-char-table-range equiv char re)
|
||||
(aset equiv char re))))
|
||||
(aset equiv char re)))
|
||||
equiv)
|
||||
equiv)))
|
||||
|
||||
|
@ -159,6 +219,61 @@ For instance, the default alist for ?f includes:
|
|||
|
||||
Exceptionally for the space character (32), ALIST is ignored.")
|
||||
|
||||
|
||||
(defun char-fold-update-table ()
|
||||
(let ((new (list (or (bound-and-true-p char-fold-include)
|
||||
char-fold--default-include)
|
||||
(or (bound-and-true-p char-fold-exclude)
|
||||
char-fold--default-exclude)
|
||||
(or (bound-and-true-p char-fold-symmetric)
|
||||
char-fold--default-symmetric))))
|
||||
(unless (equal char-fold--previous new)
|
||||
(setq char-fold-table (char-fold-make-table)
|
||||
char-fold--previous new))))
|
||||
|
||||
(defcustom char-fold-include char-fold--default-include
|
||||
"Additional character foldings to include.
|
||||
Each entry is a list of a character and the strings that fold into it."
|
||||
:type '(alist :key-type (character :tag "Fold to character")
|
||||
:value-type (repeat (string :tag "Fold from string")))
|
||||
:initialize #'custom-initialize-default
|
||||
:set (lambda (sym val)
|
||||
(custom-set-default sym val)
|
||||
(char-fold-update-table))
|
||||
:group 'isearch
|
||||
:version "27.1")
|
||||
|
||||
(defcustom char-fold-exclude char-fold--default-exclude
|
||||
"Character foldings to remove from default decompisitions.
|
||||
Each entry is a list of a character and the strings to remove from folding."
|
||||
:type '(alist :key-type (character :tag "Fold to character")
|
||||
:value-type (repeat (string :tag "Fold from string")))
|
||||
:initialize #'custom-initialize-default
|
||||
:set (lambda (sym val)
|
||||
(custom-set-default sym val)
|
||||
(char-fold-update-table))
|
||||
:group 'isearch
|
||||
:version "27.1")
|
||||
|
||||
(defcustom char-fold-symmetric char-fold--default-symmetric
|
||||
"Non-nil means char-fold searching treats equivalent chars the same.
|
||||
That is, use of any of a set of char-fold equivalent chars in a search
|
||||
string finds any of them in the text being searched.
|
||||
|
||||
If nil then only the \"base\" or \"canonical\" char of the set matches
|
||||
any of them. The others match only themselves, even when char-folding
|
||||
is turned on."
|
||||
:type 'boolean
|
||||
:initialize #'custom-initialize-default
|
||||
:set (lambda (sym val)
|
||||
(custom-set-default sym val)
|
||||
(char-fold-update-table))
|
||||
:group 'isearch
|
||||
:version "27.1")
|
||||
|
||||
(char-fold-update-table)
|
||||
|
||||
|
||||
(defun char-fold--make-space-string (n)
|
||||
"Return a string that matches N spaces."
|
||||
(format "\\(?:%s\\|%s\\)"
|
||||
|
|
|
@ -44,6 +44,16 @@
|
|||
(should (string-match (char-fold--ascii-upcase re) (downcase it)))
|
||||
(should (string-match (char-fold--ascii-downcase re) (upcase it)))))))
|
||||
|
||||
(defun char-fold--test-no-match-exactly (string &rest strings-to-match)
|
||||
(let ((re (concat "\\`" (char-fold-to-regexp string) "\\'")))
|
||||
(dolist (it strings-to-match)
|
||||
(should-not (string-match re it)))
|
||||
;; Case folding
|
||||
(let ((case-fold-search t))
|
||||
(dolist (it strings-to-match)
|
||||
(should-not (string-match (char-fold--ascii-upcase re) (downcase it)))
|
||||
(should-not (string-match (char-fold--ascii-downcase re) (upcase it)))))))
|
||||
|
||||
(defun char-fold--test-search-with-contents (contents string)
|
||||
(with-temp-buffer
|
||||
(insert contents)
|
||||
|
@ -53,6 +63,11 @@
|
|||
(should (char-fold-search-forward string nil 'noerror))
|
||||
(should (char-fold-search-backward string nil 'noerror))))
|
||||
|
||||
(defun char-fold--permutation (strings)
|
||||
(mapcar (lambda (string)
|
||||
(cons string (remove string strings)))
|
||||
strings))
|
||||
|
||||
|
||||
(ert-deftest char-fold--test-consistency ()
|
||||
(dotimes (n 30)
|
||||
|
@ -132,5 +147,65 @@
|
|||
;; Ensure it took less than a second.
|
||||
(should (< (- (time-to-seconds) time) 1))))))
|
||||
|
||||
(ert-deftest char-fold--test-without-customization ()
|
||||
(let* ((matches
|
||||
'(
|
||||
("e" "ℯ" "ḗ" "ë" "ë")
|
||||
("ι"
|
||||
"ί" ;; 1 level decomposition
|
||||
"ί" ;; 2 level decomposition
|
||||
;; FIXME:
|
||||
;; "ΐ" ;; 3 level decomposition
|
||||
)
|
||||
)))
|
||||
(dolist (strings matches)
|
||||
(apply 'char-fold--test-match-exactly strings))))
|
||||
|
||||
(ert-deftest char-fold--test-with-customization ()
|
||||
:tags '(:expensive-test)
|
||||
(let* ((char-fold-include
|
||||
'(
|
||||
(?ß "ss") ;; de
|
||||
(?o "ø") ;; da no nb nn
|
||||
(?l "ł") ;; pl
|
||||
))
|
||||
;; FIXME: move language-specific settings to defaults
|
||||
(char-fold-exclude
|
||||
'(
|
||||
(?a "å") ;; sv da no nb nn
|
||||
(?a "ä") ;; sv fi et
|
||||
(?o "ö") ;; sv fi et
|
||||
(?n "ñ") ;; es
|
||||
(?и "й") ;; ru
|
||||
))
|
||||
(char-fold-symmetric t)
|
||||
(char-fold-table (char-fold-make-table))
|
||||
(matches
|
||||
'(
|
||||
("e" "ℯ" "ḗ" "ë" "ë")
|
||||
("е" "ё" "ё")
|
||||
("ι" "ί" "ί"
|
||||
;; FIXME: "ΐ"
|
||||
)
|
||||
("ß" "ss")
|
||||
("o" "ø")
|
||||
("l" "ł")
|
||||
|
||||
))
|
||||
(no-matches
|
||||
'(
|
||||
("a" "å")
|
||||
("a" "ä")
|
||||
("o" "ö")
|
||||
("n" "ñ")
|
||||
("и" "й")
|
||||
)))
|
||||
(dolist (strings matches)
|
||||
(dolist (permutation (char-fold--permutation strings))
|
||||
(apply 'char-fold--test-match-exactly permutation)))
|
||||
(dolist (strings no-matches)
|
||||
(dolist (permutation (char-fold--permutation strings))
|
||||
(apply 'char-fold--test-no-match-exactly permutation)))))
|
||||
|
||||
(provide 'char-fold-tests)
|
||||
;;; char-fold-tests.el ends here
|
||||
|
|
Loading…
Add table
Reference in a new issue