* lisp/isearch.el: Fold many unicode characters to ASCII
(isearch-character-fold-search, isearch--character-fold-extras) (isearch--character-fold-table): New variable. (isearch--character-folded-regexp): New function. (isearch-search-fun-default): Use them. * lisp/replace.el (replace-character-fold): New variable. (replace-search): Use it. * etc/NEWS: Document it.
This commit is contained in:
parent
2ca5558395
commit
c7a19e0c80
3 changed files with 101 additions and 0 deletions
14
etc/NEWS
14
etc/NEWS
|
@ -84,6 +84,20 @@ command line when `initial-buffer-choice' is non-nil.
|
|||
|
||||
* Changes in Emacs 25.1
|
||||
|
||||
** `isearch' and `query-replace' now perform character folding in matches.
|
||||
This is analogous to case-folding, but applies between Unicode
|
||||
characters and their ASCII counterparts. This means many characters
|
||||
will match entire groups of charactes.
|
||||
|
||||
For instance, the " will match all variants of unicode double quotes
|
||||
(like “ and ”), and the letter a will match all of its accented
|
||||
cousins, even those composed of multiple characters, as well as many
|
||||
other symbols like ℀, ℁, ⒜, and ⓐ.
|
||||
|
||||
** New function `isearch--character-folded-regexp' can be used
|
||||
by searching commands to produce a a regexp matching anything that
|
||||
character-folds into STRING.
|
||||
|
||||
** New command `checkdoc-package-keywords' checks if the
|
||||
current package keywords are recognized. Set the new option
|
||||
`checkdoc-package-keywords-flag' to non-nil to make
|
||||
|
|
|
@ -272,6 +272,79 @@ Default value, nil, means edit the string instead."
|
|||
:version "23.1"
|
||||
:group 'isearch)
|
||||
|
||||
(defvar isearch-character-fold-search t
|
||||
"Non-nil if isearch should fold similar characters.
|
||||
This means some characters will match entire groups of charactes.
|
||||
For instance, \" will match all variants of double quotes, and
|
||||
the letter a will match all of its accented versions (and then
|
||||
some).")
|
||||
|
||||
(defconst isearch--character-fold-extras
|
||||
'((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷" "🙶" "🙸" "«" "»")
|
||||
(?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "" "❮" "❯" "‹" "›")
|
||||
(?` "❛" "‘" "‛" "" "❮" "‹")
|
||||
;; `isearch-character-fold-search' doesn't interact with
|
||||
;; `isearch-lax-whitespace' yet. So we need to add this here.
|
||||
(?\s " " "\r" "\n"))
|
||||
"Extra entries to add to `isearch--character-fold-table'.
|
||||
Used to specify character folding not covered by unicode
|
||||
decomposition. Each car is a character and each cdr is a list of
|
||||
strings that it should match (itself excluded).")
|
||||
|
||||
(defvar isearch--character-fold-table
|
||||
(eval-when-compile
|
||||
(require 'subr-x)
|
||||
(let ((equiv (make-char-table 'character-fold-table)))
|
||||
;; Compile a list of all complex characters that each simple
|
||||
;; character should match.
|
||||
(dotimes (i (length equiv))
|
||||
(let ((dd (get-char-code-property i 'decomposition))
|
||||
d k found)
|
||||
;; Skip trivial cases (?a decomposes to (?a)).
|
||||
(unless (and (eq i (car dd)))
|
||||
;; Discard a possible formatting tag.
|
||||
(when (symbolp (car-safe dd))
|
||||
(setq dd (cdr dd)))
|
||||
;; Is k a number or letter, per unicode standard?
|
||||
(setq d dd)
|
||||
(while (and d (not found))
|
||||
(setq k (pop d))
|
||||
(setq found (and (characterp k)
|
||||
(memq (get-char-code-property k 'general-category)
|
||||
'(Lu Ll Lt Lm Lo Nd Nl No)))))
|
||||
;; If there's no number or letter on the
|
||||
;; decomposition, find the first character in it.
|
||||
(setq d dd)
|
||||
(while (and d (not found))
|
||||
(setq k (pop d))
|
||||
(setq found (characterp k)))
|
||||
;; Add i to the list of characters that k can
|
||||
;; represent. Also add its decomposition, so we can
|
||||
;; match multi-char representations like (format "a%c" 769)
|
||||
(when (and found (not (eq i k)))
|
||||
(aset equiv k (cons (apply #'string dd)
|
||||
(cons (char-to-string i)
|
||||
(aref equiv k))))))))
|
||||
(dotimes (i (length equiv))
|
||||
(when-let ((chars (append (cdr (assq i isearch--character-fold-extras))
|
||||
(aref equiv i))))
|
||||
(aset equiv i (regexp-opt (cons (char-to-string i) chars)))))
|
||||
equiv))
|
||||
"Used for folding characters of the same group during search.")
|
||||
|
||||
(defun isearch--character-folded-regexp (string)
|
||||
"Return a regexp matching anything that character-folds into STRING.
|
||||
If `isearch-character-fold-search' is nil, `regexp-quote' string.
|
||||
Otherwise, any character in STRING that has an entry in
|
||||
`isearch--character-fold-table' is replaced with that entry
|
||||
\(which is a regexp) and other characters are `regexp-quote'd."
|
||||
(if isearch-character-fold-search
|
||||
(apply #'concat
|
||||
(mapcar (lambda (c) (or (aref isearch--character-fold-table c)
|
||||
(regexp-quote (string c))))
|
||||
string))
|
||||
(regexp-quote string)))
|
||||
|
||||
(defcustom isearch-lazy-highlight t
|
||||
"Controls the lazy-highlighting during incremental search.
|
||||
When non-nil, all text in the buffer matching the current search
|
||||
|
@ -2607,6 +2680,11 @@ Can be changed via `isearch-search-fun-function' for special needs."
|
|||
're-search-backward-lax-whitespace))
|
||||
(isearch-regexp
|
||||
(if isearch-forward 're-search-forward 're-search-backward))
|
||||
(isearch-character-fold-search
|
||||
(lambda (string &optional bound noerror count)
|
||||
(funcall (if isearch-forward #'re-search-forward #'re-search-backward)
|
||||
(isearch--character-folded-regexp string)
|
||||
bound noerror count)))
|
||||
((and isearch-lax-whitespace search-whitespace-regexp)
|
||||
(if isearch-forward
|
||||
'search-forward-lax-whitespace
|
||||
|
|
|
@ -33,6 +33,14 @@
|
|||
:type 'boolean
|
||||
:group 'matching)
|
||||
|
||||
(defcustom replace-character-fold t
|
||||
"Non-nil means `query-replace' should do character folding in matches.
|
||||
This means, for instance, that ' will match a large variety of
|
||||
unicode quotes."
|
||||
:type 'boolean
|
||||
:group 'matching
|
||||
:version "25.1")
|
||||
|
||||
(defcustom replace-lax-whitespace nil
|
||||
"Non-nil means `query-replace' matches a sequence of whitespace chars.
|
||||
When you enter a space or spaces in the strings to be replaced,
|
||||
|
@ -2005,6 +2013,7 @@ It is called with three arguments, as if it were
|
|||
;; used after `recursive-edit' might override them.
|
||||
(let* ((isearch-regexp regexp-flag)
|
||||
(isearch-word delimited-flag)
|
||||
(isearch-character-fold-search replace-character-fold)
|
||||
(isearch-lax-whitespace
|
||||
replace-lax-whitespace)
|
||||
(isearch-regexp-lax-whitespace
|
||||
|
|
Loading…
Add table
Reference in a new issue