Fix documentation and tests for 'string-distance'
* src/fns.c (Fstring_distance): Doc fix. * doc/lispref/strings.texi (Text Comparison): Document 'string-distance'. * etc/NEWS: Fix wording and mark as documented in the manuals. * test/src/fns-tests.el (test-string-distance): Move from subr-tests.el and rename.
This commit is contained in:
parent
c6e6503900
commit
a7a3918a16
5 changed files with 47 additions and 22 deletions
|
@ -673,6 +673,28 @@ of the two strings. The sign is negative if @var{string1} (or its
|
|||
specified portion) is less.
|
||||
@end defun
|
||||
|
||||
@cindex Levenshtein distance
|
||||
@cindex distance between strings
|
||||
@cindex edit distance between strings
|
||||
@defun string-distance string1 string2 &optional bytecompare
|
||||
This function returns the @dfn{Levenshtein distance} between the
|
||||
source string @var{string1} and the target string @var{string2}. The
|
||||
Levenshtein distance is the number of single-character
|
||||
changes---deletions, insertions, or replacements---required to
|
||||
transform the source string into the target string; it is one possible
|
||||
definition of the @dfn{edit distance} between strings.
|
||||
|
||||
Letter-case of the strings is significant for the computed distance,
|
||||
but their text properties are ignored. If the optional argument
|
||||
@var{bytecompare} is non-@code{nil}, the function calculates the
|
||||
distance in terms of bytes instead of characters. The byte-wise
|
||||
comparison uses the internal Emacs representation of characters, so it
|
||||
will produce inaccurate results for multibyte strings that include raw
|
||||
bytes (@pxref{Text Representations}); make the strings unibyte by
|
||||
encoding them (@pxref{Explicit Encoding}) if you need accurate results
|
||||
with raw bytes.
|
||||
@end defun
|
||||
|
||||
@defun assoc-string key alist &optional case-fold
|
||||
This function works like @code{assoc}, except that @var{key} must be a
|
||||
string or symbol, and comparison is done using @code{compare-strings}.
|
||||
|
|
3
etc/NEWS
3
etc/NEWS
|
@ -534,7 +534,8 @@ manual for more details.
|
|||
+++
|
||||
** New function assoc-delete-all.
|
||||
|
||||
** New function string-distance to calculate Levenshtein distance
|
||||
+++
|
||||
** New function 'string-distance' to calculate the Levenshtein distance
|
||||
between two strings.
|
||||
|
||||
** 'print-quoted' now defaults to t, so if you want to see
|
||||
|
|
|
@ -155,9 +155,11 @@ If STRING is multibyte, this may be greater than the length of STRING. */)
|
|||
|
||||
DEFUN ("string-distance", Fstring_distance, Sstring_distance, 2, 3, 0,
|
||||
doc: /* Return Levenshtein distance between STRING1 and STRING2.
|
||||
If BYTECOMPARE is nil, compare character of strings.
|
||||
If BYTECOMPARE is t, compare byte of strings.
|
||||
Case is significant, but text properties are ignored. */)
|
||||
The distance is the number of deletions, insertions, and substitutions
|
||||
required to transform STRING1 into STRING2.
|
||||
If BYTECOMPARE is nil or omitted, compute distance in terms of characters.
|
||||
If BYTECOMPARE is non-nil, compute distance in terms of bytes.
|
||||
Letter-case is significant, but text properties are ignored. */)
|
||||
(Lisp_Object string1, Lisp_Object string2, Lisp_Object bytecompare)
|
||||
|
||||
{
|
||||
|
|
|
@ -281,24 +281,6 @@ indirectly `mapbacktrace'."
|
|||
(should (equal (string-match-p "\\`[[:blank:]]\\'" "\u3000") 0))
|
||||
(should-not (string-match-p "\\`[[:blank:]]\\'" "\N{LINE SEPARATOR}")))
|
||||
|
||||
(ert-deftest subr-tests--string-distance ()
|
||||
"Test `string-distance' behavior."
|
||||
;; ASCII characters are always fine
|
||||
(should (equal 1 (string-distance "heelo" "hello")))
|
||||
(should (equal 2 (string-distance "aeelo" "hello")))
|
||||
(should (equal 0 (string-distance "ab" "ab" t)))
|
||||
(should (equal 1 (string-distance "ab" "abc" t)))
|
||||
|
||||
;; string containing hanzi character, compare by byte
|
||||
(should (equal 6 (string-distance "ab" "ab我她" t)))
|
||||
(should (equal 3 (string-distance "ab" "a我b" t)))
|
||||
(should (equal 3 (string-distance "我" "她" t)))
|
||||
|
||||
;; string containing hanzi character, compare by character
|
||||
(should (equal 2 (string-distance "ab" "ab我她")))
|
||||
(should (equal 1 (string-distance "ab" "a我b")))
|
||||
(should (equal 1 (string-distance "我" "她"))))
|
||||
|
||||
(ert-deftest subr-tests--dolist--wrong-number-of-args ()
|
||||
"Test that `dolist' doesn't accept wrong types or length of SPEC,
|
||||
cf. Bug#25477."
|
||||
|
|
|
@ -575,4 +575,22 @@
|
|||
:type 'wrong-type-argument)
|
||||
'(wrong-type-argument plistp (:foo 1 . :bar)))))
|
||||
|
||||
(ert-deftest test-string-distance ()
|
||||
"Test `string-distance' behavior."
|
||||
;; ASCII characters are always fine
|
||||
(should (equal 1 (string-distance "heelo" "hello")))
|
||||
(should (equal 2 (string-distance "aeelo" "hello")))
|
||||
(should (equal 0 (string-distance "ab" "ab" t)))
|
||||
(should (equal 1 (string-distance "ab" "abc" t)))
|
||||
|
||||
;; string containing hanzi character, compare by byte
|
||||
(should (equal 6 (string-distance "ab" "ab我她" t)))
|
||||
(should (equal 3 (string-distance "ab" "a我b" t)))
|
||||
(should (equal 3 (string-distance "我" "她" t)))
|
||||
|
||||
;; string containing hanzi character, compare by character
|
||||
(should (equal 2 (string-distance "ab" "ab我她")))
|
||||
(should (equal 1 (string-distance "ab" "a我b")))
|
||||
(should (equal 1 (string-distance "我" "她"))))
|
||||
|
||||
(provide 'fns-tests)
|
||||
|
|
Loading…
Add table
Reference in a new issue