Add the string-numeric-lessp function

* doc/lispref/strings.texi (Text Comparison): Document
`string-numerical-lessp'.

* src/fns.c (Fstring_numeric_lessp): New function.
(gather_number_from_string): Helper function for that function.

* test/src/fns-tests.el (fns-tests-string-numeric-lessp): Add tests.
This commit is contained in:
Lars Ingebrigtsen 2016-02-21 15:32:45 +11:00
parent 1ba50a0d8c
commit 71783e90a4
4 changed files with 134 additions and 0 deletions

View file

@ -633,6 +633,19 @@ If your system does not support a locale environment, this function
behaves like @code{string-lessp}.
@end defun
@defun string-numerical-lessp strin1 string2
This function behaves like @code{string-lessp} for stretches of
consecutive non-numerical characters, but compares sequences of
numerical characters as if they comprised a base-ten number, and then
compares the numbers. So @samp{foo2.png} is ``smaller'' than
@samp{foo12.png} according to this predicate, even if @samp{12} is
lexicographically ``smaller'' than @samp{2}.
If one string has a number in a position in the string, and the other
doesn't, then lexicograpic comparison is done at that point, so
@samp{foo.png} is ``smaller'' than @samp{foo2.png}.
@end defun
@defun string-prefix-p string1 string2 &optional ignore-case
This function returns non-@code{nil} if @var{string1} is a prefix of
@var{string2}; i.e., if @var{string2} starts with @var{string1}. If

View file

@ -1720,6 +1720,12 @@ environment. For the time being this is implemented for modern POSIX
systems and for MS-Windows, for other systems they fall back to their
counterparts `string-lessp' and `string-equal'.
+++
** The new function `string-numeric-lessp' compares strings by
interpreting consecutive runs of numerical characters as numbers, and
compares their numerical values. According to this predicate,
"foo2.png" is smaller than "foo12.png".
---
*** The ls-lisp package uses `string-collate-lessp' to sort file names.
The effect is that, on systems that use ls-lisp for Dired, the default

View file

@ -331,6 +331,103 @@ Symbols are also allowed; their print names are used instead. */)
return i1 < SCHARS (string2) ? Qt : Qnil;
}
/* Return the numerical value of a consecutive run of numerical
characters from STRING. The ISP and ISP_BYTE address pointer
pointers are increased and left at the next character after the
numerical characters. */
static size_t
gather_number_from_string (int c, Lisp_Object string,
ptrdiff_t *isp, ptrdiff_t *isp_byte)
{
size_t number = c - '0';
unsigned char *chp;
int chlen;
do
{
if (STRING_MULTIBYTE (string))
{
chp = &SDATA (string)[*isp_byte];
c = STRING_CHAR_AND_LENGTH (chp, chlen);
}
else
{
c = SREF (string, *isp_byte);
chlen = 1;
}
/* If we're still in a number, add it to the sum and continue. */
/* FIXME: Integer overflow? */
if (c >= '0' && c <= '9')
{
number = number * 10;
number += c - '0';
(*isp)++;
(*isp_byte) += chlen;
}
else
break;
}
/* Stop when we get to the end of the string anyway. */
while (c != 0);
return number;
}
DEFUN ("string-numeric-lessp", Fstring_numeric_lessp,
Sstring_numeric_lessp, 2, 2, 0,
doc: /* Return non-nil if STRING1 is less than STRING2 in 'numeric' order.
Sequences of non-numerical characters are compared lexicographically,
while sequences of numerical characters are converted into numbers,
and then the numbers are compared. This means that \"foo2.png\" is
less than \"foo12.png\" according to this predicate.
Case is significant.
Symbols are also allowed; their print names are used instead. */)
(register Lisp_Object string1, Lisp_Object string2)
{
ptrdiff_t end;
ptrdiff_t i1, i1_byte, i2, i2_byte;
size_t num1, num2;
if (SYMBOLP (string1))
string1 = SYMBOL_NAME (string1);
if (SYMBOLP (string2))
string2 = SYMBOL_NAME (string2);
CHECK_STRING (string1);
CHECK_STRING (string2);
i1 = i1_byte = i2 = i2_byte = 0;
end = SCHARS (string1);
if (end > SCHARS (string2))
end = SCHARS (string2);
while (i1 < end)
{
/* When we find a mismatch, we must compare the
characters, not just the bytes. */
int c1, c2;
FETCH_STRING_CHAR_ADVANCE (c1, string1, i1, i1_byte);
FETCH_STRING_CHAR_ADVANCE (c2, string2, i2, i2_byte);
if (c1 >= '0' && c1 <= '9' &&
c2 >= '0' && c2 <= '9')
/* Both strings are numbers, so compare them. */
{
num1 = gather_number_from_string (c1, string1, &i1, &i1_byte);
num2 = gather_number_from_string (c2, string2, &i2, &i2_byte);
if (num1 < num2)
return Qt;
else if (num1 > num2)
return Qnil;
}
else if (c1 != c2)
return c1 < c2 ? Qt : Qnil;
}
return i1 < SCHARS (string2) ? Qt : Qnil;
}
DEFUN ("string-collate-lessp", Fstring_collate_lessp, Sstring_collate_lessp, 2, 4, 0,
doc: /* Return t if first arg string is less than second in collation order.
Symbols are also allowed; their print names are used instead.
@ -5049,6 +5146,7 @@ this variable. */);
defsubr (&Sstring_equal);
defsubr (&Scompare_strings);
defsubr (&Sstring_lessp);
defsubr (&Sstring_numeric_lessp);
defsubr (&Sstring_collate_lessp);
defsubr (&Sstring_collate_equalp);
defsubr (&Sappend);

View file

@ -191,3 +191,20 @@
(string-collate-lessp
a b (if (eq system-type 'windows-nt) "enu_USA" "en_US.UTF-8")))))
'("Adrian" "Ævar" "Agustín" "Eli"))))
(ert-deftest fns-tests-string-numeric-lessp ()
(should (string-numeric-lessp "foo2.png" "foo12.png"))
(should (not (string-numeric-lessp "foo12.png" "foo2.png")))
(should (string-numeric-lessp "foo12.png" "foo20000.png"))
(should (not (string-numeric-lessp "foo20000.png" "foo12.png")))
(should (string-numeric-lessp "foo.png" "foo2.png"))
(should (not (string-numeric-lessp "foo2.png" "foo.png")))
(should (equal (sort '("foo12.png" "foo2.png" "foo1.png")
'string-numeric-lessp)
'("foo1.png" "foo2.png" "foo12.png")))
(should (string-numeric-lessp "foo2" "foo1234"))
(should (not (string-numeric-lessp "foo1234" "foo2")))
(should (string-numeric-lessp "foo.png" "foo2"))
(should (string-numeric-lessp "foo1.25.5.png" "foo1.125.5"))
(should (string-numeric-lessp "2" "1245"))
(should (not (string-numeric-lessp "1245" "2"))))