Use Gnulib filevercmp for version comparison

* admin/merge-gnulib (GNULIB_MODULES): Add filevercmp.
* doc/lispref/strings.texi (Text Comparison):
* etc/NEWS, src/fns.c:
* test/src/fns-tests.el (fns-tests-string-version-lessp):
Rename newly-introduced function to string-version-lessp, by
analogy with strverscmp.
* lib/filevercmp.c, lib/filevercmp.h: New files, copied from gnulib.
* lib/gnulib.mk, m4/gnulib-comp.m4: Regenerate.
* src/fns.c: Include <filevercmp.h>.
(gather_number_from_string): Remove.
(Fstring_version_lessp): Reimplement via filevercmp.
This commit is contained in:
Paul Eggert 2016-02-21 13:25:24 -08:00
parent 3e67708d72
commit 1f7feecaee
9 changed files with 286 additions and 129 deletions

View file

@ -30,7 +30,7 @@ GNULIB_MODULES='
careadlinkat close-stream count-one-bits count-trailing-zeros
crypto/md5 crypto/sha1 crypto/sha256 crypto/sha512
dtoastr dtotimespec dup2 environ execinfo faccessat
fcntl fcntl-h fdatasync fdopendir filemode fstatat fsync
fcntl fcntl-h fdatasync fdopendir filemode filevercmp fstatat fsync
getloadavg getopt-gnu gettime gettimeofday gitlog-to-changelog
ignore-value intprops largefile lstat
manywarnings memrchr mkostemp mktime

View file

@ -633,20 +633,12 @@ If your system does not support a locale environment, this function
behaves like @code{string-lessp}.
@end defun
@defun string-numerical-lessp strin1 string2
This function behaves like @code{string-lessp} for stretches of
consecutive non-numerical characters, but compares sequences of
numerical characters as if they comprised a base-ten number, and then
compares the numbers. So @samp{foo2.png} is ``smaller'' than
@samp{foo12.png} according to this predicate, even if @samp{12} is
lexicographically ``smaller'' than @samp{2}.
If one string has a number in a position in the string, and the other
doesn't, then lexicograpic comparison is done at that point, so
@samp{foo.png} is ``smaller'' than @samp{foo2.png}. If any of the
numbers in the strings are larger than can be represented as an
integer number, the entire string is compared using
@code{string-less}.
@defun string-version-lessp string1 string2
This function compares strings lexicographically, except it treats
sequences of numerical characters as if they comprised a base-ten
number, and then compares the numbers. So @samp{foo2.png} is
``smaller'' than @samp{foo12.png} according to this predicate, even if
@samp{12} is lexicographically ``smaller'' than @samp{2}.
@end defun
@defun string-prefix-p string1 string2 &optional ignore-case

View file

@ -1726,7 +1726,7 @@ systems and for MS-Windows, for other systems they fall back to their
counterparts `string-lessp' and `string-equal'.
+++
** The new function `string-numeric-lessp' compares strings by
** The new function `string-version-lessp' compares strings by
interpreting consecutive runs of numerical characters as numbers, and
compares their numerical values. According to this predicate,
"foo2.png" is smaller than "foo12.png".

181
lib/filevercmp.c Normal file
View file

@ -0,0 +1,181 @@
/*
Copyright (C) 1995 Ian Jackson <iwj10@cus.cam.ac.uk>
Copyright (C) 2001 Anthony Towns <aj@azure.humbug.org.au>
Copyright (C) 2008-2016 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
#include "filevercmp.h"
#include <sys/types.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <c-ctype.h>
#include <limits.h>
/* Match a file suffix defined by this regular expression:
/(\.[A-Za-z~][A-Za-z0-9~]*)*$/
Scan the string *STR and return a pointer to the matching suffix, or
NULL if not found. Upon return, *STR points to terminating NUL. */
static const char *
match_suffix (const char **str)
{
const char *match = NULL;
bool read_alpha = false;
while (**str)
{
if (read_alpha)
{
read_alpha = false;
if (!c_isalpha (**str) && '~' != **str)
match = NULL;
}
else if ('.' == **str)
{
read_alpha = true;
if (!match)
match = *str;
}
else if (!c_isalnum (**str) && '~' != **str)
match = NULL;
(*str)++;
}
return match;
}
/* verrevcmp helper function */
static int
order (unsigned char c)
{
if (c_isdigit (c))
return 0;
else if (c_isalpha (c))
return c;
else if (c == '~')
return -1;
else
return (int) c + UCHAR_MAX + 1;
}
/* slightly modified verrevcmp function from dpkg
S1, S2 - compared string
S1_LEN, S2_LEN - length of strings to be scanned
This implements the algorithm for comparison of version strings
specified by Debian and now widely adopted. The detailed
specification can be found in the Debian Policy Manual in the
section on the 'Version' control field. This version of the code
implements that from s5.6.12 of Debian Policy v3.8.0.1
http://www.debian.org/doc/debian-policy/ch-controlfields.html#s-f-Version */
static int _GL_ATTRIBUTE_PURE
verrevcmp (const char *s1, size_t s1_len, const char *s2, size_t s2_len)
{
size_t s1_pos = 0;
size_t s2_pos = 0;
while (s1_pos < s1_len || s2_pos < s2_len)
{
int first_diff = 0;
while ((s1_pos < s1_len && !c_isdigit (s1[s1_pos]))
|| (s2_pos < s2_len && !c_isdigit (s2[s2_pos])))
{
int s1_c = (s1_pos == s1_len) ? 0 : order (s1[s1_pos]);
int s2_c = (s2_pos == s2_len) ? 0 : order (s2[s2_pos]);
if (s1_c != s2_c)
return s1_c - s2_c;
s1_pos++;
s2_pos++;
}
while (s1[s1_pos] == '0')
s1_pos++;
while (s2[s2_pos] == '0')
s2_pos++;
while (c_isdigit (s1[s1_pos]) && c_isdigit (s2[s2_pos]))
{
if (!first_diff)
first_diff = s1[s1_pos] - s2[s2_pos];
s1_pos++;
s2_pos++;
}
if (c_isdigit (s1[s1_pos]))
return 1;
if (c_isdigit (s2[s2_pos]))
return -1;
if (first_diff)
return first_diff;
}
return 0;
}
/* Compare version strings S1 and S2.
See filevercmp.h for function description. */
int
filevercmp (const char *s1, const char *s2)
{
const char *s1_pos;
const char *s2_pos;
const char *s1_suffix, *s2_suffix;
size_t s1_len, s2_len;
int result;
/* easy comparison to see if strings are identical */
int simple_cmp = strcmp (s1, s2);
if (simple_cmp == 0)
return 0;
/* special handle for "", "." and ".." */
if (!*s1)
return -1;
if (!*s2)
return 1;
if (0 == strcmp (".", s1))
return -1;
if (0 == strcmp (".", s2))
return 1;
if (0 == strcmp ("..", s1))
return -1;
if (0 == strcmp ("..", s2))
return 1;
/* special handle for other hidden files */
if (*s1 == '.' && *s2 != '.')
return -1;
if (*s1 != '.' && *s2 == '.')
return 1;
if (*s1 == '.' && *s2 == '.')
{
s1++;
s2++;
}
/* "cut" file suffixes */
s1_pos = s1;
s2_pos = s2;
s1_suffix = match_suffix (&s1_pos);
s2_suffix = match_suffix (&s2_pos);
s1_len = (s1_suffix ? s1_suffix : s1_pos) - s1;
s2_len = (s2_suffix ? s2_suffix : s2_pos) - s2;
/* restore file suffixes if strings are identical after "cut" */
if ((s1_suffix || s2_suffix) && (s1_len == s2_len)
&& 0 == strncmp (s1, s2, s1_len))
{
s1_len = s1_pos - s1;
s2_len = s2_pos - s2;
}
result = verrevcmp (s1, s1_len, s2, s2_len);
return result == 0 ? simple_cmp : result;
}

42
lib/filevercmp.h Normal file
View file

@ -0,0 +1,42 @@
/*
Copyright (C) 1995 Ian Jackson <iwj10@cus.cam.ac.uk>
Copyright (C) 2001 Anthony Towns <aj@azure.humbug.org.au>
Copyright (C) 2008-2016 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#ifndef FILEVERCMP_H
#define FILEVERCMP_H
/* Compare version strings:
This function compares strings S1 and S2:
1) By PREFIX in the same way as strcmp.
2) Then by VERSION (most similarly to version compare of Debian's dpkg).
Leading zeros in version numbers are ignored.
3) If both (PREFIX and VERSION) are equal, strcmp function is used for
comparison. So this function can return 0 if (and only if) strings S1
and S2 are identical.
It returns number >0 for S1 > S2, 0 for S1 == S2 and number <0 for S1 < S2.
This function compares strings, in a way that if VER1 and VER2 are version
numbers and PREFIX and SUFFIX (SUFFIX defined as (\.[A-Za-z~][A-Za-z0-9~]*)*)
are strings then VER1 < VER2 implies filevercmp (PREFIX VER1 SUFFIX,
PREFIX VER2 SUFFIX) < 0.
This function is intended to be a replacement for strverscmp. */
int filevercmp (const char *s1, const char *s2) _GL_ATTRIBUTE_PURE;
#endif /* FILEVERCMP_H */

View file

@ -21,7 +21,7 @@
# the same distribution terms as the rest of that program.
#
# Generated by gnulib-tool.
# Reproduce by: gnulib-tool --import --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --avoid=close --avoid=dup --avoid=fchdir --avoid=flexmember --avoid=fstat --avoid=malloc-posix --avoid=msvc-inval --avoid=msvc-nothrow --avoid=open --avoid=openat-die --avoid=opendir --avoid=raise --avoid=save-cwd --avoid=select --avoid=setenv --avoid=sigprocmask --avoid=stdarg --avoid=stdbool --avoid=threadlib --avoid=unsetenv --makefile-name=gnulib.mk --conditional-dependencies --no-libtool --macro-prefix=gl --no-vc-files alloca-opt binary-io byteswap c-ctype c-strcase careadlinkat close-stream count-one-bits count-trailing-zeros crypto/md5 crypto/sha1 crypto/sha256 crypto/sha512 dtoastr dtotimespec dup2 environ execinfo faccessat fcntl fcntl-h fdatasync fdopendir filemode fstatat fsync getloadavg getopt-gnu gettime gettimeofday gitlog-to-changelog ignore-value intprops largefile lstat manywarnings memrchr mkostemp mktime pipe2 pselect pthread_sigmask putenv qcopy-acl readlink readlinkat sig2str socklen stat-time std-gnu11 stdalign stddef stdio stpcpy strftime strtoimax strtoumax symlink sys_stat sys_time time time_r time_rz timegm timer-time timespec-add timespec-sub unsetenv update-copyright utimens vla warnings
# Reproduce by: gnulib-tool --import --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --avoid=close --avoid=dup --avoid=fchdir --avoid=flexmember --avoid=fstat --avoid=malloc-posix --avoid=msvc-inval --avoid=msvc-nothrow --avoid=open --avoid=openat-die --avoid=opendir --avoid=raise --avoid=save-cwd --avoid=select --avoid=setenv --avoid=sigprocmask --avoid=stdarg --avoid=stdbool --avoid=threadlib --avoid=unsetenv --makefile-name=gnulib.mk --conditional-dependencies --no-libtool --macro-prefix=gl --no-vc-files alloca-opt binary-io byteswap c-ctype c-strcase careadlinkat close-stream count-one-bits count-trailing-zeros crypto/md5 crypto/sha1 crypto/sha256 crypto/sha512 dtoastr dtotimespec dup2 environ execinfo faccessat fcntl fcntl-h fdatasync fdopendir filemode filevercmp fstatat fsync getloadavg getopt-gnu gettime gettimeofday gitlog-to-changelog ignore-value intprops largefile lstat manywarnings memrchr mkostemp mktime pipe2 pselect pthread_sigmask putenv qcopy-acl readlink readlinkat sig2str socklen stat-time std-gnu11 stdalign stddef stdio stpcpy strftime strtoimax strtoumax symlink sys_stat sys_time time time_r time_rz timegm timer-time timespec-add timespec-sub unsetenv update-copyright utimens vla warnings
MOSTLYCLEANFILES += core *.stackdump
@ -441,6 +441,14 @@ EXTRA_DIST += filemode.h
## end gnulib module filemode
## begin gnulib module filevercmp
libgnu_a_SOURCES += filevercmp.c
EXTRA_DIST += filevercmp.h
## end gnulib module filevercmp
## begin gnulib module fpending

View file

@ -78,6 +78,7 @@ AC_DEFUN([gl_EARLY],
# Code from module fdatasync:
# Code from module fdopendir:
# Code from module filemode:
# Code from module filevercmp:
# Code from module fpending:
# Code from module fstatat:
# Code from module fsync:
@ -889,6 +890,8 @@ AC_DEFUN([gl_FILE_LIST], [
lib/fdopendir.c
lib/filemode.c
lib/filemode.h
lib/filevercmp.c
lib/filevercmp.h
lib/fpending.c
lib/fpending.h
lib/fstatat.c

127
src/fns.c
View file

@ -21,6 +21,7 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
#include <unistd.h>
#include <filevercmp.h>
#include <intprops.h>
#include <vla.h>
#include <errno.h>
@ -332,50 +333,21 @@ Symbols are also allowed; their print names are used instead. */)
return i1 < SCHARS (string2) ? Qt : Qnil;
}
/* Return the numerical value of a consecutive run of numerical
characters from STRING. The ISP and ISP_BYTE address pointer
pointers are increased and left at the next character after the
numerical characters. */
static size_t
gather_number_from_string (Lisp_Object string,
ptrdiff_t *isp, ptrdiff_t *isp_byte)
{
size_t number = 0;
char *s = SSDATA (string);
char *end;
DEFUN ("string-version-lessp", Fstring_version_lessp,
Sstring_version_lessp, 2, 2, 0,
doc: /* Return non-nil if S1 is less than S2, as version strings.
errno = 0;
number = strtoumax (s + *isp_byte, &end, 10);
if (errno == ERANGE)
/* If we have an integer overflow, then we fall back on lexical
comparison. */
return -1;
else
{
size_t diff = end - (s + *isp_byte);
(*isp) += diff;
(*isp_byte) += diff;
return number;
}
}
This function compares version strings S1 and S2:
1) By prefix lexicographically.
2) Then by version (similarly to version comparison of Debian's dpkg).
Leading zeros in version numbers are ignored.
3) If both prefix and version are equal, compare as ordinary strings.
DEFUN ("string-numeric-lessp", Fstring_numeric_lessp,
Sstring_numeric_lessp, 2, 2, 0,
doc: /* Return non-nil if STRING1 is less than STRING2 in 'numeric' order.
Sequences of non-numerical characters are compared lexicographically,
while sequences of numerical characters are converted into numbers,
and then the numbers are compared. This means that \"foo2.png\" is
less than \"foo12.png\" according to this predicate.
For example, \"foo2.png\" compares less than \"foo12.png\".
Case is significant.
Symbols are also allowed; their print names are used instead. */)
(register Lisp_Object string1, Lisp_Object string2)
(Lisp_Object string1, Lisp_Object string2)
{
ptrdiff_t end;
ptrdiff_t i1, i1_byte, i2, i2_byte;
size_t num1, num2;
unsigned char *chp;
int chlen1, chlen2;
if (SYMBOLP (string1))
string1 = SYMBOL_NAME (string1);
if (SYMBOLP (string2))
@ -383,67 +355,26 @@ Symbols are also allowed; their print names are used instead. */)
CHECK_STRING (string1);
CHECK_STRING (string2);
i1 = i1_byte = i2 = i2_byte = 0;
char *p1 = SSDATA (string1);
char *p2 = SSDATA (string2);
char *lim1 = p1 + SBYTES (string1);
char *lim2 = p2 + SBYTES (string2);
int cmp;
end = SCHARS (string1);
if (end > SCHARS (string2))
end = SCHARS (string2);
while (i1 < end)
while ((cmp = filevercmp (p1, p2)) == 0)
{
/* When we find a mismatch, we must compare the
characters, not just the bytes. */
int c1, c2;
if (STRING_MULTIBYTE (string1))
{
chp = &SDATA (string1)[i1_byte];
c1 = STRING_CHAR_AND_LENGTH (chp, chlen1);
}
else
{
c1 = SREF (string1, i1_byte);
chlen1 = 1;
}
if (STRING_MULTIBYTE (string2))
{
chp = &SDATA (string1)[i2_byte];
c2 = STRING_CHAR_AND_LENGTH (chp, chlen2);
}
else
{
c2 = SREF (string2, i2_byte);
chlen2 = 1;
}
if (c1 >= '0' && c1 <= '9' &&
c2 >= '0' && c2 <= '9')
/* Both strings are numbers, so compare them. */
{
num1 = gather_number_from_string (string1, &i1, &i1_byte);
num2 = gather_number_from_string (string2, &i2, &i2_byte);
/* If we have an integer overflow, then resort to sorting
the entire string lexicographically. */
if (num1 == -1 || num2 == -1)
return Fstring_lessp (string1, string2);
else if (num1 < num2)
return Qt;
else if (num1 > num2)
return Qnil;
}
else
{
if (c1 != c2)
return c1 < c2 ? Qt : Qnil;
i1++;
i2++;
i1_byte += chlen1;
i2_byte += chlen2;
}
/* If the strings are identical through their first null bytes,
skip past identical prefixes and try again. */
ptrdiff_t size = strlen (p1) + 1;
p1 += size;
p2 += size;
if (lim1 < p1)
return lim2 < p2 ? Qnil : Qt;
if (lim2 < p2)
return Qnil;
}
return i1 < SCHARS (string2) ? Qt : Qnil;
return cmp < 0 ? Qt : Qnil;
}
DEFUN ("string-collate-lessp", Fstring_collate_lessp, Sstring_collate_lessp, 2, 4, 0,
@ -5164,7 +5095,7 @@ this variable. */);
defsubr (&Sstring_equal);
defsubr (&Scompare_strings);
defsubr (&Sstring_lessp);
defsubr (&Sstring_numeric_lessp);
defsubr (&Sstring_version_lessp);
defsubr (&Sstring_collate_lessp);
defsubr (&Sstring_collate_equalp);
defsubr (&Sappend);

View file

@ -192,19 +192,19 @@
a b (if (eq system-type 'windows-nt) "enu_USA" "en_US.UTF-8")))))
'("Adrian" "Ævar" "Agustín" "Eli"))))
(ert-deftest fns-tests-string-numeric-lessp ()
(should (string-numeric-lessp "foo2.png" "foo12.png"))
(should (not (string-numeric-lessp "foo12.png" "foo2.png")))
(should (string-numeric-lessp "foo12.png" "foo20000.png"))
(should (not (string-numeric-lessp "foo20000.png" "foo12.png")))
(should (string-numeric-lessp "foo.png" "foo2.png"))
(should (not (string-numeric-lessp "foo2.png" "foo.png")))
(ert-deftest fns-tests-string-version-lessp ()
(should (string-version-lessp "foo2.png" "foo12.png"))
(should (not (string-version-lessp "foo12.png" "foo2.png")))
(should (string-version-lessp "foo12.png" "foo20000.png"))
(should (not (string-version-lessp "foo20000.png" "foo12.png")))
(should (string-version-lessp "foo.png" "foo2.png"))
(should (not (string-version-lessp "foo2.png" "foo.png")))
(should (equal (sort '("foo12.png" "foo2.png" "foo1.png")
'string-numeric-lessp)
'string-version-lessp)
'("foo1.png" "foo2.png" "foo12.png")))
(should (string-numeric-lessp "foo2" "foo1234"))
(should (not (string-numeric-lessp "foo1234" "foo2")))
(should (string-numeric-lessp "foo.png" "foo2"))
(should (string-numeric-lessp "foo1.25.5.png" "foo1.125.5"))
(should (string-numeric-lessp "2" "1245"))
(should (not (string-numeric-lessp "1245" "2"))))
(should (string-version-lessp "foo2" "foo1234"))
(should (not (string-version-lessp "foo1234" "foo2")))
(should (string-version-lessp "foo.png" "foo2"))
(should (string-version-lessp "foo1.25.5.png" "foo1.125.5"))
(should (string-version-lessp "2" "1245"))
(should (not (string-version-lessp "1245" "2"))))