Improve performance by avoiding strtoumax

This made (string-to-number "10") 20% faster on my old desktop,
an AMD Phenom II X4 910e running Fedora 25 x86-64.
* admin/merge-gnulib (GNULIB_MODULES): Remove strtoumax.
* lib/gnulib.mk.in, m4/gnulib-comp.m4: Regenerate.
* lib/strtoul.c, lib/strtoull.c, lib/strtoumax.c, m4/strtoull.m4:
* m4/strtoumax.m4: Remove.
* src/editfns.c (str2num): New function.
(styled_format): Use it instead of strtoumax.  Use ptrdiff_t
instead of uintmax_t.  Check for integer overflow.
* src/lread.c (LEAD_INT, DOT_CHAR, TRAIL_INT, E_EXP):
Move to private scope and make them enums.
(string_to_number): Compute integer value directly during
first pass instead of revisiting it with strtoumax later.
This commit is contained in:
Paul Eggert 2017-06-01 16:03:12 -07:00
parent 5324710841
commit 178d0cb5f5
10 changed files with 58 additions and 202 deletions

View file

@ -38,7 +38,7 @@ GNULIB_MODULES='
manywarnings memrchr mkostemp mktime
pipe2 pselect pthread_sigmask putenv qcopy-acl readlink readlinkat
sig2str socklen stat-time std-gnu11 stdalign stddef stdio
stpcpy strftime strtoimax strtoumax symlink sys_stat
stpcpy strftime strtoimax symlink sys_stat
sys_time time time_r time_rz timegm timer-time timespec-add timespec-sub
update-copyright utimens
vla warnings

View file

@ -21,7 +21,7 @@
# the same distribution terms as the rest of that program.
#
# Generated by gnulib-tool.
# Reproduce by: gnulib-tool --import --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --avoid=close --avoid=dup --avoid=fchdir --avoid=fstat --avoid=malloc-posix --avoid=msvc-inval --avoid=msvc-nothrow --avoid=open --avoid=openat-die --avoid=opendir --avoid=raise --avoid=save-cwd --avoid=select --avoid=setenv --avoid=sigprocmask --avoid=stat --avoid=stdarg --avoid=stdbool --avoid=threadlib --avoid=tzset --avoid=unsetenv --avoid=utime --avoid=utime-h --gnu-make --makefile-name=gnulib.mk.in --conditional-dependencies --no-libtool --macro-prefix=gl --no-vc-files alloca-opt binary-io byteswap c-ctype c-strcase careadlinkat close-stream count-leading-zeros count-one-bits count-trailing-zeros crypto/md5 crypto/sha1 crypto/sha256 crypto/sha512 dtoastr dtotimespec dup2 environ execinfo faccessat fcntl fcntl-h fdatasync fdopendir filemode filevercmp flexmember fstatat fsync getloadavg getopt-gnu gettime gettimeofday gitlog-to-changelog ignore-value intprops largefile lstat manywarnings memrchr mkostemp mktime pipe2 pselect pthread_sigmask putenv qcopy-acl readlink readlinkat sig2str socklen stat-time std-gnu11 stdalign stddef stdio stpcpy strftime strtoimax strtoumax symlink sys_stat sys_time time time_r time_rz timegm timer-time timespec-add timespec-sub update-copyright utimens vla warnings
# Reproduce by: gnulib-tool --import --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --avoid=close --avoid=dup --avoid=fchdir --avoid=fstat --avoid=malloc-posix --avoid=msvc-inval --avoid=msvc-nothrow --avoid=open --avoid=openat-die --avoid=opendir --avoid=raise --avoid=save-cwd --avoid=select --avoid=setenv --avoid=sigprocmask --avoid=stat --avoid=stdarg --avoid=stdbool --avoid=threadlib --avoid=tzset --avoid=unsetenv --avoid=utime --avoid=utime-h --gnu-make --makefile-name=gnulib.mk.in --conditional-dependencies --no-libtool --macro-prefix=gl --no-vc-files alloca-opt binary-io byteswap c-ctype c-strcase careadlinkat close-stream count-leading-zeros count-one-bits count-trailing-zeros crypto/md5 crypto/sha1 crypto/sha256 crypto/sha512 dtoastr dtotimespec dup2 environ execinfo faccessat fcntl fcntl-h fdatasync fdopendir filemode filevercmp flexmember fstatat fsync getloadavg getopt-gnu gettime gettimeofday gitlog-to-changelog ignore-value intprops largefile lstat manywarnings memrchr mkostemp mktime pipe2 pselect pthread_sigmask putenv qcopy-acl readlink readlinkat sig2str socklen stat-time std-gnu11 stdalign stddef stdio stpcpy strftime strtoimax symlink sys_stat sys_time time time_r time_rz timegm timer-time timespec-add timespec-sub update-copyright utimens vla warnings
MOSTLYCLEANFILES += core *.stackdump
@ -905,7 +905,6 @@ gl_GNULIB_ENABLED_getdtablesize = @gl_GNULIB_ENABLED_getdtablesize@
gl_GNULIB_ENABLED_getgroups = @gl_GNULIB_ENABLED_getgroups@
gl_GNULIB_ENABLED_secure_getenv = @gl_GNULIB_ENABLED_secure_getenv@
gl_GNULIB_ENABLED_strtoll = @gl_GNULIB_ENABLED_strtoll@
gl_GNULIB_ENABLED_strtoull = @gl_GNULIB_ENABLED_strtoull@
gl_GNULIB_ENABLED_tempname = @gl_GNULIB_ENABLED_tempname@
gl_LIBOBJS = @gl_LIBOBJS@
gl_LTLIBOBJS = @gl_LTLIBOBJS@
@ -2507,30 +2506,6 @@ EXTRA_libgnu_a_SOURCES += strtol.c strtoll.c
endif
## end gnulib module strtoll
## begin gnulib module strtoull
ifeq (,$(OMIT_GNULIB_MODULE_strtoull))
ifneq (,$(gl_GNULIB_ENABLED_strtoull))
endif
EXTRA_DIST += strtol.c strtoul.c strtoull.c
EXTRA_libgnu_a_SOURCES += strtol.c strtoul.c strtoull.c
endif
## end gnulib module strtoull
## begin gnulib module strtoumax
ifeq (,$(OMIT_GNULIB_MODULE_strtoumax))
EXTRA_DIST += strtoimax.c strtoumax.c
EXTRA_libgnu_a_SOURCES += strtoimax.c strtoumax.c
endif
## end gnulib module strtoumax
## begin gnulib module symlink
ifeq (,$(OMIT_GNULIB_MODULE_symlink))

View file

@ -1,19 +0,0 @@
/* Copyright (C) 1991, 1997, 2009-2017 Free Software Foundation, Inc.
This file is part of the GNU C Library.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#define UNSIGNED 1
#include "strtol.c"

View file

@ -1,26 +0,0 @@
/* Function to parse an 'unsigned long long int' from text.
Copyright (C) 1995-1997, 1999, 2009-2017 Free Software Foundation, Inc.
NOTE: The canonical source of this file is maintained with the GNU C
Library. Bugs can be reported to bug-glibc@gnu.org.
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3 of the License, or any
later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#define QUAD 1
#include "strtoul.c"
#ifdef _LIBC
strong_alias (__strtoull_internal, __strtouq_internal)
weak_alias (strtoull, strtouq)
#endif

View file

@ -1,2 +0,0 @@
#define UNSIGNED 1
#include "strtoimax.c"

View file

@ -140,8 +140,6 @@ AC_DEFUN([gl_EARLY],
# Code from module string:
# Code from module strtoimax:
# Code from module strtoll:
# Code from module strtoull:
# Code from module strtoumax:
# Code from module symlink:
# Code from module sys_select:
# Code from module sys_stat:
@ -364,12 +362,6 @@ AC_DEFUN([gl_INIT],
gl_PREREQ_STRTOIMAX
fi
gl_INTTYPES_MODULE_INDICATOR([strtoimax])
gl_FUNC_STRTOUMAX
if test $HAVE_DECL_STRTOUMAX = 0 || test $REPLACE_STRTOUMAX = 1; then
AC_LIBOBJ([strtoumax])
gl_PREREQ_STRTOUMAX
fi
gl_INTTYPES_MODULE_INDICATOR([strtoumax])
gl_FUNC_SYMLINK
if test $HAVE_SYMLINK = 0 || test $REPLACE_SYMLINK = 1; then
AC_LIBOBJ([symlink])
@ -420,7 +412,6 @@ AC_DEFUN([gl_INIT],
gl_gnulib_enabled_6099e9737f757db36c47fa9d9f02e88c=false
gl_gnulib_enabled_secure_getenv=false
gl_gnulib_enabled_strtoll=false
gl_gnulib_enabled_strtoull=false
gl_gnulib_enabled_tempname=false
gl_gnulib_enabled_682e609604ccaac6be382e4ee3a4eaec=false
func_gl_gnulib_m4code_260941c0e5dc67ec9e87d1fb321c300b ()
@ -569,18 +560,6 @@ AC_DEFUN([gl_INIT],
gl_gnulib_enabled_strtoll=true
fi
}
func_gl_gnulib_m4code_strtoull ()
{
if ! $gl_gnulib_enabled_strtoull; then
gl_FUNC_STRTOULL
if test $HAVE_STRTOULL = 0; then
AC_LIBOBJ([strtoull])
gl_PREREQ_STRTOULL
fi
gl_STDLIB_MODULE_INDICATOR([strtoull])
gl_gnulib_enabled_strtoull=true
fi
}
func_gl_gnulib_m4code_tempname ()
{
if ! $gl_gnulib_enabled_tempname; then
@ -649,9 +628,6 @@ AC_DEFUN([gl_INIT],
if { test $HAVE_DECL_STRTOIMAX = 0 || test $REPLACE_STRTOIMAX = 1; } && test $ac_cv_type_long_long_int = yes; then
func_gl_gnulib_m4code_strtoll
fi
if { test $HAVE_DECL_STRTOUMAX = 0 || test $REPLACE_STRTOUMAX = 1; } && test $ac_cv_type_unsigned_long_long_int = yes; then
func_gl_gnulib_m4code_strtoull
fi
if test $HAVE_TIMEGM = 0 || test $REPLACE_TIMEGM = 1; then
func_gl_gnulib_m4code_5264294aa0a5557541b53c8c741f7f31
fi
@ -670,7 +646,6 @@ AC_DEFUN([gl_INIT],
AM_CONDITIONAL([gl_GNULIB_ENABLED_6099e9737f757db36c47fa9d9f02e88c], [$gl_gnulib_enabled_6099e9737f757db36c47fa9d9f02e88c])
AM_CONDITIONAL([gl_GNULIB_ENABLED_secure_getenv], [$gl_gnulib_enabled_secure_getenv])
AM_CONDITIONAL([gl_GNULIB_ENABLED_strtoll], [$gl_gnulib_enabled_strtoll])
AM_CONDITIONAL([gl_GNULIB_ENABLED_strtoull], [$gl_gnulib_enabled_strtoull])
AM_CONDITIONAL([gl_GNULIB_ENABLED_tempname], [$gl_gnulib_enabled_tempname])
AM_CONDITIONAL([gl_GNULIB_ENABLED_682e609604ccaac6be382e4ee3a4eaec], [$gl_gnulib_enabled_682e609604ccaac6be382e4ee3a4eaec])
# End of code from modules
@ -940,9 +915,6 @@ AC_DEFUN([gl_FILE_LIST], [
lib/strtoimax.c
lib/strtol.c
lib/strtoll.c
lib/strtoul.c
lib/strtoull.c
lib/strtoumax.c
lib/symlink.c
lib/sys_select.in.h
lib/sys_stat.in.h
@ -1051,8 +1023,6 @@ AC_DEFUN([gl_FILE_LIST], [
m4/string_h.m4
m4/strtoimax.m4
m4/strtoll.m4
m4/strtoull.m4
m4/strtoumax.m4
m4/symlink.m4
m4/sys_select_h.m4
m4/sys_socket_h.m4

View file

@ -1,24 +0,0 @@
# strtoull.m4 serial 7
dnl Copyright (C) 2002, 2004, 2006, 2008-2017 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
AC_DEFUN([gl_FUNC_STRTOULL],
[
AC_REQUIRE([gl_STDLIB_H_DEFAULTS])
dnl We don't need (and can't compile) the replacement strtoull
dnl unless the type 'unsigned long long int' exists.
AC_REQUIRE([AC_TYPE_UNSIGNED_LONG_LONG_INT])
if test "$ac_cv_type_unsigned_long_long_int" = yes; then
AC_CHECK_FUNCS([strtoull])
if test $ac_cv_func_strtoull = no; then
HAVE_STRTOULL=0
fi
fi
])
# Prerequisites of lib/strtoull.c.
AC_DEFUN([gl_PREREQ_STRTOULL], [
:
])

View file

@ -1,28 +0,0 @@
# strtoumax.m4 serial 12
dnl Copyright (C) 2002-2004, 2006, 2009-2017 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
AC_DEFUN([gl_FUNC_STRTOUMAX],
[
AC_REQUIRE([gl_INTTYPES_H_DEFAULTS])
dnl On OSF/1 5.1 with cc, this function is declared but not defined.
AC_CHECK_FUNCS_ONCE([strtoumax])
AC_CHECK_DECLS_ONCE([strtoumax])
if test "$ac_cv_have_decl_strtoumax" = yes; then
if test "$ac_cv_func_strtoumax" != yes; then
# HP-UX 11.11 has "#define strtoimax(...) ..." but no function.
REPLACE_STRTOUMAX=1
fi
else
HAVE_DECL_STRTOUMAX=0
fi
])
# Prerequisites of lib/strtoumax.c.
AC_DEFUN([gl_PREREQ_STRTOUMAX], [
AC_CHECK_DECLS([strtoull])
AC_REQUIRE([AC_TYPE_UNSIGNED_LONG_LONG_INT])
])

View file

@ -3851,6 +3851,23 @@ usage: (propertize STRING &rest PROPERTIES) */)
return string;
}
/* Convert the prefix of STR from ASCII decimal digits to a number.
Set *STR_END to the address of the first non-digit. Return the
number, or PTRDIFF_MAX on overflow. Return 0 if there is no number.
This is like strtol for ptrdiff_t and base 10 and C locale,
except without negative numbers or errno. */
static ptrdiff_t
str2num (char *str, char **str_end)
{
ptrdiff_t n = 0;
for (; c_isdigit (*str); str++)
if (INT_MULTIPLY_WRAPV (n, 10, &n) || INT_ADD_WRAPV (n, *str - '0', &n))
n = PTRDIFF_MAX;
*str_end = str;
return n;
}
DEFUN ("format", Fformat, Sformat, 1, MANY, 0,
doc: /* Format a string out of a format-string and arguments.
The first argument is a format control string.
@ -4057,17 +4074,16 @@ styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message)
digits to print after the '.' for floats, or the max.
number of chars to print from a string. */
uintmax_t num;
ptrdiff_t num;
char *num_end;
if (c_isdigit (*format))
{
num = strtoumax (format, &num_end, 10);
num = str2num (format, &num_end);
if (*num_end == '$')
{
if (num == 0)
error ("Invalid format field number 0");
n = min (num, PTRDIFF_MAX);
n--;
n = num - 1;
format = num_end + 1;
}
}
@ -4095,15 +4111,15 @@ styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message)
space_flag &= ! plus_flag;
zero_flag &= ! minus_flag;
num = strtoumax (format, &num_end, 10);
num = str2num (format, &num_end);
if (max_bufsize <= num)
string_overflow ();
ptrdiff_t field_width = num;
bool precision_given = *num_end == '.';
uintmax_t precision = (precision_given
? strtoumax (num_end + 1, &num_end, 10)
: UINTMAX_MAX);
ptrdiff_t precision = (precision_given
? str2num (num_end + 1, &num_end)
: PTRDIFF_MAX);
format = num_end;
if (format == end)
@ -4176,7 +4192,7 @@ styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message)
/* handle case (precision[n] >= 0) */
ptrdiff_t prec = -1;
if (precision_given && precision <= TYPE_MAXIMUM (ptrdiff_t))
if (precision_given)
prec = precision;
/* lisp_string_width ignores a precision of 0, but GNU
@ -4424,8 +4440,9 @@ styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message)
padding and excess precision. Deal with excess precision
first. This happens only when the format specifies
ridiculously large precision. */
uintmax_t excess_precision = precision - prec;
uintmax_t leading_zeros = 0, trailing_zeros = 0;
ptrdiff_t excess_precision
= precision_given ? precision - prec : 0;
ptrdiff_t leading_zeros = 0, trailing_zeros = 0;
if (excess_precision)
{
if (float_conversion)
@ -4451,7 +4468,9 @@ styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message)
/* Compute the total bytes needed for this item, including
excess precision and padding. */
uintmax_t numwidth = sprintf_bytes + excess_precision;
ptrdiff_t numwidth;
if (INT_ADD_WRAPV (sprintf_bytes, excess_precision, &numwidth))
numwidth = PTRDIFF_MAX;
ptrdiff_t padding
= numwidth < field_width ? field_width - numwidth : 0;
if (max_bufsize - sprintf_bytes <= excess_precision

View file

@ -3495,25 +3495,18 @@ substitute_in_interval (INTERVAL interval, Lisp_Object arg)
}
#define LEAD_INT 1
#define DOT_CHAR 2
#define TRAIL_INT 4
#define E_EXP 16
/* Convert STRING to a number, assuming base BASE. Return a fixnum if CP has
integer syntax and fits in a fixnum, else return the nearest float if CP has
either floating point or integer syntax and BASE is 10, else return nil. If
IGNORE_TRAILING, consider just the longest prefix of CP that has
valid floating point syntax. Signal an overflow if BASE is not 10 and the
number has integer syntax but does not fit. */
/* Convert STRING to a number, assuming base BASE. Return a fixnum if
STRING has integer syntax and fits in a fixnum, else return the
nearest float if STRING has either floating point or integer syntax
and BASE is 10, else return nil. If IGNORE_TRAILING, consider just
the longest prefix of STRING that has valid floating point syntax.
Signal an overflow if BASE is not 10 and the number has integer
syntax but does not fit. */
Lisp_Object
string_to_number (char const *string, int base, bool ignore_trailing)
{
int state;
char const *cp = string;
int leading_digit;
bool float_syntax = 0;
double value = 0;
@ -3525,15 +3518,23 @@ string_to_number (char const *string, int base, bool ignore_trailing)
bool signedp = negative || *cp == '+';
cp += signedp;
state = 0;
leading_digit = digit_to_number (*cp, base);
enum { INTOVERFLOW = 1, LEAD_INT = 2, DOT_CHAR = 4, TRAIL_INT = 8,
E_EXP = 16 };
int state = 0;
int leading_digit = digit_to_number (*cp, base);
uintmax_t n = leading_digit;
if (leading_digit >= 0)
{
state |= LEAD_INT;
do
++cp;
while (digit_to_number (*cp, base) >= 0);
for (int digit; 0 <= (digit = digit_to_number (*++cp, base)); )
{
if (INT_MULTIPLY_OVERFLOW (n, base))
state |= INTOVERFLOW;
n *= base;
if (INT_ADD_OVERFLOW (n, digit))
state |= INTOVERFLOW;
n += digit;
}
}
if (*cp == '.')
{
@ -3583,32 +3584,22 @@ string_to_number (char const *string, int base, bool ignore_trailing)
}
float_syntax = ((state & (DOT_CHAR|TRAIL_INT)) == (DOT_CHAR|TRAIL_INT)
|| state == (LEAD_INT|E_EXP));
|| (state & ~INTOVERFLOW) == (LEAD_INT|E_EXP));
}
/* Return nil if the number uses invalid syntax. If IGNORE_TRAILING, accept
any prefix that matches. Otherwise, the entire string must match. */
if (! (ignore_trailing
? ((state & LEAD_INT) != 0 || float_syntax)
: (!*cp && ((state & ~DOT_CHAR) == LEAD_INT || float_syntax))))
: (!*cp && ((state & ~(INTOVERFLOW | DOT_CHAR)) == LEAD_INT
|| float_syntax))))
return Qnil;
/* If the number uses integer and not float syntax, and is in C-language
range, use its value, preferably as a fixnum. */
if (leading_digit >= 0 && ! float_syntax)
{
uintmax_t n;
/* Fast special case for single-digit integers. This also avoids a
glitch when BASE is 16 and IGNORE_TRAILING, because in that
case some versions of strtoumax accept numbers like "0x1" that Emacs
does not allow. */
if (digit_to_number (string[signedp + 1], base) < 0)
return make_number (negative ? -leading_digit : leading_digit);
errno = 0;
n = strtoumax (string + signedp, NULL, base);
if (errno == ERANGE)
if (state & INTOVERFLOW)
{
/* Unfortunately there's no simple and accurate way to convert
non-base-10 numbers that are out of C-language range. */