Fix mishandling of symbols that look like numbers

* src/bignum.c (make_neg_biguint): New function.
* src/lread.c (read1): Do not mishandle an unquoted symbol
with name equal to something like "1\0x", i.e., a string
of numeric form followed by a NUL byte.
Formerly these symbols were misread as numbers.
(string_to_number): Change last argument from an integer flag
to a pointer to the length.  This lets the caller figure out
how much of the prefix was used.  All callers changed.
Add a fast path if the integer (sans sign) fits in uintmax_t.
Update comments and simplify now that bignums are present.
* src/print.c (print_object): Fix quoting of symbols that look
like numbers, by relying on string_to_number for the tricky
cases rather than trying to redo its logic, incorrectly.  For
example, (read (prin1-to-string '\1e+NaN)) formerly returned
"1e+NaN", which was wrong: a backslash is needed in the output
to prevent it from being read as a NaN.  Escape NO_BREAK_SPACE
too, since lread.c treats it like SPACE.
* test/src/print-tests.el (print-read-roundtrip):
Add tests illustrating the abovementioned bugs.
This commit is contained in:
Paul Eggert 2018-10-10 23:17:18 -07:00
parent fd3a48fcd8
commit 5bd8cfc14d
7 changed files with 98 additions and 89 deletions

View file

@ -117,6 +117,16 @@ make_biguint (uintmax_t n)
return make_bignum ();
}
/* Return a Lisp integer equal to -N, which must not be in fixnum range. */
Lisp_Object
make_neg_biguint (uintmax_t n)
{
eassert (-MOST_NEGATIVE_FIXNUM < n);
mpz_set_uintmax (mpz[0], n);
mpz_neg (mpz[0], mpz[0]);
return make_bignum ();
}
/* Return a Lisp integer with value taken from mpz[0].
Set mpz[0] to a junk value. */
Lisp_Object

View file

@ -2796,7 +2796,7 @@ If the base used is not 10, STRING is always parsed as an integer. */)
while (*p == ' ' || *p == '\t')
p++;
Lisp_Object val = string_to_number (p, b, S2N_IGNORE_TRAILING);
Lisp_Object val = string_to_number (p, b, 0);
return NILP (val) ? make_fixnum (0) : val;
}

View file

@ -2506,7 +2506,7 @@ INTEGERP (Lisp_Object x)
return FIXNUMP (x) || BIGNUMP (x);
}
/* Return a Lisp integer with value taken from n. */
/* Return a Lisp integer with value taken from N. */
INLINE Lisp_Object
make_int (intmax_t n)
{
@ -3329,6 +3329,7 @@ extern ptrdiff_t bignum_bufsize (Lisp_Object, int);
extern ptrdiff_t bignum_to_c_string (char *, ptrdiff_t, Lisp_Object, int);
extern Lisp_Object bignum_to_string (Lisp_Object, int);
extern Lisp_Object make_bignum_str (char const *, int);
extern Lisp_Object make_neg_biguint (uintmax_t);
extern Lisp_Object double_to_integer (double);
/* Converthe integer NUM to *N. Return true if successful, false
@ -3839,7 +3840,7 @@ LOADHIST_ATTACH (Lisp_Object x)
extern int openp (Lisp_Object, Lisp_Object, Lisp_Object,
Lisp_Object *, Lisp_Object, bool);
enum { S2N_IGNORE_TRAILING = 1 };
extern Lisp_Object string_to_number (char const *, int, int);
extern Lisp_Object string_to_number (char const *, int, ptrdiff_t *);
extern void map_obarray (Lisp_Object, void (*) (Lisp_Object, Lisp_Object),
Lisp_Object);
extern void dir_warning (const char *, Lisp_Object);

View file

@ -2354,12 +2354,14 @@ character_name_to_code (char const *name, ptrdiff_t name_len)
{
/* For "U+XXXX", pass the leading '+' to string_to_number to reject
monstrosities like "U+-0000". */
ptrdiff_t len = name_len - 1;
Lisp_Object code
= (name[0] == 'U' && name[1] == '+'
? string_to_number (name + 1, 16, 0)
? string_to_number (name + 1, 16, &len)
: call2 (Qchar_from_name, make_unibyte_string (name, name_len), Qt));
if (! RANGED_FIXNUMP (0, code, MAX_UNICODE_CHAR)
|| len != name_len - 1
|| char_surrogate_p (XFIXNUM (code)))
{
AUTO_STRING (format, "\\N{%s}");
@ -3531,12 +3533,14 @@ read1 (Lisp_Object readcharfun, int *pch, bool first_in_list)
|| strchr ("\"';()[]#`,", c) == NULL));
*p = 0;
ptrdiff_t nbytes = p - read_buffer;
UNREAD (c);
if (!quoted && !uninterned_symbol)
{
Lisp_Object result = string_to_number (read_buffer, 10, 0);
if (! NILP (result))
ptrdiff_t len;
Lisp_Object result = string_to_number (read_buffer, 10, &len);
if (! NILP (result) && len == nbytes)
return unbind_to (count, result);
}
if (!quoted && multibyte)
@ -3548,7 +3552,6 @@ read1 (Lisp_Object readcharfun, int *pch, bool first_in_list)
}
{
Lisp_Object result;
ptrdiff_t nbytes = p - read_buffer;
ptrdiff_t nchars
= (multibyte
? multibyte_chars_in_text ((unsigned char *) read_buffer,
@ -3700,18 +3703,18 @@ substitute_in_interval (INTERVAL interval, void *arg)
}
/* Convert STRING to a number, assuming base BASE. When STRING has
floating point syntax and BASE is 10, return a nearest float. When
STRING has integer syntax, return a fixnum if the integer fits, or
else a bignum. Otherwise, return nil. If FLAGS &
S2N_IGNORE_TRAILING is nonzero, consider just the longest prefix of
STRING that has valid syntax. */
/* Convert the initial prefix of STRING to a number, assuming base BASE.
If the prefix has floating point syntax and BASE is 10, return a
nearest float; otherwise, if the prefix has integer syntax, return
the integer; otherwise, return nil. If PLEN, set *PLEN to the
length of the numeric prefix if there is one, otherwise *PLEN is
unspecified. */
Lisp_Object
string_to_number (char const *string, int base, int flags)
string_to_number (char const *string, int base, ptrdiff_t *plen)
{
char const *cp = string;
bool float_syntax = 0;
bool float_syntax = false;
double value = 0;
/* Negate the value ourselves. This treats 0, NaNs, and infinity properly on
@ -3797,49 +3800,46 @@ string_to_number (char const *string, int base, int flags)
|| (state & ~INTOVERFLOW) == (LEAD_INT|E_EXP));
}
/* Return nil if the number uses invalid syntax. If FLAGS &
S2N_IGNORE_TRAILING, accept any prefix that matches. Otherwise,
the entire string must match. */
if (! (flags & S2N_IGNORE_TRAILING
? ((state & LEAD_INT) != 0 || float_syntax)
: (!*cp && ((state & ~(INTOVERFLOW | DOT_CHAR)) == LEAD_INT
|| float_syntax))))
return Qnil;
if (plen)
*plen = cp - string;
/* If the number uses integer and not float syntax, and is in C-language
range, use its value, preferably as a fixnum. */
if (leading_digit >= 0 && ! float_syntax)
/* Return a float if the number uses float syntax. */
if (float_syntax)
{
if ((state & INTOVERFLOW) == 0
&& n <= (negative ? -MOST_NEGATIVE_FIXNUM : MOST_POSITIVE_FIXNUM))
{
EMACS_INT signed_n = n;
return make_fixnum (negative ? -signed_n : signed_n);
}
/* Trim any leading "+" and trailing nondigits, then convert to
bignum. */
string += positive;
if (!*after_digits)
return make_bignum_str (string, base);
ptrdiff_t trimmed_len = after_digits - string;
USE_SAFE_ALLOCA;
char *trimmed = SAFE_ALLOCA (trimmed_len + 1);
memcpy (trimmed, string, trimmed_len);
trimmed[trimmed_len] = '\0';
Lisp_Object result = make_bignum_str (trimmed, base);
SAFE_FREE ();
return result;
/* Convert to floating point, unless the value is already known
because it is infinite or a NaN. */
if (! value)
value = atof (string + signedp);
return make_float (negative ? -value : value);
}
/* Either the number uses float syntax, or it does not fit into a fixnum.
Convert it from string to floating point, unless the value is already
known because it is an infinity, a NAN, or its absolute value fits in
uintmax_t. */
if (! value)
value = atof (string + signedp);
/* Return nil if the number uses invalid syntax. */
if (! (state & LEAD_INT))
return Qnil;
return make_float (negative ? -value : value);
/* Fast path if the integer (san sign) fits in uintmax_t. */
if (! (state & INTOVERFLOW))
{
if (!negative)
return make_uint (n);
if (-MOST_NEGATIVE_FIXNUM < n)
return make_neg_biguint (n);
EMACS_INT signed_n = n;
return make_fixnum (-signed_n);
}
/* Trim any leading "+" and trailing nondigits, then return a bignum. */
string += positive;
if (!*after_digits)
return make_bignum_str (string, base);
ptrdiff_t trimmed_len = after_digits - string;
USE_SAFE_ALLOCA;
char *trimmed = SAFE_ALLOCA (trimmed_len + 1);
memcpy (trimmed, string, trimmed_len);
trimmed[trimmed_len] = '\0';
Lisp_Object result = make_bignum_str (trimmed, base);
SAFE_FREE ();
return result;
}

View file

@ -1993,39 +1993,17 @@ print_object (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag)
case Lisp_Symbol:
{
bool confusing;
unsigned char *p = SDATA (SYMBOL_NAME (obj));
unsigned char *end = p + SBYTES (SYMBOL_NAME (obj));
int c;
ptrdiff_t i, i_byte;
ptrdiff_t size_byte;
Lisp_Object name;
Lisp_Object name = SYMBOL_NAME (obj);
ptrdiff_t size_byte = SBYTES (name);
name = SYMBOL_NAME (obj);
if (p != end && (*p == '-' || *p == '+')) p++;
if (p == end)
confusing = 0;
/* If symbol name begins with a digit, and ends with a digit,
and contains nothing but digits and `e', it could be treated
as a number. So set CONFUSING.
Symbols that contain periods could also be taken as numbers,
but periods are always escaped, so we don't have to worry
about them here. */
else if (*p >= '0' && *p <= '9'
&& end[-1] >= '0' && end[-1] <= '9')
{
while (p != end && ((*p >= '0' && *p <= '9')
/* Needed for \2e10. */
|| *p == 'e' || *p == 'E'))
p++;
confusing = (end == p);
}
else
confusing = 0;
size_byte = SBYTES (name);
/* Set CONFUSING if NAME looks like a number, calling
string_to_number for non-obvious cases. */
char *p = SSDATA (name);
bool signedp = *p == '-' || *p == '+';
ptrdiff_t len;
bool confusing = ((c_isdigit (p[signedp]) || p[signedp] == '.')
&& !NILP (string_to_number (p, 10, &len))
&& len == size_byte);
if (! NILP (Vprint_gensym)
&& !SYMBOL_INTERNED_IN_INITIAL_OBARRAY_P (obj))
@ -2036,10 +2014,12 @@ print_object (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag)
break;
}
for (i = 0, i_byte = 0; i_byte < size_byte;)
ptrdiff_t i = 0;
for (ptrdiff_t i_byte = 0; i_byte < size_byte; )
{
/* Here, we must convert each multi-byte form to the
corresponding character code before handing it to PRINTCHAR. */
int c;
FETCH_STRING_CHAR_ADVANCE (c, name, i, i_byte);
maybe_quit ();
@ -2049,6 +2029,7 @@ print_object (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag)
|| c == ';' || c == '#' || c == '(' || c == ')'
|| c == ',' || c == '.' || c == '`'
|| c == '[' || c == ']' || c == '?' || c <= 040
|| c == NO_BREAK_SPACE
|| confusing
|| (i == 1 && confusable_symbol_character_p (c)))
{

View file

@ -6852,7 +6852,12 @@ SIGCODE may be an integer, or a symbol whose name is a signal name. */)
{
Lisp_Object tem = Fget_process (process);
if (NILP (tem))
tem = string_to_number (SSDATA (process), 10, 0);
{
ptrdiff_t len;
tem = string_to_number (SSDATA (process), 10, &len);
if (NILP (tem) || len != SBYTES (process))
return Qnil;
}
process = tem;
}
else if (!NUMBERP (process))

View file

@ -95,8 +95,20 @@ otherwise, use a different charset."
"--------\n"))))
(ert-deftest print-read-roundtrip ()
(let ((sym '\bar))
(should (eq (read (prin1-to-string sym)) sym))))
(let ((syms (list '## '& '* '+ '- '/ '0E '0e '< '= '> 'E 'E0 'NaN '\"
'\# '\#x0 '\' '\'\' '\( '\) '\+00 '\, '\-0 '\. '\.0
'\0 '\0.0 '\0E0 '\0e0 '\1E+ '\1E+NaN '\1e+ '\1e+NaN
'\; '\? '\[ '\\ '\] '\` '_ 'a 'e 'e0 'x
'{ '| '} '~ : '\ '\bar
(intern "\t") (intern "\n") (intern " ")
(intern "\N{NO-BREAK SPACE}")
(intern "\N{ZERO WIDTH SPACE}")
(intern "\0"))))
(dolist (sym syms)
(should (eq (read (prin1-to-string sym)) sym))
(dolist (sym1 syms)
(let ((sym2 (intern (concat (symbol-name sym) (symbol-name sym1)))))
(should (eq (read (prin1-to-string sym2)) sym2)))))))
(ert-deftest print-bignum ()
(let* ((str "999999999999999999999999999999999")