Fix mishandling of symbols that look like numbers

* src/bignum.c (make_neg_biguint): New function. * src/lread.c (read1): Do not mishandle an unquoted symbol with name equal to something like "1\0x", i.e., a string of numeric form followed by a NUL byte. Formerly these symbols were misread as numbers. (string_to_number): Change last argument from an integer flag to a pointer to the length. This lets the caller figure out how much of the prefix was used. All callers changed. Add a fast path if the integer (sans sign) fits in uintmax_t. Update comments and simplify now that bignums are present. * src/print.c (print_object): Fix quoting of symbols that look like numbers, by relying on string_to_number for the tricky cases rather than trying to redo its logic, incorrectly. For example, (read (prin1-to-string '\1e+NaN)) formerly returned "1e+NaN", which was wrong: a backslash is needed in the output to prevent it from being read as a NaN. Escape NO_BREAK_SPACE too, since lread.c treats it like SPACE. * test/src/print-tests.el (print-read-roundtrip): Add tests illustrating the abovementioned bugs.
2018-10-10 23:17:18 -07:00 · 2018-10-10 23:17:18 -07:00 · 5bd8cfc14d
commit 5bd8cfc14d
parent fd3a48fcd8
7 changed files with 98 additions and 89 deletions
--- a/src/bignum.c
+++ b/src/bignum.c
@ -117,6 +117,16 @@ make_biguint (uintmax_t n)
  return make_bignum ();
 }

+/* Return a Lisp integer equal to -N, which must not be in fixnum range.  */
+Lisp_Object
+make_neg_biguint (uintmax_t n)
+{
+  eassert (-MOST_NEGATIVE_FIXNUM < n);
+  mpz_set_uintmax (mpz[0], n);
+  mpz_neg (mpz[0], mpz[0]);
+  return make_bignum ();
+}
+
 /* Return a Lisp integer with value taken from mpz[0].
   Set mpz[0] to a junk value.  */
 Lisp_Object
--- a/src/data.c
+++ b/src/data.c
@ -2796,7 +2796,7 @@ If the base used is not 10, STRING is always parsed as an integer.  */)
  while (*p == ' ' || *p == '\t')
    p++;

-  Lisp_Object val = string_to_number (p, b, S2N_IGNORE_TRAILING);
+  Lisp_Object val = string_to_number (p, b, 0);
  return NILP (val) ? make_fixnum (0) : val;
 }

--- a/src/lisp.h
+++ b/src/lisp.h
@ -2506,7 +2506,7 @@ INTEGERP (Lisp_Object x)
  return FIXNUMP (x) || BIGNUMP (x);
 }

-/* Return a Lisp integer with value taken from n.  */
+/* Return a Lisp integer with value taken from N.  */
 INLINE Lisp_Object
 make_int (intmax_t n)
 {
@ -3329,6 +3329,7 @@ extern ptrdiff_t bignum_bufsize (Lisp_Object, int);
 extern ptrdiff_t bignum_to_c_string (char *, ptrdiff_t, Lisp_Object, int);
 extern Lisp_Object bignum_to_string (Lisp_Object, int);
 extern Lisp_Object make_bignum_str (char const *, int);
+extern Lisp_Object make_neg_biguint (uintmax_t);
 extern Lisp_Object double_to_integer (double);

 /* Converthe integer NUM to *N.  Return true if successful, false
@ -3839,7 +3840,7 @@ LOADHIST_ATTACH (Lisp_Object x)
 extern int openp (Lisp_Object, Lisp_Object, Lisp_Object,
                  Lisp_Object *, Lisp_Object, bool);
 enum { S2N_IGNORE_TRAILING = 1 };
-extern Lisp_Object string_to_number (char const *, int, int);
+extern Lisp_Object string_to_number (char const *, int, ptrdiff_t *);
 extern void map_obarray (Lisp_Object, void (*) (Lisp_Object, Lisp_Object),
                         Lisp_Object);
 extern void dir_warning (const char *, Lisp_Object);
--- a/src/lread.c
+++ b/src/lread.c
@ -2354,12 +2354,14 @@ character_name_to_code (char const *name, ptrdiff_t name_len)
 {
  /* For "U+XXXX", pass the leading '+' to string_to_number to reject
     monstrosities like "U+-0000".  */
+  ptrdiff_t len = name_len - 1;
  Lisp_Object code
    = (name[0] == 'U' && name[1] == '+'
-       ? string_to_number (name + 1, 16, 0)
+       ? string_to_number (name + 1, 16, &len)
       : call2 (Qchar_from_name, make_unibyte_string (name, name_len), Qt));

  if (! RANGED_FIXNUMP (0, code, MAX_UNICODE_CHAR)
+      || len != name_len - 1
      || char_surrogate_p (XFIXNUM (code)))
    {
      AUTO_STRING (format, "\\N{%s}");
@ -3531,12 +3533,14 @@ read1 (Lisp_Object readcharfun, int *pch, bool first_in_list)
 		   || strchr ("\"';()[]#`,", c) == NULL));

 	*p = 0;
+	ptrdiff_t nbytes = p - read_buffer;
 	UNREAD (c);

 	if (!quoted && !uninterned_symbol)
 	  {
-	    Lisp_Object result = string_to_number (read_buffer, 10, 0);
-	    if (! NILP (result))
+	    ptrdiff_t len;
+	    Lisp_Object result = string_to_number (read_buffer, 10, &len);
+	    if (! NILP (result) && len == nbytes)
 	      return unbind_to (count, result);
 	  }
        if (!quoted && multibyte)
@ -3548,7 +3552,6 @@ read1 (Lisp_Object readcharfun, int *pch, bool first_in_list)
          }
 	{
 	  Lisp_Object result;
-	  ptrdiff_t nbytes = p - read_buffer;
 	  ptrdiff_t nchars
 	    = (multibyte
 	       ? multibyte_chars_in_text ((unsigned char *) read_buffer,
@ -3700,18 +3703,18 @@ substitute_in_interval (INTERVAL interval, void *arg)
 }


-/* Convert STRING to a number, assuming base BASE.  When STRING has
-   floating point syntax and BASE is 10, return a nearest float.  When
-   STRING has integer syntax, return a fixnum if the integer fits, or
-   else a bignum.  Otherwise, return nil.  If FLAGS &
-   S2N_IGNORE_TRAILING is nonzero, consider just the longest prefix of
-   STRING that has valid syntax.  */
+/* Convert the initial prefix of STRING to a number, assuming base BASE.
+   If the prefix has floating point syntax and BASE is 10, return a
+   nearest float; otherwise, if the prefix has integer syntax, return
+   the integer; otherwise, return nil.  If PLEN, set *PLEN to the
+   length of the numeric prefix if there is one, otherwise *PLEN is
+   unspecified.  */

 Lisp_Object
-string_to_number (char const *string, int base, int flags)
+string_to_number (char const *string, int base, ptrdiff_t *plen)
 {
  char const *cp = string;
-  bool float_syntax = 0;
+  bool float_syntax = false;
  double value = 0;

  /* Negate the value ourselves.  This treats 0, NaNs, and infinity properly on
@ -3797,49 +3800,46 @@ string_to_number (char const *string, int base, int flags)
 		      || (state & ~INTOVERFLOW) == (LEAD_INT|E_EXP));
    }

-  /* Return nil if the number uses invalid syntax.  If FLAGS &
-     S2N_IGNORE_TRAILING, accept any prefix that matches.  Otherwise,
-     the entire string must match.  */
-  if (! (flags & S2N_IGNORE_TRAILING
-	 ? ((state & LEAD_INT) != 0 || float_syntax)
-	 : (!*cp && ((state & ~(INTOVERFLOW | DOT_CHAR)) == LEAD_INT
-		     || float_syntax))))
-    return Qnil;
+  if (plen)
+    *plen = cp - string;

-  /* If the number uses integer and not float syntax, and is in C-language
-     range, use its value, preferably as a fixnum.  */
-  if (leading_digit >= 0 && ! float_syntax)
+  /* Return a float if the number uses float syntax.  */
+  if (float_syntax)
    {
-      if ((state & INTOVERFLOW) == 0
-	  && n <= (negative ? -MOST_NEGATIVE_FIXNUM : MOST_POSITIVE_FIXNUM))
-	{
-	  EMACS_INT signed_n = n;
-	  return make_fixnum (negative ? -signed_n : signed_n);
-	}
-
-      /* Trim any leading "+" and trailing nondigits, then convert to
-	 bignum.  */
-      string += positive;
-      if (!*after_digits)
-	return make_bignum_str (string, base);
-      ptrdiff_t trimmed_len = after_digits - string;
-      USE_SAFE_ALLOCA;
-      char *trimmed = SAFE_ALLOCA (trimmed_len + 1);
-      memcpy (trimmed, string, trimmed_len);
-      trimmed[trimmed_len] = '\0';
-      Lisp_Object result = make_bignum_str (trimmed, base);
-      SAFE_FREE ();
-      return result;
+      /* Convert to floating point, unless the value is already known
+	 because it is infinite or a NaN.  */
+      if (! value)
+	value = atof (string + signedp);
+      return make_float (negative ? -value : value);
    }

-  /* Either the number uses float syntax, or it does not fit into a fixnum.
-     Convert it from string to floating point, unless the value is already
-     known because it is an infinity, a NAN, or its absolute value fits in
-     uintmax_t.  */
-  if (! value)
-    value = atof (string + signedp);
+  /* Return nil if the number uses invalid syntax.  */
+  if (! (state & LEAD_INT))
+    return Qnil;

-  return make_float (negative ? -value : value);
+  /* Fast path if the integer (san sign) fits in uintmax_t.  */
+  if (! (state & INTOVERFLOW))
+    {
+      if (!negative)
+	return make_uint (n);
+      if (-MOST_NEGATIVE_FIXNUM < n)
+	return make_neg_biguint (n);
+      EMACS_INT signed_n = n;
+      return make_fixnum (-signed_n);
+    }
+
+  /* Trim any leading "+" and trailing nondigits, then return a bignum.  */
+  string += positive;
+  if (!*after_digits)
+    return make_bignum_str (string, base);
+  ptrdiff_t trimmed_len = after_digits - string;
+  USE_SAFE_ALLOCA;
+  char *trimmed = SAFE_ALLOCA (trimmed_len + 1);
+  memcpy (trimmed, string, trimmed_len);
+  trimmed[trimmed_len] = '\0';
+  Lisp_Object result = make_bignum_str (trimmed, base);
+  SAFE_FREE ();
+  return result;
 }


--- a/src/print.c
+++ b/src/print.c
@ -1993,39 +1993,17 @@ print_object (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag)

    case Lisp_Symbol:
      {
-	bool confusing;
-	unsigned char *p = SDATA (SYMBOL_NAME (obj));
-	unsigned char *end = p + SBYTES (SYMBOL_NAME (obj));
-	int c;
-	ptrdiff_t i, i_byte;
-	ptrdiff_t size_byte;
-	Lisp_Object name;
+	Lisp_Object name = SYMBOL_NAME (obj);
+	ptrdiff_t size_byte = SBYTES (name);

-	name = SYMBOL_NAME (obj);
-
-	if (p != end && (*p == '-' || *p == '+')) p++;
-	if (p == end)
-	  confusing = 0;
-	/* If symbol name begins with a digit, and ends with a digit,
-	   and contains nothing but digits and `e', it could be treated
-	   as a number.  So set CONFUSING.
-
-	   Symbols that contain periods could also be taken as numbers,
-	   but periods are always escaped, so we don't have to worry
-	   about them here.  */
-	else if (*p >= '0' && *p <= '9'
-		 && end[-1] >= '0' && end[-1] <= '9')
-	  {
-	    while (p != end && ((*p >= '0' && *p <= '9')
-				/* Needed for \2e10.  */
-				|| *p == 'e' || *p == 'E'))
-	      p++;
-	    confusing = (end == p);
-	  }
-	else
-	  confusing = 0;
-
-	size_byte = SBYTES (name);
+	/* Set CONFUSING if NAME looks like a number, calling
+	   string_to_number for non-obvious cases.  */
+	char *p = SSDATA (name);
+	bool signedp = *p == '-' || *p == '+';
+	ptrdiff_t len;
+	bool confusing = ((c_isdigit (p[signedp]) || p[signedp] == '.')
+			  && !NILP (string_to_number (p, 10, &len))
+			  && len == size_byte);

 	if (! NILP (Vprint_gensym)
            && !SYMBOL_INTERNED_IN_INITIAL_OBARRAY_P (obj))
@ -2036,10 +2014,12 @@ print_object (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag)
 	    break;
 	  }

-	for (i = 0, i_byte = 0; i_byte < size_byte;)
+	ptrdiff_t i = 0;
+	for (ptrdiff_t i_byte = 0; i_byte < size_byte; )
 	  {
 	    /* Here, we must convert each multi-byte form to the
 	       corresponding character code before handing it to PRINTCHAR.  */
+	    int c;
 	    FETCH_STRING_CHAR_ADVANCE (c, name, i, i_byte);
 	    maybe_quit ();

@ -2049,6 +2029,7 @@ print_object (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag)
 		    || c == ';' || c == '#' || c == '(' || c == ')'
 		    || c == ',' || c == '.' || c == '`'
 		    || c == '[' || c == ']' || c == '?' || c <= 040
+		    || c == NO_BREAK_SPACE
                    || confusing
 		    || (i == 1 && confusable_symbol_character_p (c)))
 		  {
--- a/src/process.c
+++ b/src/process.c
@ -6852,7 +6852,12 @@ SIGCODE may be an integer, or a symbol whose name is a signal name.  */)
    {
      Lisp_Object tem = Fget_process (process);
      if (NILP (tem))
-	tem = string_to_number (SSDATA (process), 10, 0);
+	{
+	  ptrdiff_t len;
+	  tem = string_to_number (SSDATA (process), 10, &len);
+	  if (NILP (tem) || len != SBYTES (process))
+	    return Qnil;
+	}
      process = tem;
    }
  else if (!NUMBERP (process))
--- a/test/src/print-tests.el
+++ b/test/src/print-tests.el
@ -95,8 +95,20 @@ otherwise, use a different charset."
                     "--------\n"))))

 (ert-deftest print-read-roundtrip ()
-  (let ((sym '\’bar))
-    (should (eq (read (prin1-to-string sym)) sym))))
+  (let ((syms (list '## '& '* '+ '- '/ '0E '0e '< '= '> 'E 'E0 'NaN '\"
+                    '\# '\#x0 '\' '\'\' '\( '\) '\+00 '\, '\-0 '\. '\.0
+                    '\0 '\0.0 '\0E0 '\0e0 '\1E+ '\1E+NaN '\1e+ '\1e+NaN
+                    '\; '\? '\[ '\\ '\] '\` '_ 'a 'e 'e0 'x
+                    '{ '| '} '~ : '\’ '\’bar
+                    (intern "\t") (intern "\n") (intern " ")
+                    (intern "\N{NO-BREAK SPACE}")
+                    (intern "\N{ZERO WIDTH SPACE}")
+                    (intern "\0"))))
+    (dolist (sym syms)
+      (should (eq (read (prin1-to-string sym)) sym))
+      (dolist (sym1 syms)
+        (let ((sym2 (intern (concat (symbol-name sym) (symbol-name sym1)))))
+          (should (eq (read (prin1-to-string sym2)) sym2)))))))

 (ert-deftest print-bignum ()
  (let* ((str "999999999999999999999999999999999")