(read_escape): Provide a Unicode character escape syntax; \u followed by

exactly four or \U followed by exactly eight hex digits in a comment or string is read as a Unicode character with that code point.
2006-06-09 18:22:30 +00:00 · 2006-06-09 18:22:30 +00:00 · 71b169b8c4
commit 71b169b8c4
parent a9ab79a844
1 changed files with 49 additions and 0 deletions
--- a/src/lread.c
+++ b/src/lread.c
@ -1764,6 +1764,9 @@ read_escape (readcharfun, stringp, byterep)
     int *byterep;
 {
  register int c = READCHAR;
+  /* \u allows up to four hex digits, \U up to eight. Default to the
+     behaviour for \u, and change this value in the case that \U is seen. */
+  int unicode_hex_count = 4;

  *byterep = 0;

@ -1928,6 +1931,52 @@ read_escape (readcharfun, stringp, byterep)
 	return i;
      }

+    case 'U':
+      /* Post-Unicode-2.0: Up to eight hex chars.  */
+      unicode_hex_count = 8;
+    case 'u':
+
+      /* A Unicode escape. We only permit them in strings and characters,
+	 not arbitrarily in the source code, as in some other languages.  */
+      {
+	int i = 0;
+	int count = 0;
+	Lisp_Object lisp_char;
+	struct gcpro gcpro1;
+
+	while (++count <= unicode_hex_count)
+	  {
+	    c = READCHAR;
+	    /* isdigit(), isalpha() may be locale-specific, which we don't
+	       want. */
+	    if      (c >= '0' && c <= '9')  i = (i << 4) + (c - '0');
+	    else if (c >= 'a' && c <= 'f')  i = (i << 4) + (c - 'a') + 10;
+            else if (c >= 'A' && c <= 'F')  i = (i << 4) + (c - 'A') + 10;
+	    else
+	      {
+		error ("Non-hex digit used for Unicode escape");
+		break;
+	      }
+	  }
+
+	GCPRO1 (readcharfun);
+	lisp_char = call2(intern("decode-char"), intern("ucs"),
+			  make_number(i));
+	UNGCPRO;
+
+	if (EQ(Qnil, lisp_char))
+	  {
+	    /* This is ugly and horrible and trashes the user's data.  */
+	    XSETFASTINT (i, MAKE_CHAR (charset_katakana_jisx0201,
+				       34 + 128, 46 + 128));
+            return i;
+	  }
+	else
+	  {
+	    return XFASTINT (lisp_char);
+	  }
+      }
+
    default:
      if (BASE_LEADING_CODE_P (c))
 	c = read_multibyte (c, readcharfun);