Fix encoding of JSON surrogate pairs
JSON requires that such pairs be treated as UTF-16 surrogate pairs, not individual code points; cf. Bug #24784. * lisp/json.el (json-read-escaped-char): Fix decoding of surrogate pairs. (json--decode-utf-16-surrogates): New defun. * test/lisp/json-tests.el (test-json-read-string): Add test for surrogate pairs.
This commit is contained in:
parent
baa370f255
commit
93be35e038
2 changed files with 18 additions and 0 deletions
15
lisp/json.el
15
lisp/json.el
|
@ -363,6 +363,10 @@ representation will be parsed correctly."
|
|||
|
||||
;; String parsing
|
||||
|
||||
(defun json--decode-utf-16-surrogates (high low)
|
||||
"Return the code point represented by the UTF-16 surrogates HIGH and LOW."
|
||||
(+ (lsh (- high #xD800) 10) (- low #xDC00) #x10000))
|
||||
|
||||
(defun json-read-escaped-char ()
|
||||
"Read the JSON string escaped character at point."
|
||||
;; Skip over the '\'
|
||||
|
@ -372,6 +376,17 @@ representation will be parsed correctly."
|
|||
(cond
|
||||
(special (cdr special))
|
||||
((not (eq char ?u)) char)
|
||||
;; Special-case UTF-16 surrogate pairs,
|
||||
;; cf. https://tools.ietf.org/html/rfc7159#section-7. Note that
|
||||
;; this clause overlaps with the next one and therefore has to
|
||||
;; come first.
|
||||
((looking-at
|
||||
(rx (group (any "Dd") (any "89ABab") (= 2 (any "0-9A-Fa-f")))
|
||||
"\\u" (group (any "Dd") (any "C-Fc-f") (= 2 (any "0-9A-Fa-f")))))
|
||||
(json-advance 10)
|
||||
(json--decode-utf-16-surrogates
|
||||
(string-to-number (match-string 1) 16)
|
||||
(string-to-number (match-string 2) 16)))
|
||||
((looking-at "[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]")
|
||||
(let ((hex (match-string 0)))
|
||||
(json-advance 4)
|
||||
|
|
|
@ -167,6 +167,9 @@ Point is moved to beginning of the buffer."
|
|||
(should (equal (json-read-string) "abcαβγ")))
|
||||
(json-tests--with-temp-buffer "\"\\nasd\\u0444\\u044b\\u0432fgh\\t\""
|
||||
(should (equal (json-read-string) "\nasdфывfgh\t")))
|
||||
;; Bug#24784
|
||||
(json-tests--with-temp-buffer "\"\\uD834\\uDD1E\""
|
||||
(should (equal (json-read-string) "\U0001D11E")))
|
||||
(json-tests--with-temp-buffer "foo"
|
||||
(should-error (json-read-string) :type 'json-string-format)))
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue