diff --git a/src/character.c b/src/character.c index c1a1b553891..d12df23f8ea 100644 --- a/src/character.c +++ b/src/character.c @@ -734,31 +734,6 @@ str_as_unibyte (unsigned char *str, ptrdiff_t bytes) return (to - str); } -/* Convert eight-bit chars in SRC (in multibyte form) to the - corresponding byte and store in DST. CHARS is the number of - characters in SRC. The value is the number of bytes stored in DST. - Usually, the value is the same as CHARS, but is less than it if SRC - contains a non-ASCII, non-eight-bit character. */ - -ptrdiff_t -str_to_unibyte (const unsigned char *src, unsigned char *dst, ptrdiff_t chars) -{ - ptrdiff_t i; - - for (i = 0; i < chars; i++) - { - int c = string_char_advance (&src); - - if (CHAR_BYTE8_P (c)) - c = CHAR_TO_BYTE8 (c); - else if (! ASCII_CHAR_P (c)) - return i; - *dst++ = c; - } - return i; -} - - static ptrdiff_t string_count_byte8 (Lisp_Object string) { diff --git a/src/character.h b/src/character.h index 6ee6bcab205..2ca935ba04c 100644 --- a/src/character.h +++ b/src/character.h @@ -569,8 +569,6 @@ extern ptrdiff_t str_as_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t, ptrdiff_t *); extern ptrdiff_t str_to_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t); extern ptrdiff_t str_as_unibyte (unsigned char *, ptrdiff_t); -extern ptrdiff_t str_to_unibyte (const unsigned char *, unsigned char *, - ptrdiff_t); extern ptrdiff_t strwidth (const char *, ptrdiff_t); extern ptrdiff_t c_string_width (const unsigned char *, ptrdiff_t, int, ptrdiff_t *, ptrdiff_t *); diff --git a/src/fns.c b/src/fns.c index 49d76a0e7c7..61ed01eee4e 100644 --- a/src/fns.c +++ b/src/fns.c @@ -1413,19 +1413,24 @@ an error is signaled. */) (Lisp_Object string) { CHECK_STRING (string); + if (!STRING_MULTIBYTE (string)) + return string; - if (STRING_MULTIBYTE (string)) + ptrdiff_t chars = SCHARS (string); + Lisp_Object ret = make_uninit_string (chars); + unsigned char *src = SDATA (string); + unsigned char *dst = SDATA (ret); + for (ptrdiff_t i = 0; i < chars; i++) { - ptrdiff_t chars = SCHARS (string); - unsigned char *str = xmalloc (chars); - ptrdiff_t converted = str_to_unibyte (SDATA (string), str, chars); - - if (converted < chars) - error ("Can't convert the %"pD"dth character to unibyte", converted); - string = make_unibyte_string ((char *) str, chars); - xfree (str); + unsigned char b = *src++; + if (b <= 0x7f) + *dst++ = b; /* ASCII */ + else if (CHAR_BYTE8_HEAD_P (b)) + *dst++ = 0x80 | (b & 1) << 6 | (*src++ & 0x3f); /* raw byte */ + else + error ("Cannot convert character at index %"pD"d to unibyte", i); } - return string; + return ret; } diff --git a/test/src/fns-tests.el b/test/src/fns-tests.el index ba56019d4cd..0119e31df11 100644 --- a/test/src/fns-tests.el +++ b/test/src/fns-tests.el @@ -1344,4 +1344,19 @@ (should (equal (plist-member plist (copy-sequence "a") #'equal) '("a" "c"))))) +(ert-deftest fns--string-to-unibyte () + (dolist (str '("" "a" "abc" "a\x00\x7fz" "a\xaa\xbbz ""\x80\xdd\xff")) + (ert-info ((prin1-to-string str) :prefix "str: ") + (should-not (multibyte-string-p str)) + (let* ((u (string-to-unibyte str)) ; should be identity + (m (string-to-multibyte u)) ; lossless conversion + (uu (string-to-unibyte m))) ; also lossless + (should-not (multibyte-string-p u)) + (should (multibyte-string-p m)) + (should-not (multibyte-string-p uu)) + (should (equal str u)) + (should (equal str uu))))) + (should-error (string-to-unibyte "å")) + (should-error (string-to-unibyte "ABC∀BC"))) + ;;; fns-tests.el ends here