Speed up string-to-unibyte

* src/character.h (str_to_unibyte):
* src/character.c (str_to_unibyte): Remove.
* src/fns.c (Fstring_to_unibyte): Ditch the call to str_to_unibyte and
the unnecessary heap allocation.  Write new, faster code.
* test/src/fns-tests.el (fns--string-to-unibyte): New test.
This commit is contained in:
Mattias Engdegård 2022-07-10 18:02:08 +02:00
parent 4bab499ed0
commit cfda663282
4 changed files with 30 additions and 37 deletions

View file

@ -734,31 +734,6 @@ str_as_unibyte (unsigned char *str, ptrdiff_t bytes)
return (to - str);
}
/* Convert eight-bit chars in SRC (in multibyte form) to the
corresponding byte and store in DST. CHARS is the number of
characters in SRC. The value is the number of bytes stored in DST.
Usually, the value is the same as CHARS, but is less than it if SRC
contains a non-ASCII, non-eight-bit character. */
ptrdiff_t
str_to_unibyte (const unsigned char *src, unsigned char *dst, ptrdiff_t chars)
{
ptrdiff_t i;
for (i = 0; i < chars; i++)
{
int c = string_char_advance (&src);
if (CHAR_BYTE8_P (c))
c = CHAR_TO_BYTE8 (c);
else if (! ASCII_CHAR_P (c))
return i;
*dst++ = c;
}
return i;
}
static ptrdiff_t
string_count_byte8 (Lisp_Object string)
{

View file

@ -569,8 +569,6 @@ extern ptrdiff_t str_as_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t,
ptrdiff_t *);
extern ptrdiff_t str_to_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t);
extern ptrdiff_t str_as_unibyte (unsigned char *, ptrdiff_t);
extern ptrdiff_t str_to_unibyte (const unsigned char *, unsigned char *,
ptrdiff_t);
extern ptrdiff_t strwidth (const char *, ptrdiff_t);
extern ptrdiff_t c_string_width (const unsigned char *, ptrdiff_t, int,
ptrdiff_t *, ptrdiff_t *);

View file

@ -1413,19 +1413,24 @@ an error is signaled. */)
(Lisp_Object string)
{
CHECK_STRING (string);
if (!STRING_MULTIBYTE (string))
return string;
if (STRING_MULTIBYTE (string))
ptrdiff_t chars = SCHARS (string);
Lisp_Object ret = make_uninit_string (chars);
unsigned char *src = SDATA (string);
unsigned char *dst = SDATA (ret);
for (ptrdiff_t i = 0; i < chars; i++)
{
ptrdiff_t chars = SCHARS (string);
unsigned char *str = xmalloc (chars);
ptrdiff_t converted = str_to_unibyte (SDATA (string), str, chars);
if (converted < chars)
error ("Can't convert the %"pD"dth character to unibyte", converted);
string = make_unibyte_string ((char *) str, chars);
xfree (str);
unsigned char b = *src++;
if (b <= 0x7f)
*dst++ = b; /* ASCII */
else if (CHAR_BYTE8_HEAD_P (b))
*dst++ = 0x80 | (b & 1) << 6 | (*src++ & 0x3f); /* raw byte */
else
error ("Cannot convert character at index %"pD"d to unibyte", i);
}
return string;
return ret;
}

View file

@ -1344,4 +1344,19 @@
(should (equal (plist-member plist (copy-sequence "a") #'equal)
'("a" "c")))))
(ert-deftest fns--string-to-unibyte ()
(dolist (str '("" "a" "abc" "a\x00\x7fz" "a\xaa\xbbz ""\x80\xdd\xff"))
(ert-info ((prin1-to-string str) :prefix "str: ")
(should-not (multibyte-string-p str))
(let* ((u (string-to-unibyte str)) ; should be identity
(m (string-to-multibyte u)) ; lossless conversion
(uu (string-to-unibyte m))) ; also lossless
(should-not (multibyte-string-p u))
(should (multibyte-string-p m))
(should-not (multibyte-string-p uu))
(should (equal str u))
(should (equal str uu)))))
(should-error (string-to-unibyte "å"))
(should-error (string-to-unibyte "ABC∀BC")))
;;; fns-tests.el ends here