Speed up string-to-unibyte
* src/character.h (str_to_unibyte): * src/character.c (str_to_unibyte): Remove. * src/fns.c (Fstring_to_unibyte): Ditch the call to str_to_unibyte and the unnecessary heap allocation. Write new, faster code. * test/src/fns-tests.el (fns--string-to-unibyte): New test.
This commit is contained in:
parent
4bab499ed0
commit
cfda663282
4 changed files with 30 additions and 37 deletions
|
@ -734,31 +734,6 @@ str_as_unibyte (unsigned char *str, ptrdiff_t bytes)
|
|||
return (to - str);
|
||||
}
|
||||
|
||||
/* Convert eight-bit chars in SRC (in multibyte form) to the
|
||||
corresponding byte and store in DST. CHARS is the number of
|
||||
characters in SRC. The value is the number of bytes stored in DST.
|
||||
Usually, the value is the same as CHARS, but is less than it if SRC
|
||||
contains a non-ASCII, non-eight-bit character. */
|
||||
|
||||
ptrdiff_t
|
||||
str_to_unibyte (const unsigned char *src, unsigned char *dst, ptrdiff_t chars)
|
||||
{
|
||||
ptrdiff_t i;
|
||||
|
||||
for (i = 0; i < chars; i++)
|
||||
{
|
||||
int c = string_char_advance (&src);
|
||||
|
||||
if (CHAR_BYTE8_P (c))
|
||||
c = CHAR_TO_BYTE8 (c);
|
||||
else if (! ASCII_CHAR_P (c))
|
||||
return i;
|
||||
*dst++ = c;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
static ptrdiff_t
|
||||
string_count_byte8 (Lisp_Object string)
|
||||
{
|
||||
|
|
|
@ -569,8 +569,6 @@ extern ptrdiff_t str_as_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t,
|
|||
ptrdiff_t *);
|
||||
extern ptrdiff_t str_to_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t);
|
||||
extern ptrdiff_t str_as_unibyte (unsigned char *, ptrdiff_t);
|
||||
extern ptrdiff_t str_to_unibyte (const unsigned char *, unsigned char *,
|
||||
ptrdiff_t);
|
||||
extern ptrdiff_t strwidth (const char *, ptrdiff_t);
|
||||
extern ptrdiff_t c_string_width (const unsigned char *, ptrdiff_t, int,
|
||||
ptrdiff_t *, ptrdiff_t *);
|
||||
|
|
25
src/fns.c
25
src/fns.c
|
@ -1413,19 +1413,24 @@ an error is signaled. */)
|
|||
(Lisp_Object string)
|
||||
{
|
||||
CHECK_STRING (string);
|
||||
if (!STRING_MULTIBYTE (string))
|
||||
return string;
|
||||
|
||||
if (STRING_MULTIBYTE (string))
|
||||
ptrdiff_t chars = SCHARS (string);
|
||||
Lisp_Object ret = make_uninit_string (chars);
|
||||
unsigned char *src = SDATA (string);
|
||||
unsigned char *dst = SDATA (ret);
|
||||
for (ptrdiff_t i = 0; i < chars; i++)
|
||||
{
|
||||
ptrdiff_t chars = SCHARS (string);
|
||||
unsigned char *str = xmalloc (chars);
|
||||
ptrdiff_t converted = str_to_unibyte (SDATA (string), str, chars);
|
||||
|
||||
if (converted < chars)
|
||||
error ("Can't convert the %"pD"dth character to unibyte", converted);
|
||||
string = make_unibyte_string ((char *) str, chars);
|
||||
xfree (str);
|
||||
unsigned char b = *src++;
|
||||
if (b <= 0x7f)
|
||||
*dst++ = b; /* ASCII */
|
||||
else if (CHAR_BYTE8_HEAD_P (b))
|
||||
*dst++ = 0x80 | (b & 1) << 6 | (*src++ & 0x3f); /* raw byte */
|
||||
else
|
||||
error ("Cannot convert character at index %"pD"d to unibyte", i);
|
||||
}
|
||||
return string;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1344,4 +1344,19 @@
|
|||
(should (equal (plist-member plist (copy-sequence "a") #'equal)
|
||||
'("a" "c")))))
|
||||
|
||||
(ert-deftest fns--string-to-unibyte ()
|
||||
(dolist (str '("" "a" "abc" "a\x00\x7fz" "a\xaa\xbbz ""\x80\xdd\xff"))
|
||||
(ert-info ((prin1-to-string str) :prefix "str: ")
|
||||
(should-not (multibyte-string-p str))
|
||||
(let* ((u (string-to-unibyte str)) ; should be identity
|
||||
(m (string-to-multibyte u)) ; lossless conversion
|
||||
(uu (string-to-unibyte m))) ; also lossless
|
||||
(should-not (multibyte-string-p u))
|
||||
(should (multibyte-string-p m))
|
||||
(should-not (multibyte-string-p uu))
|
||||
(should (equal str u))
|
||||
(should (equal str uu)))))
|
||||
(should-error (string-to-unibyte "å"))
|
||||
(should-error (string-to-unibyte "ABC∀BC")))
|
||||
|
||||
;;; fns-tests.el ends here
|
||||
|
|
Loading…
Add table
Reference in a new issue