Simplify and speed up string-to-multibyte
* src/character.h (str_to_multibyte): * src/character.c (str_to_multibyte): Change signature and simplify; the conversion is no longer done in-place. * src/fns.c (string_to_multibyte): Drop temporary buffer and memcpy; adapt to new str_to_multibyte signature. * src/print.c (print_string): Drop memcpy; adapt call to str_to_multibyte. * test/src/fns-tests.el (fns--string-to-unibyte): Rename to... (fns--string-to-unibyte-multibyte): ... this and strengthen, so that the test covers string-to-multibyte reasonably well.
This commit is contained in:
parent
9684687793
commit
69b68099ec
5 changed files with 37 additions and 49 deletions
|
@ -666,35 +666,26 @@ count_size_as_multibyte (const unsigned char *str, ptrdiff_t len)
|
|||
}
|
||||
|
||||
|
||||
/* Convert unibyte text at STR of BYTES bytes to a multibyte text
|
||||
that contains the same single-byte characters. It actually
|
||||
converts all 8-bit characters to multibyte forms. It is assured
|
||||
that we can use LEN bytes at STR as a work area and that is
|
||||
enough. */
|
||||
|
||||
ptrdiff_t
|
||||
str_to_multibyte (unsigned char *str, ptrdiff_t len, ptrdiff_t bytes)
|
||||
/* Convert unibyte text at SRC of NCHARS bytes to a multibyte text
|
||||
at DST of NBYTES bytes, that contains the same single-byte characters. */
|
||||
void
|
||||
str_to_multibyte (unsigned char *dst, const unsigned char *src,
|
||||
ptrdiff_t nchars, ptrdiff_t nbytes)
|
||||
{
|
||||
unsigned char *p = str, *endp = str + bytes;
|
||||
unsigned char *to;
|
||||
|
||||
while (p < endp && *p < 0x80) p++;
|
||||
if (p == endp)
|
||||
return bytes;
|
||||
to = p;
|
||||
bytes = endp - p;
|
||||
endp = str + len;
|
||||
memmove (endp - bytes, p, bytes);
|
||||
p = endp - bytes;
|
||||
while (p < endp)
|
||||
const unsigned char *s = src + nchars;
|
||||
unsigned char *d = dst + nbytes;
|
||||
for (ptrdiff_t i = 0; i < nchars; i++)
|
||||
{
|
||||
int c = *p++;
|
||||
|
||||
if (c >= 0x80)
|
||||
c = BYTE8_TO_CHAR (c);
|
||||
to += CHAR_STRING (c, to);
|
||||
unsigned char c = *--s;
|
||||
if (c <= 0x7f)
|
||||
*--d = c;
|
||||
else
|
||||
{
|
||||
*--d = 0x80 + (c & 0x3f);
|
||||
*--d = 0xc0 + ((c >> 6) & 1);
|
||||
}
|
||||
}
|
||||
return (to - str);
|
||||
eassert (d == dst && s == src);
|
||||
}
|
||||
|
||||
/* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
|
||||
|
|
|
@ -567,7 +567,8 @@ extern int translate_char (Lisp_Object, int c);
|
|||
extern ptrdiff_t count_size_as_multibyte (const unsigned char *, ptrdiff_t);
|
||||
extern ptrdiff_t str_as_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t,
|
||||
ptrdiff_t *);
|
||||
extern ptrdiff_t str_to_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t);
|
||||
extern void str_to_multibyte (unsigned char *dst, const unsigned char *src,
|
||||
ptrdiff_t nchars, ptrdiff_t nbytes);
|
||||
extern ptrdiff_t str_as_unibyte (unsigned char *, ptrdiff_t);
|
||||
extern ptrdiff_t strwidth (const char *, ptrdiff_t);
|
||||
extern ptrdiff_t c_string_width (const unsigned char *, ptrdiff_t, int,
|
||||
|
|
23
src/fns.c
23
src/fns.c
|
@ -1237,33 +1237,24 @@ string_make_multibyte (Lisp_Object string)
|
|||
|
||||
|
||||
/* Convert STRING (if unibyte) to a multibyte string without changing
|
||||
the number of characters. Characters 0200 through 0237 are
|
||||
converted to eight-bit characters. */
|
||||
the number of characters. Characters 0x80..0xff are interpreted as
|
||||
raw bytes. */
|
||||
|
||||
Lisp_Object
|
||||
string_to_multibyte (Lisp_Object string)
|
||||
{
|
||||
unsigned char *buf;
|
||||
ptrdiff_t nbytes;
|
||||
Lisp_Object ret;
|
||||
USE_SAFE_ALLOCA;
|
||||
|
||||
if (STRING_MULTIBYTE (string))
|
||||
return string;
|
||||
|
||||
nbytes = count_size_as_multibyte (SDATA (string), SBYTES (string));
|
||||
ptrdiff_t nchars = SCHARS (string);
|
||||
ptrdiff_t nbytes = count_size_as_multibyte (SDATA (string), nchars);
|
||||
/* If all the chars are ASCII, they won't need any more bytes once
|
||||
converted. */
|
||||
if (nbytes == SBYTES (string))
|
||||
if (nbytes == nchars)
|
||||
return make_multibyte_string (SSDATA (string), nbytes, nbytes);
|
||||
|
||||
buf = SAFE_ALLOCA (nbytes);
|
||||
memcpy (buf, SDATA (string), SBYTES (string));
|
||||
str_to_multibyte (buf, nbytes, SBYTES (string));
|
||||
|
||||
ret = make_multibyte_string ((char *) buf, SCHARS (string), nbytes);
|
||||
SAFE_FREE ();
|
||||
|
||||
Lisp_Object ret = make_uninit_multibyte_string (nchars, nbytes);
|
||||
str_to_multibyte (SDATA (ret), SDATA (string), nchars, nbytes);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
@ -467,8 +467,7 @@ print_string (Lisp_Object string, Lisp_Object printcharfun)
|
|||
if (chars < bytes)
|
||||
{
|
||||
newstr = make_uninit_multibyte_string (chars, bytes);
|
||||
memcpy (SDATA (newstr), SDATA (string), chars);
|
||||
str_to_multibyte (SDATA (newstr), bytes, chars);
|
||||
str_to_multibyte (SDATA (newstr), SDATA (string), chars, bytes);
|
||||
string = newstr;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1344,18 +1344,24 @@
|
|||
(should (equal (plist-member plist (copy-sequence "a") #'equal)
|
||||
'("a" "c")))))
|
||||
|
||||
(ert-deftest fns--string-to-unibyte ()
|
||||
(dolist (str '("" "a" "abc" "a\x00\x7fz" "a\xaa\xbbz ""\x80\xdd\xff"))
|
||||
(ert-deftest fns--string-to-unibyte-multibyte ()
|
||||
(dolist (str (list "" "a" "abc" "a\x00\x7fz" "a\xaa\xbbz" "\x80\xdd\xff"
|
||||
(apply #'unibyte-string (number-sequence 0 255))))
|
||||
(ert-info ((prin1-to-string str) :prefix "str: ")
|
||||
(should-not (multibyte-string-p str))
|
||||
(let* ((u (string-to-unibyte str)) ; should be identity
|
||||
(m (string-to-multibyte u)) ; lossless conversion
|
||||
(uu (string-to-unibyte m))) ; also lossless
|
||||
(mm (string-to-multibyte m)) ; should be identity
|
||||
(uu (string-to-unibyte m)) ; also lossless
|
||||
(ml (mapcar (lambda (c) (if (<= c #x7f) c (+ c #x3fff00))) u)))
|
||||
(should-not (multibyte-string-p u))
|
||||
(should (multibyte-string-p m))
|
||||
(should (multibyte-string-p mm))
|
||||
(should-not (multibyte-string-p uu))
|
||||
(should (equal str u))
|
||||
(should (equal str uu)))))
|
||||
(should (equal m mm))
|
||||
(should (equal str uu))
|
||||
(should (equal (append m nil) ml)))))
|
||||
(should-error (string-to-unibyte "å"))
|
||||
(should-error (string-to-unibyte "ABC∀BC")))
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue