Faster NSString to Lisp string conversion

Since we know that the value from [NSString UTF8String] is valid
UTF-8, using make_string is wastefully slow.

* src/nsfns.m (count_utf8_chars): New function, at least twice as fast
as parse_str_as_multibyte used by make_string for this purpose.
([NSString lispString]): Use count_utf8_chars.  We now always make a
multibyte string because there is no reason not to.
This commit is contained in:
Mattias Engdegård 2023-08-09 12:34:06 +02:00
parent 08cc48e496
commit 722b1ebc6e

View file

@ -3796,6 +3796,27 @@ - (NSString *)panel: (id)sender userEnteredFilename: (NSString *)filename
return true;
}
/* Count the number of characters in STR, NBYTES long.
The string is valid UTF-8 except that it may contain unpaired surrogates. */
static ptrdiff_t
count_utf8_chars (const char *str, ptrdiff_t nbytes)
{
/* This is faster than parse_str_as_multibyte, and much faster than
[NSString lengthOfBytesUsingEncoding: NSUTF32StringEncoding]. */
const char *end = str + nbytes;
ptrdiff_t nc = 0;
while (str < end)
{
nc++;
unsigned char c = *str;
str += ( c <= 0x7f ? 1 // 0xxxxxxx
: c <= 0xdf ? 2 // 110xxxxx 10xxxxxx
: c <= 0xef ? 3 // 1110xxxx 10xxxxxx 10xxxxxx
: 4); // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
}
return nc;
}
@implementation NSString (EmacsString)
/* Make an NSString from a Lisp string. STRING must not be in an
encoded form (e.g. UTF-8). */
@ -3840,9 +3861,9 @@ handled fairly well by the NS libraries (displayed with distinct
/* Make a Lisp string from an NSString. */
- (Lisp_Object)lispString
{
// make_string behaves predictably and correctly with UTF-8 input.
return make_string ([self UTF8String],
[self lengthOfBytesUsingEncoding: NSUTF8StringEncoding]);
const char *utf8 = [self UTF8String];
ptrdiff_t bytes = [self lengthOfBytesUsingEncoding: NSUTF8StringEncoding];
return make_multibyte_string (utf8, count_utf8_chars (utf8, bytes), bytes);
}
@end