diff --git a/src/nsfns.m b/src/nsfns.m index f962de02cb9..508568d90c3 100644 --- a/src/nsfns.m +++ b/src/nsfns.m @@ -3796,6 +3796,27 @@ - (NSString *)panel: (id)sender userEnteredFilename: (NSString *)filename return true; } +/* Count the number of characters in STR, NBYTES long. + The string is valid UTF-8 except that it may contain unpaired surrogates. */ +static ptrdiff_t +count_utf8_chars (const char *str, ptrdiff_t nbytes) +{ + /* This is faster than parse_str_as_multibyte, and much faster than + [NSString lengthOfBytesUsingEncoding: NSUTF32StringEncoding]. */ + const char *end = str + nbytes; + ptrdiff_t nc = 0; + while (str < end) + { + nc++; + unsigned char c = *str; + str += ( c <= 0x7f ? 1 // 0xxxxxxx + : c <= 0xdf ? 2 // 110xxxxx 10xxxxxx + : c <= 0xef ? 3 // 1110xxxx 10xxxxxx 10xxxxxx + : 4); // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + } + return nc; +} + @implementation NSString (EmacsString) /* Make an NSString from a Lisp string. STRING must not be in an encoded form (e.g. UTF-8). */ @@ -3840,9 +3861,9 @@ handled fairly well by the NS libraries (displayed with distinct /* Make a Lisp string from an NSString. */ - (Lisp_Object)lispString { - // make_string behaves predictably and correctly with UTF-8 input. - return make_string ([self UTF8String], - [self lengthOfBytesUsingEncoding: NSUTF8StringEncoding]); + const char *utf8 = [self UTF8String]; + ptrdiff_t bytes = [self lengthOfBytesUsingEncoding: NSUTF8StringEncoding]; + return make_multibyte_string (utf8, count_utf8_chars (utf8, bytes), bytes); } @end