Speed up string-lessp for multibyte strings
Improve comparison speed when both arguments are multibyte strings, at least one of them containing a non-ASCII character. (All-ASCII multibyte strings are already fast.) The speed-up is about 2× for strings of 10 chars, 10× for strings of 100 chars. * src/fns.c (Fstring_lessp): Quickly skip the common prefix by comparing words.
This commit is contained in:
parent
6b4c17dec0
commit
def6fa4246
1 changed files with 39 additions and 12 deletions
51
src/fns.c
51
src/fns.c
|
@ -454,23 +454,50 @@ Symbols are also allowed; their print names are used instead. */)
|
|||
&& (!STRING_MULTIBYTE (string2) || SCHARS (string2) == SBYTES (string2)))
|
||||
{
|
||||
/* Each argument is either unibyte or all-ASCII multibyte:
|
||||
we can compare bytewise.
|
||||
(Arbitrary multibyte strings cannot be compared bytewise because
|
||||
that would give a different order for raw bytes 80..FF.) */
|
||||
we can compare bytewise. */
|
||||
int d = memcmp (SSDATA (string1), SSDATA (string2), n);
|
||||
return d < 0 || (d == 0 && n < SCHARS (string2)) ? Qt : Qnil;
|
||||
}
|
||||
else if (STRING_MULTIBYTE (string1) && STRING_MULTIBYTE (string2))
|
||||
{
|
||||
ptrdiff_t i1 = 0, i1_byte = 0, i2 = 0, i2_byte = 0;
|
||||
while (i1 < n)
|
||||
{
|
||||
int c1 = fetch_string_char_advance_no_check (string1, &i1, &i1_byte);
|
||||
int c2 = fetch_string_char_advance_no_check (string2, &i2, &i2_byte);
|
||||
if (c1 != c2)
|
||||
return c1 < c2 ? Qt : Qnil;
|
||||
}
|
||||
return i1 < SCHARS (string2) ? Qt : Qnil;
|
||||
/* Two arbitrary multibyte strings: we cannot use memcmp because
|
||||
the encoding for raw bytes would sort those between U+007F and U+0080
|
||||
which isn't where we want them.
|
||||
Instead, we skip the longest common prefix and look at
|
||||
what follows. */
|
||||
ptrdiff_t nb1 = SBYTES (string1);
|
||||
ptrdiff_t nb2 = SBYTES (string2);
|
||||
ptrdiff_t nb = min (nb1, nb2);
|
||||
|
||||
/* First compare entire machine words. (String data is allocated
|
||||
with word alignment.) */
|
||||
typedef size_t word_t;
|
||||
int ws = sizeof (word_t);
|
||||
const word_t *w1 = (const word_t *) SDATA (string1);
|
||||
const word_t *w2 = (const word_t *) SDATA (string2);
|
||||
ptrdiff_t b = 0;
|
||||
while (b < nb - ws + 1 && w1[b / ws] == w2[b / ws])
|
||||
b += ws;
|
||||
|
||||
/* Scan forward to the differing byte (at most ws-1 bytes). */
|
||||
while (b < nb && SREF (string1, b) == SREF (string2, b))
|
||||
b++;
|
||||
|
||||
if (b >= nb)
|
||||
/* One string is a prefix of the other. */
|
||||
return b < nb2 ? Qt : Qnil;
|
||||
|
||||
/* Now back up to the start of the differing characters:
|
||||
it's the last byte not having the bit pattern 10xxxxxx. */
|
||||
while ((SREF (string1, b) & 0xc0) == 0x80)
|
||||
b--;
|
||||
|
||||
/* Compare the differing characters. */
|
||||
ptrdiff_t i1 = 0, i2 = 0;
|
||||
ptrdiff_t i1_byte = b, i2_byte = b;
|
||||
int c1 = fetch_string_char_advance_no_check (string1, &i1, &i1_byte);
|
||||
int c2 = fetch_string_char_advance_no_check (string2, &i2, &i2_byte);
|
||||
return c1 < c2 ? Qt : Qnil;
|
||||
}
|
||||
else if (STRING_MULTIBYTE (string1))
|
||||
{
|
||||
|
|
Loading…
Add table
Reference in a new issue