Improve string_char_and_length speed

This tweak improved the CPU time performance of
‘make compile-always’ by about 1.7% on my platform.
* src/character.c (string_char): Remove; no longer used.
* src/character.h (string_char_and_length): Redo so that it
needn’t call string_char.  This helps the caller, which can now
become a leaf function.
This commit is contained in:
Paul Eggert 2020-04-26 15:18:49 -07:00
parent 895a18eafb
commit ed2def7d5e
2 changed files with 31 additions and 69 deletions

View file

@ -141,51 +141,6 @@ char_string (unsigned int c, unsigned char *p)
}
/* Return a character whose multibyte form is at P. Set *LEN to the
byte length of the multibyte form. */
int
string_char (const unsigned char *p, int *len)
{
int c;
const unsigned char *saved_p = p;
if (*p < 0x80 || ! (*p & 0x20) || ! (*p & 0x10))
{
/* 1-, 2-, and 3-byte sequences can be handled by the macro. */
c = string_char_advance (&p);
}
else if (! (*p & 0x08))
{
/* A 4-byte sequence of this form:
11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
c = ((((p)[0] & 0x7) << 18)
| (((p)[1] & 0x3F) << 12)
| (((p)[2] & 0x3F) << 6)
| ((p)[3] & 0x3F));
p += 4;
}
else
{
/* A 5-byte sequence of this form:
111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
Note that the top 4 `x's are always 0, so shifting p[1] can
never exceed the maximum valid character codepoint. */
c = (/* (((p)[0] & 0x3) << 24) ... always 0, so no need to shift. */
(((p)[1] & 0x3F) << 18)
| (((p)[2] & 0x3F) << 12)
| (((p)[3] & 0x3F) << 6)
| ((p)[4] & 0x3F));
p += 5;
}
*len = p - saved_p;
return c;
}
/* Translate character C by translation table TABLE. If no translation is
found in TABLE, return the untranslated character. If TABLE is a list,
elements are char tables. In that case, recursively translate C by all the

View file

@ -85,7 +85,6 @@ enum
};
extern int char_string (unsigned, unsigned char *);
extern int string_char (const unsigned char *, int *);
/* UTF-8 encodings. Use \x escapes, so they are portable to pre-C11
compilers and can be concatenated with ordinary string literals. */
@ -371,33 +370,41 @@ raw_prev_char_len (unsigned char const *p)
INLINE int
string_char_and_length (unsigned char const *p, int *length)
{
int c, len;
int c = p[0];
if (! (c & 0x80))
{
*length = 1;
return c;
}
eassume (0xC0 <= c);
if (! (p[0] & 0x80))
int d = (c << 6) + p[1] - ((0xC0 << 6) + 0x80);
if (! (c & 0x20))
{
len = 1;
c = p[0];
*length = 2;
return d + (c < 0xC2 ? 0x3FFF80 : 0);
}
else if (! (p[0] & 0x20))
{
len = 2;
c = ((((p[0] & 0x1F) << 6)
| (p[1] & 0x3F))
+ (p[0] < 0xC2 ? 0x3FFF80 : 0));
}
else if (! (p[0] & 0x10))
{
len = 3;
c = (((p[0] & 0x0F) << 12)
| ((p[1] & 0x3F) << 6)
| (p[2] & 0x3F));
}
else
c = string_char (p, &len);
eassume (0 < len && len <= MAX_MULTIBYTE_LENGTH);
*length = len;
return c;
d = (d << 6) + p[2] - ((0x20 << 12) + 0x80);
if (! (c & 0x10))
{
*length = 3;
eassume (MAX_2_BYTE_CHAR < d && d <= MAX_3_BYTE_CHAR);
return d;
}
d = (d << 6) + p[3] - ((0x10 << 18) + 0x80);
if (! (c & 0x08))
{
*length = 4;
eassume (MAX_3_BYTE_CHAR < d && d <= MAX_4_BYTE_CHAR);
return d;
}
d = (d << 6) + p[4] - ((0x08 << 24) + 0x80);
*length = 5;
eassume (MAX_4_BYTE_CHAR < d && d <= MAX_5_BYTE_CHAR);
return d;
}
/* Return the character code of character whose multibyte form is at P. */