Improve string_char_and_length speed
This tweak improved the CPU time performance of ‘make compile-always’ by about 1.7% on my platform. * src/character.c (string_char): Remove; no longer used. * src/character.h (string_char_and_length): Redo so that it needn’t call string_char. This helps the caller, which can now become a leaf function.
This commit is contained in:
parent
895a18eafb
commit
ed2def7d5e
2 changed files with 31 additions and 69 deletions
|
@ -141,51 +141,6 @@ char_string (unsigned int c, unsigned char *p)
|
|||
}
|
||||
|
||||
|
||||
/* Return a character whose multibyte form is at P. Set *LEN to the
|
||||
byte length of the multibyte form. */
|
||||
|
||||
int
|
||||
string_char (const unsigned char *p, int *len)
|
||||
{
|
||||
int c;
|
||||
const unsigned char *saved_p = p;
|
||||
|
||||
if (*p < 0x80 || ! (*p & 0x20) || ! (*p & 0x10))
|
||||
{
|
||||
/* 1-, 2-, and 3-byte sequences can be handled by the macro. */
|
||||
c = string_char_advance (&p);
|
||||
}
|
||||
else if (! (*p & 0x08))
|
||||
{
|
||||
/* A 4-byte sequence of this form:
|
||||
11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
|
||||
c = ((((p)[0] & 0x7) << 18)
|
||||
| (((p)[1] & 0x3F) << 12)
|
||||
| (((p)[2] & 0x3F) << 6)
|
||||
| ((p)[3] & 0x3F));
|
||||
p += 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* A 5-byte sequence of this form:
|
||||
|
||||
111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
|
||||
Note that the top 4 `x's are always 0, so shifting p[1] can
|
||||
never exceed the maximum valid character codepoint. */
|
||||
c = (/* (((p)[0] & 0x3) << 24) ... always 0, so no need to shift. */
|
||||
(((p)[1] & 0x3F) << 18)
|
||||
| (((p)[2] & 0x3F) << 12)
|
||||
| (((p)[3] & 0x3F) << 6)
|
||||
| ((p)[4] & 0x3F));
|
||||
p += 5;
|
||||
}
|
||||
|
||||
*len = p - saved_p;
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
/* Translate character C by translation table TABLE. If no translation is
|
||||
found in TABLE, return the untranslated character. If TABLE is a list,
|
||||
elements are char tables. In that case, recursively translate C by all the
|
||||
|
|
|
@ -85,7 +85,6 @@ enum
|
|||
};
|
||||
|
||||
extern int char_string (unsigned, unsigned char *);
|
||||
extern int string_char (const unsigned char *, int *);
|
||||
|
||||
/* UTF-8 encodings. Use \x escapes, so they are portable to pre-C11
|
||||
compilers and can be concatenated with ordinary string literals. */
|
||||
|
@ -371,33 +370,41 @@ raw_prev_char_len (unsigned char const *p)
|
|||
INLINE int
|
||||
string_char_and_length (unsigned char const *p, int *length)
|
||||
{
|
||||
int c, len;
|
||||
int c = p[0];
|
||||
if (! (c & 0x80))
|
||||
{
|
||||
*length = 1;
|
||||
return c;
|
||||
}
|
||||
eassume (0xC0 <= c);
|
||||
|
||||
if (! (p[0] & 0x80))
|
||||
int d = (c << 6) + p[1] - ((0xC0 << 6) + 0x80);
|
||||
if (! (c & 0x20))
|
||||
{
|
||||
len = 1;
|
||||
c = p[0];
|
||||
*length = 2;
|
||||
return d + (c < 0xC2 ? 0x3FFF80 : 0);
|
||||
}
|
||||
else if (! (p[0] & 0x20))
|
||||
{
|
||||
len = 2;
|
||||
c = ((((p[0] & 0x1F) << 6)
|
||||
| (p[1] & 0x3F))
|
||||
+ (p[0] < 0xC2 ? 0x3FFF80 : 0));
|
||||
}
|
||||
else if (! (p[0] & 0x10))
|
||||
{
|
||||
len = 3;
|
||||
c = (((p[0] & 0x0F) << 12)
|
||||
| ((p[1] & 0x3F) << 6)
|
||||
| (p[2] & 0x3F));
|
||||
}
|
||||
else
|
||||
c = string_char (p, &len);
|
||||
|
||||
eassume (0 < len && len <= MAX_MULTIBYTE_LENGTH);
|
||||
*length = len;
|
||||
return c;
|
||||
d = (d << 6) + p[2] - ((0x20 << 12) + 0x80);
|
||||
if (! (c & 0x10))
|
||||
{
|
||||
*length = 3;
|
||||
eassume (MAX_2_BYTE_CHAR < d && d <= MAX_3_BYTE_CHAR);
|
||||
return d;
|
||||
}
|
||||
|
||||
d = (d << 6) + p[3] - ((0x10 << 18) + 0x80);
|
||||
if (! (c & 0x08))
|
||||
{
|
||||
*length = 4;
|
||||
eassume (MAX_3_BYTE_CHAR < d && d <= MAX_4_BYTE_CHAR);
|
||||
return d;
|
||||
}
|
||||
|
||||
d = (d << 6) + p[4] - ((0x08 << 24) + 0x80);
|
||||
*length = 5;
|
||||
eassume (MAX_4_BYTE_CHAR < d && d <= MAX_5_BYTE_CHAR);
|
||||
return d;
|
||||
}
|
||||
|
||||
/* Return the character code of character whose multibyte form is at P. */
|
||||
|
|
Loading…
Add table
Reference in a new issue