Minor string-search optimisations (bug#43598)
* src/fns.c (Fstring_search): Perform cheap all-ASCII checks before more expensive ones. Use a faster loop when searching for non-ASCII non-raw bytes. * test/src/fns-tests.el (string-search): Add more test cases.
This commit is contained in:
parent
583cb264ad
commit
8bd233a7eb
2 changed files with 47 additions and 34 deletions
59
src/fns.c
59
src/fns.c
|
@ -5457,16 +5457,11 @@ It should not be used for anything security-related. See
|
|||
static bool
|
||||
string_ascii_p (Lisp_Object string)
|
||||
{
|
||||
if (STRING_MULTIBYTE (string))
|
||||
return SBYTES (string) == SCHARS (string);
|
||||
else
|
||||
{
|
||||
ptrdiff_t nbytes = SBYTES (string);
|
||||
for (ptrdiff_t i = 0; i < nbytes; i++)
|
||||
if (SREF (string, i) > 127)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
ptrdiff_t nbytes = SBYTES (string);
|
||||
for (ptrdiff_t i = 0; i < nbytes; i++)
|
||||
if (SREF (string, i) > 127)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
DEFUN ("string-search", Fstring_search, Sstring_search, 2, 3, 0,
|
||||
|
@ -5505,9 +5500,14 @@ Case is always significant and text properties are ignored. */)
|
|||
haystart = SSDATA (haystack) + start_byte;
|
||||
haybytes = SBYTES (haystack) - start_byte;
|
||||
|
||||
if (STRING_MULTIBYTE (haystack) == STRING_MULTIBYTE (needle)
|
||||
|| string_ascii_p (needle)
|
||||
|| string_ascii_p (haystack))
|
||||
/* We can do a direct byte-string search if both strings have the
|
||||
same multibyteness, or if at least one of them consists of ASCII
|
||||
characters only. */
|
||||
if (STRING_MULTIBYTE (haystack)
|
||||
? (STRING_MULTIBYTE (needle)
|
||||
|| SCHARS (haystack) == SBYTES (haystack) || string_ascii_p (needle))
|
||||
: (!STRING_MULTIBYTE (needle)
|
||||
|| SCHARS (needle) == SBYTES (needle) || string_ascii_p (haystack)))
|
||||
res = memmem (haystart, haybytes,
|
||||
SSDATA (needle), SBYTES (needle));
|
||||
else if (STRING_MULTIBYTE (haystack)) /* unibyte needle */
|
||||
|
@ -5521,26 +5521,21 @@ Case is always significant and text properties are ignored. */)
|
|||
/* The only possible way we can find the multibyte needle in the
|
||||
unibyte stack (since we know that neither are pure-ASCII) is
|
||||
if they contain "raw bytes" (and no other non-ASCII chars.) */
|
||||
ptrdiff_t chars = SCHARS (needle);
|
||||
const unsigned char *src = SDATA (needle);
|
||||
ptrdiff_t nbytes = SBYTES (needle);
|
||||
for (ptrdiff_t i = 0; i < nbytes; i++)
|
||||
{
|
||||
int c = SREF (needle, i);
|
||||
if (CHAR_BYTE8_HEAD_P (c))
|
||||
i++; /* Skip raw byte. */
|
||||
else if (!ASCII_CHAR_P (c))
|
||||
return Qnil; /* Found a char that can't be in the haystack. */
|
||||
}
|
||||
|
||||
for (ptrdiff_t i = 0; i < chars; i++)
|
||||
{
|
||||
int c = string_char_advance (&src);
|
||||
|
||||
if (!CHAR_BYTE8_P (c)
|
||||
&& !ASCII_CHAR_P (c))
|
||||
/* Found a char that can't be in the haystack. */
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
{
|
||||
/* "Raw bytes" (aka eighth-bit) are represented differently in
|
||||
multibyte and unibyte strings. */
|
||||
Lisp_Object uni_needle = Fstring_to_unibyte (needle);
|
||||
res = memmem (haystart, haybytes,
|
||||
SSDATA (uni_needle), SBYTES (uni_needle));
|
||||
}
|
||||
/* "Raw bytes" (aka eighth-bit) are represented differently in
|
||||
multibyte and unibyte strings. */
|
||||
Lisp_Object uni_needle = Fstring_to_unibyte (needle);
|
||||
res = memmem (haystart, haybytes,
|
||||
SSDATA (uni_needle), SBYTES (uni_needle));
|
||||
}
|
||||
|
||||
if (! res)
|
||||
|
|
|
@ -913,6 +913,7 @@
|
|||
(should (equal (string-search "ab\0" "ab") nil))
|
||||
(should (equal (string-search "ab" "abababab" 3) 4))
|
||||
(should (equal (string-search "ab" "ababac" 3) nil))
|
||||
(should (equal (string-search "aaa" "aa") nil))
|
||||
(let ((case-fold-search t))
|
||||
(should (equal (string-search "ab" "AB") nil)))
|
||||
|
||||
|
@ -936,14 +937,16 @@
|
|||
(should (equal (string-search (string-to-multibyte "\377") "ab\377c") 2))
|
||||
(should (equal (string-search "\303" "aøb") nil))
|
||||
(should (equal (string-search "\270" "aøb") nil))
|
||||
;; This test currently fails, but it shouldn't!
|
||||
;;(should (equal (string-search "ø" "\303\270") nil))
|
||||
(should (equal (string-search "ø" "\303\270") nil))
|
||||
|
||||
(should (equal (string-search "a\U00010f98z" "a\U00010f98a\U00010f98z") 2))
|
||||
|
||||
(should-error (string-search "a" "abc" -1))
|
||||
(should-error (string-search "a" "abc" 4))
|
||||
(should-error (string-search "a" "abc" 100000000000))
|
||||
|
||||
(should (equal (string-search "a" "aaa" 3) nil))
|
||||
(should (equal (string-search "aa" "aa" 1) nil))
|
||||
(should (equal (string-search "\0" "") nil))
|
||||
|
||||
(should (equal (string-search "" "") 0))
|
||||
|
@ -955,6 +958,21 @@
|
|||
(should-error (string-search "" "abc" -1))
|
||||
|
||||
(should-not (string-search "ø" "foo\303\270"))
|
||||
(should-not (string-search "\303\270" "ø"))
|
||||
(should-not (string-search "\370" "ø"))
|
||||
(should-not (string-search (string-to-multibyte "\370") "ø"))
|
||||
(should-not (string-search "ø" "\370"))
|
||||
(should-not (string-search "ø" (string-to-multibyte "\370")))
|
||||
(should-not (string-search "\303\270" "\370"))
|
||||
(should-not (string-search (string-to-multibyte "\303\270") "\370"))
|
||||
(should-not (string-search "\303\270" (string-to-multibyte "\370")))
|
||||
(should-not (string-search (string-to-multibyte "\303\270")
|
||||
(string-to-multibyte "\370")))
|
||||
(should-not (string-search "\370" "\303\270"))
|
||||
(should-not (string-search (string-to-multibyte "\370") "\303\270"))
|
||||
(should-not (string-search "\370" (string-to-multibyte "\303\270")))
|
||||
(should-not (string-search (string-to-multibyte "\370")
|
||||
(string-to-multibyte "\303\270")))
|
||||
(should (equal (string-search (string-to-multibyte "o\303\270") "foo\303\270")
|
||||
2))
|
||||
(should (equal (string-search "\303\270" "foo\303\270") 3)))
|
||||
|
|
Loading…
Add table
Reference in a new issue