src/regex-emacs.c (POS_AS_IN_BUFFER): Delete macro

That macro added 1 to buffer positions because:

    Strings are 0-indexed, buffers are 1-indexed

but the reality is that this 1 was added to the regexp engine's "byte
offsets" which are not 1-based byte positions as used throughout
the rest of Emacs, but they are BEGV_BYTE-relative offsets, so the two
did not cancel out.

* src/regex-emacs.c (PTR_TO_OFFSET, POS_AS_IN_BUFFER): Delete macros;
use `POINTER_TO_OFFSET` instead.
(re_search_2, re_match_2, re_match_2_internal): Adjust accordingly.
* src/syntax.h (SYNTAX_TABLE_BYTE_TO_CHAR): Don't remove 1 from buffer
byteoffsets now that `POS_AS_IN_BUFFER` doesn't add it any more.
This commit is contained in:
Stefan Monnier 2023-04-12 15:44:58 -04:00
parent 1e6463ad22
commit d53ff9fe28
2 changed files with 14 additions and 21 deletions

View file

@ -47,13 +47,6 @@
/* Make syntax table lookup grant data in gl_state. */
#define SYNTAX(c) syntax_property (c, 1)
/* Convert the pointer to the char to BEG-based offset from the start. */
#define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
/* Strings are 0-indexed, buffers are 1-indexed; pun on the boolean
result to get the right base index. */
#define POS_AS_IN_BUFFER(p) \
((p) + (NILP (gl_state.object) || BUFFERP (gl_state.object)))
#define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
#define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
#define RE_STRING_CHAR(p, multibyte) \
@ -3260,7 +3253,7 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ptrdiff_t size1,
gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
{
ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos));
ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (startpos);
SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
}
@ -3873,7 +3866,7 @@ re_match_2 (struct re_pattern_buffer *bufp,
ptrdiff_t charpos;
gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos));
charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos);
SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
result = re_match_2_internal (bufp, (re_char *) string1, size1,
@ -4806,7 +4799,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
int c1, c2;
int s1, s2;
int dummy;
ptrdiff_t offset = PTR_TO_OFFSET (d);
ptrdiff_t offset = POINTER_TO_OFFSET (d);
ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
UPDATE_SYNTAX_TABLE (charpos);
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
@ -4846,7 +4839,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
int c1, c2;
int s1, s2;
int dummy;
ptrdiff_t offset = PTR_TO_OFFSET (d);
ptrdiff_t offset = POINTER_TO_OFFSET (d);
ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE (charpos);
PREFETCH ();
@ -4889,7 +4882,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
int c1, c2;
int s1, s2;
int dummy;
ptrdiff_t offset = PTR_TO_OFFSET (d);
ptrdiff_t offset = POINTER_TO_OFFSET (d);
ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
UPDATE_SYNTAX_TABLE (charpos);
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
@ -4931,7 +4924,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
is the character at D, and S2 is the syntax of C2. */
int c1, c2;
int s1, s2;
ptrdiff_t offset = PTR_TO_OFFSET (d);
ptrdiff_t offset = POINTER_TO_OFFSET (d);
ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE (charpos);
PREFETCH ();
@ -4972,7 +4965,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
is the character at D, and S2 is the syntax of C2. */
int c1, c2;
int s1, s2;
ptrdiff_t offset = PTR_TO_OFFSET (d);
ptrdiff_t offset = POINTER_TO_OFFSET (d);
ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
UPDATE_SYNTAX_TABLE (charpos);
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
@ -5008,7 +5001,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
mcnt);
PREFETCH ();
{
ptrdiff_t offset = PTR_TO_OFFSET (d);
ptrdiff_t offset = POINTER_TO_OFFSET (d);
ptrdiff_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
UPDATE_SYNTAX_TABLE (pos1);
}

View file

@ -145,7 +145,7 @@ extern bool syntax_prefix_flag_p (int c);
extern unsigned char const syntax_spec_code[0400];
/* Convert the byte offset BYTEPOS into a character position,
/* Convert the regexp BYTEOFFSET into a character position,
for the object recorded in gl_state with SETUP_SYNTAX_TABLE_FOR_OBJECT.
The value is meant for use in code that does nothing when
@ -153,19 +153,19 @@ extern unsigned char const syntax_spec_code[0400];
for speed. */
INLINE ptrdiff_t
SYNTAX_TABLE_BYTE_TO_CHAR (ptrdiff_t bytepos)
SYNTAX_TABLE_BYTE_TO_CHAR (ptrdiff_t byteoffset)
{
return (! parse_sexp_lookup_properties
? 0
: STRINGP (gl_state.object)
? string_byte_to_char (gl_state.object, bytepos)
? string_byte_to_char (gl_state.object, byteoffset)
: BUFFERP (gl_state.object)
? ((buf_bytepos_to_charpos
(XBUFFER (gl_state.object),
(bytepos + BUF_BEGV_BYTE (XBUFFER (gl_state.object)) - 1))))
(byteoffset + BUF_BEGV_BYTE (XBUFFER (gl_state.object))))))
: NILP (gl_state.object)
? BYTE_TO_CHAR (bytepos + BEGV_BYTE - 1)
: bytepos);
? BYTE_TO_CHAR (byteoffset + BEGV_BYTE)
: byteoffset);
}
/* Make syntax table state (gl_state) good for CHARPOS, assuming it is