* src/marker.c: Try and speed up byte<->char conversion with many markers.

When considering markers (to find a starting point for the conversion),
typically one of the two bounds is nearby (coming from
cached_(byte|char)pos) but the other is far (point-min or point-max),
so change the exit condition so we stop as soon as *one* of the bounds
is near.

(BYTECHAR_DISTANCE_INITIAL, BYTECHAR_DISTANCE_INCREMENT): New constants.
(buf_charpos_to_bytepos, buf_bytepos_to_charpos): Use them to try and
reduce the number of markers we consider.
This commit is contained in:
Stefan Monnier 2018-03-26 09:01:30 -04:00
parent 9c1176247b
commit b300052fb4

View file

@ -90,7 +90,7 @@ clear_charpos_cache (struct buffer *b)
#define CONSIDER(CHARPOS, BYTEPOS) \
{ \
ptrdiff_t this_charpos = (CHARPOS); \
bool changed = 0; \
bool changed = false; \
\
if (this_charpos == charpos) \
{ \
@ -105,14 +105,14 @@ clear_charpos_cache (struct buffer *b)
{ \
best_above = this_charpos; \
best_above_byte = (BYTEPOS); \
changed = 1; \
changed = true; \
} \
} \
else if (this_charpos > best_below) \
{ \
best_below = this_charpos; \
best_below_byte = (BYTEPOS); \
changed = 1; \
changed = true; \
} \
\
if (changed) \
@ -133,6 +133,28 @@ CHECK_MARKER (Lisp_Object x)
CHECK_TYPE (MARKERP (x), Qmarkerp, x);
}
/* When converting bytes from/to chars, we look through the list of
markers to try and find a good starting point (since markers keep
track of both bytepos and charpos at the same time).
But if there are many markers, it can take too much time to find a "good"
marker from which to start. Worse yet: if it takes a long time and we end
up finding a nearby markers, we won't add a new marker to cache this
result, so next time around we'll have to go through this same long list
to (re)find this best marker. So the further down the list of
markers we go, the less demanding we are w.r.t what is a good marker.
The previous code used INITIAL=50 and INCREMENT=0 and this lead to
really poor performance when there are many markers.
I haven't tried to tweak INITIAL, but experiments on my trusty Thinkpad
T61 using various artificial test cases seem to suggest that INCREMENT=50
might be "the best compromise": it significantly improved the
worst case and it was rarely slower and never by much.
The asymptotic behavior is still poor, tho, so in largish buffers with many
overlays (e.g. 300KB and 30K overlays), it can still be a bottlneck. */
#define BYTECHAR_DISTANCE_INITIAL 50
#define BYTECHAR_DISTANCE_INCREMENT 50
/* Return the byte position corresponding to CHARPOS in B. */
ptrdiff_t
@ -141,6 +163,7 @@ buf_charpos_to_bytepos (struct buffer *b, ptrdiff_t charpos)
struct Lisp_Marker *tail;
ptrdiff_t best_above, best_above_byte;
ptrdiff_t best_below, best_below_byte;
ptrdiff_t distance = BYTECHAR_DISTANCE_INITIAL;
eassert (BUF_BEG (b) <= charpos && charpos <= BUF_Z (b));
@ -180,8 +203,11 @@ buf_charpos_to_bytepos (struct buffer *b, ptrdiff_t charpos)
/* If we are down to a range of 50 chars,
don't bother checking any other markers;
scan the intervening chars directly now. */
if (best_above - best_below < 50)
if (best_above - charpos < distance
|| charpos - best_below < distance)
break;
else
distance += BYTECHAR_DISTANCE_INCREMENT;
}
/* We get here if we did not exactly hit one of the known places.
@ -248,7 +274,7 @@ buf_charpos_to_bytepos (struct buffer *b, ptrdiff_t charpos)
#define CONSIDER(BYTEPOS, CHARPOS) \
{ \
ptrdiff_t this_bytepos = (BYTEPOS); \
int changed = 0; \
int changed = false; \
\
if (this_bytepos == bytepos) \
{ \
@ -263,14 +289,14 @@ buf_charpos_to_bytepos (struct buffer *b, ptrdiff_t charpos)
{ \
best_above = (CHARPOS); \
best_above_byte = this_bytepos; \
changed = 1; \
changed = true; \
} \
} \
else if (this_bytepos > best_below_byte) \
{ \
best_below = (CHARPOS); \
best_below_byte = this_bytepos; \
changed = 1; \
changed = true; \
} \
\
if (changed) \
@ -293,6 +319,7 @@ buf_bytepos_to_charpos (struct buffer *b, ptrdiff_t bytepos)
struct Lisp_Marker *tail;
ptrdiff_t best_above, best_above_byte;
ptrdiff_t best_below, best_below_byte;
ptrdiff_t distance = BYTECHAR_DISTANCE_INITIAL;
eassert (BUF_BEG_BYTE (b) <= bytepos && bytepos <= BUF_Z_BYTE (b));
@ -323,8 +350,11 @@ buf_bytepos_to_charpos (struct buffer *b, ptrdiff_t bytepos)
/* If we are down to a range of 50 chars,
don't bother checking any other markers;
scan the intervening chars directly now. */
if (best_above - best_below < 50)
if (best_above - bytepos < distance
|| bytepos - best_below < distance)
break;
else
distance += BYTECHAR_DISTANCE_INCREMENT;
}
/* We get here if we did not exactly hit one of the known places.
@ -744,8 +774,8 @@ count_markers (struct buffer *buf)
ptrdiff_t
verify_bytepos (ptrdiff_t charpos)
{
ptrdiff_t below = 1;
ptrdiff_t below_byte = 1;
ptrdiff_t below = BEG;
ptrdiff_t below_byte = BYTE_BEG;
while (below != charpos)
{