Rehash hash tables eagerly after loading a dump

This simplifies code, and helps performance in some cases (Bug#36597).
* src/lisp.h (hash_rehash_needed_p): Remove.  All uses removed.
(hash_rehash_if_needed): Remove.  All uses removed.
(struct Lisp_Hash_Table): Remove comment about rehashing hash tables.
* src/pdumper.c (thaw_hash_tables): New function.
(hash_table_thaw): New function.
(hash_table_freeze): New function.
(dump_hash_table): Simplify.
(dump_hash_table_list): New function.
(hash_table_contents): New function.
(Fdump_emacs_portable): Handle hash tables by eager rehashing.
(pdumper_load): Restore hash tables.
(init_pdumper_once): New function.
This commit is contained in:
Pip Cet 2020-08-11 02:16:53 -07:00 committed by Paul Eggert
parent 0d0aad213f
commit 16a16645f5
8 changed files with 112 additions and 188 deletions

View file

@ -1401,7 +1401,6 @@ exec_byte_code (Lisp_Object bytestr, Lisp_Object vector, Lisp_Object maxdepth,
Lisp_Object v1 = POP;
ptrdiff_t i;
struct Lisp_Hash_Table *h = XHASH_TABLE (jmp_table);
hash_rehash_if_needed (h);
/* h->count is a faster approximation for HASH_TABLE_SIZE (h)
here. */

View file

@ -652,7 +652,6 @@ Lisp_Object
composition_gstring_put_cache (Lisp_Object gstring, ptrdiff_t len)
{
struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table);
hash_rehash_if_needed (h);
Lisp_Object header = LGSTRING_HEADER (gstring);
Lisp_Object hash = h->test.hashfn (header, h);
if (len < 0)

View file

@ -1536,6 +1536,7 @@ Using an Emacs configured with --with-x-toolkit=lucid does not have this problem
if (!initialized)
{
init_alloc_once ();
init_pdumper_once ();
init_obarray_once ();
init_eval_once ();
init_charset_once ();

View file

@ -4248,50 +4248,28 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
/* Recompute the hashes (and hence also the "next" pointers).
Normally there's never a need to recompute hashes.
This is done only on first-access to a hash-table loaded from
the "pdump", because the object's addresses may have changed, thus
This is done only on first access to a hash-table loaded from
the "pdump", because the objects' addresses may have changed, thus
affecting their hash. */
void
hash_table_rehash (struct Lisp_Hash_Table *h)
hash_table_rehash (Lisp_Object hash)
{
ptrdiff_t size = HASH_TABLE_SIZE (h);
/* These structures may have been purecopied and shared
(bug#36447). */
Lisp_Object hash = make_nil_vector (size);
h->next = Fcopy_sequence (h->next);
h->index = Fcopy_sequence (h->index);
struct Lisp_Hash_Table *h = XHASH_TABLE (hash);
/* Recompute the actual hash codes for each entry in the table.
Order is still invalid. */
for (ptrdiff_t i = 0; i < size; ++i)
for (ptrdiff_t i = 0; i < h->count; ++i)
{
Lisp_Object key = HASH_KEY (h, i);
if (!EQ (key, Qunbound))
ASET (hash, i, h->test.hashfn (key, h));
Lisp_Object hash_code = h->test.hashfn (key, h);
ptrdiff_t start_of_bucket = XUFIXNUM (hash_code) % ASIZE (h->index);
set_hash_hash_slot (h, i, hash_code);
set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket));
set_hash_index_slot (h, start_of_bucket, i);
eassert (HASH_NEXT (h, i) != i); /* Stop loops. */
}
/* Reset the index so that any slot we don't fill below is marked
invalid. */
Ffillarray (h->index, make_fixnum (-1));
/* Rebuild the collision chains. */
for (ptrdiff_t i = 0; i < size; ++i)
if (!NILP (AREF (hash, i)))
{
EMACS_UINT hash_code = XUFIXNUM (AREF (hash, i));
ptrdiff_t start_of_bucket = hash_code % ASIZE (h->index);
set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket));
set_hash_index_slot (h, start_of_bucket, i);
eassert (HASH_NEXT (h, i) != i); /* Stop loops. */
}
/* Finally, mark the hash table as having a valid hash order.
Do this last so that if we're interrupted, we retry on next
access. */
eassert (hash_rehash_needed_p (h));
h->hash = hash;
eassert (!hash_rehash_needed_p (h));
for (ptrdiff_t i = h->count; i < ASIZE (h->next) - 1; i++)
set_hash_next_slot (h, i, i + 1);
}
/* Lookup KEY in hash table H. If HASH is non-null, return in *HASH
@ -4303,8 +4281,6 @@ hash_lookup (struct Lisp_Hash_Table *h, Lisp_Object key, Lisp_Object *hash)
{
ptrdiff_t start_of_bucket, i;
hash_rehash_if_needed (h);
Lisp_Object hash_code = h->test.hashfn (key, h);
if (hash)
*hash = hash_code;
@ -4339,8 +4315,6 @@ hash_put (struct Lisp_Hash_Table *h, Lisp_Object key, Lisp_Object value,
{
ptrdiff_t start_of_bucket, i;
hash_rehash_if_needed (h);
/* Increment count after resizing because resizing may fail. */
maybe_resize_hash_table (h);
h->count++;
@ -4373,8 +4347,6 @@ hash_remove_from_table (struct Lisp_Hash_Table *h, Lisp_Object key)
ptrdiff_t start_of_bucket = XUFIXNUM (hash_code) % ASIZE (h->index);
ptrdiff_t prev = -1;
hash_rehash_if_needed (h);
for (ptrdiff_t i = HASH_INDEX (h, start_of_bucket);
0 <= i;
i = HASH_NEXT (h, i))
@ -4415,8 +4387,7 @@ hash_clear (struct Lisp_Hash_Table *h)
if (h->count > 0)
{
ptrdiff_t size = HASH_TABLE_SIZE (h);
if (!hash_rehash_needed_p (h))
memclear (xvector_contents (h->hash), size * word_size);
memclear (xvector_contents (h->hash), size * word_size);
for (ptrdiff_t i = 0; i < size; i++)
{
set_hash_next_slot (h, i, i < size - 1 ? i + 1 : -1);
@ -4452,9 +4423,7 @@ sweep_weak_table (struct Lisp_Hash_Table *h, bool remove_entries_p)
for (ptrdiff_t bucket = 0; bucket < n; ++bucket)
{
/* Follow collision chain, removing entries that don't survive
this garbage collection. It's okay if hash_rehash_needed_p
(h) is true, since we're operating entirely on the cached
hash values. */
this garbage collection. */
ptrdiff_t prev = -1;
ptrdiff_t next;
for (ptrdiff_t i = HASH_INDEX (h, bucket); 0 <= i; i = next)
@ -4499,7 +4468,7 @@ sweep_weak_table (struct Lisp_Hash_Table *h, bool remove_entries_p)
set_hash_hash_slot (h, i, Qnil);
eassert (h->count != 0);
h->count += h->count > 0 ? -1 : 1;
h->count--;
}
else
{
@ -4923,7 +4892,7 @@ DEFUN ("hash-table-count", Fhash_table_count, Shash_table_count, 1, 1, 0,
(Lisp_Object table)
{
struct Lisp_Hash_Table *h = check_hash_table (table);
eassert (h->count >= 0);
return make_fixnum (h->count);
}

View file

@ -2275,11 +2275,7 @@ struct hash_table_test
struct Lisp_Hash_Table
{
/* Change pdumper.c if you change the fields here.
IMPORTANT!!!!!!!
Call hash_rehash_if_needed() before accessing. */
/* Change pdumper.c if you change the fields here. */
/* This is for Lisp; the hash table code does not refer to it. */
union vectorlike_header header;
@ -2398,20 +2394,7 @@ HASH_TABLE_SIZE (const struct Lisp_Hash_Table *h)
return size;
}
void hash_table_rehash (struct Lisp_Hash_Table *h);
INLINE bool
hash_rehash_needed_p (const struct Lisp_Hash_Table *h)
{
return NILP (h->hash);
}
INLINE void
hash_rehash_if_needed (struct Lisp_Hash_Table *h)
{
if (hash_rehash_needed_p (h))
hash_table_rehash (h);
}
void hash_table_rehash (Lisp_Object);
/* Default size for hash tables if not specified. */

View file

@ -1212,9 +1212,6 @@ is used to further constrain the set of candidates. */)
bucket = AREF (collection, idx);
}
if (HASH_TABLE_P (collection))
hash_rehash_if_needed (XHASH_TABLE (collection));
while (1)
{
/* Get the next element of the alist, obarray, or hash-table. */

View file

@ -105,17 +105,6 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
# define VM_SUPPORTED 0
#endif
/* PDUMPER_CHECK_REHASHING being true causes the portable dumper to
check, for each hash table it dumps, that the hash table means the
same thing after rehashing. */
#ifndef PDUMPER_CHECK_REHASHING
# if ENABLE_CHECKING
# define PDUMPER_CHECK_REHASHING 1
# else
# define PDUMPER_CHECK_REHASHING 0
# endif
#endif
/* Require an architecture in which pointers, ptrdiff_t and intptr_t
are the same size and have the same layout, and where bytes have
eight bits --- that is, a general-purpose computer made after 1990.
@ -401,6 +390,8 @@ struct dump_header
The start of the cold region is always aligned on a page
boundary. */
dump_off cold_start;
dump_off hash_list;
};
/* Double-ended singly linked list. */
@ -558,6 +549,8 @@ struct dump_context
heap objects. */
Lisp_Object bignum_data;
Lisp_Object hash_tables;
unsigned number_hot_relocations;
unsigned number_discardable_relocations;
};
@ -2616,78 +2609,62 @@ dump_vectorlike_generic (struct dump_context *ctx,
return offset;
}
/* Determine whether the hash table's hash order is stable
across dump and load. If it is, we don't have to trigger
a rehash on access. */
static bool
dump_hash_table_stable_p (const struct Lisp_Hash_Table *hash)
{
if (hash->test.hashfn == hashfn_user_defined)
error ("cannot dump hash tables with user-defined tests"); /* Bug#36769 */
bool is_eql = hash->test.hashfn == hashfn_eql;
bool is_equal = hash->test.hashfn == hashfn_equal;
ptrdiff_t size = HASH_TABLE_SIZE (hash);
for (ptrdiff_t i = 0; i < size; ++i)
{
Lisp_Object key = HASH_KEY (hash, i);
if (!EQ (key, Qunbound))
{
bool key_stable = (dump_builtin_symbol_p (key)
|| FIXNUMP (key)
|| (is_equal
&& (STRINGP (key) || BOOL_VECTOR_P (key)))
|| ((is_equal || is_eql)
&& (FLOATP (key) || BIGNUMP (key))));
if (!key_stable)
return false;
}
}
return true;
}
/* Return a list of (KEY . VALUE) pairs in the given hash table. */
/* Return a vector of KEY, VALUE pairs in the given hash table H. The
first H->count pairs are valid, the rest is left as nil. */
static Lisp_Object
hash_table_contents (Lisp_Object table)
hash_table_contents (struct Lisp_Hash_Table *h)
{
if (h->test.hashfn == hashfn_user_defined)
error ("cannot dump hash tables with user-defined tests"); /* Bug#36769 */
Lisp_Object contents = Qnil;
struct Lisp_Hash_Table *h = XHASH_TABLE (table);
for (ptrdiff_t i = 0; i < HASH_TABLE_SIZE (h); ++i)
/* Make sure key_and_value ends up in the same order; charset.c
relies on it by expecting hash table indices to stay constant
across the dump. */
for (ptrdiff_t i = 0; i < HASH_TABLE_SIZE (h) - h->count; i++)
{
Lisp_Object key = HASH_KEY (h, i);
if (!EQ (key, Qunbound))
dump_push (&contents, Fcons (key, HASH_VALUE (h, i)));
dump_push (&contents, Qnil);
dump_push (&contents, Qunbound);
}
return Fnreverse (contents);
for (ptrdiff_t i = HASH_TABLE_SIZE (h) - 1; i >= 0; --i)
if (!NILP (HASH_HASH (h, i)))
{
dump_push (&contents, HASH_VALUE (h, i));
dump_push (&contents, HASH_KEY (h, i));
}
return CALLN (Fapply, Qvector, contents);
}
/* Copy the given hash table, rehash it, and make sure that we can
look up all the values in the original. */
static void
check_hash_table_rehash (Lisp_Object table_orig)
static dump_off
dump_hash_table_list (struct dump_context *ctx)
{
ptrdiff_t count = XHASH_TABLE (table_orig)->count;
hash_rehash_if_needed (XHASH_TABLE (table_orig));
Lisp_Object table_rehashed = Fcopy_hash_table (table_orig);
eassert (!hash_rehash_needed_p (XHASH_TABLE (table_rehashed)));
XHASH_TABLE (table_rehashed)->hash = Qnil;
eassert (count == 0 || hash_rehash_needed_p (XHASH_TABLE (table_rehashed)));
hash_rehash_if_needed (XHASH_TABLE (table_rehashed));
eassert (!hash_rehash_needed_p (XHASH_TABLE (table_rehashed)));
Lisp_Object expected_contents = hash_table_contents (table_orig);
while (!NILP (expected_contents))
{
Lisp_Object key_value_pair = dump_pop (&expected_contents);
Lisp_Object key = XCAR (key_value_pair);
Lisp_Object expected_value = XCDR (key_value_pair);
Lisp_Object arbitrary = Qdump_emacs_portable__sort_predicate_copied;
Lisp_Object found_value = Fgethash (key, table_rehashed, arbitrary);
eassert (EQ (expected_value, found_value));
Fremhash (key, table_rehashed);
}
if (CONSP (ctx->hash_tables))
return dump_object (ctx, CALLN (Fapply, Qvector, ctx->hash_tables));
else
return 0;
}
eassert (EQ (Fhash_table_count (table_rehashed),
make_fixnum (0)));
static void
hash_table_freeze (struct Lisp_Hash_Table *h)
{
ptrdiff_t nkeys = XFIXNAT (Flength (h->key_and_value)) / 2;
h->key_and_value = hash_table_contents (h);
h->next_free = (nkeys == h->count ? -1 : h->count);
h->index = Flength (h->index);
h->next = h->hash = make_fixnum (nkeys);
}
static void
hash_table_thaw (Lisp_Object hash)
{
struct Lisp_Hash_Table *h = XHASH_TABLE (hash);
h->index = Fmake_vector (h->index, make_fixnum (-1));
h->hash = Fmake_vector (h->hash, Qnil);
h->next = Fmake_vector (h->next, make_fixnum (-1));
hash_table_rehash (hash);
}
static dump_off
@ -2699,51 +2676,11 @@ dump_hash_table (struct dump_context *ctx,
# error "Lisp_Hash_Table changed. See CHECK_STRUCTS comment in config.h."
#endif
const struct Lisp_Hash_Table *hash_in = XHASH_TABLE (object);
bool is_stable = dump_hash_table_stable_p (hash_in);
/* If the hash table is likely to be modified in memory (either
because we need to rehash, and thus toggle hash->count, or
because we need to assemble a list of weak tables) punt the hash
table to the end of the dump, where we can lump all such hash
tables together. */
if (!(is_stable || !NILP (hash_in->weak))
&& ctx->flags.defer_hash_tables)
{
if (offset != DUMP_OBJECT_ON_HASH_TABLE_QUEUE)
{
eassert (offset == DUMP_OBJECT_ON_NORMAL_QUEUE
|| offset == DUMP_OBJECT_NOT_SEEN);
/* We still want to dump the actual keys and values now. */
dump_enqueue_object (ctx, hash_in->key_and_value, WEIGHT_NONE);
/* We'll get to the rest later. */
offset = DUMP_OBJECT_ON_HASH_TABLE_QUEUE;
dump_remember_object (ctx, object, offset);
dump_push (&ctx->deferred_hash_tables, object);
}
return offset;
}
if (PDUMPER_CHECK_REHASHING)
check_hash_table_rehash (make_lisp_ptr ((void *) hash_in, Lisp_Vectorlike));
struct Lisp_Hash_Table hash_munged = *hash_in;
struct Lisp_Hash_Table *hash = &hash_munged;
/* Remember to rehash this hash table on first access. After a
dump reload, the hash table values will have changed, so we'll
need to rebuild the index.
TODO: for EQ and EQL hash tables, it should be possible to rehash
here using the preferred load address of the dump, eliminating
the need to rehash-on-access if we can load the dump where we
want. */
if (hash->count > 0 && !is_stable)
/* Hash codes will have to be recomputed anyway, so let's not dump them.
Also set `hash` to nil for hash_rehash_needed_p.
We could also refrain from dumping the `next' and `index' vectors,
except that `next' is currently used for HASH_TABLE_SIZE and
we'd have to rebuild the next_free list as well as adjust
sweep_weak_hash_table for the case where there's no `index'. */
hash->hash = Qnil;
hash_table_freeze (hash);
dump_push (&ctx->hash_tables, object);
START_DUMP_PVEC (ctx, &hash->header, struct Lisp_Hash_Table, out);
dump_pseudovector_lisp_fields (ctx, &out->header, &hash->header);
@ -4151,6 +4088,19 @@ types. */)
|| !NILP (ctx->deferred_hash_tables)
|| !NILP (ctx->deferred_symbols));
ctx->header.hash_list = ctx->offset;
dump_hash_table_list (ctx);
do
{
dump_drain_deferred_hash_tables (ctx);
dump_drain_deferred_symbols (ctx);
dump_drain_normal_queue (ctx);
}
while (!dump_queue_empty_p (&ctx->dump_queue)
|| !NILP (ctx->deferred_hash_tables)
|| !NILP (ctx->deferred_symbols));
dump_sort_copied_objects (ctx);
/* While we copy built-in symbols into the Emacs image, these
@ -5302,6 +5252,9 @@ enum dump_section
NUMBER_DUMP_SECTIONS,
};
/* Pointer to a stack variable to avoid having to staticpro it. */
static Lisp_Object *pdumper_hashes = &zero_vector;
/* Load a dump from DUMP_FILENAME. Return an error code.
N.B. We run very early in initialization, so we can't use lisp,
@ -5448,6 +5401,15 @@ pdumper_load (const char *dump_filename)
for (int i = 0; i < ARRAYELTS (sections); ++i)
dump_mmap_reset (&sections[i]);
Lisp_Object hashes = zero_vector;
if (header->hash_list)
{
struct Lisp_Vector *hash_tables =
((struct Lisp_Vector *)(dump_base + header->hash_list));
XSETVECTOR (hashes, hash_tables);
}
pdumper_hashes = &hashes;
/* Run the functions Emacs registered for doing post-dump-load
initialization. */
for (int i = 0; i < nr_dump_hooks; ++i)
@ -5518,6 +5480,19 @@ Value is nil if this session was not started using a dump file.*/)
#endif /* HAVE_PDUMPER */
static void
thaw_hash_tables (void)
{
Lisp_Object hash_tables = *pdumper_hashes;
for (ptrdiff_t i = 0; i < ASIZE (hash_tables); i++)
hash_table_thaw (AREF (hash_tables, i));
}
void
init_pdumper_once (void)
{
pdumper_do_now_and_after_load (thaw_hash_tables);
}
void
syms_of_pdumper (void)

View file

@ -256,6 +256,7 @@ pdumper_clear_marks (void)
file was loaded. */
extern void pdumper_record_wd (const char *);
void init_pdumper_once (void);
void syms_of_pdumper (void);
INLINE_HEADER_END