Rework eln hash filename strategy

Generate eln filename hashing also the source file content in the form:

/absolute/path/filename.el + content ->
eln-cache/filename-path_hash-content_hash.eln

	* src/lread.c (maybe_swap_for_eln): Always call
	Fcomp_el_to_eln_filename on an existing source file.

	* src/comp.c (md5.h, sysstdio.h, zlib.h): New include.
	(comp_hash_string): Use md5 instead of sha512.
	(MD5_BLOCKSIZE): New macro.
	(accumulate_and_process_md5, final_process_md5, md5_gz_stream)
	(comp_hash_source_file): New functions.
	(Fcomp_el_to_eln_filename): Rework for hasing using also source
	file content.

	* src/lread.c (maybe_swap_for_eln): Rename el_name -> src_name as
	this can be also a have .el.gz extention.
This commit is contained in:
Andrea Corallo 2020-08-22 11:11:21 +02:00
parent 9baa0296aa
commit 5f5d664c73
4 changed files with 167 additions and 22 deletions

View file

@ -3787,6 +3787,12 @@ Here instructions on how to compile and install libgccjit from source:
HAVE_NATIVE_COMP=no
LIBGCCJIT_LIB=
if test "${with_nativecomp}" != "no"; then
if test "${HAVE_PDUMPER}" = no; then
AC_MSG_ERROR(['--with-nativecomp' requires '--with-dumping=pdumper'])
fi
if test "${HAVE_ZLIB}" = no; then
AC_MSG_ERROR(['--with-nativecomp' requires zlib])
fi
emacs_save_LIBS=$LIBS
LIBS="-lgccjit"
AC_RUN_IFELSE([libgccjit_smoke_test], [], [libgccjit_broken],
@ -3800,9 +3806,6 @@ if test "${with_nativecomp}" != "no"; then
NEED_DYNLIB=yes
AC_DEFINE(HAVE_NATIVE_COMP, 1, [Define to 1 if you have the libgccjit library (-lgccjit).])
fi
if test "${HAVE_NATIVE_COMP}" = yes && test "${HAVE_PDUMPER}" = no; then
AC_MSG_ERROR(['--with-nativecomp' requires '--with-dumping=pdumper'])
fi
AC_DEFINE_UNQUOTED(NATIVE_ELISP_SUFFIX, ".eln",
[System extension for native compiled elisp])
AC_SUBST(HAVE_NATIVE_COMP)

View file

@ -50,12 +50,18 @@ am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
HAVE_NATIVE_COMP = @HAVE_NATIVE_COMP@
ALL_CFLAGS= \
$(C_SWITCH_SYSTEM) $(C_SWITCH_MACHINE) $(DEPFLAGS) \
$(GNULIB_WARN_CFLAGS) $(WERROR_CFLAGS) $(PROFILING_CFLAGS) $(CFLAGS) \
-I. -I../src -I$(srcdir) -I$(srcdir)/../src \
$(if $(patsubst e-%,,$(notdir $<)),,-Demacs)
ifeq ($(HAVE_NATIVE_COMP),yes)
ALL_CFLAGS += -DGL_COMPILE_CRYPTO_STREAM
endif
SYSTEM_TYPE = @SYSTEM_TYPE@
ifeq ($(SYSTEM_TYPE),windows-nt)
include $(srcdir)/../nt/gnulib-cfg.mk

View file

@ -36,7 +36,9 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
#include "dynlib.h"
#include "buffer.h"
#include "blockinput.h"
#include "sha512.h"
#include "md5.h"
#include "sysstdio.h"
#include "zlib.h"
/********************************/
@ -394,8 +396,6 @@ load_gccjit_if_necessary (bool mandatory)
}
#define ELN_FILENAME_HASH_LEN 64
/* C symbols emitted for the load relocation mechanism. */
#define CURRENT_THREAD_RELOC_SYM "current_thread_reloc"
#define PURE_RELOC_SYM "pure_reloc"
@ -640,9 +640,123 @@ format_string (const char *format, ...)
static Lisp_Object
comp_hash_string (Lisp_Object string)
{
Lisp_Object digest = make_uninit_string (SHA512_DIGEST_SIZE * 2);
sha512_buffer (SSDATA (string), SCHARS (string), SSDATA (digest));
hexbuf_digest (SSDATA (digest), SDATA (digest), SHA512_DIGEST_SIZE);
Lisp_Object digest = make_uninit_string (MD5_DIGEST_SIZE * 2);
md5_buffer (SSDATA (string), SCHARS (string), SSDATA (digest));
hexbuf_digest (SSDATA (digest), SDATA (digest), MD5_DIGEST_SIZE);
return digest;
}
#define MD5_BLOCKSIZE 32768 /* From md5.c */
static char acc_buff[2 * MD5_BLOCKSIZE];
static size_t acc_size;
static void
accumulate_and_process_md5 (void *data, size_t len, struct md5_ctx *ctxt)
{
eassert (len <= MD5_BLOCKSIZE);
/* We may optimize this saving some of these memcpy/move using
directly the outer buffers but so far I'll not bother. */
memcpy (acc_buff + acc_size, data, len);
acc_size += len;
if (acc_size >= MD5_BLOCKSIZE)
{
acc_size -= MD5_BLOCKSIZE;
md5_process_block (acc_buff, MD5_BLOCKSIZE, ctxt);
memmove (acc_buff, acc_buff + MD5_BLOCKSIZE, acc_size);
}
}
static void
final_process_md5 (struct md5_ctx *ctxt)
{
if (acc_size)
{
md5_process_bytes (acc_buff, acc_size, ctxt);
acc_size = 0;
}
}
static int
md5_gz_stream (FILE *source, void *resblock)
{
z_stream stream;
unsigned char in[MD5_BLOCKSIZE];
unsigned char out[MD5_BLOCKSIZE];
eassert (!acc_size);
struct md5_ctx ctx;
md5_init_ctx (&ctx);
/* allocate inflate state */
stream.zalloc = Z_NULL;
stream.zfree = Z_NULL;
stream.opaque = Z_NULL;
stream.avail_in = 0;
stream.next_in = Z_NULL;
int res = inflateInit2 (&stream, MAX_WBITS + 32);
if (res != Z_OK)
return -1;
do {
stream.avail_in = fread (in, 1, MD5_BLOCKSIZE, source);
if (ferror (source)) {
inflateEnd (&stream);
return -1;
}
if (stream.avail_in == 0)
break;
stream.next_in = in;
do {
stream.avail_out = MD5_BLOCKSIZE;
stream.next_out = out;
res = inflate (&stream, Z_NO_FLUSH);
if (res != Z_OK && res != Z_STREAM_END)
return -1;
accumulate_and_process_md5 (out, MD5_BLOCKSIZE - stream.avail_out, &ctx);
} while (!stream.avail_out);
} while (res != Z_STREAM_END);
final_process_md5 (&ctx);
inflateEnd (&stream);
if (res != Z_STREAM_END)
return -1;
md5_finish_ctx (&ctx, resblock);
return 0;
}
#undef MD5_BLOCKSIZE
static Lisp_Object
comp_hash_source_file (Lisp_Object filename)
{
/* Can't use Finsert_file_contents + Fbuffer_hash as this is called
by Fcomp_el_to_eln_filename too early during bootstrap. */
bool is_gz = suffix_p (filename, ".gz");
FILE *f = emacs_fopen (SSDATA (filename), is_gz ? "rb" : "r");
if (!f)
report_file_error ("Opening source file", filename);
Lisp_Object digest = make_uninit_string (MD5_DIGEST_SIZE * 2);
int res = is_gz
? md5_gz_stream (f, SSDATA (digest))
: md5_stream (f, SSDATA (digest));
fclose (f);
if (res)
xsignal2 (Qfile_notify_error, build_string ("hashing failed"), filename);
hexbuf_digest (SSDATA (digest), SSDATA (digest), MD5_DIGEST_SIZE);
return digest;
}
@ -3872,21 +3986,36 @@ If BASE-DIR is nil use the first entry in `comp-eln-load-path'. */)
{
CHECK_STRING (filename);
filename = Fexpand_file_name (filename, Qnil);
if (NILP (Ffile_exists_p (filename)))
xsignal1 (Qfile_missing, filename);
Lisp_Object content_hash = comp_hash_source_file (filename);
if (suffix_p (filename, ".gz"))
filename = Fsubstring (filename, Qnil, make_fixnum (-3));
filename = Fexpand_file_name (filename, Qnil);
/* We create eln filenames with an hash in order to look-up these
starting from the source filename, IOW have a relation
/absolute/path/filename.el -> eln-cache/filename-hash.eln.
/absolute/path/filename.el + content ->
eln-cache/filename-path_hash-content_hash.eln.
'dlopen' can return the same handle if two shared with the same
filename are loaded in two different times (even if the first was
deleted!). To prevent this scenario the source file content is
included in the hashing algorithm.
As at any point in time no more then one file can exist with the
same filename, should be possibile to clean up all
filename-path_hash-* except the most recent one (or the new one
being recompiled).
As installing .eln files compiled during the build changes their
absolute path we need an hashing mechanism that is not sensitive
to that. For this we replace if match PATH_DUMPLOADSEARCH or
PATH_LOADSEARCH with '//' before generating the hash.
Another approach would be to hash using the source file content
but this may have a measurable performance impact. */
PATH_LOADSEARCH with '//' before generating the hash. */
if (NILP (loadsearch_re_list))
{
@ -3909,12 +4038,12 @@ If BASE-DIR is nil use the first entry in `comp-eln-load-path'. */)
break;
}
}
Lisp_Object hash = Fsubstring (comp_hash_string (filename), Qnil,
make_fixnum (ELN_FILENAME_HASH_LEN));
Lisp_Object separator = build_string ("-");
Lisp_Object path_hash = comp_hash_string (filename);
filename = concat2 (Ffile_name_nondirectory (Fsubstring (filename, Qnil,
make_fixnum (-3))),
build_string ("-"));
separator);
Lisp_Object hash = concat3 (path_hash, separator, content_hash);
filename = concat3 (filename, hash, build_string (NATIVE_ELISP_SUFFIX));
if (NILP (base_dir))
base_dir = XCAR (Vcomp_eln_load_path);

View file

@ -1623,10 +1623,17 @@ maybe_swap_for_eln (Lisp_Object *filename, int *fd, struct timespec mtime)
Lisp_Object eln_path_tail = Vcomp_eln_load_path;
FOR_EACH_TAIL_SAFE (eln_path_tail)
{
Lisp_Object el_name =
Lisp_Object src_name =
Fsubstring (*filename, Qnil, make_fixnum (-1));
if (NILP (Ffile_exists_p (src_name)))
{
src_name = concat2 (src_name, build_string (".gz"));
if (NILP (Ffile_exists_p (src_name)))
/* Can't find the corresponding source file. */
return;
}
Lisp_Object eln_name =
Fcomp_el_to_eln_filename (el_name, XCAR (eln_path_tail));
Fcomp_el_to_eln_filename (src_name, XCAR (eln_path_tail));
int eln_fd = emacs_open (SSDATA (ENCODE_FILE (eln_name)), O_RDONLY, 0);
if (eln_fd > 0)
@ -1643,7 +1650,7 @@ maybe_swap_for_eln (Lisp_Object *filename, int *fd, struct timespec mtime)
*fd = eln_fd;
/* Store the eln -> el relation. */
Fputhash (Ffile_name_nondirectory (eln_name),
el_name, Vcomp_eln_to_el_h);
src_name, Vcomp_eln_to_el_h);
return;
}
else