Change module interface to no longer use GMP objects directly.

As described in the new comment added to emacs-module.c, using GMP
directly in the module interface has significant downsides: it couples
the module interface directly to the implementation and requires
module authors to link their module against the same GMP library as
Emacs itself, which is often difficult and an unnecessary burden.  By
picking a representation for the magnitude that often matches the one
used by GMP, we can avoid overhead when converting from and to GMP in
most cases.

Loading the test module in test/data/emacs-module and evaluating

(dotimes (_ 10000)
  (mod-test-double (* 2 most-negative-fixnum)))

under Callgrind shows that on my (GNU/Linux) machine Emacs only spends
10% of the CPU time of mod-test-double in mpz_import and mpz_export
combined, even though that function does little else.  (By contrast,
30% is spent in allocate_pseudovector.)

* src/emacs-module.h.in: Don't check EMACS_MODULE_GMP.  Don't include
gmp.h.  Remove emacs_mpz structure.  Instead, define type alias
emacs_limb_t and macro EMACS_LIMB_MAX.

* src/module-env-27.h: Change interface of extract_big_integer and
make_big_integer to take a sign-magnitude representation instead of
mpz_t.

* src/emacs-module.c: Don't check EMACS_MODULE_GMP or
EMACS_MODULE_HAVE_MPZ_T.  Add a comment about the chosen
implementation.
(module_extract_big_integer, module_make_big_integer): Reimplement
without using mpz_t in the interface.

* doc/lispref/internals.texi (Module Values): Adapt function
documentation and example.  Stop mentioning GMP and EMACS_MODULE_GMP.

* test/data/emacs-module/mod-test.c: Don't define EMACS_MODULE_GMP or
EMACS_MODULE_HAVE_MPZ_T.
(memory_full, extract_big_integer, make_big_integer): New helper
functions, identical to example in the Info documentation.
(Fmod_test_nanoseconds, Fmod_test_double): Adapt to new interface.
This commit is contained in:
Philipp Stephani 2019-11-02 10:54:57 +01:00
parent 0ca32d1270
commit 096be9c454
5 changed files with 406 additions and 101 deletions

View file

@ -1475,6 +1475,42 @@ the widest integral data type supported by the C compiler, typically
@code{overflow-error}.
@end deftypefn
@deftypefn Function bool extract_big_integer (emacs_env *@var{env}, emacs_value @var{arg}, int *@var{sign}, ptrdiff_t *@var{count}, emacs_limb_t *@var{magnitude})
This function, which is available since Emacs 27, extracts the
integral value of @var{arg}. The value of @var{arg} must be an
integer (fixnum or bignum). If @var{sign} is not @code{NULL}, it
stores the sign of @var{arg} (-1, 0, or +1) into @code{*sign}. The
magnitude is stored into @var{magnitude} as follows. If @var{count}
and @var{magnitude} are bot non-@code{NULL}, then @var{magnitude} must
point to an array of at least @code{*count} @code{unsigned long}
elements. If @var{magnitude} is large enough to hold the magnitude of
@var{arg}, then this function writes the magnitude into the
@var{magnitude} array in little-endian form, stores the number of
array elements written into @code{*count}, and returns @code{true}.
If @var{magnitude} is not large enough, it stores the required array
size into @code{*count}, signals an error, and returns @code{false}.
If @var{count} is not @code{NULL} and @var{magnitude} is @code{NULL},
then the function stores the required array size into @code{*count}
and returns @code{true}.
Emacs guarantees that the maximum required value of @code{*count}
never exceeds @code{min (PTRDIFF_MAX, SIZE_MAX) / sizeof
(emacs_limb_t)}. This implies that you can use e.g. @code{malloc
((size_t) (*count * sizeof (emacs_limb_t)))} to allocate the
@code{magnitude} array without integer overflow.
@end deftypefn
@deftp {Type alias} emacs_limb_t
This type is an alias to an otherwise unspecified unsigned integral
type. It is used as element type for the magnitude arrays for the big
integer conversion functions.
@end deftp
@defvr Macro EMACS_LIMB_MAX
This macro expands to an integer literal specifying the maximum
possible value for an @code{emacs_limb_t} object.
@end defvr
@deftypefn Function double extract_float (emacs_env *@var{env}, emacs_value @var{arg})
This function returns the value of a Lisp float specified by
@var{arg}, as a C @code{double} value.
@ -1572,6 +1608,128 @@ limits set by @code{most-negative-fixnum} and
@code{most-positive-fixnum} (@pxref{Integer Basics}).
@end deftypefn
@deftypefn Function emacs_value make_big_integer (emacs_env *@var{env}, int sign, ptrdiff_t count, const emacs_limb_t *magnitude)
This function, which is available since Emacs 27, takes an
arbitrary-sized integer argument and returns a corresponding
@code{emacs_value} object. The @var{sign} argument gives the sign of
the return value. If @var{sign} is nonzero, then @var{magnitude} must
point to an array of at least @var{count} elements specifying the
little-endian magnitude of the return value.
@end deftypefn
The following example uses the GNU Multiprecision Library (GMP) to
calculate the next probable prime after a given integer.
@xref{Top,,,gmp} for a general overview of GMP, and @pxref{Integer
Import and Export,,,gmp} for how to convert the @code{magnitude} array
to and from GMP @code{mpz_t} values.
@example
#include <assert.h>
#include <limits.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <gmp.h>
#include <emacs-module.h>
static void
memory_full (emacs_env *env)
@{
const char *message = "Memory exhausted";
emacs_value data = env->make_string (env, message, strlen (message));
env->non_local_exit_signal (env, env->intern (env, "error"),
env->funcall (env, env->intern (env, "list"), 1,
&data));
@}
enum
@{
max_count = ((SIZE_MAX < PTRDIFF_MAX ? SIZE_MAX : PTRDIFF_MAX)
/ sizeof (emacs_limb_t))
@};
static bool
extract_big_integer (emacs_env *env, emacs_value arg, mpz_t result)
@{
int sign;
ptrdiff_t count;
bool success = env->extract_big_integer (env, arg, &sign, &count, NULL);
if (!success)
return false;
if (sign == 0)
@{
mpz_set_ui (result, 0);
return true;
@}
enum @{ order = -1, size = sizeof (emacs_limb_t), endian = 0, nails = 0 @};
assert (0 < count && count <= max_count);
emacs_limb_t *magnitude = malloc ((size_t) (count * size));
if (magnitude == NULL)
@{
memory_full (env);
return false;
@}
success = env->extract_big_integer (env, arg, NULL, &count, magnitude);
assert (success);
mpz_import (result, count, order, size, endian, nails, magnitude);
free (magnitude);
if (sign < 0)
mpz_neg (result, result);
return true;
@}
static emacs_value
make_big_integer (emacs_env *env, const mpz_t value)
@{
if (mpz_sgn (value) == 0)
return env->make_integer (env, 0);
enum
@{
order = -1,
size = sizeof (emacs_limb_t),
endian = 0,
nails = 0,
numb = 8 * size - nails
@};
size_t count = (mpz_sizeinbase (value, 2) + numb - 1) / numb;
if (max_count < count)
@{
memory_full (env);
return NULL;
@}
emacs_limb_t *magnitude = malloc (count * size);
if (magnitude == NULL)
@{
memory_full (env);
return NULL;
@}
size_t written;
mpz_export (magnitude, &written, order, size, endian, nails, value);
assert (written == count);
assert (count <= PTRDIFF_MAX);
emacs_value result = env->make_big_integer (env, mpz_sgn (value),
(ptrdiff_t) count, magnitude);
free (magnitude);
return result;
@}
static emacs_value
next_prime (emacs_env *env, ptrdiff_t nargs, emacs_value *args,
void *data)
@{
assert (nargs == 1);
emacs_mpz p;
mpz_init (p);
extract_big_integer (env, args[0], p);
mpz_nextprime (p, p);
emacs_value result = make_big_integer (env, p);
mpz_clear (p);
return result;
@}
@end example
@deftypefn Function emacs_value make_float (emacs_env *@var{env}, double @var{d})
This function takes a @code{double} argument @var{d} and returns the
corresponding Emacs floating-point value.
@ -1601,66 +1759,6 @@ function raises the @code{overflow-error} error condition if
string.
@end deftypefn
If you define the preprocessor macro @code{EMACS_MODULE_GMP} before
including the header @file{emacs-module.h}, you can also convert
between Emacs integers and GMP @code{mpz_t} values. @xref{GMP
Basics,,,gmp}. If @code{EMACS_MODULE_GMP} is defined,
@file{emacs-module.h} wraps @code{mpz_t} in the following structure:
@deftp struct emacs_mpz value
struct emacs_mpz @{ mpz_t value; @};
@end deftp
@noindent
Then you can use the following additional functions:
@deftypefn Function bool extract_big_integer (emacs_env *@var{env}, emacs_value @var{arg}, struct emacs_mpz *@var{result})
This function, which is available since Emacs 27, extracts the
integral value of @var{arg} into @var{result}. @var{result} must not
be @code{NULL}. @code{@var{result}->value} must be an initialized
@code{mpz_t} object. @xref{Initializing Integers,,,gmp}. If
@var{arg} is an integer, Emacs will store its value into
@code{@var{result}->value}. After you have finished using
@code{@var{result}->value}, you should free it using @code{mpz_clear}
or similar.
@end deftypefn
@deftypefn Function emacs_value make_big_integer (emacs_env *@var{env}, const struct emacs_mpz *@var{value})
This function, which is available since Emacs 27, takes an
arbitrary-sized integer argument and returns a corresponding
@code{emacs_value} object. @var{value} must not be @code{NULL}.
@code{@var{value}->value} must be an initialized @code{mpz_t} object.
@xref{Initializing Integers,,,gmp}. Emacs will return a corresponding
integral object. After you have finished using
@code{@var{value}->value}, you should free it using @code{mpz_clear}
or similar.
@end deftypefn
The following example uses GMP to calculate the next probable prime
after a given integer:
@example
#include <assert.h>
#include <gmp.h>
#define EMACS_MODULE_GMP
#include <emacs-module.h>
static emacs_value
next_prime (emacs_env *env, ptrdiff_t nargs, emacs_value *args,
void *data)
@{
assert (nargs == 1);
emacs_mpz p;
mpz_init (p.value);
env->extract_big_integer (env, args[0], &p);
mpz_nextprime (p.value, p.value);
emacs_value result = env->make_big_integer (env, &p);
mpz_clear (p.value);
return result;
@}
@end example
The @acronym{API} does not provide functions to manipulate Lisp data
structures, for example, create lists with @code{cons} and @code{list}
(@pxref{Building Lists}), extract list members with @code{car} and

View file

@ -70,12 +70,6 @@ To add a new module function, proceed as follows:
#include <config.h>
#ifndef HAVE_GMP
#include "mini-gmp.h"
#define EMACS_MODULE_HAVE_MPZ_T
#endif
#define EMACS_MODULE_GMP
#include "emacs-module.h"
#include <stdarg.h>
@ -772,21 +766,143 @@ module_make_time (emacs_env *env, struct timespec time)
return lisp_to_value (env, timespec_to_lisp (time));
}
static void
module_extract_big_integer (emacs_env *env, emacs_value value,
struct emacs_mpz *result)
/*
Big integer support.
There are two possible ways to support big integers in the module API
that have been discussed:
1. Exposing GMP numbers (mpz_t) directly in the API.
2. Isolating the API from GMP by converting to/from a custom
sign-magnitude representation.
Approach (1) has the advantage of being faster (no import/export
required) and requiring less code in Emacs and in modules that would
use GMP anyway. However, (1) also couples big integer support
directly to the current implementation in Emacs (GMP). Also (1)
requires each module author to ensure that their module is linked to
the same GMP library as Emacs itself; in particular, module authors
can't link GMP statically. (1) also requires conditional compilation
and workarounds to ensure the module interface still works if GMP
isn't available while including emacs-module.h. It also means that
modules written in languages such as Go and Java that support big
integers without GMP now have to carry an otherwise unnecessary GMP
dependency. Approach (2), on the other hand, neatly decouples the
module interface from the GMP-based implementation. It's not
significantly more complex than (1) either: the additional code is
mostly straightforward. Over all, the benefits of (2) over (1) are
large enough to prefer it here.
We use a simple sign-magnitude representation for the big integers.
For the magnitude we pick an array of an unsigned integer type similar
to mp_limb_t instead of e.g. unsigned char. This matches in most
cases the representation of a GMP limb. In such cases GMP picks an
optimized algorithm for mpz_import and mpz_export that boils down to a
single memcpy to convert the magnitude. This way we largely avoid the
import/export overhead on most platforms.
*/
enum
{
MODULE_FUNCTION_BEGIN ();
Lisp_Object o = value_to_lisp (value);
/* Documented maximum count of magnitude elements. */
module_bignum_count_max = min (SIZE_MAX, PTRDIFF_MAX) / sizeof (emacs_limb_t)
};
static bool
module_extract_big_integer (emacs_env *env, emacs_value arg, int *sign,
ptrdiff_t *count, emacs_limb_t *magnitude)
{
MODULE_FUNCTION_BEGIN (false);
Lisp_Object o = value_to_lisp (arg);
CHECK_INTEGER (o);
mpz_set_integer (result->value, o);
int dummy;
if (sign == NULL)
sign = &dummy;
/* See
https://gmplib.org/manual/Integer-Import-and-Export.html#index-Export. */
enum
{
order = -1,
size = sizeof *magnitude,
bits = size * CHAR_BIT,
endian = 0,
nails = 0,
numb = 8 * size - nails
};
if (FIXNUMP (o))
{
EMACS_INT x = XFIXNUM (o);
*sign = (0 < x) - (x < 0);
if (x == 0 || count == NULL)
return true;
/* As a simplification we don't check how many array elements
are exactly required, but use a reasonable static upper
bound. For most architectures exactly one element should
suffice. */
EMACS_UINT u;
enum { required = (sizeof u + size - 1) / size };
verify (0 < required && required <= module_bignum_count_max);
if (magnitude == NULL)
{
*count = required;
return true;
}
if (*count < required)
{
ptrdiff_t actual = *count;
*count = required;
args_out_of_range_3 (INT_TO_INTEGER (actual),
INT_TO_INTEGER (required),
INT_TO_INTEGER (module_bignum_count_max));
}
/* Set u = abs(x). See https://stackoverflow.com/a/17313717. */
if (0 < x)
u = (EMACS_UINT) x;
else
u = -(EMACS_UINT) x;
verify (required * bits < PTRDIFF_MAX);
for (ptrdiff_t i = 0; i < required; ++i)
magnitude[i] = (emacs_limb_t) (u >> (i * bits));
return true;
}
const mpz_t *x = xbignum_val (o);
*sign = mpz_sgn (*x);
if (count == NULL)
return true;
size_t required_size = (mpz_sizeinbase (*x, 2) + numb - 1) / numb;
eassert (required_size <= PTRDIFF_MAX);
ptrdiff_t required = (ptrdiff_t) required_size;
eassert (required <= module_bignum_count_max);
if (magnitude == NULL)
{
*count = required;
return true;
}
if (*count < required)
{
ptrdiff_t actual = *count;
*count = required;
args_out_of_range_3 (INT_TO_INTEGER (actual), INT_TO_INTEGER (required),
INT_TO_INTEGER (module_bignum_count_max));
}
size_t written;
mpz_export (magnitude, &written, order, size, endian, nails, *x);
eassert (written == required_size);
return true;
}
static emacs_value
module_make_big_integer (emacs_env *env, const struct emacs_mpz *value)
module_make_big_integer (emacs_env *env, int sign,
ptrdiff_t count, const unsigned long *magnitude)
{
MODULE_FUNCTION_BEGIN (NULL);
mpz_set (mpz[0], value->value);
if (sign == 0)
return lisp_to_value (env, make_fixed_natnum (0));
enum { order = -1, size = sizeof *magnitude, endian = 0, nails = 0 };
mpz_import (mpz[0], count, order, size, endian, nails, magnitude);
if (sign < 0)
mpz_neg (mpz[0], mpz[0]);
return lisp_to_value (env, make_integer_mpz ());
}

View file

@ -20,6 +20,7 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
#ifndef EMACS_MODULE_H
#define EMACS_MODULE_H
#include <limits.h>
#include <stdint.h>
#include <stddef.h>
#include <time.h>
@ -28,10 +29,6 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
#include <stdbool.h>
#endif
#if defined EMACS_MODULE_GMP && !defined EMACS_MODULE_HAVE_MPZ_T
#include <gmp.h>
#endif
#define EMACS_MAJOR_VERSION @emacs_major_version@
#if defined __cplusplus && __cplusplus >= 201103L
@ -100,10 +97,21 @@ enum emacs_process_input_result
emacs_process_input_quit = 1
};
#ifdef EMACS_MODULE_GMP
struct emacs_mpz { mpz_t value; };
/*
Implementation note: We define emacs_limb_t so that it is likely to
match the GMP mp_limb_t type. If the types match, GMP can use an
optimization for mpz_import and mpz_export that boils down to a
memcpy. According to https://gmplib.org/manual/ABI-and-ISA.html GMP
will prefer a 64-bit limb and will default to unsigned long if that is
wide enough. Note that this is an internal micro-optimization. Users
shouldn't rely on the exact size of emacs_limb_t.
*/
#if ULONG_MAX == 0xFFFFFFFF
typedef unsigned long long emacs_limb_t;
# define EMACS_LIMB_MAX ULLONG_MAX
#else
struct emacs_mpz; /* no definition */
typedef unsigned long emacs_limb_t;
# define EMACS_LIMB_MAX ULONG_MAX
#endif
struct emacs_env_25

View file

@ -9,10 +9,10 @@
emacs_value (*make_time) (emacs_env *env, struct timespec time)
EMACS_ATTRIBUTE_NONNULL (1);
void (*extract_big_integer) (emacs_env *env, emacs_value value,
struct emacs_mpz *result)
EMACS_ATTRIBUTE_NONNULL (1, 3);
bool (*extract_big_integer) (emacs_env *env, emacs_value arg, int *sign,
ptrdiff_t *count, unsigned long *magnitude)
EMACS_ATTRIBUTE_NONNULL (1);
emacs_value (*make_big_integer) (emacs_env *env,
const struct emacs_mpz *value)
EMACS_ATTRIBUTE_NONNULL (1, 2);
emacs_value (*make_big_integer) (emacs_env *env, int sign, ptrdiff_t count,
const unsigned long *magnitude)
EMACS_ATTRIBUTE_NONNULL (1);

View file

@ -33,10 +33,8 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
#include <gmp.h>
#else
#include "mini-gmp.h"
#define EMACS_MODULE_HAVE_MPZ_T
#endif
#define EMACS_MODULE_GMP
#include <emacs-module.h>
#include "timespec.h"
@ -66,6 +64,8 @@ int plugin_is_GPL_compatible;
# error "INTPTR_MAX too large"
#endif
/* Smoke test to verify that EMACS_LIMB_MAX is defined. */
_Static_assert (0 < EMACS_LIMB_MAX, "EMACS_LIMB_MAX missing or incorrect");
/* Always return symbol 't'. */
static emacs_value
@ -372,23 +372,106 @@ Fmod_test_add_nanosecond (emacs_env *env, ptrdiff_t nargs, emacs_value *args,
return env->make_time (env, time);
}
static void
memory_full (emacs_env *env)
{
const char *message = "Memory exhausted";
emacs_value data = env->make_string (env, message, strlen (message));
env->non_local_exit_signal (env, env->intern (env, "error"),
env->funcall (env, env->intern (env, "list"), 1,
&data));
}
enum
{
max_count = ((SIZE_MAX < PTRDIFF_MAX ? SIZE_MAX : PTRDIFF_MAX)
/ sizeof (emacs_limb_t))
};
static bool
extract_big_integer (emacs_env *env, emacs_value arg, mpz_t result)
{
int sign;
ptrdiff_t count;
bool success = env->extract_big_integer (env, arg, &sign, &count, NULL);
if (!success)
return false;
if (sign == 0)
{
mpz_set_ui (result, 0);
return true;
}
enum { order = -1, size = sizeof (unsigned long), endian = 0, nails = 0 };
assert (0 < count && count <= max_count);
emacs_limb_t *magnitude = malloc (count * size);
if (magnitude == NULL)
{
memory_full (env);
return false;
}
success = env->extract_big_integer (env, arg, NULL, &count, magnitude);
assert (success);
mpz_import (result, count, order, size, endian, nails, magnitude);
free (magnitude);
if (sign < 0)
mpz_neg (result, result);
return true;
}
static emacs_value
make_big_integer (emacs_env *env, const mpz_t value)
{
if (mpz_sgn (value) == 0)
return env->make_integer (env, 0);
/* See
https://gmplib.org/manual/Integer-Import-and-Export.html#index-Export. */
enum
{
order = -1,
size = sizeof (emacs_limb_t),
endian = 0,
nails = 0,
numb = 8 * size - nails
};
size_t count = (mpz_sizeinbase (value, 2) + numb - 1) / numb;
if (max_count < count)
{
memory_full (env);
return NULL;
}
emacs_limb_t *magnitude = malloc (count * size);
if (magnitude == NULL)
{
memory_full (env);
return NULL;
}
size_t written;
mpz_export (magnitude, &written, order, size, endian, nails, value);
assert (written == count);
assert (count <= PTRDIFF_MAX);
emacs_value result = env->make_big_integer (env, mpz_sgn (value),
(ptrdiff_t) count, magnitude);
free (magnitude);
return result;
}
static emacs_value
Fmod_test_nanoseconds (emacs_env *env, ptrdiff_t nargs, emacs_value *args, void *data) {
assert (nargs == 1);
struct timespec time = env->extract_time (env, args[0]);
struct emacs_mpz nanoseconds;
mpz_t nanoseconds;
assert (LONG_MIN <= time.tv_sec && time.tv_sec <= LONG_MAX);
mpz_init_set_si (nanoseconds.value, time.tv_sec);
mpz_init_set_si (nanoseconds, time.tv_sec);
#ifdef __MINGW32__
_Static_assert (1000000000 <= ULONG_MAX, "unsupported architecture");
#else
static_assert (1000000000 <= ULONG_MAX, "unsupported architecture");
#endif
mpz_mul_ui (nanoseconds.value, nanoseconds.value, 1000000000);
mpz_mul_ui (nanoseconds, nanoseconds, 1000000000);
assert (0 <= time.tv_nsec && time.tv_nsec <= ULONG_MAX);
mpz_add_ui (nanoseconds.value, nanoseconds.value, time.tv_nsec);
emacs_value result = env->make_big_integer (env, &nanoseconds);
mpz_clear (nanoseconds.value);
mpz_add_ui (nanoseconds, nanoseconds, time.tv_nsec);
emacs_value result = make_big_integer (env, nanoseconds);
mpz_clear (nanoseconds);
return result;
}
@ -398,12 +481,12 @@ Fmod_test_double (emacs_env *env, ptrdiff_t nargs, emacs_value *args,
{
assert (nargs == 1);
emacs_value arg = args[0];
struct emacs_mpz value;
mpz_init (value.value);
env->extract_big_integer (env, arg, &value);
mpz_mul_ui (value.value, value.value, 2);
emacs_value result = env->make_big_integer (env, &value);
mpz_clear (value.value);
mpz_t value;
mpz_init (value);
extract_big_integer (env, arg, value);
mpz_mul_ui (value, value, 2);
emacs_value result = make_big_integer (env, value);
mpz_clear (value);
return result;
}