libgomp: Use libnuma for OpenMP's partition=nearest allocation trait

As with the memkind library, it is only used when found at runtime;
it does not need to be present when building GCC.

The included testcase does not check whether the memory has been placed
on the nearest node as the Linux kernel memory handling too often ignores
that hint, using a different node for the allocation.  However, when
running with 'numactl --preferred=<node> ./executable', it is clearly
visible that the feature works by comparing malloc/default vs. nearest
placement (using get_mempolicy to obtain the node for a mem addr).

libgomp/ChangeLog:

	* allocator.c: Add ifdef for LIBGOMP_USE_LIBNUMA.
	(enum gomp_numa_memkind_kind): Renamed from gomp_memkind_kind;
	add GOMP_MEMKIND_LIBNUMA.
	(struct gomp_libnuma_data, gomp_init_libnuma, gomp_get_libnuma): New.
	(omp_init_allocator): Handle partition=nearest with libnuma if avail.
	(omp_aligned_alloc, omp_free, omp_aligned_calloc, omp_realloc): Add
	numa_alloc_local (+ memset), numa_free, and numa_realloc calls as
	needed.
	* config/linux/allocator.c (LIBGOMP_USE_LIBNUMA): Define
	* libgomp.texi: Fix a typo; use 'fi' instead of its ligature char.
	(Memory allocation): Renamed from 'Memory allocation with libmemkind';
	updated for libnuma usage.
	* testsuite/libgomp.c-c++-common/alloc-11.c: New test.
	* testsuite/libgomp.c-c++-common/alloc-12.c: New test.
This commit is contained in:
Tobias Burnus 2023-07-12 13:50:21 +02:00
parent f9182da321
commit 450b05ce54
5 changed files with 708 additions and 39 deletions

View file

@ -31,13 +31,13 @@
#include "libgomp.h"
#include <stdlib.h>
#include <string.h>
#ifdef LIBGOMP_USE_MEMKIND
#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
#include <dlfcn.h>
#endif
#define omp_max_predefined_alloc omp_thread_mem_alloc
enum gomp_memkind_kind
enum gomp_numa_memkind_kind
{
GOMP_MEMKIND_NONE = 0,
#define GOMP_MEMKIND_KINDS \
@ -50,7 +50,8 @@ enum gomp_memkind_kind
#define GOMP_MEMKIND_KIND(kind) GOMP_MEMKIND_##kind
GOMP_MEMKIND_KINDS,
#undef GOMP_MEMKIND_KIND
GOMP_MEMKIND_COUNT
GOMP_MEMKIND_COUNT,
GOMP_MEMKIND_LIBNUMA = GOMP_MEMKIND_COUNT
};
struct omp_allocator_data
@ -65,7 +66,7 @@ struct omp_allocator_data
unsigned int fallback : 8;
unsigned int pinned : 1;
unsigned int partition : 7;
#ifdef LIBGOMP_USE_MEMKIND
#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
unsigned int memkind : 8;
#endif
#ifndef HAVE_SYNC_BUILTINS
@ -81,6 +82,14 @@ struct omp_mem_header
void *pad;
};
struct gomp_libnuma_data
{
void *numa_handle;
void *(*numa_alloc_local) (size_t);
void *(*numa_realloc) (void *, size_t, size_t);
void (*numa_free) (void *, size_t);
};
struct gomp_memkind_data
{
void *memkind_handle;
@ -92,6 +101,50 @@ struct gomp_memkind_data
void **kinds[GOMP_MEMKIND_COUNT];
};
#ifdef LIBGOMP_USE_LIBNUMA
static struct gomp_libnuma_data *libnuma_data;
static pthread_once_t libnuma_data_once = PTHREAD_ONCE_INIT;
static void
gomp_init_libnuma (void)
{
void *handle = dlopen ("libnuma.so.1", RTLD_LAZY);
struct gomp_libnuma_data *data;
data = calloc (1, sizeof (struct gomp_libnuma_data));
if (data == NULL)
{
if (handle)
dlclose (handle);
return;
}
if (!handle)
{
__atomic_store_n (&libnuma_data, data, MEMMODEL_RELEASE);
return;
}
data->numa_handle = handle;
data->numa_alloc_local
= (__typeof (data->numa_alloc_local)) dlsym (handle, "numa_alloc_local");
data->numa_realloc
= (__typeof (data->numa_realloc)) dlsym (handle, "numa_realloc");
data->numa_free
= (__typeof (data->numa_free)) dlsym (handle, "numa_free");
__atomic_store_n (&libnuma_data, data, MEMMODEL_RELEASE);
}
static struct gomp_libnuma_data *
gomp_get_libnuma (void)
{
struct gomp_libnuma_data *data
= __atomic_load_n (&libnuma_data, MEMMODEL_ACQUIRE);
if (data)
return data;
pthread_once (&libnuma_data_once, gomp_init_libnuma);
return __atomic_load_n (&libnuma_data, MEMMODEL_ACQUIRE);
}
#endif
#ifdef LIBGOMP_USE_MEMKIND
static struct gomp_memkind_data *memkind_data;
static pthread_once_t memkind_data_once = PTHREAD_ONCE_INIT;
@ -166,7 +219,7 @@ omp_init_allocator (omp_memspace_handle_t memspace, int ntraits,
struct omp_allocator_data data
= { memspace, 1, ~(uintptr_t) 0, 0, 0, omp_atv_contended, omp_atv_all,
omp_atv_default_mem_fb, omp_atv_false, omp_atv_environment,
#ifdef LIBGOMP_USE_MEMKIND
#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
GOMP_MEMKIND_NONE
#endif
};
@ -285,8 +338,8 @@ omp_init_allocator (omp_memspace_handle_t memspace, int ntraits,
switch (memspace)
{
case omp_high_bw_mem_space:
#ifdef LIBGOMP_USE_MEMKIND
case omp_high_bw_mem_space:
struct gomp_memkind_data *memkind_data;
memkind_data = gomp_get_memkind ();
if (data.partition == omp_atv_interleaved
@ -300,17 +353,15 @@ omp_init_allocator (omp_memspace_handle_t memspace, int ntraits,
data.memkind = GOMP_MEMKIND_HBW_PREFERRED;
break;
}
#endif
break;
case omp_large_cap_mem_space:
#ifdef LIBGOMP_USE_MEMKIND
memkind_data = gomp_get_memkind ();
if (memkind_data->kinds[GOMP_MEMKIND_DAX_KMEM_ALL])
data.memkind = GOMP_MEMKIND_DAX_KMEM_ALL;
else if (memkind_data->kinds[GOMP_MEMKIND_DAX_KMEM])
data.memkind = GOMP_MEMKIND_DAX_KMEM;
#endif
break;
#endif
default:
#ifdef LIBGOMP_USE_MEMKIND
if (data.partition == omp_atv_interleaved)
@ -323,6 +374,14 @@ omp_init_allocator (omp_memspace_handle_t memspace, int ntraits,
break;
}
#ifdef LIBGOMP_USE_LIBNUMA
if (data.memkind == GOMP_MEMKIND_NONE && data.partition == omp_atv_nearest)
{
data.memkind = GOMP_MEMKIND_LIBNUMA;
libnuma_data = gomp_get_libnuma ();
}
#endif
/* No support for this so far. */
if (data.pinned)
return omp_null_allocator;
@ -357,8 +416,8 @@ omp_aligned_alloc (size_t alignment, size_t size,
struct omp_allocator_data *allocator_data;
size_t new_size, new_alignment;
void *ptr, *ret;
#ifdef LIBGOMP_USE_MEMKIND
enum gomp_memkind_kind memkind;
#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
enum gomp_numa_memkind_kind memkind;
#endif
if (__builtin_expect (size == 0, 0))
@ -379,7 +438,7 @@ retry:
allocator_data = (struct omp_allocator_data *) allocator;
if (new_alignment < allocator_data->alignment)
new_alignment = allocator_data->alignment;
#ifdef LIBGOMP_USE_MEMKIND
#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
memkind = allocator_data->memkind;
#endif
}
@ -388,8 +447,10 @@ retry:
allocator_data = NULL;
if (new_alignment < sizeof (void *))
new_alignment = sizeof (void *);
#ifdef LIBGOMP_USE_MEMKIND
#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
memkind = GOMP_MEMKIND_NONE;
#endif
#ifdef LIBGOMP_USE_MEMKIND
if (allocator == omp_high_bw_mem_alloc)
memkind = GOMP_MEMKIND_HBW_PREFERRED;
else if (allocator == omp_large_cap_mem_alloc)
@ -444,6 +505,13 @@ retry:
allocator_data->used_pool_size = used_pool_size;
gomp_mutex_unlock (&allocator_data->lock);
#endif
#ifdef LIBGOMP_USE_LIBNUMA
if (memkind == GOMP_MEMKIND_LIBNUMA)
ptr = libnuma_data->numa_alloc_local (new_size);
# ifdef LIBGOMP_USE_MEMKIND
else
# endif
#endif
#ifdef LIBGOMP_USE_MEMKIND
if (memkind)
{
@ -469,6 +537,13 @@ retry:
}
else
{
#ifdef LIBGOMP_USE_LIBNUMA
if (memkind == GOMP_MEMKIND_LIBNUMA)
ptr = libnuma_data->numa_alloc_local (new_size);
# ifdef LIBGOMP_USE_MEMKIND
else
# endif
#endif
#ifdef LIBGOMP_USE_MEMKIND
if (memkind)
{
@ -502,7 +577,7 @@ fail:
{
case omp_atv_default_mem_fb:
if ((new_alignment > sizeof (void *) && new_alignment > alignment)
#ifdef LIBGOMP_USE_MEMKIND
#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
|| memkind
#endif
|| (allocator_data
@ -577,6 +652,16 @@ omp_free (void *ptr, omp_allocator_handle_t allocator)
gomp_mutex_unlock (&allocator_data->lock);
#endif
}
#ifdef LIBGOMP_USE_LIBNUMA
if (allocator_data->memkind == GOMP_MEMKIND_LIBNUMA)
{
libnuma_data->numa_free (data->ptr, data->size);
return;
}
# ifdef LIBGOMP_USE_MEMKIND
else
# endif
#endif
#ifdef LIBGOMP_USE_MEMKIND
if (allocator_data->memkind)
{
@ -590,7 +675,7 @@ omp_free (void *ptr, omp_allocator_handle_t allocator)
#ifdef LIBGOMP_USE_MEMKIND
else
{
enum gomp_memkind_kind memkind = GOMP_MEMKIND_NONE;
enum gomp_numa_memkind_kind memkind = GOMP_MEMKIND_NONE;
if (data->allocator == omp_high_bw_mem_alloc)
memkind = GOMP_MEMKIND_HBW_PREFERRED;
else if (data->allocator == omp_large_cap_mem_alloc)
@ -625,8 +710,8 @@ omp_aligned_calloc (size_t alignment, size_t nmemb, size_t size,
struct omp_allocator_data *allocator_data;
size_t new_size, size_temp, new_alignment;
void *ptr, *ret;
#ifdef LIBGOMP_USE_MEMKIND
enum gomp_memkind_kind memkind;
#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
enum gomp_numa_memkind_kind memkind;
#endif
if (__builtin_expect (size == 0 || nmemb == 0, 0))
@ -647,7 +732,7 @@ retry:
allocator_data = (struct omp_allocator_data *) allocator;
if (new_alignment < allocator_data->alignment)
new_alignment = allocator_data->alignment;
#ifdef LIBGOMP_USE_MEMKIND
#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
memkind = allocator_data->memkind;
#endif
}
@ -656,8 +741,10 @@ retry:
allocator_data = NULL;
if (new_alignment < sizeof (void *))
new_alignment = sizeof (void *);
#ifdef LIBGOMP_USE_MEMKIND
#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
memkind = GOMP_MEMKIND_NONE;
#endif
#ifdef LIBGOMP_USE_MEMKIND
if (allocator == omp_high_bw_mem_alloc)
memkind = GOMP_MEMKIND_HBW_PREFERRED;
else if (allocator == omp_large_cap_mem_alloc)
@ -714,6 +801,15 @@ retry:
allocator_data->used_pool_size = used_pool_size;
gomp_mutex_unlock (&allocator_data->lock);
#endif
#ifdef LIBGOMP_USE_LIBNUMA
if (memkind == GOMP_MEMKIND_LIBNUMA)
/* numa_alloc_local uses mmap with MAP_ANONYMOUS, returning
memory that is initialized to zero. */
ptr = libnuma_data->numa_alloc_local (new_size);
# ifdef LIBGOMP_USE_MEMKIND
else
# endif
#endif
#ifdef LIBGOMP_USE_MEMKIND
if (memkind)
{
@ -739,6 +835,15 @@ retry:
}
else
{
#ifdef LIBGOMP_USE_LIBNUMA
if (memkind == GOMP_MEMKIND_LIBNUMA)
/* numa_alloc_local uses mmap with MAP_ANONYMOUS, returning
memory that is initialized to zero. */
ptr = libnuma_data->numa_alloc_local (new_size);
# ifdef LIBGOMP_USE_MEMKIND
else
# endif
#endif
#ifdef LIBGOMP_USE_MEMKIND
if (memkind)
{
@ -772,7 +877,7 @@ fail:
{
case omp_atv_default_mem_fb:
if ((new_alignment > sizeof (void *) && new_alignment > alignment)
#ifdef LIBGOMP_USE_MEMKIND
#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
|| memkind
#endif
|| (allocator_data
@ -815,8 +920,8 @@ omp_realloc (void *ptr, size_t size, omp_allocator_handle_t allocator,
size_t new_size, old_size, new_alignment, old_alignment;
void *new_ptr, *ret;
struct omp_mem_header *data;
#ifdef LIBGOMP_USE_MEMKIND
enum gomp_memkind_kind memkind, free_memkind;
#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
enum gomp_numa_memkind_kind memkind, free_memkind;
#endif
if (__builtin_expect (ptr == NULL, 0))
@ -841,15 +946,17 @@ retry:
allocator_data = (struct omp_allocator_data *) allocator;
if (new_alignment < allocator_data->alignment)
new_alignment = allocator_data->alignment;
#ifdef LIBGOMP_USE_MEMKIND
#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
memkind = allocator_data->memkind;
#endif
}
else
{
allocator_data = NULL;
#ifdef LIBGOMP_USE_MEMKIND
#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
memkind = GOMP_MEMKIND_NONE;
#endif
#ifdef LIBGOMP_USE_MEMKIND
if (allocator == omp_high_bw_mem_alloc)
memkind = GOMP_MEMKIND_HBW_PREFERRED;
else if (allocator == omp_large_cap_mem_alloc)
@ -865,15 +972,17 @@ retry:
if (free_allocator > omp_max_predefined_alloc)
{
free_allocator_data = (struct omp_allocator_data *) free_allocator;
#ifdef LIBGOMP_USE_MEMKIND
#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
free_memkind = free_allocator_data->memkind;
#endif
}
else
{
free_allocator_data = NULL;
#ifdef LIBGOMP_USE_MEMKIND
#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
free_memkind = GOMP_MEMKIND_NONE;
#endif
#ifdef LIBGOMP_USE_MEMKIND
if (free_allocator == omp_high_bw_mem_alloc)
free_memkind = GOMP_MEMKIND_HBW_PREFERRED;
else if (free_allocator == omp_large_cap_mem_alloc)
@ -953,6 +1062,19 @@ retry:
allocator_data->used_pool_size = used_pool_size;
gomp_mutex_unlock (&allocator_data->lock);
#endif
#ifdef LIBGOMP_USE_LIBNUMA
if (memkind == GOMP_MEMKIND_LIBNUMA)
{
if (prev_size)
new_ptr = libnuma_data->numa_realloc (data->ptr, data->size,
new_size);
else
new_ptr = libnuma_data->numa_alloc_local (new_size);
}
# ifdef LIBGOMP_USE_MEMKIND
else
# endif
#endif
#ifdef LIBGOMP_USE_MEMKIND
if (memkind)
{
@ -994,12 +1116,19 @@ retry:
}
else if (new_alignment == sizeof (void *)
&& old_alignment == sizeof (struct omp_mem_header)
#ifdef LIBGOMP_USE_MEMKIND
#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
&& memkind == free_memkind
#endif
&& (free_allocator_data == NULL
|| free_allocator_data->pool_size == ~(uintptr_t) 0))
{
#ifdef LIBGOMP_USE_LIBNUMA
if (memkind == GOMP_MEMKIND_LIBNUMA)
new_ptr = libnuma_data->numa_realloc (data->ptr, data->size, new_size);
# ifdef LIBGOMP_USE_MEMKIND
else
# endif
#endif
#ifdef LIBGOMP_USE_MEMKIND
if (memkind)
{
@ -1021,6 +1150,13 @@ retry:
}
else
{
#ifdef LIBGOMP_USE_LIBNUMA
if (memkind == GOMP_MEMKIND_LIBNUMA)
new_ptr = libnuma_data->numa_alloc_local (new_size);
# ifdef LIBGOMP_USE_MEMKIND
else
# endif
#endif
#ifdef LIBGOMP_USE_MEMKIND
if (memkind)
{
@ -1060,6 +1196,16 @@ retry:
gomp_mutex_unlock (&free_allocator_data->lock);
#endif
}
#ifdef LIBGOMP_USE_LIBNUMA
if (free_memkind == GOMP_MEMKIND_LIBNUMA)
{
libnuma_data->numa_free (data->ptr, data->size);
return ret;
}
# ifdef LIBGOMP_USE_MEMKIND
else
# endif
#endif
#ifdef LIBGOMP_USE_MEMKIND
if (free_memkind)
{
@ -1079,7 +1225,7 @@ fail:
{
case omp_atv_default_mem_fb:
if (new_alignment > sizeof (void *)
#ifdef LIBGOMP_USE_MEMKIND
#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
|| memkind
#endif
|| (allocator_data

View file

@ -31,6 +31,7 @@
#include "libgomp.h"
#if defined(PLUGIN_SUPPORT) && defined(LIBGOMP_USE_PTHREADS)
#define LIBGOMP_USE_MEMKIND
#define LIBGOMP_USE_LIBNUMA
#endif
#include "../../allocator.c"

View file

@ -2061,7 +2061,7 @@ Special values are output using @code{%} followed by an optional size
specification and then either the single-character field type or its long
name enclosed in curly braces; using @code{%%} will display a literal percent.
The size specification consists of an optional @code{0.} or @code{.} followed
by a positive integer, specifing the minimal width of the output. With
by a positive integer, specifying the minimal width of the output. With
@code{0.} and numerical values, the output is padded with zeros on the left;
with @code{.}, the output is padded by spaces on the left; otherwise, the
output is padded by spaces on the right. If unset, the value is
@ -2079,8 +2079,8 @@ Supported field types are:
@tab value returned by
@code{omp_get_ancestor_thread_num(omp_get_level()-1)}
@item H @tab host @tab name of the host that executes the thread
@item P @tab process_id @tab process identier
@item i @tab native_thread_id @tab native thread identier
@item P @tab process_id @tab process identifier
@item i @tab native_thread_id @tab native thread identifier
@item A @tab thread_affinity
@tab comma separated list of integer values or ranges, representing the
processors on which a process might execute, subject to affinity
@ -4584,7 +4584,7 @@ offloading devices (it's not clear if they should be):
@menu
* Implementation-defined ICV Initialization::
* OpenMP Context Selectors::
* Memory allocation with libmemkind::
* Memory allocation::
@end menu
@node Implementation-defined ICV Initialization
@ -4631,8 +4631,8 @@ smaller number. On non-host devices, the value of the
@tab See @code{-march=} in ``Nvidia PTX Options''
@end multitable
@node Memory allocation with libmemkind
@section Memory allocation with libmemkind
@node Memory allocation
@section Memory allocation
For the memory spaces, the following applies:
@itemize
@ -4652,20 +4652,40 @@ creating memory allocators requesting
@itemize
@item the memory space @code{omp_high_bw_mem_space}
@item the memory space @code{omp_large_cap_mem_space}
@item the partition trait @code{omp_atv_interleaved}; note that for
@item the @code{partition} trait @code{interleaved}; note that for
@code{omp_large_cap_mem_space} the allocation will not be interleaved
@end itemize
On Linux systems, where the @uref{https://github.com/numactl/numactl, numa
library} (@code{libnuma.so.1}) is available at runtime, it used when creating
memory allocators requesting
@itemize
@item the @code{partition} trait @code{nearest}, except when both the
libmemkind library is available and the memory space is either
@code{omp_large_cap_mem_space} or @code{omp_high_bw_mem_space}
@end itemize
Note that the numa library will round up the allocation size to a multiple of
the system page size; therefore, consider using it only with large data or
by sharing allocations via the @code{pool_size} trait. Furthermore, the Linux
kernel does not guarantee that an allocation will always be on the nearest NUMA
node nor that after reallocation the same node will be used. Note additionally
that, on Linux, the default setting of the memory placement policy is to use the
current node; therefore, unless the memory placement policy has been overridden,
the @code{partition} trait @code{environment} (the default) will be effectively
a @code{nearest} allocation.
Additional notes:
@itemize
@item The @code{pinned} trait is unsupported.
@item For the @code{partition} trait, the partition part size will be the same
as the requested size (i.e. @code{interleaved} or @code{blocked} has no
effect), except for @code{interleaved} when the memkind library is
available. Furthermore, for @code{nearest} the memory might not be
on the same NUMA node as thread that allocated the memory; on Linux,
this is in particular the case when the memory placement policy is
set to preferred.
available. Furthermore, for @code{nearest} and unless the numa library
is available, the memory might not be on the same NUMA node as thread
that allocated the memory; on Linux, this is in particular the case when
the memory placement policy is set to preferred.
@item The @code{access} trait has no effect such that memory is always
accessible by all threads.
@item The @code{sync_hint} trait has no effect.

View file

@ -0,0 +1,285 @@
/* This testcase is mostly the same as alloc-9.c.
However, on systems where the numa and/or memkind libraries are
installed, libgomp uses those. This test ensures that the minimal
features work. Note: No attempt has been made to verify the parition
hints interleaved and nearest as the kernal purposely ignore them once
in a while and it would also require a 'dlopen' dance.
memkind is used for omp_high_bw_mem_space, omp_large_cap_mem_space
and partition = interleaved, albeit it won't be interleaved for
omp_large_cap_mem_space.
numa is used for partition = nearest, unless memkind is used. */
#include <omp.h>
#include <stdint.h>
#include <stdlib.h>
const omp_alloctrait_t traits2[]
= { { omp_atk_alignment, 16 },
{ omp_atk_sync_hint, omp_atv_default },
{ omp_atk_access, omp_atv_default },
{ omp_atk_pool_size, 1024 },
{ omp_atk_fallback, omp_atv_default_mem_fb },
{ omp_atk_partition, omp_atv_nearest } };
omp_alloctrait_t traits3[]
= { { omp_atk_sync_hint, omp_atv_uncontended },
{ omp_atk_alignment, 32 },
{ omp_atk_access, omp_atv_all },
{ omp_atk_pool_size, 512 },
{ omp_atk_fallback, omp_atv_allocator_fb },
{ omp_atk_fb_data, 0 },
{ omp_atk_partition, omp_atv_interleaved } };
const omp_alloctrait_t traits4[]
= { { omp_atk_alignment, 128 },
{ omp_atk_pool_size, 1024 },
{ omp_atk_fallback, omp_atv_null_fb } };
int
main ()
{
int *volatile p = (int *) omp_alloc (3 * sizeof (int), omp_default_mem_alloc);
int *volatile q;
int *volatile r;
omp_alloctrait_t traits[4]
= { { omp_atk_alignment, 64 },
{ omp_atk_fallback, omp_atv_null_fb },
{ omp_atk_pool_size, 4096 },
{ omp_atk_partition, omp_atv_nearest } };
omp_alloctrait_t traits5[2]
= { { omp_atk_fallback, omp_atv_null_fb },
{ omp_atk_pool_size, 4096 } };
omp_allocator_handle_t a, a2;
if ((((uintptr_t) p) % __alignof (int)) != 0)
abort ();
p[0] = 1;
p[1] = 2;
p[2] = 3;
p = (int *) omp_realloc (p, 4 * sizeof (int), omp_high_bw_mem_alloc, omp_high_bw_mem_alloc);
if ((((uintptr_t) p) % __alignof (int)) != 0 || p[0] != 1 || p[1] != 2 || p[2] != 3)
abort ();
p[0] = 4;
p[1] = 5;
p[2] = 6;
p[3] = 7;
p = (int *) omp_realloc (p, 2 * sizeof (int), omp_high_bw_mem_alloc, omp_high_bw_mem_alloc);
if ((((uintptr_t) p) % __alignof (int)) != 0 || p[0] != 4 || p[1] != 5)
abort ();
p[0] = 8;
p[1] = 9;
if (omp_realloc (p, 0, omp_null_allocator, omp_high_bw_mem_alloc) != NULL)
abort ();
p = (int *) omp_realloc (NULL, 2 * sizeof (int), omp_large_cap_mem_alloc, omp_null_allocator);
if ((((uintptr_t) p) % __alignof (int)) != 0)
abort ();
p[0] = 1;
p[1] = 2;
p = (int *) omp_realloc (p, 5 * sizeof (int), omp_large_cap_mem_alloc, omp_large_cap_mem_alloc);
if ((((uintptr_t) p) % __alignof (int)) != 0 || p[0] != 1 || p[1] != 2)
abort ();
p[0] = 3;
p[1] = 4;
p[2] = 5;
p[3] = 6;
p[4] = 7;
omp_free (p, omp_null_allocator);
omp_set_default_allocator (omp_large_cap_mem_alloc);
if (omp_realloc (NULL, 0, omp_null_allocator, omp_null_allocator) != NULL)
abort ();
p = (int *) omp_alloc (sizeof (int), omp_null_allocator);
if ((((uintptr_t) p) % __alignof (int)) != 0)
abort ();
p[0] = 3;
p = (int *) omp_realloc (p, 3 * sizeof (int), omp_null_allocator, omp_null_allocator);
if ((((uintptr_t) p) % __alignof (int)) != 0 || p[0] != 3)
abort ();
p[0] = 4;
p[1] = 5;
p[2] = 6;
if (omp_realloc (p, 0, omp_null_allocator, omp_get_default_allocator ()) != NULL)
abort ();
a = omp_init_allocator (omp_default_mem_space, 4, traits);
if (a == omp_null_allocator)
abort ();
p = (int *) omp_alloc (sizeof (int), a);
if ((((uintptr_t) p) % 64) != 0)
abort ();
p[0] = 7;
p = (int *) omp_realloc (p, 3072, a, a);
if ((((uintptr_t) p) % 64) != 0 || p[0] != 7)
abort ();
p[0] = 1;
p[3071 / sizeof (int)] = 2;
q = (int *) omp_alloc (sizeof (int), a);
if ((((uintptr_t) q) % 64) != 0)
abort ();
q[0] = 8;
if (omp_realloc (q, 3072, a, a) != NULL)
abort ();
omp_free (p, a);
omp_free (q, a);
p = (int *) omp_alloc (sizeof (int), a);
p[0] = 42;
p = (int *) omp_realloc (p, 3072, a, a);
if (p[0] != 42)
abort ();
p[0] = 3;
p[3071 / sizeof (int)] = 4;
omp_realloc (p, 0, omp_null_allocator, omp_null_allocator);
omp_set_default_allocator (a);
if (omp_get_default_allocator () != a)
abort ();
p = (int *) omp_alloc (31, omp_null_allocator);
if (p == NULL)
abort ();
p = (int *) omp_realloc (p, 3072, omp_null_allocator, omp_null_allocator);
if (p == NULL)
abort ();
q = (int *) omp_alloc (sizeof (int), omp_null_allocator);
if (q == NULL)
abort ();
if (omp_realloc (q, 3072, omp_null_allocator, omp_null_allocator) != NULL)
abort ();
omp_free (p, a);
omp_free (q, a);
omp_destroy_allocator (a);
a = omp_init_allocator (omp_large_cap_mem_space, 2, traits5);
if (a == omp_null_allocator)
abort ();
omp_set_default_allocator (a);
if (omp_get_default_allocator () != a)
abort ();
p = (int *) omp_alloc (3071, omp_null_allocator);
if (p == NULL)
abort ();
p = (int *) omp_realloc (p, 3072, omp_null_allocator, omp_null_allocator);
if (p == NULL)
abort ();
q = (int *) omp_alloc (sizeof (int), omp_null_allocator);
if (q == NULL)
abort ();
if (omp_realloc (q, 3072, omp_null_allocator, omp_null_allocator) != NULL)
abort ();
omp_free (p, a);
omp_free (q, a);
omp_destroy_allocator (a);
a = omp_init_allocator (omp_default_mem_space,
sizeof (traits2) / sizeof (traits2[0]),
traits2);
if (a == omp_null_allocator)
abort ();
if (traits3[5].key != omp_atk_fb_data)
abort ();
traits3[5].value = (uintptr_t) a;
a2 = omp_init_allocator (omp_default_mem_space,
sizeof (traits3) / sizeof (traits3[0]),
traits3);
if (a2 == omp_null_allocator)
abort ();
p = (int *) omp_alloc (sizeof (int), a2);
if ((((uintptr_t) p) % 32) != 0)
abort ();
p[0] = 84;
p = (int *) omp_realloc (p, 380, a2, a2);
if ((((uintptr_t) p) % 32) != 0 || p[0] != 84)
abort ();
p[0] = 5;
p[379 / sizeof (int)] = 6;
q = (int *) omp_alloc (sizeof (int), a2);
if ((((uintptr_t) q) % 32) != 0)
abort ();
q[0] = 42;
q = (int *) omp_realloc (q, 768, a2, a2);
if ((((uintptr_t) q) % 16) != 0 || q[0] != 42)
abort ();
q[0] = 7;
q[767 / sizeof (int)] = 8;
r = (int *) omp_realloc (NULL, 512, a2, omp_null_allocator);
if ((((uintptr_t) r) % __alignof (int)) != 0)
abort ();
r[0] = 9;
r[511 / sizeof (int)] = 10;
omp_free (p, omp_null_allocator);
omp_free (q, a2);
omp_free (r, omp_null_allocator);
p = (int *) omp_alloc (sizeof (int), a2);
if ((((uintptr_t) p) % 32) != 0)
abort ();
p[0] = 85;
p = (int *) omp_realloc (p, 320, a, a2);
if ((((uintptr_t) p) % 16) != 0 || p[0] != 85)
abort ();
p[0] = 5;
p[319 / sizeof (int)] = 6;
q = (int *) omp_alloc (sizeof (int), a);
if ((((uintptr_t) q) % 16) != 0)
abort ();
q[0] = 43;
q = (int *) omp_realloc (q, 320, a2, a);
if ((((uintptr_t) q) % 32) != 0 || q[0] != 43)
abort ();
q[0] = 44;
q[319 / sizeof (int)] = 8;
q = (int *) omp_realloc (q, 568, a2, a2);
if ((((uintptr_t) q) % 16) != 0 || q[0] != 44)
abort ();
q[0] = 7;
q[567 / sizeof (int)] = 8;
omp_free (p, omp_null_allocator);
omp_free (q, a2);
omp_destroy_allocator (a2);
omp_destroy_allocator (a);
a = omp_init_allocator (omp_large_cap_mem_space,
sizeof (traits4) / sizeof (traits4[0]),
traits4);
if (a == omp_null_allocator)
abort ();
if (traits3[5].key != omp_atk_fb_data)
abort ();
traits3[5].value = (uintptr_t) a;
a2 = omp_init_allocator (omp_default_mem_space,
sizeof (traits3) / sizeof (traits3[0]),
traits3);
if (a2 == omp_null_allocator)
abort ();
omp_set_default_allocator (a2);
#ifdef __cplusplus
p = static_cast <int *> (omp_realloc (NULL, 420));
#else
p = (int *) omp_realloc (NULL, 420, omp_null_allocator, omp_null_allocator);
#endif
if ((((uintptr_t) p) % 32) != 0)
abort ();
p[0] = 5;
p[419 / sizeof (int)] = 6;
q = (int *) omp_realloc (NULL, sizeof (int), omp_null_allocator, omp_null_allocator);
if ((((uintptr_t) q) % 32) != 0)
abort ();
q[0] = 99;
q = (int *) omp_realloc (q, 700, omp_null_allocator, omp_null_allocator);
if ((((uintptr_t) q) % 128) != 0 || q[0] != 99)
abort ();
q[0] = 7;
q[699 / sizeof (int)] = 8;
if (omp_realloc (NULL, 768, omp_null_allocator, omp_null_allocator) != NULL)
abort ();
#ifdef __cplusplus
omp_free (p);
if (omp_realloc (q, 0) != NULL)
abort ();
omp_free (NULL);
#else
omp_free (p, omp_null_allocator);
if (omp_realloc (q, 0, omp_null_allocator, omp_null_allocator) != NULL)
abort ();
omp_free (NULL, omp_null_allocator);
#endif
omp_free (NULL, omp_null_allocator);
omp_destroy_allocator (a2);
omp_destroy_allocator (a);
return 0;
}

View file

@ -0,0 +1,217 @@
/* This testcase is mostly the same as alloc-8.c.
However, on systems where the numa and/or memkind libraries are
installed, libgomp uses those. This test ensures that the minimal
features work. Note: No attempt has been made to verify the parition
hints interleaved and nearest as the kernal purposely ignore them once
in a while and it would also require a 'dlopen' dance.
memkind is used for omp_high_bw_mem_space, omp_large_cap_mem_space
and partition = interleaved, albeit it won't be interleaved for
omp_large_cap_mem_space.
numa is used for partition = nearest, unless memkind is used. */
#include <omp.h>
#include <stdint.h>
#include <stdlib.h>
const omp_alloctrait_t traits2[]
= { { omp_atk_alignment, 16 },
{ omp_atk_sync_hint, omp_atv_default },
{ omp_atk_access, omp_atv_default },
{ omp_atk_pool_size, 1024 },
{ omp_atk_fallback, omp_atv_default_mem_fb },
{ omp_atk_partition, omp_atv_nearest } };
omp_alloctrait_t traits3[]
= { { omp_atk_sync_hint, omp_atv_uncontended },
{ omp_atk_alignment, 32 },
{ omp_atk_access, omp_atv_all },
{ omp_atk_pool_size, 512 },
{ omp_atk_fallback, omp_atv_allocator_fb },
{ omp_atk_fb_data, 0 },
{ omp_atk_partition, omp_atv_interleaved } };
const omp_alloctrait_t traits4[]
= { { omp_atk_alignment, 128 },
{ omp_atk_pool_size, 1024 },
{ omp_atk_fallback, omp_atv_null_fb } };
static void
check_all_zero (void *ptr, size_t len)
{
char *p = (char *) ptr;
for (size_t i = 0; i < len; i++)
if (p[i] != '\0')
abort ();
}
int
main ()
{
int *volatile p = (int *) omp_aligned_calloc (sizeof (int), 3, sizeof (int), omp_high_bw_mem_alloc);
check_all_zero (p, 3*sizeof (int));
int *volatile q;
int *volatile r;
int i;
omp_alloctrait_t traits[3]
= { { omp_atk_alignment, 64 },
{ omp_atk_fallback, omp_atv_null_fb },
{ omp_atk_pool_size, 4096 } };
omp_allocator_handle_t a, a2;
if ((((uintptr_t) p) % __alignof (int)) != 0 || p[0] || p[1] || p[2])
abort ();
p[0] = 1;
p[1] = 2;
p[2] = 3;
omp_free (p, omp_high_bw_mem_alloc);
p = (int *) omp_aligned_calloc (2 * sizeof (int), 1, 2 * sizeof (int), omp_large_cap_mem_alloc);
check_all_zero (p, 2*sizeof (int));
if ((((uintptr_t) p) % (2 * sizeof (int))) != 0 || p[0] || p[1])
abort ();
p[0] = 1;
p[1] = 2;
omp_free (p, omp_null_allocator);
omp_set_default_allocator (omp_large_cap_mem_alloc);
p = (int *) omp_aligned_calloc (1, 1, sizeof (int), omp_null_allocator);
check_all_zero (p, sizeof (int));
if ((((uintptr_t) p) % __alignof (int)) != 0 || p[0])
abort ();
p[0] = 3;
omp_free (p, omp_get_default_allocator ());
a = omp_init_allocator (omp_large_cap_mem_space, 3, traits);
if (a == omp_null_allocator)
abort ();
p = (int *) omp_aligned_calloc (32, 3, 1024, a);
check_all_zero (p, 3*1024);
if ((((uintptr_t) p) % 64) != 0)
abort ();
for (i = 0; i < 3072 / sizeof (int); i++)
if (p[i])
abort ();
p[0] = 1;
p[3071 / sizeof (int)] = 2;
if (omp_aligned_calloc (8, 192, 16, a) != NULL)
abort ();
omp_free (p, a);
p = (int *) omp_aligned_calloc (128, 6, 512, a);
check_all_zero (p, 6*512);
if ((((uintptr_t) p) % 128) != 0)
abort ();
for (i = 0; i < 3072 / sizeof (int); i++)
if (p[i])
abort ();
p[0] = 3;
p[3071 / sizeof (int)] = 4;
omp_free (p, omp_null_allocator);
omp_set_default_allocator (a);
if (omp_get_default_allocator () != a)
abort ();
p = (int *) omp_aligned_calloc (64, 12, 256, omp_null_allocator);
check_all_zero (p, 12*256);
for (i = 0; i < 3072 / sizeof (int); i++)
if (p[i])
abort ();
if (omp_aligned_calloc (8, 128, 24, omp_null_allocator) != NULL)
abort ();
omp_free (p, a);
omp_destroy_allocator (a);
a = omp_init_allocator (omp_default_mem_space,
sizeof (traits2) / sizeof (traits2[0]),
traits2);
if (a == omp_null_allocator)
abort ();
if (traits3[5].key != omp_atk_fb_data)
abort ();
traits3[5].value = (uintptr_t) a;
a2 = omp_init_allocator (omp_default_mem_space,
sizeof (traits3) / sizeof (traits3[0]),
traits3);
if (a2 == omp_null_allocator)
abort ();
p = (int *) omp_aligned_calloc (4, 5, 84, a2);
check_all_zero (p, 5*84);
for (i = 0; i < 420 / sizeof (int); i++)
if (p[i])
abort ();
if ((((uintptr_t) p) % 32) != 0)
abort ();
p[0] = 5;
p[419 / sizeof (int)] = 6;
q = (int *) omp_aligned_calloc (8, 24, 32, a2);
check_all_zero (q, 24*32);
if ((((uintptr_t) q) % 16) != 0)
abort ();
for (i = 0; i < 768 / sizeof (int); i++)
if (q[i])
abort ();
q[0] = 7;
q[767 / sizeof (int)] = 8;
r = (int *) omp_aligned_calloc (8, 64, 8, a2);
check_all_zero (r, 64*8);
if ((((uintptr_t) r) % 8) != 0)
abort ();
for (i = 0; i < 512 / sizeof (int); i++)
if (r[i])
abort ();
r[0] = 9;
r[511 / sizeof (int)] = 10;
omp_free (p, omp_null_allocator);
omp_free (q, a2);
omp_free (r, omp_null_allocator);
omp_destroy_allocator (a2);
omp_destroy_allocator (a);
a = omp_init_allocator (omp_high_bw_mem_space,
sizeof (traits4) / sizeof (traits4[0]),
traits4);
if (a == omp_null_allocator)
abort ();
if (traits3[5].key != omp_atk_fb_data)
abort ();
traits3[5].value = (uintptr_t) a;
a2 = omp_init_allocator (omp_high_bw_mem_space,
sizeof (traits3) / sizeof (traits3[0]),
traits3);
if (a2 == omp_null_allocator)
abort ();
omp_set_default_allocator (a2);
#ifdef __cplusplus
p = static_cast <int *> (omp_aligned_calloc (4, 21, 20));
#else
p = (int *) omp_aligned_calloc (4, 21, 20, omp_null_allocator);
#endif
check_all_zero (p, 21*20);
if ((((uintptr_t) p) % 32) != 0)
abort ();
for (i = 0; i < 420 / sizeof (int); i++)
if (p[i])
abort ();
p[0] = 5;
p[419 / sizeof (int)] = 6;
q = (int *) omp_aligned_calloc (64, 12, 64, omp_null_allocator);
check_all_zero (q, 12*64);
if ((((uintptr_t) q) % 128) != 0)
abort ();
for (i = 0; i < 768 / sizeof (int); i++)
if (q[i])
abort ();
q[0] = 7;
q[767 / sizeof (int)] = 8;
if (omp_aligned_calloc (8, 24, 32, omp_null_allocator) != NULL)
abort ();
#ifdef __cplusplus
omp_free (p);
omp_free (q);
omp_free (NULL);
#else
omp_free (p, omp_null_allocator);
omp_free (q, omp_null_allocator);
omp_free (NULL, omp_null_allocator);
#endif
omp_free (NULL, omp_null_allocator);
omp_destroy_allocator (a2);
omp_destroy_allocator (a);
return 0;
}