diff --git a/libgomp/allocator.c b/libgomp/allocator.c index 25c0f150302..b3187ab2911 100644 --- a/libgomp/allocator.c +++ b/libgomp/allocator.c @@ -31,13 +31,13 @@ #include "libgomp.h" #include #include -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) #include #endif #define omp_max_predefined_alloc omp_thread_mem_alloc -enum gomp_memkind_kind +enum gomp_numa_memkind_kind { GOMP_MEMKIND_NONE = 0, #define GOMP_MEMKIND_KINDS \ @@ -50,7 +50,8 @@ enum gomp_memkind_kind #define GOMP_MEMKIND_KIND(kind) GOMP_MEMKIND_##kind GOMP_MEMKIND_KINDS, #undef GOMP_MEMKIND_KIND - GOMP_MEMKIND_COUNT + GOMP_MEMKIND_COUNT, + GOMP_MEMKIND_LIBNUMA = GOMP_MEMKIND_COUNT }; struct omp_allocator_data @@ -65,7 +66,7 @@ struct omp_allocator_data unsigned int fallback : 8; unsigned int pinned : 1; unsigned int partition : 7; -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) unsigned int memkind : 8; #endif #ifndef HAVE_SYNC_BUILTINS @@ -81,6 +82,14 @@ struct omp_mem_header void *pad; }; +struct gomp_libnuma_data +{ + void *numa_handle; + void *(*numa_alloc_local) (size_t); + void *(*numa_realloc) (void *, size_t, size_t); + void (*numa_free) (void *, size_t); +}; + struct gomp_memkind_data { void *memkind_handle; @@ -92,6 +101,50 @@ struct gomp_memkind_data void **kinds[GOMP_MEMKIND_COUNT]; }; +#ifdef LIBGOMP_USE_LIBNUMA +static struct gomp_libnuma_data *libnuma_data; +static pthread_once_t libnuma_data_once = PTHREAD_ONCE_INIT; + +static void +gomp_init_libnuma (void) +{ + void *handle = dlopen ("libnuma.so.1", RTLD_LAZY); + struct gomp_libnuma_data *data; + + data = calloc (1, sizeof (struct gomp_libnuma_data)); + if (data == NULL) + { + if (handle) + dlclose (handle); + return; + } + if (!handle) + { + __atomic_store_n (&libnuma_data, data, MEMMODEL_RELEASE); + return; + } + data->numa_handle = handle; + data->numa_alloc_local + = (__typeof (data->numa_alloc_local)) dlsym (handle, "numa_alloc_local"); + data->numa_realloc + = (__typeof (data->numa_realloc)) dlsym (handle, "numa_realloc"); + data->numa_free + = (__typeof (data->numa_free)) dlsym (handle, "numa_free"); + __atomic_store_n (&libnuma_data, data, MEMMODEL_RELEASE); +} + +static struct gomp_libnuma_data * +gomp_get_libnuma (void) +{ + struct gomp_libnuma_data *data + = __atomic_load_n (&libnuma_data, MEMMODEL_ACQUIRE); + if (data) + return data; + pthread_once (&libnuma_data_once, gomp_init_libnuma); + return __atomic_load_n (&libnuma_data, MEMMODEL_ACQUIRE); +} +#endif + #ifdef LIBGOMP_USE_MEMKIND static struct gomp_memkind_data *memkind_data; static pthread_once_t memkind_data_once = PTHREAD_ONCE_INIT; @@ -166,7 +219,7 @@ omp_init_allocator (omp_memspace_handle_t memspace, int ntraits, struct omp_allocator_data data = { memspace, 1, ~(uintptr_t) 0, 0, 0, omp_atv_contended, omp_atv_all, omp_atv_default_mem_fb, omp_atv_false, omp_atv_environment, -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) GOMP_MEMKIND_NONE #endif }; @@ -285,8 +338,8 @@ omp_init_allocator (omp_memspace_handle_t memspace, int ntraits, switch (memspace) { - case omp_high_bw_mem_space: #ifdef LIBGOMP_USE_MEMKIND + case omp_high_bw_mem_space: struct gomp_memkind_data *memkind_data; memkind_data = gomp_get_memkind (); if (data.partition == omp_atv_interleaved @@ -300,17 +353,15 @@ omp_init_allocator (omp_memspace_handle_t memspace, int ntraits, data.memkind = GOMP_MEMKIND_HBW_PREFERRED; break; } -#endif break; case omp_large_cap_mem_space: -#ifdef LIBGOMP_USE_MEMKIND memkind_data = gomp_get_memkind (); if (memkind_data->kinds[GOMP_MEMKIND_DAX_KMEM_ALL]) data.memkind = GOMP_MEMKIND_DAX_KMEM_ALL; else if (memkind_data->kinds[GOMP_MEMKIND_DAX_KMEM]) data.memkind = GOMP_MEMKIND_DAX_KMEM; -#endif break; +#endif default: #ifdef LIBGOMP_USE_MEMKIND if (data.partition == omp_atv_interleaved) @@ -323,6 +374,14 @@ omp_init_allocator (omp_memspace_handle_t memspace, int ntraits, break; } +#ifdef LIBGOMP_USE_LIBNUMA + if (data.memkind == GOMP_MEMKIND_NONE && data.partition == omp_atv_nearest) + { + data.memkind = GOMP_MEMKIND_LIBNUMA; + libnuma_data = gomp_get_libnuma (); + } +#endif + /* No support for this so far. */ if (data.pinned) return omp_null_allocator; @@ -357,8 +416,8 @@ omp_aligned_alloc (size_t alignment, size_t size, struct omp_allocator_data *allocator_data; size_t new_size, new_alignment; void *ptr, *ret; -#ifdef LIBGOMP_USE_MEMKIND - enum gomp_memkind_kind memkind; +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) + enum gomp_numa_memkind_kind memkind; #endif if (__builtin_expect (size == 0, 0)) @@ -379,7 +438,7 @@ retry: allocator_data = (struct omp_allocator_data *) allocator; if (new_alignment < allocator_data->alignment) new_alignment = allocator_data->alignment; -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) memkind = allocator_data->memkind; #endif } @@ -388,8 +447,10 @@ retry: allocator_data = NULL; if (new_alignment < sizeof (void *)) new_alignment = sizeof (void *); -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) memkind = GOMP_MEMKIND_NONE; +#endif +#ifdef LIBGOMP_USE_MEMKIND if (allocator == omp_high_bw_mem_alloc) memkind = GOMP_MEMKIND_HBW_PREFERRED; else if (allocator == omp_large_cap_mem_alloc) @@ -444,6 +505,13 @@ retry: allocator_data->used_pool_size = used_pool_size; gomp_mutex_unlock (&allocator_data->lock); #endif +#ifdef LIBGOMP_USE_LIBNUMA + if (memkind == GOMP_MEMKIND_LIBNUMA) + ptr = libnuma_data->numa_alloc_local (new_size); +# ifdef LIBGOMP_USE_MEMKIND + else +# endif +#endif #ifdef LIBGOMP_USE_MEMKIND if (memkind) { @@ -469,6 +537,13 @@ retry: } else { +#ifdef LIBGOMP_USE_LIBNUMA + if (memkind == GOMP_MEMKIND_LIBNUMA) + ptr = libnuma_data->numa_alloc_local (new_size); +# ifdef LIBGOMP_USE_MEMKIND + else +# endif +#endif #ifdef LIBGOMP_USE_MEMKIND if (memkind) { @@ -502,7 +577,7 @@ fail: { case omp_atv_default_mem_fb: if ((new_alignment > sizeof (void *) && new_alignment > alignment) -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) || memkind #endif || (allocator_data @@ -577,6 +652,16 @@ omp_free (void *ptr, omp_allocator_handle_t allocator) gomp_mutex_unlock (&allocator_data->lock); #endif } +#ifdef LIBGOMP_USE_LIBNUMA + if (allocator_data->memkind == GOMP_MEMKIND_LIBNUMA) + { + libnuma_data->numa_free (data->ptr, data->size); + return; + } +# ifdef LIBGOMP_USE_MEMKIND + else +# endif +#endif #ifdef LIBGOMP_USE_MEMKIND if (allocator_data->memkind) { @@ -590,7 +675,7 @@ omp_free (void *ptr, omp_allocator_handle_t allocator) #ifdef LIBGOMP_USE_MEMKIND else { - enum gomp_memkind_kind memkind = GOMP_MEMKIND_NONE; + enum gomp_numa_memkind_kind memkind = GOMP_MEMKIND_NONE; if (data->allocator == omp_high_bw_mem_alloc) memkind = GOMP_MEMKIND_HBW_PREFERRED; else if (data->allocator == omp_large_cap_mem_alloc) @@ -625,8 +710,8 @@ omp_aligned_calloc (size_t alignment, size_t nmemb, size_t size, struct omp_allocator_data *allocator_data; size_t new_size, size_temp, new_alignment; void *ptr, *ret; -#ifdef LIBGOMP_USE_MEMKIND - enum gomp_memkind_kind memkind; +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) + enum gomp_numa_memkind_kind memkind; #endif if (__builtin_expect (size == 0 || nmemb == 0, 0)) @@ -647,7 +732,7 @@ retry: allocator_data = (struct omp_allocator_data *) allocator; if (new_alignment < allocator_data->alignment) new_alignment = allocator_data->alignment; -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) memkind = allocator_data->memkind; #endif } @@ -656,8 +741,10 @@ retry: allocator_data = NULL; if (new_alignment < sizeof (void *)) new_alignment = sizeof (void *); -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) memkind = GOMP_MEMKIND_NONE; +#endif +#ifdef LIBGOMP_USE_MEMKIND if (allocator == omp_high_bw_mem_alloc) memkind = GOMP_MEMKIND_HBW_PREFERRED; else if (allocator == omp_large_cap_mem_alloc) @@ -714,6 +801,15 @@ retry: allocator_data->used_pool_size = used_pool_size; gomp_mutex_unlock (&allocator_data->lock); #endif +#ifdef LIBGOMP_USE_LIBNUMA + if (memkind == GOMP_MEMKIND_LIBNUMA) + /* numa_alloc_local uses mmap with MAP_ANONYMOUS, returning + memory that is initialized to zero. */ + ptr = libnuma_data->numa_alloc_local (new_size); +# ifdef LIBGOMP_USE_MEMKIND + else +# endif +#endif #ifdef LIBGOMP_USE_MEMKIND if (memkind) { @@ -739,6 +835,15 @@ retry: } else { +#ifdef LIBGOMP_USE_LIBNUMA + if (memkind == GOMP_MEMKIND_LIBNUMA) + /* numa_alloc_local uses mmap with MAP_ANONYMOUS, returning + memory that is initialized to zero. */ + ptr = libnuma_data->numa_alloc_local (new_size); +# ifdef LIBGOMP_USE_MEMKIND + else +# endif +#endif #ifdef LIBGOMP_USE_MEMKIND if (memkind) { @@ -772,7 +877,7 @@ fail: { case omp_atv_default_mem_fb: if ((new_alignment > sizeof (void *) && new_alignment > alignment) -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) || memkind #endif || (allocator_data @@ -815,8 +920,8 @@ omp_realloc (void *ptr, size_t size, omp_allocator_handle_t allocator, size_t new_size, old_size, new_alignment, old_alignment; void *new_ptr, *ret; struct omp_mem_header *data; -#ifdef LIBGOMP_USE_MEMKIND - enum gomp_memkind_kind memkind, free_memkind; +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) + enum gomp_numa_memkind_kind memkind, free_memkind; #endif if (__builtin_expect (ptr == NULL, 0)) @@ -841,15 +946,17 @@ retry: allocator_data = (struct omp_allocator_data *) allocator; if (new_alignment < allocator_data->alignment) new_alignment = allocator_data->alignment; -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) memkind = allocator_data->memkind; #endif } else { allocator_data = NULL; -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) memkind = GOMP_MEMKIND_NONE; +#endif +#ifdef LIBGOMP_USE_MEMKIND if (allocator == omp_high_bw_mem_alloc) memkind = GOMP_MEMKIND_HBW_PREFERRED; else if (allocator == omp_large_cap_mem_alloc) @@ -865,15 +972,17 @@ retry: if (free_allocator > omp_max_predefined_alloc) { free_allocator_data = (struct omp_allocator_data *) free_allocator; -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) free_memkind = free_allocator_data->memkind; #endif } else { free_allocator_data = NULL; -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) free_memkind = GOMP_MEMKIND_NONE; +#endif +#ifdef LIBGOMP_USE_MEMKIND if (free_allocator == omp_high_bw_mem_alloc) free_memkind = GOMP_MEMKIND_HBW_PREFERRED; else if (free_allocator == omp_large_cap_mem_alloc) @@ -953,6 +1062,19 @@ retry: allocator_data->used_pool_size = used_pool_size; gomp_mutex_unlock (&allocator_data->lock); #endif +#ifdef LIBGOMP_USE_LIBNUMA + if (memkind == GOMP_MEMKIND_LIBNUMA) + { + if (prev_size) + new_ptr = libnuma_data->numa_realloc (data->ptr, data->size, + new_size); + else + new_ptr = libnuma_data->numa_alloc_local (new_size); + } +# ifdef LIBGOMP_USE_MEMKIND + else +# endif +#endif #ifdef LIBGOMP_USE_MEMKIND if (memkind) { @@ -994,12 +1116,19 @@ retry: } else if (new_alignment == sizeof (void *) && old_alignment == sizeof (struct omp_mem_header) -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) && memkind == free_memkind #endif && (free_allocator_data == NULL || free_allocator_data->pool_size == ~(uintptr_t) 0)) { +#ifdef LIBGOMP_USE_LIBNUMA + if (memkind == GOMP_MEMKIND_LIBNUMA) + new_ptr = libnuma_data->numa_realloc (data->ptr, data->size, new_size); +# ifdef LIBGOMP_USE_MEMKIND + else +# endif +#endif #ifdef LIBGOMP_USE_MEMKIND if (memkind) { @@ -1021,6 +1150,13 @@ retry: } else { +#ifdef LIBGOMP_USE_LIBNUMA + if (memkind == GOMP_MEMKIND_LIBNUMA) + new_ptr = libnuma_data->numa_alloc_local (new_size); +# ifdef LIBGOMP_USE_MEMKIND + else +# endif +#endif #ifdef LIBGOMP_USE_MEMKIND if (memkind) { @@ -1060,6 +1196,16 @@ retry: gomp_mutex_unlock (&free_allocator_data->lock); #endif } +#ifdef LIBGOMP_USE_LIBNUMA + if (free_memkind == GOMP_MEMKIND_LIBNUMA) + { + libnuma_data->numa_free (data->ptr, data->size); + return ret; + } +# ifdef LIBGOMP_USE_MEMKIND + else +# endif +#endif #ifdef LIBGOMP_USE_MEMKIND if (free_memkind) { @@ -1079,7 +1225,7 @@ fail: { case omp_atv_default_mem_fb: if (new_alignment > sizeof (void *) -#ifdef LIBGOMP_USE_MEMKIND +#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA) || memkind #endif || (allocator_data diff --git a/libgomp/config/linux/allocator.c b/libgomp/config/linux/allocator.c index 15babcd1ada..64b1b4b9623 100644 --- a/libgomp/config/linux/allocator.c +++ b/libgomp/config/linux/allocator.c @@ -31,6 +31,7 @@ #include "libgomp.h" #if defined(PLUGIN_SUPPORT) && defined(LIBGOMP_USE_PTHREADS) #define LIBGOMP_USE_MEMKIND +#define LIBGOMP_USE_LIBNUMA #endif #include "../../allocator.c" diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index d1a5e67329a..9d910e6883c 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -2061,7 +2061,7 @@ Special values are output using @code{%} followed by an optional size specification and then either the single-character field type or its long name enclosed in curly braces; using @code{%%} will display a literal percent. The size specification consists of an optional @code{0.} or @code{.} followed -by a positive integer, specifing the minimal width of the output. With +by a positive integer, specifying the minimal width of the output. With @code{0.} and numerical values, the output is padded with zeros on the left; with @code{.}, the output is padded by spaces on the left; otherwise, the output is padded by spaces on the right. If unset, the value is @@ -2079,8 +2079,8 @@ Supported field types are: @tab value returned by @code{omp_get_ancestor_thread_num(omp_get_level()-1)} @item H @tab host @tab name of the host that executes the thread -@item P @tab process_id @tab process identifier -@item i @tab native_thread_id @tab native thread identifier +@item P @tab process_id @tab process identifier +@item i @tab native_thread_id @tab native thread identifier @item A @tab thread_affinity @tab comma separated list of integer values or ranges, representing the processors on which a process might execute, subject to affinity @@ -4584,7 +4584,7 @@ offloading devices (it's not clear if they should be): @menu * Implementation-defined ICV Initialization:: * OpenMP Context Selectors:: -* Memory allocation with libmemkind:: +* Memory allocation:: @end menu @node Implementation-defined ICV Initialization @@ -4631,8 +4631,8 @@ smaller number. On non-host devices, the value of the @tab See @code{-march=} in ``Nvidia PTX Options'' @end multitable -@node Memory allocation with libmemkind -@section Memory allocation with libmemkind +@node Memory allocation +@section Memory allocation For the memory spaces, the following applies: @itemize @@ -4652,20 +4652,40 @@ creating memory allocators requesting @itemize @item the memory space @code{omp_high_bw_mem_space} @item the memory space @code{omp_large_cap_mem_space} -@item the partition trait @code{omp_atv_interleaved}; note that for +@item the @code{partition} trait @code{interleaved}; note that for @code{omp_large_cap_mem_space} the allocation will not be interleaved @end itemize +On Linux systems, where the @uref{https://github.com/numactl/numactl, numa +library} (@code{libnuma.so.1}) is available at runtime, it used when creating +memory allocators requesting + +@itemize +@item the @code{partition} trait @code{nearest}, except when both the +libmemkind library is available and the memory space is either +@code{omp_large_cap_mem_space} or @code{omp_high_bw_mem_space} +@end itemize + +Note that the numa library will round up the allocation size to a multiple of +the system page size; therefore, consider using it only with large data or +by sharing allocations via the @code{pool_size} trait. Furthermore, the Linux +kernel does not guarantee that an allocation will always be on the nearest NUMA +node nor that after reallocation the same node will be used. Note additionally +that, on Linux, the default setting of the memory placement policy is to use the +current node; therefore, unless the memory placement policy has been overridden, +the @code{partition} trait @code{environment} (the default) will be effectively +a @code{nearest} allocation. + Additional notes: @itemize @item The @code{pinned} trait is unsupported. @item For the @code{partition} trait, the partition part size will be the same as the requested size (i.e. @code{interleaved} or @code{blocked} has no effect), except for @code{interleaved} when the memkind library is - available. Furthermore, for @code{nearest} the memory might not be - on the same NUMA node as thread that allocated the memory; on Linux, - this is in particular the case when the memory placement policy is - set to preferred. + available. Furthermore, for @code{nearest} and unless the numa library + is available, the memory might not be on the same NUMA node as thread + that allocated the memory; on Linux, this is in particular the case when + the memory placement policy is set to preferred. @item The @code{access} trait has no effect such that memory is always accessible by all threads. @item The @code{sync_hint} trait has no effect. diff --git a/libgomp/testsuite/libgomp.c-c++-common/alloc-11.c b/libgomp/testsuite/libgomp.c-c++-common/alloc-11.c new file mode 100644 index 00000000000..5fbadf4406a --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/alloc-11.c @@ -0,0 +1,285 @@ +/* This testcase is mostly the same as alloc-9.c. + However, on systems where the numa and/or memkind libraries are + installed, libgomp uses those. This test ensures that the minimal + features work. Note: No attempt has been made to verify the parition + hints interleaved and nearest as the kernal purposely ignore them once + in a while and it would also require a 'dlopen' dance. + + memkind is used for omp_high_bw_mem_space, omp_large_cap_mem_space + and partition = interleaved, albeit it won't be interleaved for + omp_large_cap_mem_space. + + numa is used for partition = nearest, unless memkind is used. */ + +#include +#include +#include + +const omp_alloctrait_t traits2[] += { { omp_atk_alignment, 16 }, + { omp_atk_sync_hint, omp_atv_default }, + { omp_atk_access, omp_atv_default }, + { omp_atk_pool_size, 1024 }, + { omp_atk_fallback, omp_atv_default_mem_fb }, + { omp_atk_partition, omp_atv_nearest } }; +omp_alloctrait_t traits3[] += { { omp_atk_sync_hint, omp_atv_uncontended }, + { omp_atk_alignment, 32 }, + { omp_atk_access, omp_atv_all }, + { omp_atk_pool_size, 512 }, + { omp_atk_fallback, omp_atv_allocator_fb }, + { omp_atk_fb_data, 0 }, + { omp_atk_partition, omp_atv_interleaved } }; +const omp_alloctrait_t traits4[] += { { omp_atk_alignment, 128 }, + { omp_atk_pool_size, 1024 }, + { omp_atk_fallback, omp_atv_null_fb } }; + +int +main () +{ + int *volatile p = (int *) omp_alloc (3 * sizeof (int), omp_default_mem_alloc); + int *volatile q; + int *volatile r; + omp_alloctrait_t traits[4] + = { { omp_atk_alignment, 64 }, + { omp_atk_fallback, omp_atv_null_fb }, + { omp_atk_pool_size, 4096 }, + { omp_atk_partition, omp_atv_nearest } }; + omp_alloctrait_t traits5[2] + = { { omp_atk_fallback, omp_atv_null_fb }, + { omp_atk_pool_size, 4096 } }; + omp_allocator_handle_t a, a2; + + if ((((uintptr_t) p) % __alignof (int)) != 0) + abort (); + p[0] = 1; + p[1] = 2; + p[2] = 3; + p = (int *) omp_realloc (p, 4 * sizeof (int), omp_high_bw_mem_alloc, omp_high_bw_mem_alloc); + if ((((uintptr_t) p) % __alignof (int)) != 0 || p[0] != 1 || p[1] != 2 || p[2] != 3) + abort (); + p[0] = 4; + p[1] = 5; + p[2] = 6; + p[3] = 7; + p = (int *) omp_realloc (p, 2 * sizeof (int), omp_high_bw_mem_alloc, omp_high_bw_mem_alloc); + if ((((uintptr_t) p) % __alignof (int)) != 0 || p[0] != 4 || p[1] != 5) + abort (); + p[0] = 8; + p[1] = 9; + if (omp_realloc (p, 0, omp_null_allocator, omp_high_bw_mem_alloc) != NULL) + abort (); + p = (int *) omp_realloc (NULL, 2 * sizeof (int), omp_large_cap_mem_alloc, omp_null_allocator); + if ((((uintptr_t) p) % __alignof (int)) != 0) + abort (); + p[0] = 1; + p[1] = 2; + p = (int *) omp_realloc (p, 5 * sizeof (int), omp_large_cap_mem_alloc, omp_large_cap_mem_alloc); + if ((((uintptr_t) p) % __alignof (int)) != 0 || p[0] != 1 || p[1] != 2) + abort (); + p[0] = 3; + p[1] = 4; + p[2] = 5; + p[3] = 6; + p[4] = 7; + omp_free (p, omp_null_allocator); + omp_set_default_allocator (omp_large_cap_mem_alloc); + if (omp_realloc (NULL, 0, omp_null_allocator, omp_null_allocator) != NULL) + abort (); + p = (int *) omp_alloc (sizeof (int), omp_null_allocator); + if ((((uintptr_t) p) % __alignof (int)) != 0) + abort (); + p[0] = 3; + p = (int *) omp_realloc (p, 3 * sizeof (int), omp_null_allocator, omp_null_allocator); + if ((((uintptr_t) p) % __alignof (int)) != 0 || p[0] != 3) + abort (); + p[0] = 4; + p[1] = 5; + p[2] = 6; + if (omp_realloc (p, 0, omp_null_allocator, omp_get_default_allocator ()) != NULL) + abort (); + a = omp_init_allocator (omp_default_mem_space, 4, traits); + if (a == omp_null_allocator) + abort (); + p = (int *) omp_alloc (sizeof (int), a); + if ((((uintptr_t) p) % 64) != 0) + abort (); + p[0] = 7; + p = (int *) omp_realloc (p, 3072, a, a); + if ((((uintptr_t) p) % 64) != 0 || p[0] != 7) + abort (); + p[0] = 1; + p[3071 / sizeof (int)] = 2; + q = (int *) omp_alloc (sizeof (int), a); + if ((((uintptr_t) q) % 64) != 0) + abort (); + q[0] = 8; + if (omp_realloc (q, 3072, a, a) != NULL) + abort (); + omp_free (p, a); + omp_free (q, a); + p = (int *) omp_alloc (sizeof (int), a); + p[0] = 42; + p = (int *) omp_realloc (p, 3072, a, a); + if (p[0] != 42) + abort (); + p[0] = 3; + p[3071 / sizeof (int)] = 4; + omp_realloc (p, 0, omp_null_allocator, omp_null_allocator); + omp_set_default_allocator (a); + if (omp_get_default_allocator () != a) + abort (); + p = (int *) omp_alloc (31, omp_null_allocator); + if (p == NULL) + abort (); + p = (int *) omp_realloc (p, 3072, omp_null_allocator, omp_null_allocator); + if (p == NULL) + abort (); + q = (int *) omp_alloc (sizeof (int), omp_null_allocator); + if (q == NULL) + abort (); + if (omp_realloc (q, 3072, omp_null_allocator, omp_null_allocator) != NULL) + abort (); + omp_free (p, a); + omp_free (q, a); + omp_destroy_allocator (a); + + a = omp_init_allocator (omp_large_cap_mem_space, 2, traits5); + if (a == omp_null_allocator) + abort (); + omp_set_default_allocator (a); + if (omp_get_default_allocator () != a) + abort (); + p = (int *) omp_alloc (3071, omp_null_allocator); + if (p == NULL) + abort (); + p = (int *) omp_realloc (p, 3072, omp_null_allocator, omp_null_allocator); + if (p == NULL) + abort (); + q = (int *) omp_alloc (sizeof (int), omp_null_allocator); + if (q == NULL) + abort (); + if (omp_realloc (q, 3072, omp_null_allocator, omp_null_allocator) != NULL) + abort (); + omp_free (p, a); + omp_free (q, a); + omp_destroy_allocator (a); + + a = omp_init_allocator (omp_default_mem_space, + sizeof (traits2) / sizeof (traits2[0]), + traits2); + if (a == omp_null_allocator) + abort (); + if (traits3[5].key != omp_atk_fb_data) + abort (); + traits3[5].value = (uintptr_t) a; + a2 = omp_init_allocator (omp_default_mem_space, + sizeof (traits3) / sizeof (traits3[0]), + traits3); + if (a2 == omp_null_allocator) + abort (); + p = (int *) omp_alloc (sizeof (int), a2); + if ((((uintptr_t) p) % 32) != 0) + abort (); + p[0] = 84; + p = (int *) omp_realloc (p, 380, a2, a2); + if ((((uintptr_t) p) % 32) != 0 || p[0] != 84) + abort (); + p[0] = 5; + p[379 / sizeof (int)] = 6; + q = (int *) omp_alloc (sizeof (int), a2); + if ((((uintptr_t) q) % 32) != 0) + abort (); + q[0] = 42; + q = (int *) omp_realloc (q, 768, a2, a2); + if ((((uintptr_t) q) % 16) != 0 || q[0] != 42) + abort (); + q[0] = 7; + q[767 / sizeof (int)] = 8; + r = (int *) omp_realloc (NULL, 512, a2, omp_null_allocator); + if ((((uintptr_t) r) % __alignof (int)) != 0) + abort (); + r[0] = 9; + r[511 / sizeof (int)] = 10; + omp_free (p, omp_null_allocator); + omp_free (q, a2); + omp_free (r, omp_null_allocator); + p = (int *) omp_alloc (sizeof (int), a2); + if ((((uintptr_t) p) % 32) != 0) + abort (); + p[0] = 85; + p = (int *) omp_realloc (p, 320, a, a2); + if ((((uintptr_t) p) % 16) != 0 || p[0] != 85) + abort (); + p[0] = 5; + p[319 / sizeof (int)] = 6; + q = (int *) omp_alloc (sizeof (int), a); + if ((((uintptr_t) q) % 16) != 0) + abort (); + q[0] = 43; + q = (int *) omp_realloc (q, 320, a2, a); + if ((((uintptr_t) q) % 32) != 0 || q[0] != 43) + abort (); + q[0] = 44; + q[319 / sizeof (int)] = 8; + q = (int *) omp_realloc (q, 568, a2, a2); + if ((((uintptr_t) q) % 16) != 0 || q[0] != 44) + abort (); + q[0] = 7; + q[567 / sizeof (int)] = 8; + omp_free (p, omp_null_allocator); + omp_free (q, a2); + omp_destroy_allocator (a2); + omp_destroy_allocator (a); + + a = omp_init_allocator (omp_large_cap_mem_space, + sizeof (traits4) / sizeof (traits4[0]), + traits4); + if (a == omp_null_allocator) + abort (); + if (traits3[5].key != omp_atk_fb_data) + abort (); + traits3[5].value = (uintptr_t) a; + a2 = omp_init_allocator (omp_default_mem_space, + sizeof (traits3) / sizeof (traits3[0]), + traits3); + if (a2 == omp_null_allocator) + abort (); + omp_set_default_allocator (a2); +#ifdef __cplusplus + p = static_cast (omp_realloc (NULL, 420)); +#else + p = (int *) omp_realloc (NULL, 420, omp_null_allocator, omp_null_allocator); +#endif + if ((((uintptr_t) p) % 32) != 0) + abort (); + p[0] = 5; + p[419 / sizeof (int)] = 6; + q = (int *) omp_realloc (NULL, sizeof (int), omp_null_allocator, omp_null_allocator); + if ((((uintptr_t) q) % 32) != 0) + abort (); + q[0] = 99; + q = (int *) omp_realloc (q, 700, omp_null_allocator, omp_null_allocator); + if ((((uintptr_t) q) % 128) != 0 || q[0] != 99) + abort (); + q[0] = 7; + q[699 / sizeof (int)] = 8; + if (omp_realloc (NULL, 768, omp_null_allocator, omp_null_allocator) != NULL) + abort (); +#ifdef __cplusplus + omp_free (p); + if (omp_realloc (q, 0) != NULL) + abort (); + omp_free (NULL); +#else + omp_free (p, omp_null_allocator); + if (omp_realloc (q, 0, omp_null_allocator, omp_null_allocator) != NULL) + abort (); + omp_free (NULL, omp_null_allocator); +#endif + omp_free (NULL, omp_null_allocator); + omp_destroy_allocator (a2); + omp_destroy_allocator (a); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/alloc-12.c b/libgomp/testsuite/libgomp.c-c++-common/alloc-12.c new file mode 100644 index 00000000000..e07de3be6a7 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/alloc-12.c @@ -0,0 +1,217 @@ +/* This testcase is mostly the same as alloc-8.c. + However, on systems where the numa and/or memkind libraries are + installed, libgomp uses those. This test ensures that the minimal + features work. Note: No attempt has been made to verify the parition + hints interleaved and nearest as the kernal purposely ignore them once + in a while and it would also require a 'dlopen' dance. + + memkind is used for omp_high_bw_mem_space, omp_large_cap_mem_space + and partition = interleaved, albeit it won't be interleaved for + omp_large_cap_mem_space. + + numa is used for partition = nearest, unless memkind is used. */ + +#include +#include +#include + +const omp_alloctrait_t traits2[] += { { omp_atk_alignment, 16 }, + { omp_atk_sync_hint, omp_atv_default }, + { omp_atk_access, omp_atv_default }, + { omp_atk_pool_size, 1024 }, + { omp_atk_fallback, omp_atv_default_mem_fb }, + { omp_atk_partition, omp_atv_nearest } }; +omp_alloctrait_t traits3[] += { { omp_atk_sync_hint, omp_atv_uncontended }, + { omp_atk_alignment, 32 }, + { omp_atk_access, omp_atv_all }, + { omp_atk_pool_size, 512 }, + { omp_atk_fallback, omp_atv_allocator_fb }, + { omp_atk_fb_data, 0 }, + { omp_atk_partition, omp_atv_interleaved } }; +const omp_alloctrait_t traits4[] += { { omp_atk_alignment, 128 }, + { omp_atk_pool_size, 1024 }, + { omp_atk_fallback, omp_atv_null_fb } }; + +static void +check_all_zero (void *ptr, size_t len) +{ + char *p = (char *) ptr; + for (size_t i = 0; i < len; i++) + if (p[i] != '\0') + abort (); +} + +int +main () +{ + int *volatile p = (int *) omp_aligned_calloc (sizeof (int), 3, sizeof (int), omp_high_bw_mem_alloc); + check_all_zero (p, 3*sizeof (int)); + int *volatile q; + int *volatile r; + int i; + omp_alloctrait_t traits[3] + = { { omp_atk_alignment, 64 }, + { omp_atk_fallback, omp_atv_null_fb }, + { omp_atk_pool_size, 4096 } }; + omp_allocator_handle_t a, a2; + + if ((((uintptr_t) p) % __alignof (int)) != 0 || p[0] || p[1] || p[2]) + abort (); + p[0] = 1; + p[1] = 2; + p[2] = 3; + omp_free (p, omp_high_bw_mem_alloc); + p = (int *) omp_aligned_calloc (2 * sizeof (int), 1, 2 * sizeof (int), omp_large_cap_mem_alloc); + check_all_zero (p, 2*sizeof (int)); + if ((((uintptr_t) p) % (2 * sizeof (int))) != 0 || p[0] || p[1]) + abort (); + p[0] = 1; + p[1] = 2; + omp_free (p, omp_null_allocator); + omp_set_default_allocator (omp_large_cap_mem_alloc); + p = (int *) omp_aligned_calloc (1, 1, sizeof (int), omp_null_allocator); + check_all_zero (p, sizeof (int)); + if ((((uintptr_t) p) % __alignof (int)) != 0 || p[0]) + abort (); + p[0] = 3; + omp_free (p, omp_get_default_allocator ()); + + a = omp_init_allocator (omp_large_cap_mem_space, 3, traits); + if (a == omp_null_allocator) + abort (); + p = (int *) omp_aligned_calloc (32, 3, 1024, a); + check_all_zero (p, 3*1024); + if ((((uintptr_t) p) % 64) != 0) + abort (); + for (i = 0; i < 3072 / sizeof (int); i++) + if (p[i]) + abort (); + p[0] = 1; + p[3071 / sizeof (int)] = 2; + if (omp_aligned_calloc (8, 192, 16, a) != NULL) + abort (); + omp_free (p, a); + p = (int *) omp_aligned_calloc (128, 6, 512, a); + check_all_zero (p, 6*512); + if ((((uintptr_t) p) % 128) != 0) + abort (); + for (i = 0; i < 3072 / sizeof (int); i++) + if (p[i]) + abort (); + p[0] = 3; + p[3071 / sizeof (int)] = 4; + omp_free (p, omp_null_allocator); + omp_set_default_allocator (a); + if (omp_get_default_allocator () != a) + abort (); + p = (int *) omp_aligned_calloc (64, 12, 256, omp_null_allocator); + check_all_zero (p, 12*256); + for (i = 0; i < 3072 / sizeof (int); i++) + if (p[i]) + abort (); + if (omp_aligned_calloc (8, 128, 24, omp_null_allocator) != NULL) + abort (); + omp_free (p, a); + omp_destroy_allocator (a); + + a = omp_init_allocator (omp_default_mem_space, + sizeof (traits2) / sizeof (traits2[0]), + traits2); + if (a == omp_null_allocator) + abort (); + if (traits3[5].key != omp_atk_fb_data) + abort (); + traits3[5].value = (uintptr_t) a; + a2 = omp_init_allocator (omp_default_mem_space, + sizeof (traits3) / sizeof (traits3[0]), + traits3); + if (a2 == omp_null_allocator) + abort (); + p = (int *) omp_aligned_calloc (4, 5, 84, a2); + check_all_zero (p, 5*84); + for (i = 0; i < 420 / sizeof (int); i++) + if (p[i]) + abort (); + if ((((uintptr_t) p) % 32) != 0) + abort (); + p[0] = 5; + p[419 / sizeof (int)] = 6; + q = (int *) omp_aligned_calloc (8, 24, 32, a2); + check_all_zero (q, 24*32); + if ((((uintptr_t) q) % 16) != 0) + abort (); + for (i = 0; i < 768 / sizeof (int); i++) + if (q[i]) + abort (); + q[0] = 7; + q[767 / sizeof (int)] = 8; + r = (int *) omp_aligned_calloc (8, 64, 8, a2); + check_all_zero (r, 64*8); + if ((((uintptr_t) r) % 8) != 0) + abort (); + for (i = 0; i < 512 / sizeof (int); i++) + if (r[i]) + abort (); + r[0] = 9; + r[511 / sizeof (int)] = 10; + omp_free (p, omp_null_allocator); + omp_free (q, a2); + omp_free (r, omp_null_allocator); + omp_destroy_allocator (a2); + omp_destroy_allocator (a); + + a = omp_init_allocator (omp_high_bw_mem_space, + sizeof (traits4) / sizeof (traits4[0]), + traits4); + if (a == omp_null_allocator) + abort (); + if (traits3[5].key != omp_atk_fb_data) + abort (); + traits3[5].value = (uintptr_t) a; + a2 = omp_init_allocator (omp_high_bw_mem_space, + sizeof (traits3) / sizeof (traits3[0]), + traits3); + if (a2 == omp_null_allocator) + abort (); + omp_set_default_allocator (a2); +#ifdef __cplusplus + p = static_cast (omp_aligned_calloc (4, 21, 20)); +#else + p = (int *) omp_aligned_calloc (4, 21, 20, omp_null_allocator); +#endif + check_all_zero (p, 21*20); + if ((((uintptr_t) p) % 32) != 0) + abort (); + for (i = 0; i < 420 / sizeof (int); i++) + if (p[i]) + abort (); + p[0] = 5; + p[419 / sizeof (int)] = 6; + q = (int *) omp_aligned_calloc (64, 12, 64, omp_null_allocator); + check_all_zero (q, 12*64); + if ((((uintptr_t) q) % 128) != 0) + abort (); + for (i = 0; i < 768 / sizeof (int); i++) + if (q[i]) + abort (); + q[0] = 7; + q[767 / sizeof (int)] = 8; + if (omp_aligned_calloc (8, 24, 32, omp_null_allocator) != NULL) + abort (); +#ifdef __cplusplus + omp_free (p); + omp_free (q); + omp_free (NULL); +#else + omp_free (p, omp_null_allocator); + omp_free (q, omp_null_allocator); + omp_free (NULL, omp_null_allocator); +#endif + omp_free (NULL, omp_null_allocator); + omp_destroy_allocator (a2); + omp_destroy_allocator (a); + return 0; +}