gcc/libgomp/testsuite/libgomp.c-c++-common/alloc-12.c
Tobias Burnus 450b05ce54 libgomp: Use libnuma for OpenMP's partition=nearest allocation trait
As with the memkind library, it is only used when found at runtime;
it does not need to be present when building GCC.

The included testcase does not check whether the memory has been placed
on the nearest node as the Linux kernel memory handling too often ignores
that hint, using a different node for the allocation.  However, when
running with 'numactl --preferred=<node> ./executable', it is clearly
visible that the feature works by comparing malloc/default vs. nearest
placement (using get_mempolicy to obtain the node for a mem addr).

libgomp/ChangeLog:

	* allocator.c: Add ifdef for LIBGOMP_USE_LIBNUMA.
	(enum gomp_numa_memkind_kind): Renamed from gomp_memkind_kind;
	add GOMP_MEMKIND_LIBNUMA.
	(struct gomp_libnuma_data, gomp_init_libnuma, gomp_get_libnuma): New.
	(omp_init_allocator): Handle partition=nearest with libnuma if avail.
	(omp_aligned_alloc, omp_free, omp_aligned_calloc, omp_realloc): Add
	numa_alloc_local (+ memset), numa_free, and numa_realloc calls as
	needed.
	* config/linux/allocator.c (LIBGOMP_USE_LIBNUMA): Define
	* libgomp.texi: Fix a typo; use 'fi' instead of its ligature char.
	(Memory allocation): Renamed from 'Memory allocation with libmemkind';
	updated for libnuma usage.
	* testsuite/libgomp.c-c++-common/alloc-11.c: New test.
	* testsuite/libgomp.c-c++-common/alloc-12.c: New test.
2023-07-12 13:50:21 +02:00

217 lines
6.3 KiB
C

/* This testcase is mostly the same as alloc-8.c.
However, on systems where the numa and/or memkind libraries are
installed, libgomp uses those. This test ensures that the minimal
features work. Note: No attempt has been made to verify the parition
hints interleaved and nearest as the kernal purposely ignore them once
in a while and it would also require a 'dlopen' dance.
memkind is used for omp_high_bw_mem_space, omp_large_cap_mem_space
and partition = interleaved, albeit it won't be interleaved for
omp_large_cap_mem_space.
numa is used for partition = nearest, unless memkind is used. */
#include <omp.h>
#include <stdint.h>
#include <stdlib.h>
const omp_alloctrait_t traits2[]
= { { omp_atk_alignment, 16 },
{ omp_atk_sync_hint, omp_atv_default },
{ omp_atk_access, omp_atv_default },
{ omp_atk_pool_size, 1024 },
{ omp_atk_fallback, omp_atv_default_mem_fb },
{ omp_atk_partition, omp_atv_nearest } };
omp_alloctrait_t traits3[]
= { { omp_atk_sync_hint, omp_atv_uncontended },
{ omp_atk_alignment, 32 },
{ omp_atk_access, omp_atv_all },
{ omp_atk_pool_size, 512 },
{ omp_atk_fallback, omp_atv_allocator_fb },
{ omp_atk_fb_data, 0 },
{ omp_atk_partition, omp_atv_interleaved } };
const omp_alloctrait_t traits4[]
= { { omp_atk_alignment, 128 },
{ omp_atk_pool_size, 1024 },
{ omp_atk_fallback, omp_atv_null_fb } };
static void
check_all_zero (void *ptr, size_t len)
{
char *p = (char *) ptr;
for (size_t i = 0; i < len; i++)
if (p[i] != '\0')
abort ();
}
int
main ()
{
int *volatile p = (int *) omp_aligned_calloc (sizeof (int), 3, sizeof (int), omp_high_bw_mem_alloc);
check_all_zero (p, 3*sizeof (int));
int *volatile q;
int *volatile r;
int i;
omp_alloctrait_t traits[3]
= { { omp_atk_alignment, 64 },
{ omp_atk_fallback, omp_atv_null_fb },
{ omp_atk_pool_size, 4096 } };
omp_allocator_handle_t a, a2;
if ((((uintptr_t) p) % __alignof (int)) != 0 || p[0] || p[1] || p[2])
abort ();
p[0] = 1;
p[1] = 2;
p[2] = 3;
omp_free (p, omp_high_bw_mem_alloc);
p = (int *) omp_aligned_calloc (2 * sizeof (int), 1, 2 * sizeof (int), omp_large_cap_mem_alloc);
check_all_zero (p, 2*sizeof (int));
if ((((uintptr_t) p) % (2 * sizeof (int))) != 0 || p[0] || p[1])
abort ();
p[0] = 1;
p[1] = 2;
omp_free (p, omp_null_allocator);
omp_set_default_allocator (omp_large_cap_mem_alloc);
p = (int *) omp_aligned_calloc (1, 1, sizeof (int), omp_null_allocator);
check_all_zero (p, sizeof (int));
if ((((uintptr_t) p) % __alignof (int)) != 0 || p[0])
abort ();
p[0] = 3;
omp_free (p, omp_get_default_allocator ());
a = omp_init_allocator (omp_large_cap_mem_space, 3, traits);
if (a == omp_null_allocator)
abort ();
p = (int *) omp_aligned_calloc (32, 3, 1024, a);
check_all_zero (p, 3*1024);
if ((((uintptr_t) p) % 64) != 0)
abort ();
for (i = 0; i < 3072 / sizeof (int); i++)
if (p[i])
abort ();
p[0] = 1;
p[3071 / sizeof (int)] = 2;
if (omp_aligned_calloc (8, 192, 16, a) != NULL)
abort ();
omp_free (p, a);
p = (int *) omp_aligned_calloc (128, 6, 512, a);
check_all_zero (p, 6*512);
if ((((uintptr_t) p) % 128) != 0)
abort ();
for (i = 0; i < 3072 / sizeof (int); i++)
if (p[i])
abort ();
p[0] = 3;
p[3071 / sizeof (int)] = 4;
omp_free (p, omp_null_allocator);
omp_set_default_allocator (a);
if (omp_get_default_allocator () != a)
abort ();
p = (int *) omp_aligned_calloc (64, 12, 256, omp_null_allocator);
check_all_zero (p, 12*256);
for (i = 0; i < 3072 / sizeof (int); i++)
if (p[i])
abort ();
if (omp_aligned_calloc (8, 128, 24, omp_null_allocator) != NULL)
abort ();
omp_free (p, a);
omp_destroy_allocator (a);
a = omp_init_allocator (omp_default_mem_space,
sizeof (traits2) / sizeof (traits2[0]),
traits2);
if (a == omp_null_allocator)
abort ();
if (traits3[5].key != omp_atk_fb_data)
abort ();
traits3[5].value = (uintptr_t) a;
a2 = omp_init_allocator (omp_default_mem_space,
sizeof (traits3) / sizeof (traits3[0]),
traits3);
if (a2 == omp_null_allocator)
abort ();
p = (int *) omp_aligned_calloc (4, 5, 84, a2);
check_all_zero (p, 5*84);
for (i = 0; i < 420 / sizeof (int); i++)
if (p[i])
abort ();
if ((((uintptr_t) p) % 32) != 0)
abort ();
p[0] = 5;
p[419 / sizeof (int)] = 6;
q = (int *) omp_aligned_calloc (8, 24, 32, a2);
check_all_zero (q, 24*32);
if ((((uintptr_t) q) % 16) != 0)
abort ();
for (i = 0; i < 768 / sizeof (int); i++)
if (q[i])
abort ();
q[0] = 7;
q[767 / sizeof (int)] = 8;
r = (int *) omp_aligned_calloc (8, 64, 8, a2);
check_all_zero (r, 64*8);
if ((((uintptr_t) r) % 8) != 0)
abort ();
for (i = 0; i < 512 / sizeof (int); i++)
if (r[i])
abort ();
r[0] = 9;
r[511 / sizeof (int)] = 10;
omp_free (p, omp_null_allocator);
omp_free (q, a2);
omp_free (r, omp_null_allocator);
omp_destroy_allocator (a2);
omp_destroy_allocator (a);
a = omp_init_allocator (omp_high_bw_mem_space,
sizeof (traits4) / sizeof (traits4[0]),
traits4);
if (a == omp_null_allocator)
abort ();
if (traits3[5].key != omp_atk_fb_data)
abort ();
traits3[5].value = (uintptr_t) a;
a2 = omp_init_allocator (omp_high_bw_mem_space,
sizeof (traits3) / sizeof (traits3[0]),
traits3);
if (a2 == omp_null_allocator)
abort ();
omp_set_default_allocator (a2);
#ifdef __cplusplus
p = static_cast <int *> (omp_aligned_calloc (4, 21, 20));
#else
p = (int *) omp_aligned_calloc (4, 21, 20, omp_null_allocator);
#endif
check_all_zero (p, 21*20);
if ((((uintptr_t) p) % 32) != 0)
abort ();
for (i = 0; i < 420 / sizeof (int); i++)
if (p[i])
abort ();
p[0] = 5;
p[419 / sizeof (int)] = 6;
q = (int *) omp_aligned_calloc (64, 12, 64, omp_null_allocator);
check_all_zero (q, 12*64);
if ((((uintptr_t) q) % 128) != 0)
abort ();
for (i = 0; i < 768 / sizeof (int); i++)
if (q[i])
abort ();
q[0] = 7;
q[767 / sizeof (int)] = 8;
if (omp_aligned_calloc (8, 24, 32, omp_null_allocator) != NULL)
abort ();
#ifdef __cplusplus
omp_free (p);
omp_free (q);
omp_free (NULL);
#else
omp_free (p, omp_null_allocator);
omp_free (q, omp_null_allocator);
omp_free (NULL, omp_null_allocator);
#endif
omp_free (NULL, omp_null_allocator);
omp_destroy_allocator (a2);
omp_destroy_allocator (a);
return 0;
}