openmp: Add testcases for omp_max_vf

Ensure that the GOMP_MAX_VF does the right thing for explicit schedules, when offloading is enabled ("target" directives are present), and is inactive otherwise. libgomp/ChangeLog: * testsuite/libgomp.c/max_vf-1.c: New test. * testsuite/libgomp.c/max_vf-2.c: New test. gcc/testsuite/ChangeLog: * gcc.dg/gomp/max_vf-1.c: New test.
2024-11-06 12:26:08 +00:00 · 2024-11-06 12:26:08 +00:00 · d334f729e5
commit d334f729e5
parent 2a2e6e9894
3 changed files with 105 additions and 0 deletions
--- a/gcc/testsuite/gcc.dg/gomp/max_vf-1.c
+++ b/gcc/testsuite/gcc.dg/gomp/max_vf-1.c
@ -0,0 +1,37 @@
+/* Test that omp parallel simd schedule uses the correct max_vf for the
+   host system, when no target directives are present.  */
+
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -O2 -fdump-tree-ompexp" } */
+
+/* Fix a max_vf size so we can scan for it.
+{ dg-additional-options "-msse2" { target { x86_64-*-* i?86-*-* } } } */
+
+#define N 1024
+int a[N], b[N], c[N];
+
+void
+f2 (void)
+{
+  int i;
+  #pragma omp parallel for simd schedule (simd: static, 7)
+  for (i = 0; i < N; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* Make sure the max_vf is inlined as a number.
+   Hopefully there are no unrelated uses of these numbers ...
+{ dg-final { scan-tree-dump-times {\* 16} 2 "ompexp" { target { x86_64-*-* } } } }
+{ dg-final { scan-tree-dump-times {\+ 16} 1 "ompexp" { target { x86_64-*-* } } } } */
+
+void
+f3 (int *a, int *b, int *c)
+{
+  int i;
+  #pragma omp parallel for simd schedule (simd : dynamic, 7)
+  for (i = 0; i < N; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* Make sure the max_vf is inlined as a number.
+{ dg-final { scan-tree-dump-times {__builtin_GOMP_parallel_loop_nonmonotonic_dynamic \(.*, 16, 0\);} 1 "ompexp" { target { x86_64-*-* } } } } */
--- a/libgomp/testsuite/libgomp.c/max_vf-1.c
+++ b/libgomp/testsuite/libgomp.c/max_vf-1.c
@ -0,0 +1,47 @@
+/* Test that omp parallel simd schedule uses the correct max_vf for the
+   host system, when target directives are present.  */
+
+/* { dg-require-effective-target offloading_enabled } */
+
+/* { dg-do link } */
+/* { dg-options "-fopenmp -O2 -fdump-tree-ompexp -foffload=-fdump-tree-optimized" } */
+
+/* Fix a max_vf size so we can scan for it.
+{ dg-additional-options "-msse2" { target { x86_64-*-* i?86-*-* } } } */
+
+#define N 1024
+int a[N], b[N], c[N];
+
+/* Test both static schedules and inline target directives.  */
+void
+f2 (void)
+{
+  int i;
+  #pragma omp target parallel for simd schedule (simd: static, 7)
+  for (i = 0; i < N; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* Test both dynamic schedules and declare target functions.  */
+#pragma omp declare target
+void
+f3 (int *a, int *b, int *c)
+{
+  int i;
+  #pragma omp parallel for simd schedule (simd : dynamic, 7)
+  for (i = 0; i < N; i++)
+    a[i] = b[i] + c[i];
+}
+#pragma omp end declare target
+
+/* Make sure that the max_vf is used as an IFN.
+{ dg-final { scan-tree-dump-times {GOMP_MAX_VF} 2 "ompexp" { target { x86_64-*-* i?86-*-* } } } } */
+
+/* Make sure the max_vf is passed as a temporary variable.
+{ dg-final { scan-tree-dump-times {__builtin_GOMP_parallel_loop_nonmonotonic_dynamic \(.*, D\.[0-9]*, 0\);} 1 "ompexp" { target { x86_64-*-* i?86-*-* } } } } */
+
+/* Test SIMD offload devices
+{ dg-final { scan-offload-tree-dump-times {__builtin_GOMP_parallel_loop_nonmonotonic_dynamic \(.*, 64, 0\);} 1 "optimized" { target { offload_gcn } } } } 
+{ dg-final { scan-offload-tree-dump-times {__builtin_GOMP_parallel_loop_nonmonotonic_dynamic \(.*, 7, 0\);} 1 "optimized" { target { offload_nvptx } } } } */
+
+int main() {}
--- a/libgomp/testsuite/libgomp.c/max_vf-2.c
+++ b/libgomp/testsuite/libgomp.c/max_vf-2.c
@ -0,0 +1,21 @@
+/* Ensure that the default safelen is set correctly for the larger of the host
+   and offload device, to prevent defeating the vectorizer.  */
+ 
+/* { dg-require-effective-target offloading_enabled } */
+
+/* { dg-do link } */
+/* { dg-options "-fopenmp -O2 -fdump-tree-omplower" } */
+
+int f(float *a, float *b, int n)
+{
+  float sum = 0;
+  #pragma omp target teams distribute parallel for simd map(tofrom: sum) reduction(+:sum)
+  for (int i = 0; i < n; i++)
+    sum += a[i] * b[i];
+  return sum;
+}
+
+/* Make sure that the max_vf used is suitable for the offload device.
+{ dg-final { scan-tree-dump-times {omp simd safelen\(64\)} 1 "omplower" { target { offload_gcn } } } } */
+
+int main() {}