[PATCH 7/7] OpenMP 4.0 offloading infrastructure: testsuite.

libgomp/
	* testsuite/lib/libgomp.exp
	(check_effective_target_offload_device): New.
	* testsuite/libgomp.c++/c++.exp: Include tests from subdirectories.
	* testsuite/libgomp.c++/examples-4/e.51.5.C: New test.
	* testsuite/libgomp.c++/examples-4/e.53.2.C: Ditto.
	* testsuite/libgomp.c/examples-4/e.50.1.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.50.2.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.50.3.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.50.4.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.50.5.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.51.1.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.51.2.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.51.3.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.51.4.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.51.6.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.51.7.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.52.1.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.52.2.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.53.1.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.53.3.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.53.4.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.53.5.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.54.2.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.54.3.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.54.4.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.54.5.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.54.6.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.55.1.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.55.2.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.56.3.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.56.4.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.57.1.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.57.2.c: Ditto.
	* testsuite/libgomp.c/examples-4/e.57.3.c: Ditto.
	* testsuite/libgomp.c/target-7.c: Fix test.
	* testsuite/libgomp.fortran/examples-4/e.50.1.f90: New test.
	* testsuite/libgomp.fortran/examples-4/e.50.2.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.50.3.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.50.4.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.50.5.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.51.1.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.51.2.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.51.3.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.51.4.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.51.5.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.51.6.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.51.7.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.52.1.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.52.2.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.53.1.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.53.2.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.53.3.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.53.4.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.53.5.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.54.2.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.54.3.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.54.4.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.54.5.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.54.6.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.55.1.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.55.2.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.56.3.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.56.4.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.57.1.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.57.2.f90: Ditto.
	* testsuite/libgomp.fortran/examples-4/e.57.3.f90: Ditto.

Co-Authored-By: Ilya Tocar <ilya.tocar@intel.com>
Co-Authored-By: Ilya Verbin <ilya.verbin@intel.com>
Co-Authored-By: Kirill Yukhin <kirill.yukhin@intel.com>

From-SVN: r217494
This commit is contained in:
Andrey Turetskiy 2014-11-13 13:56:22 +00:00 committed by Kirill Yukhin
parent c713ddc031
commit 122d7303a0
66 changed files with 3686 additions and 5 deletions

View file

@ -1,3 +1,75 @@
2014-11-13 Andrey Turetskiy <andrey.turetskiy@intel.com>
Ilya Verbin <ilya.verbin@intel.com>
Kirill Yukhin <kirill.yukhin@intel.com>
Ilya Tocar <ilya.tocar@intel.com>
* testsuite/lib/libgomp.exp
(check_effective_target_offload_device): New.
* testsuite/libgomp.c++/c++.exp: Include tests from subdirectories.
* testsuite/libgomp.c++/examples-4/e.51.5.C: New test.
* testsuite/libgomp.c++/examples-4/e.53.2.C: Ditto.
* testsuite/libgomp.c/examples-4/e.50.1.c: Ditto.
* testsuite/libgomp.c/examples-4/e.50.2.c: Ditto.
* testsuite/libgomp.c/examples-4/e.50.3.c: Ditto.
* testsuite/libgomp.c/examples-4/e.50.4.c: Ditto.
* testsuite/libgomp.c/examples-4/e.50.5.c: Ditto.
* testsuite/libgomp.c/examples-4/e.51.1.c: Ditto.
* testsuite/libgomp.c/examples-4/e.51.2.c: Ditto.
* testsuite/libgomp.c/examples-4/e.51.3.c: Ditto.
* testsuite/libgomp.c/examples-4/e.51.4.c: Ditto.
* testsuite/libgomp.c/examples-4/e.51.6.c: Ditto.
* testsuite/libgomp.c/examples-4/e.51.7.c: Ditto.
* testsuite/libgomp.c/examples-4/e.52.1.c: Ditto.
* testsuite/libgomp.c/examples-4/e.52.2.c: Ditto.
* testsuite/libgomp.c/examples-4/e.53.1.c: Ditto.
* testsuite/libgomp.c/examples-4/e.53.3.c: Ditto.
* testsuite/libgomp.c/examples-4/e.53.4.c: Ditto.
* testsuite/libgomp.c/examples-4/e.53.5.c: Ditto.
* testsuite/libgomp.c/examples-4/e.54.2.c: Ditto.
* testsuite/libgomp.c/examples-4/e.54.3.c: Ditto.
* testsuite/libgomp.c/examples-4/e.54.4.c: Ditto.
* testsuite/libgomp.c/examples-4/e.54.5.c: Ditto.
* testsuite/libgomp.c/examples-4/e.54.6.c: Ditto.
* testsuite/libgomp.c/examples-4/e.55.1.c: Ditto.
* testsuite/libgomp.c/examples-4/e.55.2.c: Ditto.
* testsuite/libgomp.c/examples-4/e.56.3.c: Ditto.
* testsuite/libgomp.c/examples-4/e.56.4.c: Ditto.
* testsuite/libgomp.c/examples-4/e.57.1.c: Ditto.
* testsuite/libgomp.c/examples-4/e.57.2.c: Ditto.
* testsuite/libgomp.c/examples-4/e.57.3.c: Ditto.
* testsuite/libgomp.c/target-7.c: Fix test.
* testsuite/libgomp.fortran/examples-4/e.50.1.f90: New test.
* testsuite/libgomp.fortran/examples-4/e.50.2.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.50.3.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.50.4.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.50.5.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.51.1.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.51.2.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.51.3.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.51.4.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.51.5.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.51.6.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.51.7.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.52.1.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.52.2.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.53.1.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.53.2.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.53.3.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.53.4.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.53.5.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.54.2.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.54.3.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.54.4.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.54.5.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.54.6.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.55.1.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.55.2.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.56.3.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.56.4.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.57.1.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.57.2.f90: Ditto.
* testsuite/libgomp.fortran/examples-4/e.57.3.f90: Ditto.
2014-11-13 Jakub Jelinek <jakub@redhat.com>
Ilya Verbin <ilya.verbin@intel.com>
Thomas Schwinge <thomas@codesourcery.com>

View file

@ -239,3 +239,17 @@ proc libgomp_option_proc { option } {
return 0
}
}
# Return 1 if offload device is available.
proc check_effective_target_offload_device { } {
return [check_runtime_nocache offload_device_available_ {
#include <omp.h>
int main ()
{
int a;
#pragma omp target map(from: a)
a = omp_is_initial_device ();
return a;
}
} ]
}

View file

@ -42,7 +42,7 @@ if { $blddir != "" } {
if { $lang_test_file_found } {
# Gather a list of all tests.
set tests [lsort [glob -nocomplain $srcdir/$subdir/*.C]]
set tests [lsort [find $srcdir/$subdir *.C]]
if { $blddir != "" } {
set ld_library_path "$always_ld_library_path:${blddir}/${lang_library_path}"

View file

@ -0,0 +1,62 @@
// { dg-do run }
#include <omp.h>
#define EPS 0.000001
#define N 1000
extern "C" void abort (void);
void init (float *a1, float *a2, int n)
{
int s = -1;
for (int i = 0; i < n; i++)
{
a1[i] = s * 0.01;
a2[i] = i;
s = -s;
}
}
void check (float *a, float *b, int n)
{
for (int i = 0; i < n; i++)
if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
abort ();
}
void vec_mult_ref (float *&p, float *&v1, float *&v2, int n)
{
for (int i = 0; i < n; i++)
p[i] = v1[i] * v2[i];
}
void vec_mult (float *&p, float *&v1, float *&v2, int n)
{
#pragma omp target map(to: v1[0:n], v2[:n]) map(from: p[0:n])
#pragma omp parallel for
for (int i = 0; i < n; i++)
p[i] = v1[i] * v2[i];
}
int main ()
{
float *p = new float [N];
float *p1 = new float [N];
float *v1 = new float [N];
float *v2 = new float [N];
init (v1, v2, N);
vec_mult_ref (p, v1, v2, N);
vec_mult (p1, v1, v2, N);
check (p, p1, N);
delete [] p;
delete [] p1;
delete [] v1;
delete [] v2;
return 0;
}

View file

@ -0,0 +1,43 @@
// { dg-do run }
// { dg-require-effective-target offload_device }
#include <stdlib.h>
struct typeX
{
int a;
};
class typeY
{
public:
int foo () { return a^0x01; }
int a;
};
#pragma omp declare target
struct typeX varX;
class typeY varY;
#pragma omp end declare target
int main ()
{
varX.a = 0;
varY.a = 0;
#pragma omp target
{
varX.a = 100;
varY.a = 100;
}
if (varX.a != 0 || varY.a != 0)
abort ();
#pragma omp target update from(varX, varY)
if (varX.a != 100 || varY.a != 100)
abort ();
return 0;
}

View file

@ -0,0 +1,63 @@
/* { dg-do run } */
#include <stdlib.h>
#define N 100000
void init (int *a1, int *a2)
{
int i, s = -1;
for (i = 0; i < N; i++)
{
a1[i] = s;
a2[i] = i;
s = -s;
}
}
void check (int *a, int *b)
{
int i;
for (i = 0; i < N; i++)
if (a[i] != b[i])
abort ();
}
void vec_mult_ref (int *p)
{
int i;
int v1[N], v2[N];
init (v1, v2);
for (i = 0; i < N; i++)
p[i] = v1[i] * v2[i];
}
void vec_mult (int *p)
{
int i;
int v1[N], v2[N];
init (v1, v2);
#pragma omp target map(p[0:N])
#pragma omp parallel for
for (i = 0; i < N; i++)
p[i] = v1[i] * v2[i];
}
int main ()
{
int p1[N], p2[N];
int v1[N], v2[N];
init (v1, v2);
vec_mult_ref (p1);
vec_mult (p2);
check (p1, p2);
return 0;
}

View file

@ -0,0 +1,64 @@
/* { dg-do run } */
#include <stdlib.h>
#define N 100000
void init (char *a1, char *a2)
{
char s = -1;
int i;
for (i = 0; i < N; i++)
{
a1[i] = s;
a2[i] = i;
s = -s;
}
}
void check (char *a, char *b)
{
int i;
for (i = 0; i < N; i++)
if (a[i] != b[i])
abort ();
}
void vec_mult_ref (char *p)
{
int i;
char v1[N], v2[N];
init (v1, v2);
for (i = 0; i < N; i++)
p[i] = v1[i] * v2[i];
}
void vec_mult (char *p)
{
int i;
char v1[N], v2[N];
init (v1, v2);
#pragma omp target map(from: p[0:N])
#pragma omp parallel for
for (i = 0; i < N; i++)
p[i] = v1[i] * v2[i];
}
int main ()
{
char p1[N], p2[N];
char v1[N], v2[N];
init (v1, v2);
vec_mult_ref (p1);
vec_mult (p2);
check (p1, p2);
return 0;
}

View file

@ -0,0 +1,64 @@
/* { dg-do run } */
#include <stdlib.h>
#define N 100000
void init (long long *a1, long long *a2)
{
long long s = -1;
int i;
for (i = 0; i < N; i++)
{
a1[i] = s;
a2[i] = i;
s = -s;
}
}
void check (long long *a, long long *b)
{
int i;
for (i = 0; i < N; i++)
if (a[i] != b[i])
abort ();
}
void vec_mult_ref (long long *p)
{
int i;
long long v1[N], v2[N];
init (v1, v2);
for (i = 0; i < N; i++)
p[i] = v1[i] * v2[i];
}
void vec_mult (long long *p)
{
int i;
long long v1[N], v2[N];
init (v1, v2);
#pragma omp target map(v1, v2, p[0:N])
#pragma omp parallel for
for (i = 0; i < N; i++)
p[i] = v1[i] * v2[i];
}
int main ()
{
long long p1[N], p2[N];
long long v1[N], v2[N];
init (v1, v2);
vec_mult_ref (p1);
vec_mult (p2);
check (p1, p2);
return 0;
}

View file

@ -0,0 +1,57 @@
/* { dg-do run } */
#include <stdlib.h>
#define EPS 0.000001
#define N 100000
void init (double *a1, double *a2)
{
double s = -1;
int i;
for (i = 0; i < N; i++)
{
a1[i] = s;
a2[i] = i;
s = -s;
}
}
void check (double *a, double *b)
{
int i;
for (i = 0; i < N; i++)
if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
abort ();
}
void vec_mult_ref (double *p, double *v1, double *v2)
{
int i;
for (i = 0; i < N; i++)
p[i] = v1[i] * v2[i];
}
void vec_mult (double *p, double *v1, double *v2)
{
int i;
#pragma omp target map(to: v1[0:N], v2[:N]) map(from: p[0:N])
#pragma omp parallel for
for (i = 0; i < N; i++)
p[i] = v1[i] * v2[i];
}
int main ()
{
double p1[N], p2[N];
double v1[N], v2[N];
init (v1, v2);
vec_mult_ref (p1, v1, v2);
vec_mult (p2, v1, v2);
check (p1, p2);
return 0;
}

View file

@ -0,0 +1,67 @@
/* { dg-do run } */
/* { dg-require-effective-target offload_device } */
#include <omp.h>
#include <stdlib.h>
#define EPS 0.000001
#define N 100000
#define THRESHOLD1 10000
#define THRESHOLD2 1000
void init (float *a1, float *a2)
{
float s = -1;
int i;
for (i = 0; i < N; i++)
{
a1[i] = s;
a2[i] = i;
s = -s;
}
}
void check (float *a, float *b)
{
int i;
for (i = 0; i < N; i++)
if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
abort ();
}
void vec_mult_ref (float *p, float *v1, float *v2)
{
int i;
for (i = 0; i < N; i++)
p[i] = v1[i] * v2[i];
}
void vec_mult (float *p, float *v1, float *v2)
{
int i;
#pragma omp target if(N > THRESHOLD1) map(to: v1[0:N], v2[:N]) \
map(from: p[0:N])
{
if (omp_is_initial_device ())
abort ();
#pragma omp parallel for if(N > THRESHOLD2)
for (i = 0; i < N; i++)
p[i] = v1[i] * v2[i];
}
}
int main ()
{
float p1[N], p2[N];
float v1[N], v2[N];
init (v1, v2);
vec_mult_ref (p1, v1, v2);
vec_mult (p2, v1, v2);
check (p1, p2);
return 0;
}

View file

@ -0,0 +1,64 @@
/* { dg-do run } */
#include <stdlib.h>
const int MAX = 1800;
void check (long long *a, long long *b, int N)
{
int i;
for (i = 0; i < N; i++)
if (a[i] != b[i])
abort ();
}
void init (long long *a1, long long *a2, int N)
{
long long s = -1;
int i;
for (i = 0; i < N; i++)
{
a1[i] = s;
a2[i] = i;
s = -s;
}
}
void vec_mult_ref (long long *p, long long *v1, long long *v2, int N)
{
int i;
for (i = 0; i < N; i++)
p[i] = v1[i] * v2[i];
}
void vec_mult (long long *p, long long *v1, long long *v2, int N)
{
int i;
#pragma omp target data map(to: v1[0:N], v2[:N]) map(from: p[0:N])
#pragma omp target
#pragma omp parallel for
for (i = 0; i < N; i++)
p[i] = v1[i] * v2[i];
}
int main ()
{
long long *p1 = (long long *) malloc (MAX * sizeof (long long));
long long *p2 = (long long *) malloc (MAX * sizeof (long long));
long long *v1 = (long long *) malloc (MAX * sizeof (long long));
long long *v2 = (long long *) malloc (MAX * sizeof (long long));
init (v1, v2, MAX);
vec_mult_ref (p1, v1, v2, MAX);
vec_mult (p2, v1, v2, MAX);
check (p1, p2, MAX);
free (p1);
free (p2);
free (v1);
free (v2);
return 0;
}

View file

@ -0,0 +1,94 @@
/* { dg-do run } */
#include <stdlib.h>
const int MAX = 1800;
void check (char *a, char *b, int N)
{
int i;
for (i = 0; i < N; i++)
if (a[i] != b[i])
abort ();
}
void init (char *a1, char *a2, int N)
{
char s = -1;
int i;
for (i = 0; i < N; i++)
{
a1[i] = s;
a2[i] = i;
s = -s;
}
}
void init_again (char *a1, char *a2, int N)
{
char s = -1;
int i;
for (i = 0; i < N; i++)
{
a1[i] = s * 10;
a2[i] = i;
s = -s;
}
}
void vec_mult_ref (char *p, char *v1, char *v2, int N)
{
int i;
init (v1, v2, N);
for (i = 0; i < N; i++)
p[i] = v1[i] * v2[i];
init_again (v1, v2, N);
for (i = 0; i < N; i++)
p[i] = p[i] + (v1[i] * v2[i]);
}
void vec_mult (char *p, char *v1, char *v2, int N)
{
int i;
init (v1, v2, N);
#pragma omp target data map(from: p[0:N])
{
#pragma omp target map(to: v1[:N], v2[:N])
#pragma omp parallel for
for (i = 0; i < N; i++)
p[i] = v1[i] * v2[i];
init_again (v1, v2, N);
#pragma omp target map(to: v1[:N], v2[:N])
#pragma omp parallel for
for (i = 0; i < N; i++)
p[i] = p[i] + (v1[i] * v2[i]);
}
}
int main ()
{
char *p1 = (char *) malloc (MAX * sizeof (char));
char *p2 = (char *) malloc (MAX * sizeof (char));
char *v1 = (char *) malloc (MAX * sizeof (char));
char *v2 = (char *) malloc (MAX * sizeof (char));
vec_mult_ref (p1, v1, v2, MAX);
vec_mult (p2, v1, v2, MAX);
check (p1, p2, MAX);
free (p1);
free (p2);
free (v1);
free (v2);
return 0;
}

View file

@ -0,0 +1,79 @@
/* { dg-do run } */
#include <stdlib.h>
const int ROWS = 5;
const int COLS = 5;
void init (int Q[][COLS], const int rows, const int cols)
{
int i, j;
for (i = 0; i < rows; i++)
for (j = 0; j < cols; j++)
Q[i][j] = (i + 1) * 100 + (j + 1);
}
void check (int a[][COLS], int b[][COLS], const int rows, const int cols)
{
int i, j;
for (i = 0; i < rows; i++)
for (j = 0; j < cols; j++)
if (a[i][j] != b[i][j])
abort ();
}
void gramSchmidt_ref (int Q[][COLS], const int rows, const int cols)
{
int i, k;
for (k = 0; k < cols; k++)
{
int tmp = 0;
for (i = 0; i < rows; i++)
tmp += (Q[i][k] * Q[i][k]);
for (i = 0; i < rows; i++)
Q[i][k] *= tmp;
}
}
void gramSchmidt (int Q[][COLS], const int rows, const int cols)
{
int i, k;
#pragma omp target data map(Q[0:rows][0:cols]) map(to:COLS)
for (k = 0; k < cols; k++)
{
int tmp = 0;
#pragma omp target
#pragma omp parallel for reduction(+:tmp)
for (i = 0; i < rows; i++)
tmp += (Q[i][k] * Q[i][k]);
#pragma omp target
#pragma omp parallel for
for (i = 0; i < rows; i++)
Q[i][k] *= tmp;
}
}
int main ()
{
int (*Q1)[COLS] = (int(*)[COLS]) malloc (ROWS * COLS * sizeof (int));
int (*Q2)[COLS] = (int(*)[COLS]) malloc (ROWS * COLS * sizeof (int));
init (Q1, ROWS, COLS);
init (Q2, ROWS, COLS);
gramSchmidt_ref (Q1, ROWS, COLS);
gramSchmidt (Q2, ROWS, COLS);
check (Q1, Q2, ROWS, COLS);
free (Q1);
free (Q2);
return 0;
}

View file

@ -0,0 +1,77 @@
/* { dg-do run } */
#include <stdlib.h>
#define EPS 0.000001
const int MAX = 1800;
void check (double *a, double *b, int N)
{
int i;
for (i = 0; i < N; i++)
if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
abort ();
}
void init (double *a1, double *a2, int N)
{
double s = -1;
int i;
for (i = 0; i < N; i++)
{
a1[i] = s;
a2[i] = i;
s = -s;
}
}
void vec_mult_ref (double *p1, double *v3, double *v4, int N)
{
int i;
for (i = 0; i < N; i++)
p1[i] = v3[i] * v4[i];
}
void foo_ref (double *p0, double *v1, double *v2, int N)
{
init (v1, v2, N);
vec_mult_ref (p0, v1, v2, N);
}
void vec_mult (double *p1, double *v3, double *v4, int N)
{
int i;
#pragma omp target map(to: v3[0:N], v4[:N]) map(from: p1[0:N])
#pragma omp parallel for
for (i = 0; i < N; i++)
p1[i] = v3[i] * v4[i];
}
void foo (double *p0, double *v1, double *v2, int N)
{
init (v1, v2, N);
#pragma omp target data map(to: v1[0:N], v2[:N]) map(from: p0[0:N])
vec_mult (p0, v1, v2, N);
}
int main ()
{
double *p1 = (double *) malloc (MAX * sizeof (double));
double *p2 = (double *) malloc (MAX * sizeof (double));
double *v1 = (double *) malloc (MAX * sizeof (double));
double *v2 = (double *) malloc (MAX * sizeof (double));
foo_ref (p1, v1, v2, MAX);
foo (p2, v1, v2, MAX);
check (p1, p2, MAX);
free (p1);
free (p2);
free (v1);
free (v2);
return 0;
}

View file

@ -0,0 +1,109 @@
/* { dg-do run } */
/* { dg-require-effective-target offload_device } */
#include <stdlib.h>
#include <omp.h>
#define EPS 0.000001
#define THRESHOLD 1000
const int MAX = 1800;
void check (float *a, float *b, int N)
{
int i;
for (i = 0; i < N; i++)
if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
abort ();
}
void init (float *a1, float *a2, int N)
{
float s = -1;
int i;
for (i = 0; i < N; i++)
{
a1[i] = s;
a2[i] = i;
s = -s;
}
}
void init_again (float *a1, float *a2, int N)
{
float s = -1;
int i;
for (i = 0; i < N; i++)
{
a1[i] = s * 10;
a2[i] = i;
s = -s;
}
}
void vec_mult_ref (float *p, float *v1, float *v2, int N)
{
int i;
init (v1, v2, N);
for (i = 0; i < N; i++)
p[i] = v1[i] * v2[i];
init_again (v1, v2, N);
for (i = 0; i < N; i++)
p[i] = p[i] + (v1[i] * v2[i]);
}
void vec_mult (float *p, float *v1, float *v2, int N)
{
int i;
init (v1, v2, N);
#pragma omp target data if(N > THRESHOLD) map(from: p[0:N])
{
#pragma omp target if (N > THRESHOLD) map(to: v1[:N], v2[:N])
{
if (omp_is_initial_device ())
abort;
#pragma omp parallel for
for (i = 0; i < N; i++)
p[i] = v1[i] * v2[i];
}
init_again (v1, v2, N);
#pragma omp target if (N > THRESHOLD) map(to: v1[:N], v2[:N])
{
if (omp_is_initial_device ())
abort ();
#pragma omp parallel for
for (i = 0; i < N; i++)
p[i] = p[i] + (v1[i] * v2[i]);
}
}
}
int main ()
{
float *p1 = (float *) malloc (MAX * sizeof (float));
float *p2 = (float *) malloc (MAX * sizeof (float));
float *v1 = (float *) malloc (MAX * sizeof (float));
float *v2 = (float *) malloc (MAX * sizeof (float));
vec_mult_ref (p1, v1, v2, MAX);
vec_mult (p2, v1, v2, MAX);
check (p1, p2, MAX);
free (p1);
free (p2);
free (v1);
free (v2);
return 0;
}

View file

@ -0,0 +1,72 @@
/* { dg-do run } */
/* { dg-require-effective-target offload_device } */
#include <stdlib.h>
#include <omp.h>
#define THRESHOLD 1000
const int MAX = 1800;
void check (short *a, short *b, int N)
{
int i;
for (i = 0; i < N; i++)
if (a[i] != b[i])
abort ();
}
void init (short *a1, short *a2, int N)
{
short s = -1;
int i;
for (i = 0; i < N; i++)
{
a1[i] = s;
a2[i] = i;
s = -s;
}
}
void vec_mult_ref (short *p, short *v1, short *v2, int N)
{
int i;
for (i = 0; i < N; i++)
p[i] = v1[i] * v2[i];
}
void vec_mult (short *p, short *v1, short *v2, int N)
{
int i;
#pragma omp target data map(from: p[0:N])
#pragma omp target if (N > THRESHOLD) map(to: v1[:N], v2[:N])
{
if (omp_is_initial_device ())
abort ();
#pragma omp parallel for
for (i = 0; i < N; i++)
p[i] = v1[i] * v2[i];
}
}
int main ()
{
short *p1 = (short *) malloc (MAX * sizeof (short));
short *p2 = (short *) malloc (MAX * sizeof (short));
short *v1 = (short *) malloc (MAX * sizeof (short));
short *v2 = (short *) malloc (MAX * sizeof (short));
init (v1, v2, MAX);
vec_mult_ref (p1, v1, v2, MAX);
vec_mult (p2, v1, v2, MAX);
check (p1, p2, MAX);
free (p1);
free (p2);
free (v1);
free (v2);
return 0;
}

View file

@ -0,0 +1,94 @@
/* { dg-do run } */
#include <stdlib.h>
const int MAX = 1800;
void check (int *a, int *b, int N)
{
int i;
for (i = 0; i < N; i++)
if (a[i] != b[i])
abort ();
}
void init (int *a1, int *a2, int N)
{
int i, s = -1;
for (i = 0; i < N; i++)
{
a1[i] = s;
a2[i] = i;
s = -s;
}
}
void init_again (int *a1, int *a2, int N)
{
int i, s = -1;
for (i = 0; i < N; i++)
{
a1[i] = s * 10;
a2[i] = i;
s = -s;
}
}
void vec_mult_ref (int *p, int *v1, int *v2, int N)
{
int i;
init (v1, v2, MAX);
for (i = 0; i < N; i++)
p[i] = v1[i] * v2[i];
init_again (v1, v2, N);
for (i = 0; i < N; i++)
p[i] = p[i] + (v1[i] * v2[i]);
}
void vec_mult (int *p, int *v1, int *v2, int N)
{
int i;
init (v1, v2, MAX);
#pragma omp target data map(to: v1[:N], v2[:N]) map(from: p[0:N])
{
#pragma omp target
#pragma omp parallel for
for (i = 0; i < N; i++)
p[i] = v1[i] * v2[i];
init_again (v1, v2, N);
#pragma omp target update to(v1[:N], v2[:N])
#pragma omp target
#pragma omp parallel for
for (i = 0; i < N; i++)
p[i] = p[i] + (v1[i] * v2[i]);
}
}
int main ()
{
int *p1 = (int *) malloc (MAX * sizeof (int));
int *p2 = (int *) malloc (MAX * sizeof (int));
int *v1 = (int *) malloc (MAX * sizeof (int));
int *v2 = (int *) malloc (MAX * sizeof (int));
vec_mult_ref (p1, v1, v2, MAX);
vec_mult (p2, v1, v2, MAX);
check (p1, p2, MAX);
free (p1);
free (p2);
free (v1);
free (v2);
return 0;
}

View file

@ -0,0 +1,96 @@
/* { dg-do run } */
#include <stdlib.h>
const int MAX = 1800;
void check (int *a, int *b, int N)
{
int i;
for (i = 0; i < N; i++)
if (a[i] != b[i])
abort ();
}
void init (int *a1, int *a2, int N)
{
int i, s = -1;
for (i = 0; i < N; i++)
{
a1[i] = s;
a2[i] = i;
s = -s;
}
}
int maybe_init_again (int *a, int N)
{
int i;
for (i = 0; i < N; i++)
a[i] = i;
return 1;
}
void vec_mult_ref (int *p, int *v1, int *v2, int N)
{
int i;
init (v1, v2, N);
for (i = 0; i < N; i++)
p[i] = v1[i] * v2[i];
maybe_init_again (v1, N);
maybe_init_again (v2, N);
for (i = 0; i < N; i++)
p[i] = p[i] + (v1[i] * v2[i]);
}
void vec_mult (int *p, int *v1, int *v2, int N)
{
int i;
init (v1, v2, N);
#pragma omp target data map(to: v1[:N], v2[:N]) map(from: p[0:N])
{
int changed;
#pragma omp target
#pragma omp parallel for
for (i = 0; i < N; i++)
p[i] = v1[i] * v2[i];
changed = maybe_init_again (v1, N);
#pragma omp target update if (changed) to(v1[:N])
changed = maybe_init_again (v2, N);
#pragma omp target update if (changed) to(v2[:N])
#pragma omp target
#pragma omp parallel for
for (i = 0; i < N; i++)
p[i] = p[i] + (v1[i] * v2[i]);
}
}
int main ()
{
int *p = (int *) malloc (MAX * sizeof (int));
int *p1 = (int *) malloc (MAX * sizeof (int));
int *v1 = (int *) malloc (MAX * sizeof (int));
int *v2 = (int *) malloc (MAX * sizeof (int));
vec_mult_ref (p, v1, v2, MAX);
vec_mult (p1, v1, v2, MAX);
check (p, p1, MAX);
free (p);
free (p1);
free (v1);
free (v2);
return 0;
}

View file

@ -0,0 +1,36 @@
/* { dg-do run } */
#include <stdlib.h>
#define THRESHOLD 20
#pragma omp declare target
int fib (int n)
{
if (n <= 0)
return 0;
else if (n == 1)
return 1;
else
return fib (n - 1) + fib (n - 2);
}
#pragma omp end declare target
int fib_wrapper (int n)
{
int x = 0;
#pragma omp target if(n > THRESHOLD)
x = fib (n);
return x;
}
int main ()
{
if (fib (15) != fib_wrapper (15))
abort ();
if (fib (25) != fib_wrapper (25))
abort ();
return 0;
}

View file

@ -0,0 +1,62 @@
/* { dg-do run } */
#include <stdlib.h>
#define EPS 0.000001
#define N 100000
#pragma omp declare target
float p1[N], p2[N], v1[N], v2[N];
#pragma omp end declare target
void init ()
{
int i, s = -1;
for (i = 0; i < N; i++)
{
v1[i] = s * 0.01;
v2[i] = i;
s = -s;
}
}
void check ()
{
int i;
for (i = 0; i < N; i++)
if (p1[i] - p2[i] > EPS || p2[i] - p1[i] > EPS)
abort ();
}
void vec_mult_ref ()
{
int i;
for (i = 0; i < N; i++)
p1[i] = v1[i] * v2[i];
}
void vec_mult ()
{
int i;
#pragma omp target update to(v1, v2)
#pragma omp target
#pragma omp parallel for
for (i = 0; i < N; i++)
p2[i] = v1[i] * v2[i];
#pragma omp target update from(p2)
}
int main ()
{
init ();
vec_mult_ref ();
vec_mult ();
check ();
return 0;
}

View file

@ -0,0 +1,67 @@
/* { dg-do run } */
#include <stdlib.h>
#define EPS 0.00001
#define N 1000
#pragma omp declare target
float Q[N][N];
float Pfun (const int i, const int k)
{
return Q[i][k] * Q[k][i];
}
#pragma omp end declare target
void init ()
{
int i, j;
for (i = 0; i < N; i++)
for (j = 0; j < N; j++)
Q[i][j] = 0.001 * i * j;
}
float accum_ref (int k)
{
int i;
float tmp = 0.0;
for (i = 0; i < N; i++)
tmp += Pfun (i, k);
return tmp;
}
float accum (int k)
{
int i;
float tmp = 0.0;
#pragma omp target
#pragma omp parallel for reduction(+:tmp)
for (i = 0; i < N; i++)
tmp += Pfun (i, k);
return tmp;
}
void check (float a, float b)
{
float err = (b == 0.0) ? a : (a - b) / b;
if (((err > 0) ? err : -err) > EPS)
abort ();
}
int main ()
{
int i;
init ();
#pragma omp target update to(Q)
for (i = 0; i < N; i++)
check (accum (i), accum_ref (i));
return 0;
}

View file

@ -0,0 +1,84 @@
/* { dg-do run } */
/* { dg-options "-O2" } */
/* { dg-additional-options "-msse2" { target sse2_runtime } } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
#include <stdlib.h>
#define EPS 0.00001
#define N 10000
#define M 1024
#pragma omp declare target
float Q[N][N];
#pragma omp declare simd uniform(i) linear(k) notinbranch
float Pfun (const int i, const int k)
{
return Q[i][k] * Q[k][i];
}
#pragma omp end declare target
void init ()
{
int i, j;
for (i = 0; i < N; i++)
for (j = 0; j < N; j++)
Q[i][j] = 0.001 * i * j;
}
float accum_ref ()
{
int i, k;
float tmp = 0.0;
for (i = 0; i < N; i++)
{
float tmp1 = 0.0;
for (k = 0; k < M; k++)
tmp1 += Pfun(i,k);
tmp += tmp1;
}
return tmp;
}
float accum ()
{
int i, k;
float tmp = 0.0;
#pragma omp target
#pragma omp parallel for reduction(+:tmp)
for (i = 0; i < N; i++)
{
float tmp1 = 0.0;
#pragma omp simd reduction(+:tmp1)
for (k = 0; k < M; k++)
tmp1 += Pfun(i,k);
tmp += tmp1;
}
return tmp;
}
void check (float a, float b)
{
float err = (b == 0.0) ? a : (a - b) / b;
if (((err > 0) ? err : -err) > EPS)
abort ();
}
int main ()
{
init ();
#pragma omp target update to(Q)
check (accum (), accum_ref ());
return 0;
}

View file

@ -0,0 +1,72 @@
/* { dg-do run } */
#include <stdlib.h>
#define EPS 0.0001
#define N 1024*1024
void init (float B[], float C[], int n)
{
int i;
for (i = 0; i < n; i++)
{
B[i] = 0.1 * i;
C[i] = 0.01 * i * i;
}
}
float dotprod_ref (float B[], float C[], int n)
{
int i;
float sum = 0.0;
for (i = 0; i < n; i++)
sum += B[i] * C[i];
return sum;
}
float dotprod (float B[], float C[], int n, int block_size,
int num_teams, int block_threads)
{
int i, i0;
float sum = 0;
#pragma omp target map(to: B[0:n], C[0:n])
#pragma omp teams num_teams(num_teams) thread_limit(block_threads) \
reduction(+:sum)
#pragma omp distribute
for (i0 = 0; i0 < n; i0 += block_size)
#pragma omp parallel for reduction(+:sum)
for (i = i0; i < ((i0 + block_size > n) ? n : i0 + block_size); i++)
sum += B[i] * C[i];
return sum;
}
void check (float a, float b)
{
float err = (b == 0.0) ? a : (a - b) / b;
if (((err > 0) ? err : -err) > EPS)
abort ();
}
int main ()
{
float *v1 = (float *) malloc (N * sizeof (float));
float *v2 = (float *) malloc (N * sizeof (float));
float p1, p2;
init (v1, v2, N);
p1 = dotprod_ref (v1, v2, N);
p2 = dotprod (v1, v2, N, 32, 2, 8);
check (p1, p2);
free (v1);
free (v2);
return 0;
}

View file

@ -0,0 +1,67 @@
/* { dg-do run } */
#include <stdlib.h>
#define EPS 0.0001
#define N 1024*1024
void init (float B[], float C[], int n)
{
int i;
for (i = 0; i < n; i++)
{
B[i] = 0.1 * i;
C[i] = 0.01 * i * i;
}
}
float dotprod_ref (float B[], float C[], int n)
{
int i;
float sum = 0.0;
for (i = 0; i < n; i++)
sum += B[i] * C[i];
return sum;
}
float dotprod (float B[], float C[], int n)
{
int i;
float sum = 0;
#pragma omp target teams map(to: B[0:n], C[0:n])
#pragma omp distribute parallel for reduction(+:sum)
for (i = 0; i < n; i++)
sum += B[i] * C[i];
return sum;
}
void check (float a, float b)
{
float err = (b == 0.0) ? a : (a - b) / b;
if (((err > 0) ? err : -err) > EPS)
abort ();
}
int main ()
{
float *v1 = (float *) malloc (N * sizeof (float));
float *v2 = (float *) malloc (N * sizeof (float));
float p1, p2;
init (v1, v2, N);
p1 = dotprod_ref (v1, v2, N);
p2 = dotprod (v1, v2, N);
check (p1, p2);
free (v1);
free (v2);
return 0;
}

View file

@ -0,0 +1,70 @@
/* { dg-do run } */
#include <stdlib.h>
#define EPS 0.0001
#define N 1024*1024
void init (float B[], float C[], int n)
{
int i;
for (i = 0; i < n; i++)
{
B[i] = 0.1 * i;
C[i] = 0.01 * i * i;
}
}
float dotprod_ref (float B[], float C[], int n)
{
int i;
float sum = 0.0;
for (i = 0; i < n; i++)
sum += B[i] * C[i];
return sum;
}
float dotprod (float B[], float C[], int n)
{
int i;
float sum = 0;
#pragma omp target map(to: B[0:n], C[0:n])
#pragma omp teams num_teams(8) thread_limit(16)
#pragma omp distribute parallel for reduction(+:sum) \
dist_schedule(static, 1024) \
schedule(static, 64)
for (i = 0; i < n; i++)
sum += B[i] * C[i];
return sum;
}
void check (float a, float b)
{
float err = (b == 0.0) ? a : (a - b) / b;
if (((err > 0) ? err : -err) > EPS)
abort ();
}
int main ()
{
float *v1 = (float *) malloc (N * sizeof (float));
float *v2 = (float *) malloc (N * sizeof (float));
float p1, p2;
init (v1, v2, N);
p1 = dotprod_ref (v1, v2, N);
p2 = dotprod (v1, v2, N);
check (p1, p2);
free (v1);
free (v2);
return 0;
}

View file

@ -0,0 +1,65 @@
/* { dg-do run } */
#include <stdlib.h>
#define EPS 0.00001
#define N 10000
void init (float *a, float *b, int n)
{
int i;
for (i = 0; i < n; i++)
{
a[i] = 0.1 * i;
b[i] = 0.01 * i * i;
}
}
void vec_mult_ref (float *p, float *v1, float *v2, int n)
{
int i;
for (i = 0; i < n; i++)
p[i] = v1[i] * v2[i];
}
void vec_mult (float *p, float *v1, float *v2, int n)
{
int i;
#pragma omp target teams map(to: v1[0:n], v2[:n]) map(from: p[0:n])
#pragma omp distribute simd
for (i = 0; i < n; i++)
p[i] = v1[i] * v2[i];
}
void check (float *a, float *b, int n)
{
int i;
for (i = 0 ; i < n ; i++)
{
float err = (a[i] == 0.0) ? b[i] : (b[i] - a[i]) / a[i];
if (((err > 0) ? err : -err) > EPS)
abort ();
}
}
int main ()
{
float *p1 = (float *) malloc (N * sizeof (float));
float *p2 = (float *) malloc (N * sizeof (float));
float *v1 = (float *) malloc (N * sizeof (float));
float *v2 = (float *) malloc (N * sizeof (float));
init (v1, v2, N);
vec_mult_ref (p1, v1, v2, N);
vec_mult (p2, v1, v2, N);
check (p1, p2, N);
free (p1);
free (p2);
free (v1);
free (v2);
return 0;
}

View file

@ -0,0 +1,65 @@
/* { dg-do run } */
#include <stdlib.h>
#define EPS 0.00001
#define N 10000
void init (float *a, float *b, int n)
{
int i;
for (i = 0; i < n; i++)
{
a[i] = 0.1 * i;
b[i] = 0.01 * i * i;
}
}
void vec_mult_ref (float *p, float *v1, float *v2, int n)
{
int i;
for (i = 0; i < n; i++)
p[i] = v1[i] * v2[i];
}
void vec_mult (float *p, float *v1, float *v2, int n)
{
int i;
#pragma omp target teams map(to: v1[0:n], v2[:n]) map(from: p[0:n])
#pragma omp distribute parallel for simd
for (i = 0; i < n; i++)
p[i] = v1[i] * v2[i];
}
void check (float *a, float *b, int n)
{
int i;
for (i = 0 ; i < n ; i++)
{
float err = (a[i] == 0.0) ? b[i] : (b[i] - a[i]) / a[i];
if (((err > 0) ? err : -err) > EPS)
abort ();
}
}
int main ()
{
float *p1 = (float *) malloc (N * sizeof (float));
float *p2 = (float *) malloc (N * sizeof (float));
float *v1 = (float *) malloc (N * sizeof (float));
float *v2 = (float *) malloc (N * sizeof (float));
init (v1, v2, N);
vec_mult_ref (p1, v1, v2, N);
vec_mult (p2, v1, v2, N);
check (p1, p2, N);
free (p1);
free (p2);
free (v1);
free (v2);
return 0;
}

View file

@ -0,0 +1,68 @@
/* { dg-do run } */
#include <stdlib.h>
#define EPS 0.00001
#define N 100000
#define CHUNKSZ 1000
float Y[N];
float Z[N];
#pragma omp declare target
float F (float a)
{
return -a;
}
#pragma omp end declare target
void pipedF_ref ()
{
int i;
for (i = 0; i < N; i++)
Y[i] = F (Y[i]);
}
void pipedF ()
{
int i, C;
for (C = 0; C < N; C += CHUNKSZ)
{
#pragma omp task
#pragma omp target map(Z[C:CHUNKSZ])
#pragma omp parallel for
for (i = C; i < C + CHUNKSZ; i++)
Z[i] = F (Z[i]);
}
#pragma omp taskwait
}
void init ()
{
int i;
for (i = 0; i < N; i++)
Y[i] = Z[i] = 0.1 * i;
}
void check ()
{
int i;
for (i = 0; i < N; i++)
{
float err = (Z[i] == 0.0) ? Y[i] : (Y[i] - Z[i]) / Z[i];
if (((err > 0) ? err : -err) > EPS)
abort ();
}
}
int main ()
{
init ();
pipedF_ref ();
pipedF ();
check ();
return 0;
}

View file

@ -0,0 +1,95 @@
/* { dg-do run } */
/* { dg-require-effective-target offload_device } */
#include <omp.h>
#include <stdlib.h>
#define EPS 0.00001
#define N 10000
#pragma omp declare target
void init (float *a, float *b, int n)
{
int i;
for (i = 0; i < n; i++)
{
a[i] = 0.1 * i;
b[i] = 0.01 * i * i;
}
}
#pragma omp end declare target
void vec_mult_ref (float *p, float *v1, float *v2, int n)
{
int i;
v1 = (float *) malloc (n * sizeof (float));
v2 = (float *) malloc (n * sizeof (float));
init (v1, v2, n);
for (i = 0; i < n; i++)
p[i] = v1[i] * v2[i];
free (v1);
free (v2);
}
void vec_mult (float *p, float *v1, float *v2, int n)
{
int i;
#pragma omp task shared(v1, v2) depend(out: v1, v2)
#pragma omp target map(v1, v2)
{
if (omp_is_initial_device ())
abort ();
v1 = (float *) malloc (n * sizeof (float));
v2 = (float *) malloc (n * sizeof (float));
init (v1, v2, n);
}
#pragma omp task shared(v1, v2) depend(in: v1, v2)
#pragma omp target map(to: v1, v2) map(from: p[0:n])
{
if (omp_is_initial_device ())
abort ();
#pragma omp parallel for
for (i = 0; i < n; i++)
p[i] = v1[i] * v2[i];
free (v1);
free (v2);
}
}
void check (float *a, float *b, int n)
{
int i;
for (i = 0 ; i < n ; i++)
{
float err = (a[i] == 0.0) ? b[i] : (b[i] - a[i]) / a[i];
if (((err > 0) ? err : -err) > EPS)
abort ();
}
}
int main ()
{
float *p1 = (float *) malloc (N * sizeof (float));
float *p2 = (float *) malloc (N * sizeof (float));
float *v1, *v2;
vec_mult_ref (p1, v1, v2, N);
vec_mult (p2, v1, v2, N);
check (p1, p2, N);
free (p1);
free (p2);
return 0;
}

View file

@ -0,0 +1,26 @@
/* { dg-do run } */
#include <stdlib.h>
void foo ()
{
int A[30], *p;
#pragma omp target data map(A[0:4])
{
p = &A[0];
#pragma omp target map(p[7:20]) map(A[0:4])
{
A[2] = 777;
p[8] = 777;
}
}
if (A[2] != 777 || A[8] != 777)
abort ();
}
int main ()
{
foo ();
return 0;
}

View file

@ -0,0 +1,27 @@
/* { dg-do run } */
#include <stdlib.h>
void foo ()
{
int A[30], *p;
#pragma omp target data map(A[0:10])
{
p = &A[0];
#pragma omp target map(p[3:7]) map(A[0:10])
{
A[2] = 777;
A[8] = 777;
p[8] = 999;
}
}
if (A[2] != 777 || A[8] != 999)
abort ();
}
int main ()
{
foo ();
return 0;
}

View file

@ -0,0 +1,59 @@
/* { dg-do run } */
/* { dg-require-effective-target offload_device } */
#include <omp.h>
#include <stdlib.h>
int main ()
{
int a = 100;
int b = 0;
int c, d;
#pragma omp target if(a > 200 && a < 400)
c = omp_is_initial_device ();
#pragma omp target data map(to: b) if(a > 200 && a < 400)
#pragma omp target
{
b = 100;
d = omp_is_initial_device ();
}
if (b != 100 || !c || d)
abort ();
a += 200;
b = 0;
#pragma omp target if(a > 200 && a < 400)
c = omp_is_initial_device ();
#pragma omp target data map(to: b) if(a > 200 && a < 400)
#pragma omp target
{
b = 100;
d = omp_is_initial_device ();
}
if (b != 0 || c || d)
abort ();
a += 200;
b = 0;
#pragma omp target if(a > 200 && a < 400)
c = omp_is_initial_device ();
#pragma omp target data map(to: b) if(a > 200 && a < 400)
#pragma omp target
{
b = 100;
d = omp_is_initial_device ();
}
if (b != 100 || !c || d)
abort ();
return 0;
}

View file

@ -0,0 +1,29 @@
/* { dg-do run } */
/* { dg-require-effective-target offload_device } */
#include <omp.h>
#include <stdlib.h>
#define N 10
int main ()
{
int i;
int offload[N];
int num = omp_get_num_devices();
#pragma omp parallel for
for (i = 0; i < N; i++)
#pragma omp target device(i) map(from: offload[i:1])
offload[i] = omp_is_initial_device ();
for (i = 0; i < num; i++)
if (offload[i])
abort ();
for (i = num; i < N; i++)
if (!offload[i])
abort ();
return 0;
}

View file

@ -0,0 +1,27 @@
/* { dg-do run } */
/* { dg-require-effective-target offload_device } */
#include <omp.h>
#include <stdlib.h>
int main ()
{
int res;
int default_device = omp_get_default_device ();
#pragma omp target
res = omp_is_initial_device ();
if (res)
abort ();
omp_set_default_device (omp_get_num_devices ());
#pragma omp target
res = omp_is_initial_device ();
if (!res)
abort ();
return 0;
}

View file

@ -18,7 +18,7 @@ foo (int f)
if (omp_get_level () != 0 || !omp_is_initial_device ())
abort ();
#pragma omp target if (v <= 1)
if (omp_get_level () != 0 || (f && !omp_is_initial_device ()))
if (omp_get_level () != 0)
abort ();
#pragma omp target device (d) if (v <= 1)
if (omp_get_level () != 0 || (f && !omp_is_initial_device ()))
@ -30,7 +30,7 @@ foo (int f)
if (omp_get_level () != 0 || !omp_is_initial_device ())
abort ();
#pragma omp target if (1)
if (omp_get_level () != 0 || (f && !omp_is_initial_device ()))
if (omp_get_level () != 0)
abort ();
#pragma omp target device (d) if (1)
if (omp_get_level () != 0 || (f && !omp_is_initial_device ()))
@ -59,7 +59,7 @@ foo (int f)
#pragma omp target data if (v <= 1) map (to: h)
{
#pragma omp target if (v <= 1)
if (omp_get_level () != 0 || (f && !omp_is_initial_device ()) || h++ != 8)
if (omp_get_level () != 0 || h++ != 8)
abort ();
#pragma omp target update if (v <= 1) from (h)
}
@ -87,7 +87,7 @@ foo (int f)
#pragma omp target data if (1) map (to: h)
{
#pragma omp target if (1)
if (omp_get_level () != 0 || (f && !omp_is_initial_device ()) || h++ != 12)
if (omp_get_level () != 0 || h++ != 12)
abort ();
#pragma omp target update if (1) from (h)
}

View file

@ -0,0 +1,44 @@
! { dg-do run }
module e_50_1_mod
contains
subroutine init (v1, v2, N)
integer :: i, N
real :: v1(N), v2(N)
do i = 1, N
v1(i) = i + 2.0
v2(i) = i - 3.0
end do
end subroutine
subroutine check (p, N)
integer :: i, N
real, parameter :: EPS = 0.00001
real :: diff, p(N)
do i = 1, N
diff = p(i) - (i + 2.0) * (i - 3.0)
if (diff > EPS .or. -diff > EPS) call abort
end do
end subroutine
subroutine vec_mult (N)
integer :: i, N
real :: p(N), v1(N), v2(N)
call init (v1, v2, N)
!$omp target
!$omp parallel do
do i = 1, N
p(i) = v1(i) * v2(i)
end do
!$omp end target
call check (p, N)
end subroutine
end module
program e_50_1
use e_50_1_mod, only : vec_mult
integer :: n
n = 1000
call vec_mult (n)
end program

View file

@ -0,0 +1,43 @@
! { dg-do run }
module e_50_2_mod
contains
subroutine init (v1, v2, N)
integer :: i, N
real :: v1(N), v2(N)
do i = 1, N
v1(i) = i + 2.0
v2(i) = i - 3.0
end do
end subroutine
subroutine check (p, N)
integer :: i, N
real, parameter :: EPS = 0.00001
real :: diff, p(N)
do i = 1, N
diff = p(i) - (i + 2.0) * (i - 3.0)
if (diff > EPS .or. -diff > EPS) call abort
end do
end subroutine
subroutine vec_mult (N)
integer :: i, N
real :: p(N), v1(N), v2(N)
call init (v1, v2, N)
!$omp target map(v1,v2,p)
!$omp parallel do
do i = 1, N
p(i) = v1(i) * v2(i)
end do
!$omp end target
call check (p, N)
end subroutine
end module
program e_50_2
use e_50_2_mod, only : vec_mult
integer :: n
n = 1000
call vec_mult (n)
end program

View file

@ -0,0 +1,43 @@
! { dg-do run }
module e_50_3_mod
contains
subroutine init (v1, v2, N)
integer :: i, N
real :: v1(N), v2(N)
do i = 1, N
v1(i) = i + 2.0
v2(i) = i - 3.0
end do
end subroutine
subroutine check (p, N)
integer :: i, N
real, parameter :: EPS = 0.00001
real :: diff, p(N)
do i = 1, N
diff = p(i) - (i + 2.0) * (i - 3.0)
if (diff > EPS .or. -diff > EPS) call abort
end do
end subroutine
subroutine vec_mult (N)
integer :: i, N
real :: p(N), v1(N), v2(N)
call init (v1, v2, N)
!$omp target map(to: v1,v2) map(from: p)
!$omp parallel do
do i = 1, N
p(i) = v1(i) * v2(i)
end do
!$omp end target
call check (p, N)
end subroutine
end module
program e_50_3
use e_50_3_mod, only : vec_mult
integer :: n
n = 1000
call vec_mult (n)
end program

View file

@ -0,0 +1,59 @@
! { dg-do run }
module e_50_4_mod
contains
subroutine init (v1, v2, N)
integer :: i, N
real, pointer, dimension(:) :: v1, v2
do i = 1, N
v1(i) = i + 2.0
v2(i) = i - 3.0
end do
end subroutine
subroutine check (p, N)
integer :: i, N
real, parameter :: EPS = 0.00001
real, pointer, dimension(:) :: p
do i = 1, N
diff = p(i) - (i + 2.0) * (i - 3.0)
if (diff > EPS .or. -diff > EPS) call abort
end do
end subroutine
subroutine vec_mult_1 (p, v1, v2, N)
integer :: i, N
real, pointer, dimension(:) :: p, v1, v2
!$omp target map(to: v1(1:N), v2(:N)) map(from: p(1:N))
!$omp parallel do
do i = 1, N
p(i) = v1(i) * v2(i)
end do
!$omp end target
end subroutine
subroutine vec_mult_2 (p, v1, v2, N)
real, dimension(*) :: p, v1, v2
integer :: i, N
!$omp target map(to: v1(1:N), v2(:N)) map(from: p(1:N))
!$omp parallel do
do i = 1, N
p(i) = v1(i) * v2(i)
end do
!$omp end target
end subroutine
end module
program e_50_4
use e_50_4_mod, only : init, check, vec_mult_1, vec_mult_2
real, pointer, dimension(:) :: p1, p2, v1, v2
integer :: n
n = 1000
allocate (p1(n), p2(n), v1(n), v2(n))
call init (v1, v2, n)
call vec_mult_1 (p1, v1, v2, n)
call vec_mult_2 (p2, v1, v2, n)
call check (p1, N)
call check (p2, N)
deallocate (p1, p2, v1, v2)
end program

View file

@ -0,0 +1,47 @@
! { dg-do run }
! { dg-require-effective-target offload_device }
module e_50_5_mod
integer, parameter :: THRESHOLD1 = 500, THRESHOLD2 = 100
contains
subroutine init (v1, v2, N)
integer :: i, N
real :: v1(N), v2(N)
do i = 1, N
v1(i) = i + 2.0
v2(i) = i - 3.0
end do
end subroutine
subroutine check (p, N)
integer :: i, N
real, parameter :: EPS = 0.00001
real :: diff, p(N)
do i = 1, N
diff = p(i) - (i + 2.0) * (i - 3.0)
if (diff > EPS .or. -diff > EPS) call abort
end do
end subroutine
subroutine vec_mult (N)
use omp_lib, only: omp_is_initial_device
integer :: i, N
real :: p(N), v1(N), v2(N)
call init (v1, v2, N)
!$omp target if(N > THRESHOLD1) map(to: v1,v2) map(from: p)
if (omp_is_initial_device ()) call abort
!$omp parallel do if(N > THRESHOLD2)
do i = 1, N
p(i) = v1(i) * v2(i)
end do
!$omp end target
call check (p, N)
end subroutine
end module
program e_50_5
use e_50_5_mod, only : vec_mult
integer :: n
n = 1000
call vec_mult (n)
end program

View file

@ -0,0 +1,45 @@
! { dg-do run }
module e_51_1_mod
contains
subroutine init (v1, v2, N)
integer :: i, N
real :: v1(N), v2(N)
do i = 1, N
v1(i) = i + 2.0
v2(i) = i - 3.0
end do
end subroutine
subroutine check (p, N)
integer :: i, N
real, parameter :: EPS = 0.00001
real :: diff, p(N)
do i = 1, N
diff = p(i) - (i + 2.0) * (i - 3.0)
if (diff > EPS .or. -diff > EPS) call abort
end do
end subroutine
subroutine vec_mult (N)
real :: p(N), v1(N), v2(N)
integer :: i, N
call init (v1, v2, N)
!$omp target data map(to: v1, v2) map(from: p)
!$omp target
!$omp parallel do
do i = 1, N
p(i) = v1(i) * v2(i)
end do
!$omp end target
!$omp end target data
call check (p, N)
end subroutine
end module
program e_51_1
use e_51_1_mod, only : vec_mult
integer :: n
n = 1000
call vec_mult (n)
end program

View file

@ -0,0 +1,61 @@
! { dg-do run }
module e_51_2_mod
contains
subroutine init (v1, v2, N)
integer :: i, N
real :: v1(N), v2(N)
do i = 1, N
v1(i) = i + 2.0
v2(i) = i - 3.0
end do
end subroutine
subroutine init_again (v1, v2, N)
integer :: i, N
real :: v1(N), v2(N)
do i = 1, N
v1(i) = i - 3.0
v2(i) = i + 2.0
end do
end subroutine
subroutine check (p, N)
integer :: i, N
real, parameter :: EPS = 0.00001
real :: diff, p(N)
do i = 1, N
diff = p(i) - 2 * (i + 2.0) * (i - 3.0)
if (diff > EPS .or. -diff > EPS) call abort
end do
end subroutine
subroutine vec_mult (N)
real :: p(N), v1(N), v2(N)
integer :: i, N
call init (v1, v2, N)
!$omp target data map(from: p)
!$omp target map(to: v1, v2 )
!$omp parallel do
do i = 1, N
p(i) = v1(i) * v2(i)
end do
!$omp end target
call init_again (v1, v2, N)
!$omp target map(to: v1, v2 )
!$omp parallel do
do i = 1, N
p(i) = p(i) + v1(i) * v2(i)
end do
!$omp end target
!$omp end target data
call check (p, N)
end subroutine
end module
program e_51_2
use e_51_2_mod, only : vec_mult
integer :: n
n = 1000
call vec_mult (n)
end program

View file

@ -0,0 +1,79 @@
! { dg-do run }
module e_51_3_mod
contains
subroutine init (Q, rows, cols)
integer :: i, k, rows, cols
double precision :: Q(rows,cols)
do k = 1, cols
do i = 1, rows
Q(i,k) = 10 * i + k
end do
end do
end subroutine
subroutine check (P, Q, rows, cols)
integer :: i, k, rows, cols
double precision, parameter :: EPS = 0.00001
double precision :: P(rows,cols), Q(rows,cols), diff
do k = 1, cols
do i = 1, rows
diff = P(i,k) - Q(i,k)
if (diff > EPS .or. -diff > EPS) call abort
end do
end do
end subroutine
subroutine gramSchmidt_ref (Q, rows, cols)
integer :: i, k, rows, cols
double precision :: Q(rows,cols), tmp
do k = 1, cols
tmp = 0.0d0
do i = 1, rows
tmp = tmp + (Q(i,k) * Q(i,k))
end do
tmp = 1.0d0 / sqrt (tmp)
do i = 1, rows
Q(i,k) = Q(i,k) * tmp
end do
end do
end subroutine
subroutine gramSchmidt (Q, rows, cols)
integer :: i, k, rows, cols
double precision :: Q(rows,cols), tmp
!$omp target data map(Q)
do k = 1, cols
tmp = 0.0d0
!$omp target
!$omp parallel do reduction(+:tmp)
do i = 1, rows
tmp = tmp + (Q(i,k) * Q(i,k))
end do
!$omp end target
tmp = 1.0d0 / sqrt (tmp)
!$omp target
!$omp parallel do
do i = 1, rows
Q(i,k) = Q(i,k) * tmp
end do
!$omp end target
end do
!$omp end target data
end subroutine
end module
program e_51_3
use e_51_3_mod, only : init, check, gramSchmidt, gramSchmidt_ref
integer :: cols, rows
double precision, pointer :: P(:,:), Q(:,:)
cols = 5
rows = 5
allocate (P(rows,cols), Q(rows,cols))
call init (P, rows, cols)
call init (Q, rows, cols)
call gramSchmidt_ref (P, rows, cols)
call gramSchmidt (Q, rows, cols)
call check (P, Q, rows, cols)
deallocate (P, Q)
end program

View file

@ -0,0 +1,54 @@
! { dg-do run }
module e_51_4_mod
contains
subroutine init (v1, v2, N)
integer :: i, N
real :: v1(N), v2(N)
do i = 1, N
v1(i) = i + 2.0
v2(i) = i - 3.0
end do
end subroutine
subroutine check (p, N)
integer :: i, N
real, parameter :: EPS = 0.00001
real :: diff, p(N)
do i = 1, N
diff = p(i) - (i + 2.0) * (i - 3.0)
if (diff > EPS .or. -diff > EPS) call abort
end do
end subroutine
subroutine foo (p, v1, v2, N)
real, pointer, dimension(:) :: p, v1, v2
integer :: N
call init (v1, v2, N)
!$omp target data map(to: v1, v2) map(from: p)
call vec_mult (p, v1, v2, N)
!$omp end target data
call check (p, N)
end subroutine
subroutine vec_mult (p, v1, v2, N)
real, pointer, dimension(:) :: p, v1, v2
integer :: i, N
!$omp target map(to: v1, v2) map(from: p)
!$omp parallel do
do i = 1, N
p(i) = v1(i) * v2(i)
end do
!$omp end target
end subroutine
end module
program e_51_4
use e_51_4_mod, only : foo
integer :: n
real, pointer, dimension(:) :: p, v1, v2
n = 1000
allocate (p(n), v1(n), v2(n))
call foo (p, v1, v2, n)
deallocate (p, v1, v2)
end program

View file

@ -0,0 +1,53 @@
! { dg-do run }
module e_51_5_mod
contains
subroutine init (v1, v2, N)
integer :: i, N
real :: v1(N), v2(N)
do i = 1, N
v1(i) = i + 2.0
v2(i) = i - 3.0
end do
end subroutine
subroutine check (p, N)
integer :: i, N
real, parameter :: EPS = 0.00001
real :: diff, p(N)
do i = 1, N
diff = p(i) - (i + 2.0) * (i - 3.0)
if (diff > EPS .or. -diff > EPS) call abort
end do
end subroutine
subroutine foo (p, v1, v2, N)
real, dimension(:) :: p, v1, v2
integer :: N
call init (v1, v2, N)
!$omp target data map(to: v1, v2, N) map(from: p)
call vec_mult (p, v1, v2, N)
!$omp end target data
call check (p, N)
end subroutine
subroutine vec_mult (p, v1, v2, N)
real, dimension(:) :: p, v1, v2
integer :: i, N
!$omp target map(to: v1, v2, N) map(from: p)
!$omp parallel do
do i = 1, N
p(i) = v1(i) * v2(i)
end do
!$omp end target
end subroutine
end module
program e_51_5
use e_51_5_mod, only : foo
integer, parameter :: N = 1024
real, allocatable, dimension(:) :: p, v1, v2
allocate(p(N), v1(N), v2(N))
call foo (p, v1, v2, N)
deallocate (p, v1, v2)
end program

View file

@ -0,0 +1,66 @@
! { dg-do run }
! { dg-require-effective-target offload_device }
module e_51_6_mod
integer, parameter :: THRESHOLD = 500
contains
subroutine init (v1, v2, N)
integer :: i, N
real :: v1(N), v2(N)
do i = 1, N
v1(i) = i + 2.0
v2(i) = i - 3.0
end do
end subroutine
subroutine init_again (v1, v2, N)
integer :: i, N
real :: v1(N), v2(N)
do i = 1, N
v1(i) = i - 3.0
v2(i) = i + 2.0
end do
end subroutine
subroutine check (p, N)
integer :: i, N
real, parameter :: EPS = 0.00001
real :: diff, p(N)
do i = 1, N
diff = p(i) - 2 * (i + 2.0) * (i - 3.0)
if (diff > EPS .or. -diff > EPS) call abort
end do
end subroutine
subroutine vec_mult (N)
use omp_lib, only: omp_is_initial_device
real :: p(N), v1(N), v2(N)
integer :: i, N
call init (v1, v2, N)
!$omp target data if(N > THRESHOLD) map(from: p)
!$omp target if(N > THRESHOLD) map(to: v1, v2)
if (omp_is_initial_device ()) call abort
!$omp parallel do
do i = 1, N
p(i) = v1(i) * v2(i)
end do
!$omp end target
call init_again (v1, v2, N)
!$omp target if(N > THRESHOLD) map(to: v1, v2)
if (omp_is_initial_device ()) call abort
!$omp parallel do
do i = 1, N
p(i) = p(i) + v1(i) * v2(i)
end do
!$omp end target
!$omp end target data
call check (p, N)
end subroutine
end module
program e_51_6
use e_51_6_mod, only : vec_mult
integer :: n
n = 1000
call vec_mult (n)
end program

View file

@ -0,0 +1,49 @@
! { dg-do run }
! { dg-require-effective-target offload_device }
module e_51_7_mod
integer, parameter :: THRESHOLD = 500
contains
subroutine init (v1, v2, N)
integer :: i, N
real :: v1(N), v2(N)
do i = 1, N
v1(i) = i + 2.0
v2(i) = i - 3.0
end do
end subroutine
subroutine check (p, N)
integer :: i, N
real, parameter :: EPS = 0.00001
real :: diff, p(N)
do i = 1, N
diff = p(i) - (i + 2.0) * (i - 3.0)
if (diff > EPS .or. -diff > EPS) call abort
end do
end subroutine
subroutine vec_mult (N)
use omp_lib, only: omp_is_initial_device
real :: p(N), v1(N), v2(N)
integer :: i, N
call init (v1, v2, N)
!$omp target data if(N > THRESHOLD) map(to: v1, v2) map(from: p)
!$omp target
if (omp_is_initial_device ()) call abort
!$omp parallel do
do i = 1, N
p(i) = v1(i) * v2(i)
end do
!$omp end target
!$omp end target data
call check (p, N)
end subroutine
end module
program e_51_7
use e_51_7_mod, only : vec_mult
integer :: n
n = 1000
call vec_mult (n)
end program

View file

@ -0,0 +1,65 @@
! { dg-do run }
module e_52_1_mod
contains
subroutine init (v1, v2, N)
integer :: i, N
real :: v1(N), v2(N)
do i = 1, N
v1(i) = i + 2.0
v2(i) = i - 3.0
end do
end subroutine
subroutine init_again (v1, v2, N)
integer :: i, N
real :: v1(N), v2(N)
do i = 1, N
v1(i) = i - 3.0
v2(i) = i + 2.0
end do
end subroutine
subroutine check (p, N)
integer :: i, N
real, parameter :: EPS = 0.00001
real :: diff, p(N)
do i = 1, N
diff = p(i) - 2 * (i + 2.0) * (i - 3.0)
if (diff > EPS .or. -diff > EPS) call abort
end do
end subroutine
subroutine vec_mult (p, v1, v2, N)
real :: p(N), v1(N), v2(N)
integer :: i, N
call init (v1, v2, N)
!$omp target data map(to: v1, v2) map(from: p)
!$omp target
!$omp parallel do
do i = 1, N
p(i) = v1(i) * v2(i)
end do
!$omp end target
call init_again (v1, v2, N)
!$omp target update to(v1, v2)
!$omp target
!$omp parallel do
do i = 1, N
p(i) = p(i) + v1(i) * v2(i)
end do
!$omp end target
!$omp end target data
call check (p, N)
end subroutine
end module
program e_52_1
use e_52_1_mod, only : vec_mult
integer :: n
real, pointer :: p(:), v1(:), v2(:)
n = 1000
allocate (p(n), v1(n), v2(n))
call vec_mult (p, v1, v2, n)
deallocate (p, v1, v2)
end program

View file

@ -0,0 +1,77 @@
! { dg-do run }
module e_52_2_mod
contains
subroutine init (v1, v2, N)
integer :: i, N
real :: v1(N), v2(N)
do i = 1, N
v1(i) = i + 2.0
v2(i) = i - 3.0
end do
end subroutine
subroutine init_again (v1, v2, N)
integer :: i, N
real :: v1(N), v2(N)
do i = 1, N
v1(i) = i - 3.0
v2(i) = i + 2.0
end do
end subroutine
subroutine check (p, N)
integer :: i, N
real, parameter :: EPS = 0.00001
real :: diff, p(N)
do i = 1, N
diff = p(i) - (i * i + (i + 2.0) * (i - 3.0))
if (diff > EPS .or. -diff > EPS) call abort
end do
end subroutine
logical function maybe_init_again (v, N)
real :: v(N)
integer :: i, N
do i = 1, N
v(i) = i
end do
maybe_init_again = .true.
end function
subroutine vec_mult (p, v1, v2, N)
real :: p(N), v1(N), v2(N)
integer :: i, N
logical :: changed
call init (v1, v2, N)
!$omp target data map(to: v1, v2) map(from: p)
!$omp target
!$omp parallel do
do i = 1, N
p(i) = v1(i) * v2(i)
end do
!$omp end target
changed = maybe_init_again (v1, N)
!$omp target update if(changed) to(v1(:N))
changed = maybe_init_again (v2, N)
!$omp target update if(changed) to(v2(:N))
!$omp target
!$omp parallel do
do i = 1, N
p(i) = p(i) + v1(i) * v2(i)
end do
!$omp end target
!$omp end target data
call check (p, N)
end subroutine
end module
program e_52_2
use e_52_2_mod, only : vec_mult
integer :: n
real, pointer :: p(:), v1(:), v2(:)
n = 1000
allocate (p(n), v1(n), v2(n))
call vec_mult (p, v1, v2, n)
deallocate (p, v1, v2)
end program

View file

@ -0,0 +1,31 @@
! { dg-do run }
module e_53_1_mod
integer :: THRESHOLD = 20
contains
integer recursive function fib (n) result (f)
!$omp declare target
integer :: n
if (n <= 0) then
f = 0
else if (n == 1) then
f = 1
else
f = fib (n - 1) + fib (n - 2)
end if
end function
integer function fib_wrapper (n)
integer :: x
!$omp target map(to: n) if(n > THRESHOLD)
x = fib (n)
!$omp end target
fib_wrapper = x
end function
end module
program e_53_1
use e_53_1_mod, only : fib, fib_wrapper
if (fib (15) /= fib_wrapper (15)) call abort
if (fib (25) /= fib_wrapper (25)) call abort
end program

View file

@ -0,0 +1,22 @@
! { dg-do run }
program e_53_2
!$omp declare target (fib)
integer :: x, fib
!$omp target
x = fib (25)
!$omp end target
if (x /= fib (25)) call abort
end program
integer recursive function fib (n) result (f)
!$omp declare target
integer :: n
if (n <= 0) then
f = 0
else if (n == 1) then
f = 1
else
f = fib (n - 1) + fib (n - 2)
end if
end function

View file

@ -0,0 +1,45 @@
! { dg-do run }
module e_53_3_mod
!$omp declare target (N, p, v1, v2)
integer, parameter :: N = 1000
real :: p(N), v1(N), v2(N)
end module
subroutine init (v1, v2, N)
integer :: i, N
real :: v1(N), v2(N)
do i = 1, N
v1(i) = i + 2.0
v2(i) = i - 3.0
end do
end subroutine
subroutine check (p, N)
integer :: i, N
real, parameter :: EPS = 0.00001
real :: diff, p(N)
do i = 1, N
diff = p(i) - (i + 2.0) * (i - 3.0)
if (diff > EPS .or. -diff > EPS) call abort
end do
end subroutine
subroutine vec_mult ()
use e_53_3_mod
integer :: i
call init (v1, v2, N);
!$omp target update to(v1, v2)
!$omp target
!$omp parallel do
do i = 1,N
p(i) = v1(i) * v2(i)
end do
!$omp end target
!$omp target update from (p)
call check (p, N)
end subroutine
program e_53_3
call vec_mult ()
end program

View file

@ -0,0 +1,68 @@
! { dg-do run }
module e_53_4_mod
!$omp declare target (N, Q)
integer, parameter :: N = 10
real :: Q(N,N)
contains
real function Pfun (i, k)
!$omp declare target
integer, intent(in) :: i, k
Pfun = (Q(i,k) * Q(k,i))
end function
end module
real function accum (k) result (tmp)
use e_53_4_mod
integer :: i, k
tmp = 0.0e0
!$omp target
!$omp parallel do reduction(+:tmp)
do i = 1, N
tmp = tmp + Pfun (k, i)
end do
!$omp end target
end function
real function accum_ref (k) result (tmp)
use e_53_4_mod
integer :: i, k
tmp = 0.0e0
do i = 1, N
tmp = tmp + Pfun (k, i)
end do
end function
subroutine init ()
use e_53_4_mod
integer :: i, j
do i = 1, N
do j = 1, N
Q(i,j) = 0.001 * i * j
end do
end do
end subroutine
subroutine check (a, b)
real :: a, b, err
real, parameter :: EPS = 0.00001
if (b == 0.0) then
err = a
else if (a == 0.0) then
err = b
else
err = (a - b) / b
end if
if (err > EPS .or. err < -EPS) call abort
end subroutine
program e_53_4
use e_53_4_mod
integer :: i
real :: accum, accum_ref
call init ()
!$omp target update to(Q)
do i = 1, N
call check (accum (i), accum_ref (i))
end do
end program

View file

@ -0,0 +1,80 @@
! { dg-do run }
! { dg-options "-O2" }
! { dg-additional-options "-msse2" { target sse2_runtime } }
! { dg-additional-options "-mavx" { target avx_runtime } }
module e_53_5_mod
!$omp declare target (N, Q)
integer, parameter :: N = 10000, M = 1024
real :: Q(N,N)
contains
real function Pfun (k, i)
!$omp declare simd(Pfun) uniform(i) linear(k) notinbranch
!$omp declare target
integer, value, intent(in) :: i, k
Pfun = (Q(k,i) * Q(i,k))
end function
end module
real function accum () result (tmp)
use e_53_5_mod
real :: tmp1
integer :: i
tmp = 0.0e0
!$omp target
!$omp parallel do private(tmp1) reduction(+:tmp)
do i = 1, N
tmp1 = 0.0e0
!$omp simd reduction(+:tmp1)
do k = 1, M
tmp1 = tmp1 + Pfun (k, i)
end do
tmp = tmp + tmp1
end do
!$omp end target
end function
real function accum_ref () result (tmp)
use e_53_5_mod
real :: tmp1
integer :: i
tmp = 0.0e0
do i = 1, N
tmp1 = 0.0e0
do k = 1, M
tmp1 = tmp1 + Pfun (k, i)
end do
tmp = tmp + tmp1
end do
end function
subroutine init ()
use e_53_5_mod
integer :: i, j
do i = 1, N
do j = 1, N
Q(i,j) = 0.001 * i * j
end do
end do
end subroutine
subroutine check (a, b)
real :: a, b, err
real, parameter :: EPS = 0.00001
if (b == 0.0) then
err = a
else if (a == 0.0) then
err = b
else
err = (a - b) / b
end if
if (err > EPS .or. err < -EPS) call abort
end subroutine
program e_53_5
use e_53_5_mod
real :: accum, accum_ref, d
call init ()
!$omp target update to(Q)
call check (accum (), accum_ref ())
end program

View file

@ -0,0 +1,65 @@
! { dg-do run }
function dotprod_ref (B, C, N) result (sum)
implicit none
real :: B(N), C(N), sum
integer :: N, i
sum = 0.0e0
do i = 1, N
sum = sum + B(i) * C(i)
end do
end function
function dotprod (B, C, N, block_size, num_teams, block_threads) result (sum)
implicit none
real :: B(N), C(N), sum
integer :: N, block_size, num_teams, block_threads, i, i0
sum = 0.0e0
!$omp target map(to: B, C, block_size, num_teams, block_threads)
!$omp teams num_teams(num_teams) thread_limit(block_threads) &
!$omp& reduction(+:sum)
!$omp distribute
do i0 = 1, N, block_size
!$omp parallel do reduction(+:sum)
do i = i0, min (i0 + block_size - 1, N)
sum = sum + B(i) * C(i)
end do
end do
!$omp end teams
!$omp end target
end function
subroutine init (B, C, N)
real :: B(N), C(N)
integer :: N, i
do i = 1, N
B(i) = 0.0001 * i
C(i) = 0.000001 * i * i
end do
end subroutine
subroutine check (a, b)
real :: a, b, err
real, parameter :: EPS = 0.0001
if (b == 0.0) then
err = a
else if (a == 0.0) then
err = b
else
err = (a - b) / b
end if
if (err > EPS .or. err < -EPS) call abort
end subroutine
program e_54_1
integer :: n
real :: ref, d
real, pointer, dimension(:) :: B, C
n = 1024 * 1024
allocate (B(n), C(n))
call init (B, C, n)
ref = dotprod_ref (B, C, n)
d = dotprod (B, C, n, 32, 2, 8)
call check (ref, d)
deallocate (B, C)
end program

View file

@ -0,0 +1,58 @@
! { dg-do run }
function dotprod_ref (B, C, N) result (sum)
implicit none
real :: B(N), C(N), sum
integer :: N, i
sum = 0.0e0
do i = 1, N
sum = sum + B(i) * C(i)
end do
end function
function dotprod (B, C, N) result(sum)
real :: B(N), C(N), sum
integer :: N, i
sum = 0.0e0
!$omp target teams map(to: B, C)
!$omp distribute parallel do reduction(+:sum)
do i = 1, N
sum = sum + B(i) * C(i)
end do
!$omp end target teams
end function
subroutine init (B, C, N)
real :: B(N), C(N)
integer :: N, i
do i = 1, N
B(i) = 0.0001 * i
C(i) = 0.000001 * i * i
end do
end subroutine
subroutine check (a, b)
real :: a, b, err
real, parameter :: EPS = 0.0001
if (b == 0.0) then
err = a
else if (a == 0.0) then
err = b
else
err = (a - b) / b
end if
if (err > EPS .or. err < -EPS) call abort
end subroutine
program e_54_3
integer :: n
real :: ref, d
real, pointer, dimension(:) :: B, C
n = 1024 * 1024
allocate (B(n), C(n))
call init (B, C, n)
ref = dotprod_ref (B, C, n)
d = dotprod (B, C, n)
call check (ref, d)
deallocate (B, C)
end program

View file

@ -0,0 +1,61 @@
! { dg-do run }
function dotprod_ref (B, C, N) result (sum)
implicit none
real :: B(N), C(N), sum
integer :: N, i
sum = 0.0e0
do i = 1, N
sum = sum + B(i) * C(i)
end do
end function
function dotprod (B, C, n) result(sum)
real :: B(N), C(N), sum
integer :: N, i
sum = 0.0e0
!$omp target map(to: B, C)
!$omp teams num_teams(8) thread_limit(16)
!$omp distribute parallel do reduction(+:sum) &
!$omp& dist_schedule(static, 1024) schedule(static, 64)
do i = 1, N
sum = sum + B(i) * C(i)
end do
!$omp end teams
!$omp end target
end function
subroutine init (B, C, N)
real :: B(N), C(N)
integer :: N, i
do i = 1, N
B(i) = 0.0001 * i
C(i) = 0.000001 * i * i
end do
end subroutine
subroutine check (a, b)
real :: a, b, err
real, parameter :: EPS = 0.0001
if (b == 0.0) then
err = a
else if (a == 0.0) then
err = b
else
err = (a - b) / b
end if
if (err > EPS .or. err < -EPS) call abort
end subroutine
program e_54_4
integer :: n
real :: ref, d
real, pointer, dimension(:) :: B, C
n = 1024 * 1024
allocate (B(n), C(n))
call init (B, C, n)
ref = dotprod_ref (B, C, n)
d = dotprod (B, C, n)
call check (ref, d)
deallocate (B, C)
end program

View file

@ -0,0 +1,47 @@
! { dg-do run }
module e_54_5_mod
contains
subroutine init (v1, v2, N)
integer :: i, N
real, pointer, dimension(:) :: v1, v2
do i = 1, N
v1(i) = i + 2.0
v2(i) = i - 3.0
end do
end subroutine
subroutine check (p, N)
integer :: i, N
real, parameter :: EPS = 0.00001
real, pointer, dimension(:) :: p
real :: diff
do i = 1, N
diff = p(i) - (i + 2.0) * (i - 3.0)
if (diff > EPS .or. -diff > EPS) call abort
end do
end subroutine
subroutine vec_mult (p, v1, v2, N)
real :: p(N), v1(N), v2(N)
integer :: i, N
!$omp target teams map(to: v1, v2) map(from: p)
!$omp distribute simd
do i = 1, N
p(i) = v1(i) * v2(i)
end do
!$omp end target teams
end subroutine
end module
program e_54_5
use e_54_5_mod, only : init, check, vec_mult
real, pointer, dimension(:) :: p, v1, v2
integer :: n
n = 1000
allocate (p(n), v1(n), v2(n))
call init (v1, v2, n)
call vec_mult (p, v1, v2, n)
call check (p, N)
deallocate (p, v1, v2)
end program

View file

@ -0,0 +1,47 @@
! { dg-do run }
module e_54_6_mod
contains
subroutine init (v1, v2, N)
integer :: i, N
real, pointer, dimension(:) :: v1, v2
do i = 1, N
v1(i) = i + 2.0
v2(i) = i - 3.0
end do
end subroutine
subroutine check (p, N)
integer :: i, N
real, parameter :: EPS = 0.00001
real, pointer, dimension(:) :: p
real :: diff
do i = 1, N
diff = p(i) - (i + 2.0) * (i - 3.0)
if (diff > EPS .or. -diff > EPS) call abort
end do
end subroutine
subroutine vec_mult (p, v1, v2, N)
real :: p(N), v1(N), v2(N)
integer :: i, N
!$omp target teams map(to: v1, v2) map(from: p)
!$omp distribute parallel do simd
do i = 1, N
p(i) = v1(i) * v2(i)
end do
!$omp end target teams
end subroutine
end module
program e_54_6
use e_54_6_mod, only : init, check, vec_mult
real, pointer, dimension(:) :: p, v1, v2
integer :: n
n = 1000
allocate (p(n), v1(n), v2(n))
call init (v1, v2, n)
call vec_mult (p, v1, v2, n)
call check (p, N)
deallocate (p, v1, v2)
end program

View file

@ -0,0 +1,70 @@
! { dg-do run }
module e_55_1_mod
integer, parameter :: N = 10000000, CHUNKSZ = 100000
real :: Y(N), Z(N)
end module
subroutine init ()
use e_55_1_mod, only : Y, Z, N
integer :: i
do i = 1, N
Y(i) = 0.1 * i
Z(i) = Y(i)
end do
end subroutine
subroutine check ()
use e_55_1_mod, only : Y, Z, N
real :: err
real, parameter :: EPS = 0.00001
integer :: i
do i = 1, N
if (Y(i) == 0.0) then
err = Z(i)
else if (Z(i) == 0.0) then
err = Y(i)
else
err = (Y(i) - Z(i)) / Z(i)
end if
if (err > EPS .or. err < -EPS) call abort
end do
end subroutine
real function F (z)
!$omp declare target
real, intent(in) :: z
F = -z
end function
subroutine pipedF ()
use e_55_1_mod, only: Z, N, CHUNKSZ
integer :: C, i
real :: F
do C = 1, N, CHUNKSZ
!$omp task
!$omp target map(Z(C:C+CHUNKSZ-1))
!$omp parallel do
do i = C, C+CHUNKSZ-1
Z(i) = F (Z(i))
end do
!$omp end target
!$omp end task
end do
end subroutine
subroutine pipedF_ref ()
use e_55_1_mod, only: Y, N
integer :: i
real :: F
do i = 1, N
Y(i) = F (Y(i))
end do
end subroutine
program e_55_1
call init ()
call pipedF ()
call pipedF_ref ()
call check ()
end program

View file

@ -0,0 +1,56 @@
! { dg-do run }
! { dg-require-effective-target offload_device }
subroutine init (v1, v2, N)
!$omp declare target
integer :: i, N
real :: v1(N), v2(N)
do i = 1, N
v1(i) = i + 2.0
v2(i) = i - 3.0
end do
end subroutine
subroutine check (p, N)
integer :: i, N
real, parameter :: EPS = 0.00001
real :: diff, p(N)
do i = 1, N
diff = p(i) - (i + 2.0) * (i - 3.0)
if (diff > EPS .or. -diff > EPS) call abort
end do
end subroutine
subroutine vec_mult (p, N)
use omp_lib, only: omp_is_initial_device
real :: p(N)
real, allocatable :: v1(:), v2(:)
integer :: i
!$omp declare target (init)
!$omp target data map(to: v1, v2, N) map(from: p)
!$omp task shared(v1, v2, p) depend(out: v1, v2)
!$omp target map(to: v1, v2, N)
if (omp_is_initial_device ()) call abort
allocate (v1(N), v2(N))
call init (v1, v2, N)
!$omp end target
!$omp end task
!$omp task shared(v1, v2, p) depend(in: v1, v2)
!$omp target map(to: v1, v2, N) map(from: p)
if (omp_is_initial_device ()) call abort
!$omp parallel do
do i = 1, N
p(i) = v1(i) * v2(i)
end do
deallocate (v1, v2)
!$omp end target
!$omp end task
!$omp end target data
call check (p, N)
end subroutine
program e_55_2
integer, parameter :: N = 1000
real :: p(N)
call vec_mult (p, N)
end program

View file

@ -0,0 +1,17 @@
! { dg-do run }
call foo ()
contains
subroutine foo ()
integer, target :: A(30)
integer, pointer :: p(:)
!$omp target data map(A(1:4))
p => A
!$omp target map(p(8:27)) map(A(1:4))
A(3) = 777
p(9) = 777
!$omp end target
!$omp end target data
if (A(3) /= 777 .or. A(9) /= 777) call abort
end subroutine
end

View file

@ -0,0 +1,18 @@
! { dg-do run }
call foo ()
contains
subroutine foo ()
integer, target :: A(30)
integer, pointer :: p(:)
!$omp target data map(A(1:10))
p => A
!$omp target map(p(4:10)) map(A(1:10))
A(3) = 777
p(9) = 777
A(9) = 999
!$omp end target
!$omp end target data
if (A(3) /= 777 .or. A(9) /= 999) call abort
end subroutine
end

View file

@ -0,0 +1,56 @@
! { dg-do run }
! { dg-require-effective-target offload_device }
program e_57_1
use omp_lib, only: omp_is_initial_device
integer :: a, b
logical :: c, d
a = 100
b = 0
!$omp target if(a > 200 .and. a < 400)
c = omp_is_initial_device ()
!$omp end target
!$omp target data map(to: b) if(a > 200 .and. a < 400)
!$omp target
b = 100
d = omp_is_initial_device ()
!$omp end target
!$omp end target data
if (b /= 100 .or. .not. c .or. d) call abort
a = a + 200
b = 0
!$omp target if(a > 200 .and. a < 400)
c = omp_is_initial_device ()
!$omp end target
!$omp target data map(to: b) if(a > 200 .and. a < 400)
!$omp target
b = 100
d = omp_is_initial_device ()
!$omp end target
!$omp end target data
if (b /= 0 .or. c .or. d) call abort
a = a + 200
b = 0
!$omp target if(a > 200 .and. a < 400)
c = omp_is_initial_device ()
!$omp end target
!$omp target data map(to: b) if(a > 200 .and. a < 400)
!$omp target
b = 100
d = omp_is_initial_device ()
!$omp end target
!$omp end target data
if (b /= 100 .or. .not. c .or. d) call abort
end program

View file

@ -0,0 +1,24 @@
! { dg-do run }
! { dg-require-effective-target offload_device }
program e_57_2
use omp_lib, only: omp_is_initial_device, omp_get_num_devices
integer, parameter :: N = 10
integer :: i, num
logical :: offload(N)
num = omp_get_num_devices ()
!$omp parallel do
do i = 1, N
!$omp target device(i-1) map(from: offload(i:i))
offload(i) = omp_is_initial_device ()
!$omp end target
end do
do i = 1, num
if (offload(i)) call abort
end do
do i = num+1, N
if (.not. offload(i)) call abort
end do
end program

View file

@ -0,0 +1,21 @@
! { dg-do run }
! { dg-require-effective-target offload_device }
program e_57_3
use omp_lib, only: omp_is_initial_device, omp_get_num_devices,&
omp_get_default_device, omp_set_default_device
logical :: res
integer :: default_device
default_device = omp_get_default_device ()
!$omp target
res = omp_is_initial_device ()
!$omp end target
if (res) call abort
call omp_set_default_device (omp_get_num_devices ())
!$omp target
res = omp_is_initial_device ()
!$omp end target
if (.not. res) call abort
end program