OpenMP: Fix SIMT for complex/float reduction with && and ||
2021-05-07 Tobias Burnus <tobias@codesourcery.com> Tom de Vries <tdevries@suse.de> gcc/ChangeLog: * omp-low.c (lower_rec_simd_input_clauses): Set max_vf = 1 if a truth_value_p reduction variable is nonintegral. libgomp/ChangeLog: * testsuite/libgomp.c-c++-common/reduction-5.c: New test, testing complex/floating-point || + && reduction with 'omp target'. * testsuite/libgomp.c-c++-common/reduction-6.c: Likewise.
This commit is contained in:
parent
a4613d9ada
commit
33b647956c
3 changed files with 410 additions and 7 deletions
|
@ -4389,14 +4389,28 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
|
|||
{
|
||||
for (tree c = gimple_omp_for_clauses (ctx->stmt); c;
|
||||
c = OMP_CLAUSE_CHAIN (c))
|
||||
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
|
||||
&& OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
|
||||
{
|
||||
/* UDR reductions are not supported yet for SIMT, disable
|
||||
SIMT. */
|
||||
sctx->max_vf = 1;
|
||||
break;
|
||||
{
|
||||
if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION)
|
||||
continue;
|
||||
|
||||
if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
|
||||
{
|
||||
/* UDR reductions are not supported yet for SIMT, disable
|
||||
SIMT. */
|
||||
sctx->max_vf = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (truth_value_p (OMP_CLAUSE_REDUCTION_CODE (c))
|
||||
&& !INTEGRAL_TYPE_P (TREE_TYPE (new_var)))
|
||||
{
|
||||
/* Doing boolean operations on non-integral types is
|
||||
for conformance only, it's not worth supporting this
|
||||
for SIMT. */
|
||||
sctx->max_vf = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (maybe_gt (sctx->max_vf, 1U))
|
||||
{
|
||||
|
|
193
libgomp/testsuite/libgomp.c-c++-common/reduction-5.c
Normal file
193
libgomp/testsuite/libgomp.c-c++-common/reduction-5.c
Normal file
|
@ -0,0 +1,193 @@
|
|||
/* { dg-additional-options "-foffload=-latomic" { target { offload_target_nvptx } } } */
|
||||
/* C / C++'s logical AND and OR operators take any scalar argument
|
||||
which compares (un)equal to 0 - the result 1 or 0 and of type int.
|
||||
|
||||
In this testcase, the int result is again converted to a floating-poing
|
||||
or complex type.
|
||||
|
||||
While having a floating-point/complex array element with || and && can make
|
||||
sense, having a non-integer/non-bool reduction variable is odd but valid.
|
||||
|
||||
Test: FP reduction variable + FP array - as reduction-1.c but with target */
|
||||
|
||||
#define N 1024
|
||||
_Complex float rcf[N];
|
||||
_Complex double rcd[N];
|
||||
float rf[N];
|
||||
double rd[N];
|
||||
|
||||
int
|
||||
reduction_or ()
|
||||
{
|
||||
float orf = 0;
|
||||
double ord = 0;
|
||||
_Complex float orfc = 0;
|
||||
_Complex double ordc = 0;
|
||||
|
||||
#pragma omp target parallel reduction(||: orf) map(orf)
|
||||
for (int i=0; i < N; ++i)
|
||||
orf = orf || rf[i];
|
||||
|
||||
#pragma omp target parallel for reduction(||: ord) map(ord)
|
||||
for (int i=0; i < N; ++i)
|
||||
ord = ord || rcd[i];
|
||||
|
||||
#pragma omp target parallel for simd reduction(||: orfc) map(orfc)
|
||||
for (int i=0; i < N; ++i)
|
||||
orfc = orfc || rcf[i];
|
||||
|
||||
#pragma omp target parallel loop reduction(||: ordc) map(ordc)
|
||||
for (int i=0; i < N; ++i)
|
||||
ordc = ordc || rcd[i];
|
||||
|
||||
return orf + ord + __real__ orfc + __real__ ordc;
|
||||
}
|
||||
|
||||
int
|
||||
reduction_or_teams ()
|
||||
{
|
||||
float orf = 0;
|
||||
double ord = 0;
|
||||
_Complex float orfc = 0;
|
||||
_Complex double ordc = 0;
|
||||
|
||||
#pragma omp target teams distribute parallel for reduction(||: orf) map(orf)
|
||||
for (int i=0; i < N; ++i)
|
||||
orf = orf || rf[i];
|
||||
|
||||
#pragma omp target teams distribute parallel for simd reduction(||: ord) map(ord)
|
||||
for (int i=0; i < N; ++i)
|
||||
ord = ord || rcd[i];
|
||||
|
||||
#pragma omp target teams distribute parallel for reduction(||: orfc) map(orfc)
|
||||
for (int i=0; i < N; ++i)
|
||||
orfc = orfc || rcf[i];
|
||||
|
||||
#pragma omp target teams distribute parallel for simd reduction(||: ordc) map(ordc)
|
||||
for (int i=0; i < N; ++i)
|
||||
ordc = ordc || rcd[i];
|
||||
|
||||
return orf + ord + __real__ orfc + __real__ ordc;
|
||||
}
|
||||
|
||||
int
|
||||
reduction_and ()
|
||||
{
|
||||
float andf = 1;
|
||||
double andd = 1;
|
||||
_Complex float andfc = 1;
|
||||
_Complex double anddc = 1;
|
||||
|
||||
#pragma omp target parallel reduction(&&: andf) map(andf)
|
||||
for (int i=0; i < N; ++i)
|
||||
andf = andf && rf[i];
|
||||
|
||||
#pragma omp target parallel for reduction(&&: andd) map(andd)
|
||||
for (int i=0; i < N; ++i)
|
||||
andd = andd && rcd[i];
|
||||
|
||||
#pragma omp target parallel for simd reduction(&&: andfc) map(andfc)
|
||||
for (int i=0; i < N; ++i)
|
||||
andfc = andfc && rcf[i];
|
||||
|
||||
#pragma omp target parallel loop reduction(&&: anddc) map(anddc)
|
||||
for (int i=0; i < N; ++i)
|
||||
anddc = anddc && rcd[i];
|
||||
|
||||
return andf + andd + __real__ andfc + __real__ anddc;
|
||||
}
|
||||
|
||||
int
|
||||
reduction_and_teams ()
|
||||
{
|
||||
float andf = 1;
|
||||
double andd = 1;
|
||||
_Complex float andfc = 1;
|
||||
_Complex double anddc = 1;
|
||||
|
||||
#pragma omp target teams distribute parallel for reduction(&&: andf) map(andf)
|
||||
for (int i=0; i < N; ++i)
|
||||
andf = andf && rf[i];
|
||||
|
||||
#pragma omp target teams distribute parallel for simd reduction(&&: andd) map(andd)
|
||||
for (int i=0; i < N; ++i)
|
||||
andd = andd && rcd[i];
|
||||
|
||||
#pragma omp target teams distribute parallel for reduction(&&: andfc) map(andfc)
|
||||
for (int i=0; i < N; ++i)
|
||||
andfc = andfc && rcf[i];
|
||||
|
||||
#pragma omp target teams distribute parallel for simd reduction(&&: anddc) map(anddc)
|
||||
for (int i=0; i < N; ++i)
|
||||
anddc = anddc && rcd[i];
|
||||
|
||||
return andf + andd + __real__ andfc + __real__ anddc;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
for (int i = 0; i < N; ++i)
|
||||
{
|
||||
rf[i] = 0;
|
||||
rd[i] = 0;
|
||||
rcf[i] = 0;
|
||||
rcd[i] = 0;
|
||||
}
|
||||
|
||||
if (reduction_or () != 0)
|
||||
__builtin_abort ();
|
||||
if (reduction_or_teams () != 0)
|
||||
__builtin_abort ();
|
||||
if (reduction_and () != 0)
|
||||
__builtin_abort ();
|
||||
if (reduction_and_teams () != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
rf[10] = 1.0;
|
||||
rd[15] = 1.0;
|
||||
rcf[10] = 1.0;
|
||||
rcd[15] = 1.0i;
|
||||
|
||||
if (reduction_or () != 4)
|
||||
__builtin_abort ();
|
||||
if (reduction_or_teams () != 4)
|
||||
__builtin_abort ();
|
||||
if (reduction_and () != 0)
|
||||
__builtin_abort ();
|
||||
if (reduction_and_teams () != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
for (int i = 0; i < N; ++i)
|
||||
{
|
||||
rf[i] = 1;
|
||||
rd[i] = 1;
|
||||
rcf[i] = 1;
|
||||
rcd[i] = 1;
|
||||
}
|
||||
|
||||
if (reduction_or () != 4)
|
||||
__builtin_abort ();
|
||||
if (reduction_or_teams () != 4)
|
||||
__builtin_abort ();
|
||||
if (reduction_and () != 4)
|
||||
__builtin_abort ();
|
||||
if (reduction_and_teams () != 4)
|
||||
__builtin_abort ();
|
||||
|
||||
rf[10] = 0.0;
|
||||
rd[15] = 0.0;
|
||||
rcf[10] = 0.0;
|
||||
rcd[15] = 0.0;
|
||||
|
||||
if (reduction_or () != 4)
|
||||
__builtin_abort ();
|
||||
if (reduction_or_teams () != 4)
|
||||
__builtin_abort ();
|
||||
if (reduction_and () != 0)
|
||||
__builtin_abort ();
|
||||
if (reduction_and_teams () != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
return 0;
|
||||
}
|
196
libgomp/testsuite/libgomp.c-c++-common/reduction-6.c
Normal file
196
libgomp/testsuite/libgomp.c-c++-common/reduction-6.c
Normal file
|
@ -0,0 +1,196 @@
|
|||
/* { dg-additional-options "-foffload=-latomic" { target { offload_target_nvptx } } } */
|
||||
/* C / C++'s logical AND and OR operators take any scalar argument
|
||||
which compares (un)equal to 0 - the result 1 or 0 and of type int.
|
||||
|
||||
In this testcase, the int result is again converted to an integer complex
|
||||
type.
|
||||
|
||||
While having a floating-point/complex array element with || and && can make
|
||||
sense, having a complex reduction variable is odd but valid.
|
||||
|
||||
Test: int complex reduction variable + int complex array.
|
||||
as reduction-4.c but with target. */
|
||||
|
||||
#define N 1024
|
||||
_Complex char rcc[N];
|
||||
_Complex short rcs[N];
|
||||
_Complex int rci[N];
|
||||
_Complex long long rcl[N];
|
||||
|
||||
int
|
||||
reduction_or ()
|
||||
{
|
||||
_Complex char orc = 0;
|
||||
_Complex short ors = 0;
|
||||
_Complex int ori = 0;
|
||||
_Complex long orl = 0;
|
||||
|
||||
#pragma omp target parallel reduction(||: orc) map(orc)
|
||||
for (int i=0; i < N; ++i)
|
||||
orc = orc || rcl[i];
|
||||
|
||||
#pragma omp target parallel for reduction(||: ors) map(ors)
|
||||
for (int i=0; i < N; ++i)
|
||||
ors = ors || rci[i];
|
||||
|
||||
#pragma omp target parallel for simd reduction(||: ori) map(ori)
|
||||
for (int i=0; i < N; ++i)
|
||||
ori = ori || rcs[i];
|
||||
|
||||
#pragma omp target parallel loop reduction(||: orl) map(orl)
|
||||
for (int i=0; i < N; ++i)
|
||||
orl = orl || rcc[i];
|
||||
|
||||
return __real__ (orc + ors + ori + orl) + __imag__ (orc + ors + ori + orl);
|
||||
}
|
||||
|
||||
int
|
||||
reduction_or_teams ()
|
||||
{
|
||||
_Complex char orc = 0;
|
||||
_Complex short ors = 0;
|
||||
_Complex int ori = 0;
|
||||
_Complex long orl = 0;
|
||||
|
||||
#pragma omp target teams distribute parallel for reduction(||: orc) map(orc)
|
||||
for (int i=0; i < N; ++i)
|
||||
orc = orc || rcc[i];
|
||||
|
||||
#pragma omp target teams distribute parallel for simd reduction(||: ors) map(ors)
|
||||
for (int i=0; i < N; ++i)
|
||||
ors = ors || rcs[i];
|
||||
|
||||
#pragma omp target teams distribute parallel for reduction(||: ori) map(ori)
|
||||
for (int i=0; i < N; ++i)
|
||||
ori = ori || rci[i];
|
||||
|
||||
#pragma omp target teams distribute parallel for simd reduction(||: orl) map(orl)
|
||||
for (int i=0; i < N; ++i)
|
||||
orl = orl || rcl[i];
|
||||
|
||||
return __real__ (orc + ors + ori + orl) + __imag__ (orc + ors + ori + orl);
|
||||
}
|
||||
|
||||
int
|
||||
reduction_and ()
|
||||
{
|
||||
_Complex char andc = 1;
|
||||
_Complex short ands = 1;
|
||||
_Complex int andi = 1;
|
||||
_Complex long andl = 1;
|
||||
|
||||
#pragma omp target parallel reduction(&&: andc) map(andc)
|
||||
for (int i=0; i < N; ++i)
|
||||
andc = andc && rcc[i];
|
||||
|
||||
#pragma omp target parallel for reduction(&&: ands) map(ands)
|
||||
for (int i=0; i < N; ++i)
|
||||
ands = ands && rcs[i];
|
||||
|
||||
#pragma omp target parallel for simd reduction(&&: andi) map(andi)
|
||||
for (int i=0; i < N; ++i)
|
||||
andi = andi && rci[i];
|
||||
|
||||
#pragma omp target parallel loop reduction(&&: andl) map(andl)
|
||||
for (int i=0; i < N; ++i)
|
||||
andl = andl && rcl[i];
|
||||
|
||||
return __real__ (andc + ands + andi + andl)
|
||||
+ __imag__ (andc + ands + andi + andl);
|
||||
}
|
||||
|
||||
int
|
||||
reduction_and_teams ()
|
||||
{
|
||||
_Complex char andc = 1;
|
||||
_Complex short ands = 1;
|
||||
_Complex int andi = 1;
|
||||
_Complex long andl = 1;
|
||||
|
||||
#pragma omp target teams distribute parallel for reduction(&&: andc) map(andc)
|
||||
for (int i=0; i < N; ++i)
|
||||
andc = andc && rcl[i];
|
||||
|
||||
#pragma omp target teams distribute parallel for simd reduction(&&: ands) map(ands)
|
||||
for (int i=0; i < N; ++i)
|
||||
ands = ands && rci[i];
|
||||
|
||||
#pragma omp target teams distribute parallel for reduction(&&: andi) map(andi)
|
||||
for (int i=0; i < N; ++i)
|
||||
andi = andi && rcs[i];
|
||||
|
||||
#pragma omp target teams distribute parallel for simd reduction(&&: andl) map(andl)
|
||||
for (int i=0; i < N; ++i)
|
||||
andl = andl && rcc[i];
|
||||
|
||||
return __real__ (andc + ands + andi + andl)
|
||||
+ __imag__ (andc + ands + andi + andl);
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
for (int i = 0; i < N; ++i)
|
||||
{
|
||||
rcc[i] = 0;
|
||||
rcs[i] = 0;
|
||||
rci[i] = 0;
|
||||
rcl[i] = 0;
|
||||
}
|
||||
|
||||
if (reduction_or () != 0)
|
||||
__builtin_abort ();
|
||||
if (reduction_or_teams () != 0)
|
||||
__builtin_abort ();
|
||||
if (reduction_and () != 0)
|
||||
__builtin_abort ();
|
||||
if (reduction_and_teams () != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
rcc[10] = 1.0;
|
||||
rcs[15] = 1.0i;
|
||||
rci[10] = 1.0;
|
||||
rcl[15] = 1.0i;
|
||||
|
||||
if (reduction_or () != 4)
|
||||
__builtin_abort ();
|
||||
if (reduction_or_teams () != 4)
|
||||
__builtin_abort ();
|
||||
if (reduction_and () != 0)
|
||||
__builtin_abort ();
|
||||
if (reduction_and_teams () != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
for (int i = 0; i < N; ++i)
|
||||
{
|
||||
rcc[i] = 1;
|
||||
rcs[i] = 1i;
|
||||
rci[i] = 1;
|
||||
rcl[i] = 1 + 1i;
|
||||
}
|
||||
|
||||
if (reduction_or () != 4)
|
||||
__builtin_abort ();
|
||||
if (reduction_or_teams () != 4)
|
||||
__builtin_abort ();
|
||||
if (reduction_and () != 4)
|
||||
__builtin_abort ();
|
||||
if (reduction_and_teams () != 4)
|
||||
__builtin_abort ();
|
||||
|
||||
rcc[10] = 0.0;
|
||||
rcs[15] = 0.0;
|
||||
rci[10] = 0.0;
|
||||
rcl[15] = 0.0;
|
||||
|
||||
if (reduction_or () != 4)
|
||||
__builtin_abort ();
|
||||
if (reduction_or_teams () != 4)
|
||||
__builtin_abort ();
|
||||
if (reduction_and () != 0)
|
||||
__builtin_abort ();
|
||||
if (reduction_and_teams () != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Add table
Reference in a new issue