From d4de7e32eff0a6363defa50b052d7a30548b6552 Mon Sep 17 00:00:00 2001 From: Tobias Burnus Date: Mon, 23 Aug 2021 15:13:30 +0200 Subject: [PATCH] Fortran/OpenMP: strict modifier on grainsize/num_tasks This patch adds support for the 'strict' modifier on grainsize/num_tasks clauses, an OpenMP 5.1 feature supported in C/C++ since commit r12-3066-g3bc75533d1f87f0617be6c1af98804f9127ec637 gcc/fortran/ChangeLog: * dump-parse-tree.c (show_omp_clauses): Handle 'strict' modifier on grainsize/num_tasks * gfortran.h (gfc_omp_clauses): Add grainsize_strict and num_tasks_strict. * trans-openmp.c (gfc_trans_omp_clauses, gfc_split_omp_clauses): Handle 'strict' modifier on grainsize/num_tasks. * openmp.c (gfc_match_omp_clauses): Likewise. libgomp/ChangeLog: * testsuite/libgomp.fortran/taskloop-4-a.f90: New test. * testsuite/libgomp.fortran/taskloop-4.f90: New test. * testsuite/libgomp.fortran/taskloop-5-a.f90: New test. * testsuite/libgomp.fortran/taskloop-5.f90: New test. --- gcc/fortran/dump-parse-tree.c | 4 + gcc/fortran/gfortran.h | 2 +- gcc/fortran/openmp.c | 20 +++- gcc/fortran/trans-openmp.c | 8 ++ .../libgomp.fortran/taskloop-4-a.f90 | 86 +++++++++++++++++ .../testsuite/libgomp.fortran/taskloop-4.f90 | 41 ++++++++ .../libgomp.fortran/taskloop-5-a.f90 | 95 +++++++++++++++++++ .../testsuite/libgomp.fortran/taskloop-5.f90 | 75 +++++++++++++++ 8 files changed, 326 insertions(+), 5 deletions(-) create mode 100644 libgomp/testsuite/libgomp.fortran/taskloop-4-a.f90 create mode 100644 libgomp/testsuite/libgomp.fortran/taskloop-4.f90 create mode 100644 libgomp/testsuite/libgomp.fortran/taskloop-5-a.f90 create mode 100644 libgomp/testsuite/libgomp.fortran/taskloop-5.f90 diff --git a/gcc/fortran/dump-parse-tree.c b/gcc/fortran/dump-parse-tree.c index c75a0a9d095..a1df47c2f82 100644 --- a/gcc/fortran/dump-parse-tree.c +++ b/gcc/fortran/dump-parse-tree.c @@ -1805,6 +1805,8 @@ show_omp_clauses (gfc_omp_clauses *omp_clauses) if (omp_clauses->grainsize) { fputs (" GRAINSIZE(", dumpfile); + if (omp_clauses->grainsize_strict) + fputs ("strict: ", dumpfile); show_expr (omp_clauses->grainsize); fputc (')', dumpfile); } @@ -1823,6 +1825,8 @@ show_omp_clauses (gfc_omp_clauses *omp_clauses) if (omp_clauses->num_tasks) { fputs (" NUM_TASKS(", dumpfile); + if (omp_clauses->num_tasks_strict) + fputs ("strict: ", dumpfile); show_expr (omp_clauses->num_tasks); fputc (')', dumpfile); } diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 4b26cb430d4..48cdcdf6cb8 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -1490,7 +1490,7 @@ typedef struct gfc_omp_clauses unsigned inbranch:1, notinbranch:1, nogroup:1; unsigned sched_simd:1, sched_monotonic:1, sched_nonmonotonic:1; unsigned simd:1, threads:1, depend_source:1, destroy:1, order_concurrent:1; - unsigned capture:1; + unsigned capture:1, grainsize_strict:1, num_tasks_strict:1; ENUM_BITFIELD (gfc_omp_sched_kind) sched_kind:3; ENUM_BITFIELD (gfc_omp_device_type) device_type:2; ENUM_BITFIELD (gfc_omp_memorder) memorder:3; diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c index 2380866cc3b..1aae35a6bc0 100644 --- a/gcc/fortran/openmp.c +++ b/gcc/fortran/openmp.c @@ -1839,8 +1839,14 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const omp_mask mask, } if ((mask & OMP_CLAUSE_GRAINSIZE) && c->grainsize == NULL - && gfc_match ("grainsize ( %e )", &c->grainsize) == MATCH_YES) - continue; + && gfc_match ("grainsize ( ") == MATCH_YES) + { + if (gfc_match ("strict : ") == MATCH_YES) + c->grainsize_strict = true; + if (gfc_match (" %e )", &c->grainsize) != MATCH_YES) + goto error; + continue; + } break; case 'h': if ((mask & OMP_CLAUSE_HINT) @@ -2148,8 +2154,14 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const omp_mask mask, continue; if ((mask & OMP_CLAUSE_NUM_TASKS) && c->num_tasks == NULL - && gfc_match ("num_tasks ( %e )", &c->num_tasks) == MATCH_YES) - continue; + && gfc_match ("num_tasks ( ") == MATCH_YES) + { + if (gfc_match ("strict : ") == MATCH_YES) + c->num_tasks_strict = true; + if (gfc_match (" %e )", &c->num_tasks) != MATCH_YES) + goto error; + continue; + } if ((mask & OMP_CLAUSE_NUM_TEAMS) && c->num_teams == NULL && gfc_match ("num_teams ( %e )", &c->num_teams) == MATCH_YES) diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c index 91888f31cb3..40d2fd206e4 100644 --- a/gcc/fortran/trans-openmp.c +++ b/gcc/fortran/trans-openmp.c @@ -3998,6 +3998,8 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, c = build_omp_clause (gfc_get_location (&where), OMP_CLAUSE_GRAINSIZE); OMP_CLAUSE_GRAINSIZE_EXPR (c) = grainsize; + if (clauses->grainsize_strict) + OMP_CLAUSE_GRAINSIZE_STRICT (c) = 1; omp_clauses = gfc_trans_add_clause (c, omp_clauses); } @@ -4013,6 +4015,8 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, c = build_omp_clause (gfc_get_location (&where), OMP_CLAUSE_NUM_TASKS); OMP_CLAUSE_NUM_TASKS_EXPR (c) = num_tasks; + if (clauses->num_tasks_strict) + OMP_CLAUSE_NUM_TASKS_STRICT (c) = 1; omp_clauses = gfc_trans_add_clause (c, omp_clauses); } @@ -5964,8 +5968,12 @@ gfc_split_omp_clauses (gfc_code *code, = code->ext.omp_clauses->nogroup; clausesa[GFC_OMP_SPLIT_TASKLOOP].grainsize = code->ext.omp_clauses->grainsize; + clausesa[GFC_OMP_SPLIT_TASKLOOP].grainsize_strict + = code->ext.omp_clauses->grainsize_strict; clausesa[GFC_OMP_SPLIT_TASKLOOP].num_tasks = code->ext.omp_clauses->num_tasks; + clausesa[GFC_OMP_SPLIT_TASKLOOP].num_tasks_strict + = code->ext.omp_clauses->num_tasks_strict; clausesa[GFC_OMP_SPLIT_TASKLOOP].priority = code->ext.omp_clauses->priority; clausesa[GFC_OMP_SPLIT_TASKLOOP].final_expr diff --git a/libgomp/testsuite/libgomp.fortran/taskloop-4-a.f90 b/libgomp/testsuite/libgomp.fortran/taskloop-4-a.f90 new file mode 100644 index 00000000000..2049f5c8bca --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/taskloop-4-a.f90 @@ -0,0 +1,86 @@ +! { dg-do compile { target skip-all-targets } } +! Only used by taskloop-4.f90 +! To avoid inlining + +module m2 + use m_taskloop4 + implicit none (external, type) +contains + +subroutine grainsize (a, b, c, d) + integer, value :: a, b, c, d + integer :: i, j, k + j = 0 + k = 0 + !$omp taskloop firstprivate (j, k) grainsize(d) + do i = a, b - 1, c + if (j == 0) then + !$omp atomic capture + k = v + v = v + 1 + !$omp end atomic + if (k >= 64) & + stop 1 + end if + j = j + 1 + u(k) = j + end do +end + +subroutine num_tasks (a, b, c, d) + integer, value :: a, b, c, d + integer :: i, j, k + j = 0 + k = 0 + !$omp taskloop firstprivate (j, k) num_tasks(d) + do i = a, b - 1, c + if (j == 0) then + !$omp atomic capture + k = v + v = v + 1 + !$omp end atomic + if (k >= 64) & + stop 2 + end if + j = j + 1 + u(k) = j + end do + end +end module + +program main + use m2 + implicit none (external, type) + !$omp parallel + !$omp single + block + integer :: min_iters, max_iters, ntasks + + ! If grainsize is present, # of task loop iters is >= grainsize && < 2 * grainsize, + ! unless # of loop iterations is smaller than grainsize. + if (test (0, 79, 1, 17, grainsize, ntasks, min_iters, max_iters) /= 79) & + stop 3 + if (min_iters < 17 .or. max_iters >= 17 * 2) & + stop 4 + if (test (-49, 2541, 7, 28, grainsize, ntasks, min_iters, max_iters) /= 370) & + stop 5 + if (min_iters < 28 .or. max_iters >= 28 * 2) & + stop 6 + if (test (7, 21, 2, 15, grainsize, ntasks, min_iters, max_iters) /= 7) & + stop 7 + if (ntasks /= 1 .or. min_iters /= 7 .or. max_iters /= 7) & + stop 8 + ! If num_tasks is present, # of tasks is min (# of loop iters, num_tasks) + ! and each task has at least one iteration. + if (test (-51, 2500, 48, 9, num_tasks, ntasks, min_iters, max_iters) /= 54) & + stop 9 + if (ntasks /= 9) & + stop 10 + if (test (0, 25, 2, 17, num_tasks, ntasks, min_iters, max_iters) /= 13) & + stop 11 + if (ntasks /= 13) & + stop 12 + end block + !$omp end single + !$omp end parallel +end program diff --git a/libgomp/testsuite/libgomp.fortran/taskloop-4.f90 b/libgomp/testsuite/libgomp.fortran/taskloop-4.f90 new file mode 100644 index 00000000000..910e197c29e --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/taskloop-4.f90 @@ -0,0 +1,41 @@ +! { dg-do run } +! { dg-options "-O2" } +! { dg-additional-sources taskloop-4-a.f90 } + +module m_taskloop4 + implicit none (type, external) + integer :: v, u(0:63) + +contains +integer function test (a, b, c, d, fn, num_tasks, min_iters, max_iters) + integer, value :: a, b, c, d + interface + subroutine fn (n1, n2, n3, n4) + integer, value :: n1, n2, n3, n4 + end + end interface + integer :: num_tasks, min_iters, max_iters + integer :: i, t + + t = 0 + u = 0 + v = 0 + call fn (a, b, c, d) + min_iters = 0 + max_iters = 0 + num_tasks = v + if (v /= 0) then + min_iters = u(0) + max_iters = u(0) + t = u(0) + do i = 1, v - 1 + if (min_iters > u(i)) & + min_iters = u(i) + if (max_iters < u(i)) & + max_iters = u(i) + t = t + u(i) + end do + end if + test = t +end +end module diff --git a/libgomp/testsuite/libgomp.fortran/taskloop-5-a.f90 b/libgomp/testsuite/libgomp.fortran/taskloop-5-a.f90 new file mode 100644 index 00000000000..f12681baafa --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/taskloop-5-a.f90 @@ -0,0 +1,95 @@ +! { dg-do compile { target skip-all-targets } } +! Only used by taskloop-5-a.f90 +! To avoid inlining + +module m2 + use m_taskloop5 + implicit none (external, type) +contains + +subroutine grainsize (a, b, c, d) + integer, value :: a, b, c, d + integer :: i, j, k + j = 0 + k = 0 + !$omp taskloop firstprivate (j, k) grainsize(strict:d) + do i = a, b - 1, c + if (j == 0) then + !$omp atomic capture + k = v + v = v + 1 + !$omp end atomic + if (k >= 64) & + stop 3 + w(k) = i + end if + j = j + 1 + u(k) = j + end do +end + +subroutine num_tasks (a, b, c, d) + integer, value :: a, b, c, d + integer :: i, j, k + j = 0 + k = 0 + !$omp taskloop firstprivate (j, k) num_tasks(strict:d) + do i = a, b - 1, c + if (j == 0) then + !$omp atomic capture + k = v + v = v + 1 + !$omp end atomic + if (k >= 64) & + stop 4 + w(k) = i + end if + j = j + 1 + u(k) = j + end do +end +end module + +program main + use m2 + implicit none (external, type) + !$omp parallel + !$omp single + block + integer :: min_iters, max_iters, ntasks, sep + + ! If grainsize is present and has strict modifier, # of task loop iters is == grainsize, + ! except that it can be smaller on the last task. + if (test (0, 79, 1, 17, grainsize, ntasks, min_iters, max_iters, sep) /= 79) & + stop 5 + if (ntasks /= 5 .or. min_iters /= 11 .or. max_iters /= 17 .or. sep /= 4) & + stop + if (test (-49, 2541, 7, 28, grainsize, ntasks, min_iters, max_iters, sep) /= 370) & + stop 6 + if (ntasks /= 14 .or. min_iters /= 6 .or. max_iters /= 28 .or. sep /= 13) & + stop + if (test (7, 21, 2, 15, grainsize, ntasks, min_iters, max_iters, sep) /= 7) & + stop 7 + if (ntasks /= 1 .or. min_iters /= 7 .or. max_iters /= 7 .or. sep /= 1) & + stop 8 + ! If num_tasks is present, # of tasks is min (# of loop iters, num_tasks) + ! and each task has at least one iteration. If strict modifier is present, + ! first set of tasks has ceil (# of loop iters / num_tasks) iterations, + ! followed by possibly empty set of tasks with floor (# of loop iters / num_tasks) + ! iterations. + if (test (-51, 2500, 48, 9, num_tasks, ntasks, min_iters, max_iters, sep) /= 54) & + stop 9 + if (ntasks /= 9 .or. min_iters /= 6 .or. max_iters /= 6 .or. sep /= 9) & + stop 10 + if (test (0, 57, 1, 9, num_tasks, ntasks, min_iters, max_iters, sep) /= 57) & + stop 11 + if (ntasks /= 9 .or. min_iters /= 6 .or. max_iters /= 7 .or. sep /= 3) & + stop 12 + if (test (0, 25, 2, 17, num_tasks, ntasks, min_iters, max_iters, sep) /= 13) & + stop 13 + if (ntasks /= 13 .or. min_iters /= 1 .or. max_iters /= 1 .or. sep /= 13) & + stop 14 + end block + !$omp end single + !$omp end parallel +end program diff --git a/libgomp/testsuite/libgomp.fortran/taskloop-5.f90 b/libgomp/testsuite/libgomp.fortran/taskloop-5.f90 new file mode 100644 index 00000000000..247f93b97c4 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/taskloop-5.f90 @@ -0,0 +1,75 @@ +! { dg-do run } +! { dg-options "-O2" } +! { dg-additional-sources taskloop-5-a.f90 } + +module m_taskloop5 + implicit none (type, external) + integer :: u(0:63), v, w(0:63) + +contains +integer function test (a, b, c, d, fn, num_tasks, min_iters, max_iters, sep) + integer, value :: a, b, c, d + interface + subroutine fn (n1, n2, n3, n4) + integer, value :: n1, n2, n3, n4 + end + end interface + integer :: num_tasks, min_iters, max_iters, sep + integer :: i, j, t + + t = 0 + u = 0 + v = 0 + call fn (a, b, c, d) + min_iters = 0 + max_iters = 0 + num_tasks = v + sep = v + if (v /= 0) then + min_iters = u(0) + max_iters = u(0) + t = u(0) + do i = 1, v - 1 + if (min_iters > u(i)) & + min_iters = u(i) + if (max_iters < u(i)) & + max_iters = u(i) + t = t + u(i) + end do + + if (min_iters /= max_iters) then + do i = 0, v - 2 + block + integer :: min_idx + min_idx = i + do j = i + 1, v - 1 + if (w(min_idx) > w(j)) & + min_idx = j + end do + if (min_idx /= i) then + block + integer tem + tem = u(i) + u(i) = u(min_idx) + u(min_idx) = tem + tem = w(i) + w(i) = w(min_idx) + w(min_idx) = tem + end block + end if + end block + end do + if (u(0) /= max_iters) & + stop 1 + do i = 1, v - 1 + if (u(i) /= u(i - 1)) then + if (sep /= v .or. u(i) /= min_iters) & + stop 2 + sep = i; + end if + end do + end if + end if + test = t +end +end module