diff --git a/libgomp/config/nvptx/target.c b/libgomp/config/nvptx/target.c index 3d815bb0cb6..2eb545f812f 100644 --- a/libgomp/config/nvptx/target.c +++ b/libgomp/config/nvptx/target.c @@ -26,28 +26,41 @@ #include "libgomp.h" #include +extern int __gomp_team_num __attribute__((shared)); + bool GOMP_teams4 (unsigned int num_teams_lower, unsigned int num_teams_upper, unsigned int thread_limit, bool first) { + unsigned int num_blocks, block_id; + asm ("mov.u32 %0, %%nctaid.x;" : "=r" (num_blocks)); if (!first) - return false; + { + unsigned int team_num; + if (num_blocks > gomp_num_teams_var) + return false; + team_num = __gomp_team_num; + if (team_num > gomp_num_teams_var - num_blocks) + return false; + __gomp_team_num = team_num + num_blocks; + return true; + } if (thread_limit) { struct gomp_task_icv *icv = gomp_icv (true); icv->thread_limit_var = thread_limit > INT_MAX ? UINT_MAX : thread_limit; } - unsigned int num_blocks, block_id; - asm ("mov.u32 %0, %%nctaid.x;" : "=r" (num_blocks)); - asm ("mov.u32 %0, %%ctaid.x;" : "=r" (block_id)); - /* FIXME: If num_teams_lower > num_blocks, we want to loop multiple - times for some CTAs. */ - (void) num_teams_lower; - if (!num_teams_upper || num_teams_upper >= num_blocks) + if (!num_teams_upper) num_teams_upper = num_blocks; - else if (block_id >= num_teams_upper) + else if (num_blocks < num_teams_lower) + num_teams_upper = num_teams_lower; + else if (num_blocks < num_teams_upper) + num_teams_upper = num_blocks; + asm ("mov.u32 %0, %%ctaid.x;" : "=r" (block_id)); + if (block_id >= num_teams_upper) return false; + __gomp_team_num = block_id; gomp_num_teams_var = num_teams_upper - 1; return true; } diff --git a/libgomp/config/nvptx/team.c b/libgomp/config/nvptx/team.c index e2359643d7e..cabf01815e9 100644 --- a/libgomp/config/nvptx/team.c +++ b/libgomp/config/nvptx/team.c @@ -32,6 +32,7 @@ #include struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon)); +int __gomp_team_num __attribute__((shared,nocommon)); static void gomp_thread_start (struct gomp_thread_pool *); @@ -57,6 +58,7 @@ gomp_nvptx_main (void (*fn) (void *), void *fn_data) /* Starting additional threads is not supported. */ gomp_global_icv.dyn_var = true; + __gomp_team_num = 0; nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs)); memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs)); diff --git a/libgomp/config/nvptx/teams.c b/libgomp/config/nvptx/teams.c index 631cb1c7add..60fb11df006 100644 --- a/libgomp/config/nvptx/teams.c +++ b/libgomp/config/nvptx/teams.c @@ -28,6 +28,8 @@ #include "libgomp.h" +extern int __gomp_team_num __attribute__((shared)); + void GOMP_teams_reg (void (*fn) (void *), void *data, unsigned int num_teams, unsigned int thread_limit, unsigned int flags) @@ -48,9 +50,7 @@ omp_get_num_teams (void) int omp_get_team_num (void) { - int ctaid; - asm ("mov.u32 %0, %%ctaid.x;" : "=r" (ctaid)); - return ctaid; + return __gomp_team_num; } ialias (omp_get_num_teams)