diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index c41e305a34f..3a7be63c290 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -1364,6 +1364,13 @@ nvptx_init_unisimt_predicate (FILE *file) int master = REGNO (cfun->machine->unisimt_master); int pred = REGNO (cfun->machine->unisimt_predicate); fprintf (file, "\t\tld.shared.u32 %%r%d, [%%r%d];\n", master, loc); + if (cfun->machine->unisimt_outside_simt_predicate) + { + int pred_outside_simt + = REGNO (cfun->machine->unisimt_outside_simt_predicate); + fprintf (file, "\t\tsetp.eq.u32 %%r%d, %%r%d, 0;\n", + pred_outside_simt, master); + } fprintf (file, "\t\tmov.u32 %%ustmp0, %%laneid;\n"); /* Compute 'master lane index' as 'laneid & __nvptx_uni[tid.y]'. */ fprintf (file, "\t\tand.b32 %%r%d, %%r%d, %%ustmp0;\n", master, master); @@ -1589,6 +1596,13 @@ nvptx_output_unisimt_switch (FILE *file, bool entering) fprintf (file, "\t{\n"); fprintf (file, "\t\t.reg.u32 %%ustmp2;\n"); fprintf (file, "\t\tmov.u32 %%ustmp2, %d;\n", entering ? -1 : 0); + if (cfun->machine->unisimt_outside_simt_predicate) + { + int pred_outside_simt + = REGNO (cfun->machine->unisimt_outside_simt_predicate); + fprintf (file, "\t\tmov.pred %%r%d, %d;\n", pred_outside_simt, + entering ? 0 : 1); + } if (!crtl->is_leaf) { int loc = REGNO (cfun->machine->unisimt_location); @@ -3242,6 +3256,13 @@ nvptx_get_unisimt_predicate () return pred ? pred : pred = gen_reg_rtx (BImode); } +static rtx +nvptx_get_unisimt_outside_simt_predicate () +{ + rtx &pred = cfun->machine->unisimt_outside_simt_predicate; + return pred ? pred : pred = gen_reg_rtx (BImode); +} + /* Return true if given call insn references one of the functions provided by the CUDA runtime: malloc, free, vprintf. */ @@ -3286,6 +3307,16 @@ nvptx_unisimt_handle_set (rtx set, rtx_insn *insn, rtx master) return false; } +static void +predicate_insn (rtx_insn *insn, rtx pred) +{ + rtx pat = PATTERN (insn); + pred = gen_rtx_NE (BImode, pred, const0_rtx); + pat = gen_rtx_COND_EXEC (VOIDmode, pred, pat); + bool changed_p = validate_change (insn, &PATTERN (insn), pat, false); + gcc_assert (changed_p); +} + /* Adjust code for uniform-simt code generation variant by making atomics and "syscalls" conditionally executed, and inserting shuffle-based propagation for registers being set. */ @@ -3352,10 +3383,16 @@ nvptx_reorg_uniform_simt () } rtx pred = nvptx_get_unisimt_predicate (); - pred = gen_rtx_NE (BImode, pred, const0_rtx); - pat = gen_rtx_COND_EXEC (VOIDmode, pred, pat); - bool changed_p = validate_change (insn, &PATTERN (insn), pat, false); - gcc_assert (changed_p); + predicate_insn (insn, pred); + + pred = NULL_RTX; + for (rtx_insn *post = NEXT_INSN (insn); post != next; + post = NEXT_INSN (post)) + { + if (pred == NULL_RTX) + pred = nvptx_get_unisimt_outside_simt_predicate (); + predicate_insn (post, pred); + } } } diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h index 3ca22a595d2..b55ade65cc5 100644 --- a/gcc/config/nvptx/nvptx.h +++ b/gcc/config/nvptx/nvptx.h @@ -226,6 +226,7 @@ struct GTY(()) machine_function rtx sync_bar; /* Synchronization barrier ID for vectors. */ rtx unisimt_master; /* 'Master lane index' for -muniform-simt. */ rtx unisimt_predicate; /* Predicate for -muniform-simt. */ + rtx unisimt_outside_simt_predicate; /* Predicate for -muniform-simt. */ rtx unisimt_location; /* Mask location for -muniform-simt. */ /* The following two fields hold the maximum size resp. alignment required for per-lane storage in OpenMP SIMD regions. */ diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 1cbf197065f..1ccb0f11e4c 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -2268,25 +2268,28 @@ (define_insn "nvptx_warpsync" [(unspec_volatile [(const_int 0)] UNSPECV_WARPSYNC)] "TARGET_PTX_6_0" - "\\tbar.warp.sync\\t0xffffffff;" - [(set_attr "predicable" "false")]) + "%.\\tbar.warp.sync\\t0xffffffff;") (define_insn "nvptx_uniform_warp_check" [(unspec_volatile [(const_int 0)] UNSPECV_UNIFORM_WARP_CHECK)] "" { - output_asm_insn ("{", NULL); - output_asm_insn ("\\t" ".reg.b32" "\\t" "act;", NULL); - output_asm_insn ("\\t" "vote.ballot.b32" "\\t" "act,1;", NULL); - output_asm_insn ("\\t" ".reg.pred" "\\t" "uni;", NULL); - output_asm_insn ("\\t" "setp.eq.b32" "\\t" "uni,act,0xffffffff;", - NULL); - output_asm_insn ("@ !uni\\t" "trap;", NULL); - output_asm_insn ("@ !uni\\t" "exit;", NULL); - output_asm_insn ("}", NULL); + const char *insns[] = { + "{", + "\\t" ".reg.b32" "\\t" "act;", + "%.\\t" "vote.ballot.b32" "\\t" "act,1;", + "\\t" ".reg.pred" "\\t" "do_abort;", + "\\t" "mov.pred" "\\t" "do_abort,0;", + "%.\\t" "setp.ne.b32" "\\t" "do_abort,act,0xffffffff;", + "@ do_abort\\t" "trap;", + "@ do_abort\\t" "exit;", + "}", + NULL + }; + for (const char **p = &insns[0]; *p != NULL; p++) + output_asm_insn (*p, NULL); return ""; - } - [(set_attr "predicable" "false")]) + }) (define_expand "memory_barrier" [(set (match_dup 0) diff --git a/libgomp/testsuite/libgomp.c/pr104783.c b/libgomp/testsuite/libgomp.c/pr104783.c new file mode 100644 index 00000000000..05a93cd6bc1 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/pr104783.c @@ -0,0 +1,18 @@ +int +main (void) +{ + unsigned val = 0; + +#pragma omp target map(tofrom: val) +#pragma omp simd + for (int i = 0 ; i < 1 ; i++) + { +#pragma omp atomic update + val = val + 1; + } + + if (val != 1) + __builtin_abort (); + + return 0; +}