diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index db6a405d623..ceea4d3a093 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -5622,6 +5622,8 @@ enum nvptx_builtins NVPTX_BUILTIN_VECTOR_ADDR, NVPTX_BUILTIN_CMP_SWAP, NVPTX_BUILTIN_CMP_SWAPLL, + NVPTX_BUILTIN_MEMBAR_GL, + NVPTX_BUILTIN_MEMBAR_CTA, NVPTX_BUILTIN_MAX }; @@ -5652,6 +5654,7 @@ nvptx_init_builtins (void) #define UINT unsigned_type_node #define LLUINT long_long_unsigned_type_node #define PTRVOID ptr_type_node +#define VOID void_type_node DEF (SHUFFLE, "shuffle", (UINT, UINT, UINT, UINT, NULL_TREE)); DEF (SHUFFLELL, "shufflell", (LLUINT, LLUINT, UINT, UINT, NULL_TREE)); @@ -5661,6 +5664,8 @@ nvptx_init_builtins (void) (PTRVOID, ST, UINT, UINT, NULL_TREE)); DEF (CMP_SWAP, "cmp_swap", (UINT, PTRVOID, UINT, UINT, NULL_TREE)); DEF (CMP_SWAPLL, "cmp_swapll", (LLUINT, PTRVOID, LLUINT, LLUINT, NULL_TREE)); + DEF (MEMBAR_GL, "membar_gl", (VOID, VOID, NULL_TREE)); + DEF (MEMBAR_CTA, "membar_cta", (VOID, VOID, NULL_TREE)); #undef DEF #undef ST @@ -5696,6 +5701,14 @@ nvptx_expand_builtin (tree exp, rtx target, rtx ARG_UNUSED (subtarget), case NVPTX_BUILTIN_CMP_SWAPLL: return nvptx_expand_cmp_swap (exp, target, mode, ignore); + case NVPTX_BUILTIN_MEMBAR_GL: + emit_insn (gen_nvptx_membar_gl ()); + return NULL_RTX; + + case NVPTX_BUILTIN_MEMBAR_CTA: + emit_insn (gen_nvptx_membar_cta ()); + return NULL_RTX; + default: gcc_unreachable (); } } @@ -6243,7 +6256,7 @@ nvptx_lockless_update (location_t loc, gimple_stmt_iterator *gsi, static tree nvptx_lockfull_update (location_t loc, gimple_stmt_iterator *gsi, - tree ptr, tree var, tree_code op) + tree ptr, tree var, tree_code op, int level) { tree var_type = TREE_TYPE (var); tree swap_fn = nvptx_builtin_decl (NVPTX_BUILTIN_CMP_SWAP, true); @@ -6295,8 +6308,17 @@ nvptx_lockfull_update (location_t loc, gimple_stmt_iterator *gsi, lock_loop->any_estimate = true; add_loop (lock_loop, entry_bb->loop_father); - /* Build and insert the reduction calculation. */ + /* Build the pre-barrier. */ gimple_seq red_seq = NULL; + enum nvptx_builtins barrier_builtin + = (level == GOMP_DIM_GANG + ? NVPTX_BUILTIN_MEMBAR_GL + : NVPTX_BUILTIN_MEMBAR_CTA); + tree barrier_fn = nvptx_builtin_decl (barrier_builtin, true); + tree barrier_expr = build_call_expr_loc (loc, barrier_fn, 0); + gimplify_stmt (&barrier_expr, &red_seq); + + /* Build the reduction calculation. */ tree acc_in = make_ssa_name (var_type); tree ref_in = build_simple_mem_ref (ptr); TREE_THIS_VOLATILE (ref_in) = 1; @@ -6310,6 +6332,11 @@ nvptx_lockfull_update (location_t loc, gimple_stmt_iterator *gsi, TREE_THIS_VOLATILE (ref_out) = 1; gimplify_assign (ref_out, acc_out, &red_seq); + /* Build the post-barrier. */ + barrier_expr = build_call_expr_loc (loc, barrier_fn, 0); + gimplify_stmt (&barrier_expr, &red_seq); + + /* Insert the reduction calculation. */ gsi_insert_seq_before (gsi, red_seq, GSI_SAME_STMT); /* Build & insert the unlock sequence. */ @@ -6330,7 +6357,7 @@ nvptx_lockfull_update (location_t loc, gimple_stmt_iterator *gsi, static tree nvptx_reduction_update (location_t loc, gimple_stmt_iterator *gsi, - tree ptr, tree var, tree_code op) + tree ptr, tree var, tree_code op, int level) { tree type = TREE_TYPE (var); tree size = TYPE_SIZE (type); @@ -6339,7 +6366,7 @@ nvptx_reduction_update (location_t loc, gimple_stmt_iterator *gsi, || size == TYPE_SIZE (long_long_unsigned_type_node)) return nvptx_lockless_update (loc, gsi, ptr, var, op); else - return nvptx_lockfull_update (loc, gsi, ptr, var, op); + return nvptx_lockfull_update (loc, gsi, ptr, var, op, level); } /* NVPTX implementation of GOACC_REDUCTION_SETUP. */ @@ -6531,7 +6558,7 @@ nvptx_goacc_reduction_fini (gcall *call, offload_attrs *oa) gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); seq = NULL; r = nvptx_reduction_update (gimple_location (call), &gsi, - accum, var, op); + accum, var, op, level); } } diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 5cf190ab73e..773ae8fdc6f 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -58,6 +58,7 @@ UNSPECV_BARSYNC UNSPECV_MEMBAR UNSPECV_MEMBAR_CTA + UNSPECV_MEMBAR_GL UNSPECV_DIM_POS UNSPECV_FORK @@ -1932,6 +1933,22 @@ "\\tmembar.cta;" [(set_attr "predicable" "false")]) +(define_expand "nvptx_membar_gl" + [(set (match_dup 0) + (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_GL))] + "" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*nvptx_membar_gl" + [(set (match_operand:BLK 0 "" "") + (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_GL))] + "" + "\\tmembar.gl;" + [(set_attr "predicable" "false")]) + (define_insn "nvptx_nounroll" [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)] ""