nvptx.c: Include gimple headers.

* config/nvptx/nvptx.c: Include gimple headers.
	(worker_red_size, worker_red_align, worker_red_name,
	worker_red_sym): New.
	(nvptx_option_override): Initialize worker reduction buffer.
	(nvptx_file_end): Write out worker reduction buffer var.
	(nvptx_expand_shuffle, nvptx_expand_worker_addr,
	nvptx_expand_cmp_swap): New builtin expanders.
	(enum nvptx_builtins): New.
	(nvptx_builtin_decls): New.
	(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
	(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
	(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
	nvptx_lockless_update): New helpers.
	(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
	nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
	(nvptx_goacc_reduction): New.
	(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
	TARGET_BUILTIN_DECL): Override.
	(TARGET_GOACC_REDUCTION): Override.

Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>

From-SVN: r229768
This commit is contained in:
Nathan Sidwell 2015-11-04 16:58:52 +00:00 committed by Nathan Sidwell
parent e50146711b
commit f355215826
2 changed files with 650 additions and 5 deletions

View file

@ -1,3 +1,26 @@
2015-11-04 Nathan Sidwell <nathan@codesourcery.com>
Cesar Philippidis <cesar@codesourcery.com>
* config/nvptx/nvptx.c: Include gimple headers.
(worker_red_size, worker_red_align, worker_red_name,
worker_red_sym): New.
(nvptx_option_override): Initialize worker reduction buffer.
(nvptx_file_end): Write out worker reduction buffer var.
(nvptx_expand_shuffle, nvptx_expand_worker_addr,
nvptx_expand_cmp_swap): New builtin expanders.
(enum nvptx_builtins): New.
(nvptx_builtin_decls): New.
(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_builtin): New
(PTX_VECTOR_LENGTH, PTX_WORKER_LENGTH): New.
(nvptx_get_worker_red_addr, nvptx_generate_vector_shuffle,
nvptx_lockless_update): New helpers.
(nvptx_goacc_reduction_setup, nvptx_goacc_reduction_init,
nvptx_goacc_reduction_fini, nvptx_goacc_reduction_teaddown): New.
(nvptx_goacc_reduction): New.
(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN,
TARGET_BUILTIN_DECL): Override.
(TARGET_GOACC_REDUCTION): Override.
2015-11-04 Nathan Sidwell <nathan@codesourcery.com>
Cesar Philippidis <cesar@codesourcery.com>

View file

@ -57,6 +57,15 @@
#include "omp-low.h"
#include "gomp-constants.h"
#include "dumpfile.h"
#include "internal-fn.h"
#include "gimple-iterator.h"
#include "stringpool.h"
#include "tree-ssa-operands.h"
#include "tree-ssanames.h"
#include "gimplify.h"
#include "tree-phinodes.h"
#include "cfgloop.h"
#include "fold-const.h"
/* This file should be included last. */
#include "target-def.h"
@ -88,16 +97,23 @@ struct tree_hasher : ggc_cache_ptr_hash<tree_node>
static GTY((cache)) hash_table<tree_hasher> *declared_fndecls_htab;
static GTY((cache)) hash_table<tree_hasher> *needed_fndecls_htab;
/* Size of buffer needed to broadcast across workers. This is used
for both worker-neutering and worker broadcasting. It is shared
by all functions emitted. The buffer is placed in shared memory.
It'd be nice if PTX supported common blocks, because then this
could be shared across TUs (taking the largest size). */
/* Buffer needed to broadcast across workers. This is used for both
worker-neutering and worker broadcasting. It is shared by all
functions emitted. The buffer is placed in shared memory. It'd be
nice if PTX supported common blocks, because then this could be
shared across TUs (taking the largest size). */
static unsigned worker_bcast_size;
static unsigned worker_bcast_align;
#define worker_bcast_name "__worker_bcast"
static GTY(()) rtx worker_bcast_sym;
/* Buffer needed for worker reductions. This has to be distinct from
the worker broadcast array, as both may be live concurrently. */
static unsigned worker_red_size;
static unsigned worker_red_align;
#define worker_red_name "__worker_red"
static GTY(()) rtx worker_red_sym;
/* Allocate a new, cleared machine_function structure. */
static struct machine_function *
@ -128,6 +144,9 @@ nvptx_option_override (void)
worker_bcast_sym = gen_rtx_SYMBOL_REF (Pmode, worker_bcast_name);
worker_bcast_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT;
worker_red_sym = gen_rtx_SYMBOL_REF (Pmode, worker_red_name);
worker_red_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT;
}
/* Return the mode to be used when declaring a ptx object for OBJ.
@ -3246,8 +3265,203 @@ nvptx_file_end (void)
worker_bcast_align,
worker_bcast_name, worker_bcast_size);
}
if (worker_red_size)
{
/* Define the reduction buffer. */
worker_red_size = ((worker_red_size + worker_red_align - 1)
& ~(worker_red_align - 1));
fprintf (asm_out_file, "// BEGIN VAR DEF: %s\n", worker_red_name);
fprintf (asm_out_file, ".shared .align %d .u8 %s[%d];\n",
worker_red_align,
worker_red_name, worker_red_size);
}
}
/* Expander for the shuffle builtins. */
static rtx
nvptx_expand_shuffle (tree exp, rtx target, machine_mode mode, int ignore)
{
if (ignore)
return target;
rtx src = expand_expr (CALL_EXPR_ARG (exp, 0),
NULL_RTX, mode, EXPAND_NORMAL);
if (!REG_P (src))
src = copy_to_mode_reg (mode, src);
rtx idx = expand_expr (CALL_EXPR_ARG (exp, 1),
NULL_RTX, SImode, EXPAND_NORMAL);
rtx op = expand_expr (CALL_EXPR_ARG (exp, 2),
NULL_RTX, SImode, EXPAND_NORMAL);
if (!REG_P (idx) && GET_CODE (idx) != CONST_INT)
idx = copy_to_mode_reg (SImode, idx);
rtx pat = nvptx_gen_shuffle (target, src, idx, INTVAL (op));
if (pat)
emit_insn (pat);
return target;
}
/* Worker reduction address expander. */
static rtx
nvptx_expand_worker_addr (tree exp, rtx target,
machine_mode ARG_UNUSED (mode), int ignore)
{
if (ignore)
return target;
unsigned align = TREE_INT_CST_LOW (CALL_EXPR_ARG (exp, 2));
if (align > worker_red_align)
worker_red_align = align;
unsigned offset = TREE_INT_CST_LOW (CALL_EXPR_ARG (exp, 0));
unsigned size = TREE_INT_CST_LOW (CALL_EXPR_ARG (exp, 1));
if (size + offset > worker_red_size)
worker_red_size = size + offset;
emit_insn (gen_rtx_SET (target, worker_red_sym));
if (offset)
emit_insn (gen_rtx_SET (target,
gen_rtx_PLUS (Pmode, target, GEN_INT (offset))));
emit_insn (gen_rtx_SET (target,
gen_rtx_UNSPEC (Pmode, gen_rtvec (1, target),
UNSPEC_FROM_SHARED)));
return target;
}
/* Expand the CMP_SWAP PTX builtins. We have our own versions that do
not require taking the address of any object, other than the memory
cell being operated on. */
static rtx
nvptx_expand_cmp_swap (tree exp, rtx target,
machine_mode ARG_UNUSED (m), int ARG_UNUSED (ignore))
{
machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
if (!target)
target = gen_reg_rtx (mode);
rtx mem = expand_expr (CALL_EXPR_ARG (exp, 0),
NULL_RTX, Pmode, EXPAND_NORMAL);
rtx cmp = expand_expr (CALL_EXPR_ARG (exp, 1),
NULL_RTX, mode, EXPAND_NORMAL);
rtx src = expand_expr (CALL_EXPR_ARG (exp, 2),
NULL_RTX, mode, EXPAND_NORMAL);
rtx pat;
mem = gen_rtx_MEM (mode, mem);
if (!REG_P (cmp))
cmp = copy_to_mode_reg (mode, cmp);
if (!REG_P (src))
src = copy_to_mode_reg (mode, src);
if (mode == SImode)
pat = gen_atomic_compare_and_swapsi_1 (target, mem, cmp, src, const0_rtx);
else
pat = gen_atomic_compare_and_swapdi_1 (target, mem, cmp, src, const0_rtx);
emit_insn (pat);
return target;
}
/* Codes for all the NVPTX builtins. */
enum nvptx_builtins
{
NVPTX_BUILTIN_SHUFFLE,
NVPTX_BUILTIN_SHUFFLELL,
NVPTX_BUILTIN_WORKER_ADDR,
NVPTX_BUILTIN_CMP_SWAP,
NVPTX_BUILTIN_CMP_SWAPLL,
NVPTX_BUILTIN_MAX
};
static GTY(()) tree nvptx_builtin_decls[NVPTX_BUILTIN_MAX];
/* Return the NVPTX builtin for CODE. */
static tree
nvptx_builtin_decl (unsigned code, bool ARG_UNUSED (initialize_p))
{
if (code >= NVPTX_BUILTIN_MAX)
return error_mark_node;
return nvptx_builtin_decls[code];
}
/* Set up all builtin functions for this target. */
static void
nvptx_init_builtins (void)
{
#define DEF(ID, NAME, T) \
(nvptx_builtin_decls[NVPTX_BUILTIN_ ## ID] \
= add_builtin_function ("__builtin_nvptx_" NAME, \
build_function_type_list T, \
NVPTX_BUILTIN_ ## ID, BUILT_IN_MD, NULL, NULL))
#define ST sizetype
#define UINT unsigned_type_node
#define LLUINT long_long_unsigned_type_node
#define PTRVOID ptr_type_node
DEF (SHUFFLE, "shuffle", (UINT, UINT, UINT, UINT, NULL_TREE));
DEF (SHUFFLELL, "shufflell", (LLUINT, LLUINT, UINT, UINT, NULL_TREE));
DEF (WORKER_ADDR, "worker_addr",
(PTRVOID, ST, UINT, UINT, NULL_TREE));
DEF (CMP_SWAP, "cmp_swap", (UINT, PTRVOID, UINT, UINT, NULL_TREE));
DEF (CMP_SWAPLL, "cmp_swapll", (LLUINT, PTRVOID, LLUINT, LLUINT, NULL_TREE));
#undef DEF
#undef ST
#undef UINT
#undef LLUINT
#undef PTRVOID
}
/* Expand an expression EXP that calls a built-in function,
with result going to TARGET if that's convenient
(and in mode MODE if that's convenient).
SUBTARGET may be used as the target for computing one of EXP's operands.
IGNORE is nonzero if the value is to be ignored. */
static rtx
nvptx_expand_builtin (tree exp, rtx target, rtx ARG_UNUSED (subtarget),
machine_mode mode, int ignore)
{
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
switch (DECL_FUNCTION_CODE (fndecl))
{
case NVPTX_BUILTIN_SHUFFLE:
case NVPTX_BUILTIN_SHUFFLELL:
return nvptx_expand_shuffle (exp, target, mode, ignore);
case NVPTX_BUILTIN_WORKER_ADDR:
return nvptx_expand_worker_addr (exp, target, mode, ignore);
case NVPTX_BUILTIN_CMP_SWAP:
case NVPTX_BUILTIN_CMP_SWAPLL:
return nvptx_expand_cmp_swap (exp, target, mode, ignore);
default: gcc_unreachable ();
}
}
/* Define dimension sizes for known hardware. */
#define PTX_VECTOR_LENGTH 32
#define PTX_WORKER_LENGTH 32
/* Validate compute dimensions of an OpenACC offload or routine, fill
in non-unity defaults. FN_LEVEL indicates the level at which a
routine might spawn a loop. It is negative for non-routines. */
@ -3284,6 +3498,404 @@ nvptx_goacc_fork_join (gcall *call, const int dims[],
return true;
}
/* Generate a PTX builtin function call that returns the address in
the worker reduction buffer at OFFSET. TYPE is the type of the
data at that location. */
static tree
nvptx_get_worker_red_addr (tree type, tree offset)
{
machine_mode mode = TYPE_MODE (type);
tree fndecl = nvptx_builtin_decl (NVPTX_BUILTIN_WORKER_ADDR, true);
tree size = build_int_cst (unsigned_type_node, GET_MODE_SIZE (mode));
tree align = build_int_cst (unsigned_type_node,
GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
tree call = build_call_expr (fndecl, 3, offset, size, align);
return fold_convert (build_pointer_type (type), call);
}
/* Emit a SHFL.DOWN using index SHFL of VAR into DEST_VAR. This function
will cast the variable if necessary. */
static void
nvptx_generate_vector_shuffle (location_t loc,
tree dest_var, tree var, unsigned shift,
gimple_seq *seq)
{
unsigned fn = NVPTX_BUILTIN_SHUFFLE;
tree_code code = NOP_EXPR;
tree type = unsigned_type_node;
enum machine_mode mode = TYPE_MODE (TREE_TYPE (var));
if (!INTEGRAL_MODE_P (mode))
code = VIEW_CONVERT_EXPR;
if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (DImode))
{
fn = NVPTX_BUILTIN_SHUFFLELL;
type = long_long_unsigned_type_node;
}
tree call = nvptx_builtin_decl (fn, true);
call = build_call_expr_loc
(loc, call, 3, fold_build1 (code, type, var),
build_int_cst (unsigned_type_node, shift),
build_int_cst (unsigned_type_node, SHUFFLE_DOWN));
call = fold_build1 (code, TREE_TYPE (dest_var), call);
gimplify_assign (dest_var, call, seq);
}
/* Insert code to locklessly update *PTR with *PTR OP VAR just before
GSI. */
static tree
nvptx_lockless_update (location_t loc, gimple_stmt_iterator *gsi,
tree ptr, tree var, tree_code op)
{
unsigned fn = NVPTX_BUILTIN_CMP_SWAP;
tree_code code = NOP_EXPR;
tree type = unsigned_type_node;
enum machine_mode mode = TYPE_MODE (TREE_TYPE (var));
if (!INTEGRAL_MODE_P (mode))
code = VIEW_CONVERT_EXPR;
if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (DImode))
{
fn = NVPTX_BUILTIN_CMP_SWAPLL;
type = long_long_unsigned_type_node;
}
gimple_seq init_seq = NULL;
tree init_var = make_ssa_name (type);
tree init_expr = omp_reduction_init_op (loc, op, TREE_TYPE (var));
init_expr = fold_build1 (code, type, init_expr);
gimplify_assign (init_var, init_expr, &init_seq);
gimple *init_end = gimple_seq_last (init_seq);
gsi_insert_seq_before (gsi, init_seq, GSI_SAME_STMT);
gimple_seq loop_seq = NULL;
tree expect_var = make_ssa_name (type);
tree actual_var = make_ssa_name (type);
tree write_var = make_ssa_name (type);
tree write_expr = fold_build1 (code, TREE_TYPE (var), expect_var);
write_expr = fold_build2 (op, TREE_TYPE (var), write_expr, var);
write_expr = fold_build1 (code, type, write_expr);
gimplify_assign (write_var, write_expr, &loop_seq);
tree swap_expr = nvptx_builtin_decl (fn, true);
swap_expr = build_call_expr_loc (loc, swap_expr, 3,
ptr, expect_var, write_var);
gimplify_assign (actual_var, swap_expr, &loop_seq);
gcond *cond = gimple_build_cond (EQ_EXPR, actual_var, expect_var,
NULL_TREE, NULL_TREE);
gimple_seq_add_stmt (&loop_seq, cond);
/* Split the block just after the init stmts. */
basic_block pre_bb = gsi_bb (*gsi);
edge pre_edge = split_block (pre_bb, init_end);
basic_block loop_bb = pre_edge->dest;
pre_bb = pre_edge->src;
/* Reset the iterator. */
*gsi = gsi_for_stmt (gsi_stmt (*gsi));
/* Insert the loop statements. */
gimple *loop_end = gimple_seq_last (loop_seq);
gsi_insert_seq_before (gsi, loop_seq, GSI_SAME_STMT);
/* Split the block just after the loop stmts. */
edge post_edge = split_block (loop_bb, loop_end);
basic_block post_bb = post_edge->dest;
loop_bb = post_edge->src;
*gsi = gsi_for_stmt (gsi_stmt (*gsi));
post_edge->flags ^= EDGE_TRUE_VALUE | EDGE_FALLTHRU;
edge loop_edge = make_edge (loop_bb, loop_bb, EDGE_FALSE_VALUE);
set_immediate_dominator (CDI_DOMINATORS, loop_bb, pre_bb);
set_immediate_dominator (CDI_DOMINATORS, post_bb, loop_bb);
gphi *phi = create_phi_node (expect_var, loop_bb);
add_phi_arg (phi, init_var, pre_edge, loc);
add_phi_arg (phi, actual_var, loop_edge, loc);
loop *loop = alloc_loop ();
loop->header = loop_bb;
loop->latch = loop_bb;
add_loop (loop, loop_bb->loop_father);
return fold_build1 (code, TREE_TYPE (var), write_var);
}
/* NVPTX implementation of GOACC_REDUCTION_SETUP. */
static void
nvptx_goacc_reduction_setup (gcall *call)
{
gimple_stmt_iterator gsi = gsi_for_stmt (call);
tree lhs = gimple_call_lhs (call);
tree var = gimple_call_arg (call, 2);
int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
gimple_seq seq = NULL;
push_gimplify_context (true);
if (level != GOMP_DIM_GANG)
{
/* Copy the receiver object. */
tree ref_to_res = gimple_call_arg (call, 1);
if (!integer_zerop (ref_to_res))
var = build_simple_mem_ref (ref_to_res);
}
if (level == GOMP_DIM_WORKER)
{
/* Store incoming value to worker reduction buffer. */
tree offset = gimple_call_arg (call, 5);
tree call = nvptx_get_worker_red_addr (TREE_TYPE (var), offset);
tree ptr = make_ssa_name (TREE_TYPE (call));
gimplify_assign (ptr, call, &seq);
tree ref = build_simple_mem_ref (ptr);
TREE_THIS_VOLATILE (ref) = 1;
gimplify_assign (ref, var, &seq);
}
if (lhs)
gimplify_assign (lhs, var, &seq);
pop_gimplify_context (NULL);
gsi_replace_with_seq (&gsi, seq, true);
}
/* NVPTX implementation of GOACC_REDUCTION_INIT. */
static void
nvptx_goacc_reduction_init (gcall *call)
{
gimple_stmt_iterator gsi = gsi_for_stmt (call);
tree lhs = gimple_call_lhs (call);
tree var = gimple_call_arg (call, 2);
int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
enum tree_code rcode
= (enum tree_code)TREE_INT_CST_LOW (gimple_call_arg (call, 4));
tree init = omp_reduction_init_op (gimple_location (call), rcode,
TREE_TYPE (var));
gimple_seq seq = NULL;
push_gimplify_context (true);
if (level == GOMP_DIM_VECTOR)
{
/* Initialize vector-non-zeroes to INIT_VAL (OP). */
tree tid = make_ssa_name (integer_type_node);
tree dim_vector = gimple_call_arg (call, 3);
gimple *tid_call = gimple_build_call_internal (IFN_GOACC_DIM_POS, 1,
dim_vector);
gimple *cond_stmt = gimple_build_cond (NE_EXPR, tid, integer_zero_node,
NULL_TREE, NULL_TREE);
gimple_call_set_lhs (tid_call, tid);
gimple_seq_add_stmt (&seq, tid_call);
gimple_seq_add_stmt (&seq, cond_stmt);
/* Split the block just after the call. */
edge init_edge = split_block (gsi_bb (gsi), call);
basic_block init_bb = init_edge->dest;
basic_block call_bb = init_edge->src;
/* Fixup flags from call_bb to init_bb. */
init_edge->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
/* Set the initialization stmts. */
gimple_seq init_seq = NULL;
tree init_var = make_ssa_name (TREE_TYPE (var));
gimplify_assign (init_var, init, &init_seq);
gsi = gsi_start_bb (init_bb);
gsi_insert_seq_before (&gsi, init_seq, GSI_SAME_STMT);
/* Split block just after the init stmt. */
gsi_prev (&gsi);
edge inited_edge = split_block (gsi_bb (gsi), gsi_stmt (gsi));
basic_block dst_bb = inited_edge->dest;
/* Create false edge from call_bb to dst_bb. */
edge nop_edge = make_edge (call_bb, dst_bb, EDGE_FALSE_VALUE);
/* Create phi node in dst block. */
gphi *phi = create_phi_node (lhs, dst_bb);
add_phi_arg (phi, init_var, inited_edge, gimple_location (call));
add_phi_arg (phi, var, nop_edge, gimple_location (call));
/* Reset dominator of dst bb. */
set_immediate_dominator (CDI_DOMINATORS, dst_bb, call_bb);
/* Reset the gsi. */
gsi = gsi_for_stmt (call);
}
else
{
if (level == GOMP_DIM_GANG)
{
/* If there's no receiver object, propagate the incoming VAR. */
tree ref_to_res = gimple_call_arg (call, 1);
if (integer_zerop (ref_to_res))
init = var;
}
gimplify_assign (lhs, init, &seq);
}
pop_gimplify_context (NULL);
gsi_replace_with_seq (&gsi, seq, true);
}
/* NVPTX implementation of GOACC_REDUCTION_FINI. */
static void
nvptx_goacc_reduction_fini (gcall *call)
{
gimple_stmt_iterator gsi = gsi_for_stmt (call);
tree lhs = gimple_call_lhs (call);
tree ref_to_res = gimple_call_arg (call, 1);
tree var = gimple_call_arg (call, 2);
int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
enum tree_code op
= (enum tree_code)TREE_INT_CST_LOW (gimple_call_arg (call, 4));
gimple_seq seq = NULL;
tree r = NULL_TREE;;
push_gimplify_context (true);
if (level == GOMP_DIM_VECTOR)
{
/* Emit binary shuffle tree. TODO. Emit this as an actual loop,
but that requires a method of emitting a unified jump at the
gimple level. */
for (int shfl = PTX_VECTOR_LENGTH / 2; shfl > 0; shfl = shfl >> 1)
{
tree other_var = make_ssa_name (TREE_TYPE (var));
nvptx_generate_vector_shuffle (gimple_location (call),
other_var, var, shfl, &seq);
r = make_ssa_name (TREE_TYPE (var));
gimplify_assign (r, fold_build2 (op, TREE_TYPE (var),
var, other_var), &seq);
var = r;
}
}
else
{
tree accum = NULL_TREE;
if (level == GOMP_DIM_WORKER)
{
/* Get reduction buffer address. */
tree offset = gimple_call_arg (call, 5);
tree call = nvptx_get_worker_red_addr (TREE_TYPE (var), offset);
tree ptr = make_ssa_name (TREE_TYPE (call));
gimplify_assign (ptr, call, &seq);
accum = ptr;
}
else if (integer_zerop (ref_to_res))
r = var;
else
accum = ref_to_res;
if (accum)
{
/* Locklessly update the accumulator. */
gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
seq = NULL;
r = nvptx_lockless_update (gimple_location (call), &gsi,
accum, var, op);
}
}
if (lhs)
gimplify_assign (lhs, r, &seq);
pop_gimplify_context (NULL);
gsi_replace_with_seq (&gsi, seq, true);
}
/* NVPTX implementation of GOACC_REDUCTION_TEARDOWN. */
static void
nvptx_goacc_reduction_teardown (gcall *call)
{
gimple_stmt_iterator gsi = gsi_for_stmt (call);
tree lhs = gimple_call_lhs (call);
tree var = gimple_call_arg (call, 2);
int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
gimple_seq seq = NULL;
push_gimplify_context (true);
if (level == GOMP_DIM_WORKER)
{
/* Read the worker reduction buffer. */
tree offset = gimple_call_arg (call, 5);
tree call = nvptx_get_worker_red_addr(TREE_TYPE (var), offset);
tree ptr = make_ssa_name (TREE_TYPE (call));
gimplify_assign (ptr, call, &seq);
var = build_simple_mem_ref (ptr);
TREE_THIS_VOLATILE (var) = 1;
}
if (level != GOMP_DIM_GANG)
{
/* Write to the receiver object. */
tree ref_to_res = gimple_call_arg (call, 1);
if (!integer_zerop (ref_to_res))
gimplify_assign (build_simple_mem_ref (ref_to_res), var, &seq);
}
if (lhs)
gimplify_assign (lhs, var, &seq);
pop_gimplify_context (NULL);
gsi_replace_with_seq (&gsi, seq, true);
}
/* NVPTX reduction expander. */
void
nvptx_goacc_reduction (gcall *call)
{
unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
switch (code)
{
case IFN_GOACC_REDUCTION_SETUP:
nvptx_goacc_reduction_setup (call);
break;
case IFN_GOACC_REDUCTION_INIT:
nvptx_goacc_reduction_init (call);
break;
case IFN_GOACC_REDUCTION_FINI:
nvptx_goacc_reduction_fini (call);
break;
case IFN_GOACC_REDUCTION_TEARDOWN:
nvptx_goacc_reduction_teardown (call);
break;
default:
gcc_unreachable ();
}
}
#undef TARGET_OPTION_OVERRIDE
#define TARGET_OPTION_OVERRIDE nvptx_option_override
@ -3373,12 +3985,22 @@ nvptx_goacc_fork_join (gcall *call, const int dims[],
#undef TARGET_CANNOT_COPY_INSN_P
#define TARGET_CANNOT_COPY_INSN_P nvptx_cannot_copy_insn_p
#undef TARGET_INIT_BUILTINS
#define TARGET_INIT_BUILTINS nvptx_init_builtins
#undef TARGET_EXPAND_BUILTIN
#define TARGET_EXPAND_BUILTIN nvptx_expand_builtin
#undef TARGET_BUILTIN_DECL
#define TARGET_BUILTIN_DECL nvptx_builtin_decl
#undef TARGET_GOACC_VALIDATE_DIMS
#define TARGET_GOACC_VALIDATE_DIMS nvptx_goacc_validate_dims
#undef TARGET_GOACC_FORK_JOIN
#define TARGET_GOACC_FORK_JOIN nvptx_goacc_fork_join
#undef TARGET_GOACC_REDUCTION
#define TARGET_GOACC_REDUCTION nvptx_goacc_reduction
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-nvptx.h"