[BRIGFE] phsa-specific optimizations
Add flag -fassume-phsa that is on by default. If -fno-assume-phsa is given, these optimizations are disabled. With this flag, gccbrig can generate GENERIC that assumes we are targeting a phsa-runtime based implementation, which allows us to expose the work-item context accesses to retrieve WI IDs etc. which helps optimizers. First optimization that takes advantage of this is to get rid of the setworkitemid calls whenever we have non-inlined calls that use IDs internally. Other optimizations added in this commit: - expand absoluteid to similar level of simplicity as workitemid. At the moment absoluteid is the best indexing ID to end up with WG vectorization. - propagate ID variables closer to their uses. This is mainly to avoid known useless casts, which confuse at least scalar evolution analysis. - use signed long long for storing IDs. Unsigned integers have defined wraparound semantics, which confuse at least scalar evolution analysis, leading to unvectorizable WI loops. - also refactor some BRIG function generation helpers to brig_function. - no point in having the wi-loop as a for-loop. It's really a do...while and SCEV can analyze it just fine still. - add consts to ptrs etc. in BRIG builtin defs. Improves optimization opportunities. - add qualifiers to generated function parameters. Const and restrict on the hidden local/private pointers, the arg buffer and the context pointer help some optimizations. From-SVN: r259957
This commit is contained in:
parent
1e25c5a9bb
commit
080dc24383
25 changed files with 1257 additions and 814 deletions
|
@ -1,3 +1,9 @@
|
|||
2018-05-04 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
|
||||
|
||||
* brig-builtins.def: Add consts to ptrs etc. in BRIG builtin defs.
|
||||
To improve optimization opportunities.
|
||||
* builtin-types.def: The new needed builtin types for the above.
|
||||
|
||||
2018-05-04 Richard Biener <rguenther@suse.de>
|
||||
|
||||
* bb-reorder.c (sanitize_hot_paths): Release hot_bbs_to_check.
|
||||
|
|
|
@ -45,25 +45,25 @@ DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_GRIDSIZE, BRIG_OPCODE_GRIDSIZE,
|
|||
|
||||
DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_WORKITEMFLATABSID_U32,
|
||||
BRIG_OPCODE_WORKITEMFLATABSID, BRIG_TYPE_U32,
|
||||
"__hsail_workitemflatabsid_u32", BT_FN_UINT_PTR,
|
||||
ATTR_NOTHROW_LEAF_LIST)
|
||||
"__hsail_workitemflatabsid_u32", BT_FN_UINT_CONST_PTR,
|
||||
ATTR_PURE_NOTHROW_LEAF_LIST)
|
||||
|
||||
DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_WORKITEMFLATABSID_U64,
|
||||
BRIG_OPCODE_WORKITEMFLATABSID, BRIG_TYPE_U64,
|
||||
"__hsail_workitemflatabsid_u64", BT_FN_ULONG_PTR,
|
||||
ATTR_NOTHROW_LEAF_LIST)
|
||||
"__hsail_workitemflatabsid_u64", BT_FN_ULONG_CONST_PTR,
|
||||
ATTR_PURE_NOTHROW_LEAF_LIST)
|
||||
|
||||
DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_WORKITEMFLATID, BRIG_OPCODE_WORKITEMFLATID,
|
||||
BRIG_TYPE_U32, "__hsail_workitemflatid", BT_FN_UINT_PTR,
|
||||
ATTR_NOTHROW_LEAF_LIST)
|
||||
BRIG_TYPE_U32, "__hsail_workitemflatid", BT_FN_UINT_CONST_PTR,
|
||||
ATTR_PURE_NOTHROW_LEAF_LIST)
|
||||
|
||||
DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_WORKITEMID, BRIG_OPCODE_WORKITEMID,
|
||||
BRIG_TYPE_U32, "__hsail_workitemid", BT_FN_UINT_UINT_PTR,
|
||||
ATTR_NOTHROW_LEAF_LIST)
|
||||
BRIG_TYPE_U32, "__hsail_workitemid",
|
||||
BT_FN_UINT_UINT_CONST_PTR, ATTR_PURE_NOTHROW_LEAF_LIST)
|
||||
|
||||
DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_WORKGROUPID, BRIG_OPCODE_WORKGROUPID,
|
||||
BRIG_TYPE_U32, "__hsail_workgroupid", BT_FN_UINT_UINT_PTR,
|
||||
ATTR_PURE_NOTHROW_LEAF_LIST)
|
||||
BRIG_TYPE_U32, "__hsail_workgroupid",
|
||||
BT_FN_UINT_UINT_CONST_PTR, ATTR_PURE_NOTHROW_LEAF_LIST)
|
||||
|
||||
DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_CURRENTWORKITEMFLATID,
|
||||
BRIG_OPCODE_CURRENTWORKITEMFLATID,
|
||||
|
@ -90,11 +90,12 @@ DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_PACKETCOMPLETIONSIG_SIG32,
|
|||
|
||||
DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_CURRENTWORKGROUPSIZE,
|
||||
BRIG_OPCODE_CURRENTWORKGROUPSIZE, BRIG_TYPE_U32,
|
||||
"__hsail_currentworkgroupsize", BT_FN_UINT_UINT_PTR,
|
||||
"__hsail_currentworkgroupsize", BT_FN_UINT_UINT_CONST_PTR,
|
||||
ATTR_PURE_NOTHROW_LEAF_LIST)
|
||||
|
||||
DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_WORKGROUPSIZE, BRIG_OPCODE_WORKGROUPSIZE,
|
||||
BRIG_TYPE_U32, "__hsail_workgroupsize", BT_FN_UINT_UINT_PTR,
|
||||
BRIG_TYPE_U32, "__hsail_workgroupsize",
|
||||
BT_FN_UINT_UINT_CONST_PTR,
|
||||
ATTR_PURE_NOTHROW_LEAF_LIST)
|
||||
|
||||
DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_DIM, BRIG_OPCODE_DIM,
|
||||
|
@ -565,7 +566,7 @@ DEF_HSAIL_INTR_BUILTIN (BUILT_IN_HSAIL_SETWORKITEMID, "__hsail_setworkitemid",
|
|||
|
||||
DEF_HSAIL_INTR_BUILTIN (BUILT_IN_HSAIL_LAUNCH_WG_FUNC,
|
||||
"__hsail_launch_wg_function",
|
||||
BT_FN_VOID_PTR_PTR_PTR, ATTR_NOTHROW_LEAF_LIST)
|
||||
BT_FN_VOID_PTR_PTR_UINT32, ATTR_NOTHROW_LEAF_LIST)
|
||||
|
||||
DEF_HSAIL_INTR_BUILTIN (BUILT_IN_HSAIL_LAUNCH_KERNEL,
|
||||
"__hsail_launch_kernel",
|
||||
|
|
|
@ -1,3 +1,46 @@
|
|||
2018-05-04 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
|
||||
|
||||
Add flag -fassume-phsa that is on by default. If -fno-assume-phsa
|
||||
is given, these optimizations are disabled. With this flag, gccbrig
|
||||
can generate GENERIC that assumes we are targeting a phsa-runtime
|
||||
based implementation, which allows us to expose the work-item context
|
||||
accesses to retrieve WI IDs etc. which helps optimizers.
|
||||
First optimization that takes advantage of this is to get rid of
|
||||
the setworkitemid calls whenever we have non-inlined calls that
|
||||
use IDs internally. Other optimizations added in this commit:
|
||||
- expand absoluteid to similar level of simplicity as workitemid.
|
||||
At the moment absoluteid is the best indexing ID to end up with
|
||||
WG vectorization.
|
||||
- propagate ID variables closer to their uses. This is mainly
|
||||
to avoid known useless casts, which confuse at least scalar
|
||||
evolution analysis.
|
||||
- use signed long long for storing IDs. Unsigned integers have
|
||||
defined wraparound semantics, which confuse at least scalar
|
||||
evolution analysis, leading to unvectorizable WI loops.
|
||||
- also refactor some BRIG function generation helpers to brig_function.
|
||||
- no point in having the wi-loop as a for-loop. It's really
|
||||
a do...while and SCEV can analyze it just fine still.
|
||||
- add consts to ptrs etc. in BRIG builtin defs.
|
||||
Improves optimization opportunities.
|
||||
- add qualifiers to generated function parameters.
|
||||
Const and restrict on the hidden local/private pointers,
|
||||
the arg buffer and the context pointer help some optimizations.
|
||||
* brig/brigfrontend/brig-basic-inst-handler.cc: See above.
|
||||
* brig/brigfrontend/brig-branch-inst-handler.cc: See above.
|
||||
* brig/brigfrontend/brig-cmp-inst-handler.cc: See above.
|
||||
* brig/brigfrontend/brig-code-entry-handler.cc: See above.
|
||||
* brig/brigfrontend/brig-code-entry-handler.h: See above.
|
||||
* brig/brigfrontend/brig-control-handler.cc: See above.
|
||||
* brig/brigfrontend/brig-cvt-inst-handler.cc: See above.
|
||||
* brig/brigfrontend/brig-function-handler.cc: See above.
|
||||
* brig/brigfrontend/brig-function.cc: See above.
|
||||
* brig/brigfrontend/brig-function.h: See above.
|
||||
* brig/brigfrontend/brig-label-handler.cc: See above.
|
||||
* brig/brigfrontend/brig-lane-inst-handler.cc: See above.
|
||||
* brig/brigfrontend/brig-mem-inst-handler.cc: See above.
|
||||
* brig/brigfrontend/phsa.h: See above.
|
||||
* brig/lang.opt: See above.
|
||||
|
||||
2018-05-04 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
|
||||
|
||||
* brig/brigfrontend/brig-function-handler.cc: Skip multiple forward
|
||||
|
|
|
@ -105,7 +105,8 @@ brig_basic_inst_handler::build_shuffle (tree arith_type,
|
|||
/* Unpack the tightly packed mask elements to BIT_FIELD_REFs
|
||||
from which to construct the mask vector as understood by
|
||||
VEC_PERM_EXPR. */
|
||||
tree mask_operand = add_temp_var ("shuffle_mask", operands[2]);
|
||||
tree mask_operand
|
||||
= m_parent.m_cf->add_temp_var ("shuffle_mask", operands[2]);
|
||||
|
||||
tree mask_element_type
|
||||
= build_nonstandard_integer_type (input_mask_element_size, true);
|
||||
|
@ -219,10 +220,11 @@ brig_basic_inst_handler::build_pack (tree_stl_vec &operands)
|
|||
tree wide_type = build_nonstandard_integer_type (vecsize, 1);
|
||||
|
||||
tree src_vect = build_resize_convert_view (wide_type, operands[0]);
|
||||
src_vect = add_temp_var ("src_vect", src_vect);
|
||||
src_vect = m_parent.m_cf->add_temp_var ("src_vect", src_vect);
|
||||
|
||||
tree scalar = operands[1];
|
||||
scalar = add_temp_var ("scalar", convert_to_integer (wide_type, scalar));
|
||||
scalar = m_parent.m_cf->add_temp_var ("scalar",
|
||||
convert_to_integer (wide_type, scalar));
|
||||
|
||||
tree pos = operands[2];
|
||||
|
||||
|
@ -230,21 +232,22 @@ brig_basic_inst_handler::build_pack (tree_stl_vec &operands)
|
|||
Zero them for well-defined semantics. */
|
||||
tree t = build2 (BIT_AND_EXPR, TREE_TYPE (pos), operands[2],
|
||||
build_int_cstu (TREE_TYPE (pos), ecount - 1));
|
||||
pos = add_temp_var ("pos", convert (wide_type, t));
|
||||
pos = m_parent.m_cf->add_temp_var ("pos", convert (wide_type, t));
|
||||
|
||||
tree element_type = TREE_TYPE (TREE_TYPE (operands[0]));
|
||||
size_t element_width = int_size_in_bytes (element_type) * BITS_PER_UNIT;
|
||||
tree ewidth = build_int_cstu (wide_type, element_width);
|
||||
|
||||
tree bitoffset = build2 (MULT_EXPR, wide_type, ewidth, pos);
|
||||
bitoffset = add_temp_var ("offset", bitoffset);
|
||||
bitoffset = m_parent.m_cf->add_temp_var ("offset", bitoffset);
|
||||
|
||||
uint64_t mask_int
|
||||
= element_width == 64 ? (uint64_t) -1 : ((uint64_t) 1 << element_width) - 1;
|
||||
|
||||
tree mask = build_int_cstu (wide_type, mask_int);
|
||||
|
||||
mask = add_temp_var ("mask", convert_to_integer (wide_type, mask));
|
||||
mask = m_parent.m_cf->add_temp_var ("mask",
|
||||
convert_to_integer (wide_type, mask));
|
||||
|
||||
tree clearing_mask
|
||||
= build1 (BIT_NOT_EXPR, wide_type,
|
||||
|
@ -311,7 +314,8 @@ brig_basic_inst_handler::build_inst_expr (BrigOpcode16_t brig_opcode,
|
|||
tree arith_type,
|
||||
tree_stl_vec &operands)
|
||||
{
|
||||
tree_code opcode = get_tree_code_for_hsa_opcode (brig_opcode, brig_type);
|
||||
tree_code opcode
|
||||
= brig_function::get_tree_code_for_hsa_opcode (brig_opcode, brig_type);
|
||||
|
||||
BrigType16_t inner_type = brig_type & BRIG_TYPE_BASE_MASK;
|
||||
|
||||
|
@ -388,8 +392,8 @@ brig_basic_inst_handler::build_inst_expr (BrigOpcode16_t brig_opcode,
|
|||
on which cannot be used in general to remain HSAIL compliant.
|
||||
Perhaps a builtin call would be better option here. */
|
||||
return build2 (RDIV_EXPR, arith_type, build_one_cst (arith_type),
|
||||
expand_or_call_builtin (BRIG_OPCODE_SQRT, brig_type,
|
||||
arith_type, operands));
|
||||
m_parent.m_cf->expand_or_call_builtin
|
||||
(BRIG_OPCODE_SQRT, brig_type, arith_type, operands));
|
||||
}
|
||||
else if (brig_opcode == BRIG_OPCODE_NRCP)
|
||||
{
|
||||
|
@ -410,8 +414,8 @@ brig_basic_inst_handler::build_inst_expr (BrigOpcode16_t brig_opcode,
|
|||
gcc_unreachable ();
|
||||
}
|
||||
else if (opcode == CALL_EXPR)
|
||||
return expand_or_call_builtin (brig_opcode, brig_type, arith_type,
|
||||
operands);
|
||||
return m_parent.m_cf->expand_or_call_builtin (brig_opcode, brig_type,
|
||||
arith_type, operands);
|
||||
else if (output_count == 1)
|
||||
{
|
||||
if (input_count == 1)
|
||||
|
@ -520,7 +524,8 @@ brig_basic_inst_handler::operator () (const BrigBase *base)
|
|||
in_operands[0] = build_lower_element_broadcast (in_operands[0]);
|
||||
|
||||
tree_code opcode
|
||||
= get_tree_code_for_hsa_opcode (brig_inst->opcode, brig_inst_type);
|
||||
= brig_function::get_tree_code_for_hsa_opcode (brig_inst->opcode,
|
||||
brig_inst_type);
|
||||
|
||||
if (p >= BRIG_PACK_PPSAT && p <= BRIG_PACK_PSAT)
|
||||
{
|
||||
|
@ -566,11 +571,11 @@ brig_basic_inst_handler::operator () (const BrigBase *base)
|
|||
*/
|
||||
tree_stl_vec operand0_elements;
|
||||
if (input_count > 0)
|
||||
unpack (in_operands[0], operand0_elements);
|
||||
m_parent.m_cf->unpack (in_operands[0], operand0_elements);
|
||||
|
||||
tree_stl_vec operand1_elements;
|
||||
if (input_count > 1)
|
||||
unpack (in_operands[1], operand1_elements);
|
||||
m_parent.m_cf->unpack (in_operands[1], operand1_elements);
|
||||
|
||||
tree_stl_vec result_elements;
|
||||
|
||||
|
@ -617,7 +622,7 @@ brig_basic_inst_handler::operator () (const BrigBase *base)
|
|||
|
||||
result_elements.push_back (convert (scalar_type, scalar_expr));
|
||||
}
|
||||
instr_expr = pack (result_elements);
|
||||
instr_expr = m_parent.m_cf->pack (result_elements);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -728,140 +733,3 @@ brig_basic_inst_handler::build_lower_element_broadcast (tree vec_operand)
|
|||
vec_operand, mask);
|
||||
}
|
||||
|
||||
/* Returns the tree code that should be used to implement the given
|
||||
HSA instruction opcode (BRIG_OPCODE) for the given type of instruction
|
||||
(BRIG_TYPE). In case the opcode cannot be mapped to a TREE node directly,
|
||||
returns TREE_LIST (if it can be emulated with a simple chain of tree
|
||||
nodes) or CALL_EXPR if the opcode should be implemented using a builtin
|
||||
call. */
|
||||
|
||||
tree_code
|
||||
brig_basic_inst_handler::get_tree_code_for_hsa_opcode
|
||||
(BrigOpcode16_t brig_opcode, BrigType16_t brig_type) const
|
||||
{
|
||||
BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK;
|
||||
switch (brig_opcode)
|
||||
{
|
||||
case BRIG_OPCODE_NOP:
|
||||
return NOP_EXPR;
|
||||
case BRIG_OPCODE_ADD:
|
||||
return PLUS_EXPR;
|
||||
case BRIG_OPCODE_CMOV:
|
||||
if (brig_inner_type == brig_type)
|
||||
return COND_EXPR;
|
||||
else
|
||||
return VEC_COND_EXPR;
|
||||
case BRIG_OPCODE_SUB:
|
||||
return MINUS_EXPR;
|
||||
case BRIG_OPCODE_MUL:
|
||||
case BRIG_OPCODE_MUL24:
|
||||
return MULT_EXPR;
|
||||
case BRIG_OPCODE_MULHI:
|
||||
case BRIG_OPCODE_MUL24HI:
|
||||
return MULT_HIGHPART_EXPR;
|
||||
case BRIG_OPCODE_DIV:
|
||||
if (gccbrig_is_float_type (brig_inner_type))
|
||||
return RDIV_EXPR;
|
||||
else
|
||||
return TRUNC_DIV_EXPR;
|
||||
case BRIG_OPCODE_NEG:
|
||||
return NEGATE_EXPR;
|
||||
case BRIG_OPCODE_MIN:
|
||||
if (gccbrig_is_float_type (brig_inner_type))
|
||||
return CALL_EXPR;
|
||||
else
|
||||
return MIN_EXPR;
|
||||
case BRIG_OPCODE_MAX:
|
||||
if (gccbrig_is_float_type (brig_inner_type))
|
||||
return CALL_EXPR;
|
||||
else
|
||||
return MAX_EXPR;
|
||||
case BRIG_OPCODE_FMA:
|
||||
return FMA_EXPR;
|
||||
case BRIG_OPCODE_ABS:
|
||||
return ABS_EXPR;
|
||||
case BRIG_OPCODE_SHL:
|
||||
return LSHIFT_EXPR;
|
||||
case BRIG_OPCODE_SHR:
|
||||
return RSHIFT_EXPR;
|
||||
case BRIG_OPCODE_OR:
|
||||
return BIT_IOR_EXPR;
|
||||
case BRIG_OPCODE_XOR:
|
||||
return BIT_XOR_EXPR;
|
||||
case BRIG_OPCODE_AND:
|
||||
return BIT_AND_EXPR;
|
||||
case BRIG_OPCODE_NOT:
|
||||
return BIT_NOT_EXPR;
|
||||
case BRIG_OPCODE_RET:
|
||||
return RETURN_EXPR;
|
||||
case BRIG_OPCODE_MOV:
|
||||
case BRIG_OPCODE_LDF:
|
||||
return MODIFY_EXPR;
|
||||
case BRIG_OPCODE_LD:
|
||||
case BRIG_OPCODE_ST:
|
||||
return MEM_REF;
|
||||
case BRIG_OPCODE_BR:
|
||||
return GOTO_EXPR;
|
||||
case BRIG_OPCODE_REM:
|
||||
if (brig_type == BRIG_TYPE_U64 || brig_type == BRIG_TYPE_U32)
|
||||
return TRUNC_MOD_EXPR;
|
||||
else
|
||||
return CALL_EXPR;
|
||||
case BRIG_OPCODE_NRCP:
|
||||
case BRIG_OPCODE_NRSQRT:
|
||||
/* Implement as 1/f (x). gcc should pattern detect that and
|
||||
use a native instruction, if available, for it. */
|
||||
return TREE_LIST;
|
||||
case BRIG_OPCODE_FLOOR:
|
||||
case BRIG_OPCODE_CEIL:
|
||||
case BRIG_OPCODE_SQRT:
|
||||
case BRIG_OPCODE_NSQRT:
|
||||
case BRIG_OPCODE_RINT:
|
||||
case BRIG_OPCODE_TRUNC:
|
||||
case BRIG_OPCODE_POPCOUNT:
|
||||
case BRIG_OPCODE_COPYSIGN:
|
||||
case BRIG_OPCODE_NCOS:
|
||||
case BRIG_OPCODE_NSIN:
|
||||
case BRIG_OPCODE_NLOG2:
|
||||
case BRIG_OPCODE_NEXP2:
|
||||
case BRIG_OPCODE_NFMA:
|
||||
/* Class has type B1 regardless of the float type, thus
|
||||
the below builtin map search cannot find it. */
|
||||
case BRIG_OPCODE_CLASS:
|
||||
case BRIG_OPCODE_WORKITEMABSID:
|
||||
return CALL_EXPR;
|
||||
default:
|
||||
|
||||
/* Some BRIG opcodes can use the same builtins for unsigned and
|
||||
signed types. Force these cases to unsigned types.
|
||||
*/
|
||||
|
||||
if (brig_opcode == BRIG_OPCODE_BORROW
|
||||
|| brig_opcode == BRIG_OPCODE_CARRY
|
||||
|| brig_opcode == BRIG_OPCODE_LASTBIT
|
||||
|| brig_opcode == BRIG_OPCODE_BITINSERT)
|
||||
{
|
||||
if (brig_type == BRIG_TYPE_S32)
|
||||
brig_type = BRIG_TYPE_U32;
|
||||
else if (brig_type == BRIG_TYPE_S64)
|
||||
brig_type = BRIG_TYPE_U64;
|
||||
}
|
||||
|
||||
|
||||
builtin_map::const_iterator i
|
||||
= s_custom_builtins.find (std::make_pair (brig_opcode, brig_type));
|
||||
if (i != s_custom_builtins.end ())
|
||||
return CALL_EXPR;
|
||||
else if (s_custom_builtins.find
|
||||
(std::make_pair (brig_opcode, brig_inner_type))
|
||||
!= s_custom_builtins.end ())
|
||||
return CALL_EXPR;
|
||||
if (brig_inner_type == BRIG_TYPE_F16
|
||||
&& s_custom_builtins.find
|
||||
(std::make_pair (brig_opcode, BRIG_TYPE_F32))
|
||||
!= s_custom_builtins.end ())
|
||||
return CALL_EXPR;
|
||||
break;
|
||||
}
|
||||
return TREE_LIST; /* Emulate using a chain of nodes. */
|
||||
}
|
||||
|
|
|
@ -119,10 +119,11 @@ brig_branch_inst_handler::operator () (const BrigBase *base)
|
|||
memory. */
|
||||
|
||||
tree group_local_offset
|
||||
= add_temp_var ("group_local_offset",
|
||||
build_int_cst
|
||||
(uint32_type_node,
|
||||
m_parent.m_cf->m_local_group_variables.size()));
|
||||
= m_parent.m_cf->add_temp_var ("group_local_offset",
|
||||
build_int_cst
|
||||
(uint32_type_node,
|
||||
m_parent.m_cf->
|
||||
m_local_group_variables.size()));
|
||||
|
||||
/* TODO: ensure the callee's frame is aligned! */
|
||||
|
||||
|
@ -152,6 +153,7 @@ brig_branch_inst_handler::operator () (const BrigBase *base)
|
|||
m_parent.m_cf->m_called_functions.push_back (func_ref);
|
||||
if (DECL_EXTERNAL (func_ref))
|
||||
m_parent.add_decl_call (call);
|
||||
m_parent.m_cf->start_new_bb ();
|
||||
|
||||
return base->byteCount;
|
||||
}
|
||||
|
@ -216,18 +218,21 @@ brig_branch_inst_handler::operator () (const BrigBase *base)
|
|||
ensure the barrier won't be duplicated or moved out of loops etc.
|
||||
Like the 'noduplicate' of LLVM. Same goes for fbarriers. */
|
||||
m_parent.m_cf->append_statement
|
||||
(expand_or_call_builtin (brig_inst->opcode, BRIG_TYPE_NONE, NULL_TREE,
|
||||
call_operands));
|
||||
(m_parent.m_cf->expand_or_call_builtin (brig_inst->opcode,
|
||||
BRIG_TYPE_NONE, NULL_TREE,
|
||||
call_operands));
|
||||
}
|
||||
else if (brig_inst->opcode >= BRIG_OPCODE_ARRIVEFBAR
|
||||
&& brig_inst->opcode <= BRIG_OPCODE_WAITFBAR)
|
||||
{
|
||||
m_parent.m_cf->m_has_barriers = true;
|
||||
m_parent.m_cf->append_statement
|
||||
(expand_or_call_builtin (brig_inst->opcode, BRIG_TYPE_NONE,
|
||||
uint32_type_node, operands));
|
||||
(m_parent.m_cf->expand_or_call_builtin (brig_inst->opcode,
|
||||
BRIG_TYPE_NONE,
|
||||
uint32_type_node, operands));
|
||||
}
|
||||
else
|
||||
gcc_unreachable ();
|
||||
m_parent.m_cf->start_new_bb ();
|
||||
return base->byteCount;
|
||||
}
|
||||
|
|
|
@ -180,17 +180,17 @@ brig_cmp_inst_handler::operator () (const BrigBase *base)
|
|||
results, we must now truncate the result vector to S16s so it
|
||||
fits to the destination register. We can build the target vector
|
||||
type from the f16 storage type (unsigned ints). */
|
||||
expr = add_temp_var ("wide_cmp_result", expr);
|
||||
expr = m_parent.m_cf->add_temp_var ("wide_cmp_result", expr);
|
||||
tree_stl_vec wide_elements;
|
||||
tree_stl_vec shrunk_elements;
|
||||
unpack (expr, wide_elements);
|
||||
m_parent.m_cf->unpack (expr, wide_elements);
|
||||
for (size_t i = 0; i < wide_elements.size (); ++i)
|
||||
{
|
||||
tree wide = wide_elements.at (i);
|
||||
shrunk_elements.push_back
|
||||
(convert_to_integer (short_integer_type_node, wide));
|
||||
}
|
||||
expr = pack (shrunk_elements);
|
||||
expr = m_parent.m_cf->pack (shrunk_elements);
|
||||
}
|
||||
build_output_assignment (*inst_base, operands[0], expr);
|
||||
|
||||
|
|
|
@ -41,24 +41,9 @@
|
|||
#include "brig-builtins.h"
|
||||
#include "fold-const.h"
|
||||
|
||||
brig_code_entry_handler::builtin_map brig_code_entry_handler::s_custom_builtins;
|
||||
|
||||
brig_code_entry_handler::brig_code_entry_handler (brig_to_generic &parent)
|
||||
: brig_entry_handler (parent)
|
||||
{
|
||||
if (s_custom_builtins.size () > 0) return;
|
||||
|
||||
/* Populate the builtin index. */
|
||||
#undef DEF_HSAIL_ATOMIC_BUILTIN
|
||||
#undef DEF_HSAIL_CVT_ZEROI_SAT_BUILTIN
|
||||
#undef DEF_HSAIL_INTR_BUILTIN
|
||||
#undef DEF_HSAIL_SAT_BUILTIN
|
||||
#undef DEF_HSAIL_BUILTIN
|
||||
#define DEF_HSAIL_BUILTIN(ENUM, HSAIL_OPCODE, HSAIL_TYPE, NAME, TYPE, ATTRS) \
|
||||
s_custom_builtins[std::make_pair (HSAIL_OPCODE, HSAIL_TYPE)] \
|
||||
= builtin_decl_explicit (ENUM);
|
||||
|
||||
#include "brig-builtins.def"
|
||||
}
|
||||
|
||||
/* Build a tree operand which is a reference to a piece of code. REF is the
|
||||
|
@ -301,18 +286,18 @@ brig_code_entry_handler::build_address_operand
|
|||
|
||||
tree local_size
|
||||
= build2 (MULT_EXPR, uint32_type_node,
|
||||
expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE,
|
||||
BRIG_TYPE_U32,
|
||||
uint32_type_node, uint32_0),
|
||||
expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE,
|
||||
BRIG_TYPE_U32,
|
||||
uint32_type_node, uint32_1));
|
||||
m_parent.m_cf->expand_or_call_builtin
|
||||
(BRIG_OPCODE_WORKGROUPSIZE, BRIG_TYPE_U32,
|
||||
uint32_type_node, uint32_0),
|
||||
m_parent.m_cf->expand_or_call_builtin
|
||||
(BRIG_OPCODE_WORKGROUPSIZE, BRIG_TYPE_U32,
|
||||
uint32_type_node, uint32_1));
|
||||
|
||||
local_size
|
||||
= build2 (MULT_EXPR, uint32_type_node,
|
||||
expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE,
|
||||
BRIG_TYPE_U32,
|
||||
uint32_type_node, uint32_2),
|
||||
m_parent.m_cf->expand_or_call_builtin
|
||||
(BRIG_OPCODE_WORKGROUPSIZE, BRIG_TYPE_U32,
|
||||
uint32_type_node, uint32_2),
|
||||
local_size);
|
||||
|
||||
tree var_region
|
||||
|
@ -324,9 +309,9 @@ brig_code_entry_handler::build_address_operand
|
|||
= build2 (MULT_EXPR, uint32_type_node,
|
||||
build_int_cst (uint32_type_node,
|
||||
m_parent.private_variable_size (var_name)),
|
||||
expand_or_call_builtin (BRIG_OPCODE_WORKITEMFLATID,
|
||||
BRIG_TYPE_U32,
|
||||
uint32_type_node, operands));
|
||||
m_parent.m_cf->expand_or_call_builtin
|
||||
(BRIG_OPCODE_WORKITEMFLATID, BRIG_TYPE_U32,
|
||||
uint32_type_node, operands));
|
||||
|
||||
tree var_offset
|
||||
= build2 (PLUS_EXPR, uint32_type_node, var_region, pos);
|
||||
|
@ -336,8 +321,9 @@ brig_code_entry_handler::build_address_operand
|
|||
offset to a flat address by adding it as an offset to a (private
|
||||
or group) base pointer later on. Same applies to group_var_offset. */
|
||||
symbol_base
|
||||
= add_temp_var ("priv_var_offset",
|
||||
convert (size_type_node, var_offset));
|
||||
= m_parent.m_cf->add_temp_var ("priv_var_offset",
|
||||
convert (size_type_node,
|
||||
var_offset));
|
||||
}
|
||||
else if (segment == BRIG_SEGMENT_ARG)
|
||||
{
|
||||
|
@ -699,138 +685,6 @@ brig_code_entry_handler::get_tree_expr_type_for_hsa_type
|
|||
return gccbrig_tree_type_for_hsa_type (brig_type);
|
||||
}
|
||||
|
||||
/* In case the HSA instruction must be implemented using a builtin,
|
||||
this function is called to get the correct builtin function.
|
||||
TYPE is the instruction tree type, BRIG_OPCODE the opcode of the
|
||||
brig instruction and BRIG_TYPE the brig instruction's type. */
|
||||
|
||||
tree
|
||||
brig_code_entry_handler::get_builtin_for_hsa_opcode
|
||||
(tree type, BrigOpcode16_t brig_opcode, BrigType16_t brig_type) const
|
||||
{
|
||||
tree builtin = NULL_TREE;
|
||||
tree builtin_type = type;
|
||||
|
||||
/* For vector types, first find the scalar version of the builtin. */
|
||||
if (type != NULL_TREE && VECTOR_TYPE_P (type))
|
||||
builtin_type = TREE_TYPE (type);
|
||||
BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK;
|
||||
|
||||
/* Some BRIG opcodes can use the same builtins for unsigned and
|
||||
signed types. Force these cases to unsigned types. */
|
||||
|
||||
if (brig_opcode == BRIG_OPCODE_BORROW
|
||||
|| brig_opcode == BRIG_OPCODE_CARRY
|
||||
|| brig_opcode == BRIG_OPCODE_LASTBIT
|
||||
|| brig_opcode == BRIG_OPCODE_BITINSERT)
|
||||
{
|
||||
if (brig_type == BRIG_TYPE_S32)
|
||||
brig_type = BRIG_TYPE_U32;
|
||||
else if (brig_type == BRIG_TYPE_S64)
|
||||
brig_type = BRIG_TYPE_U64;
|
||||
}
|
||||
|
||||
switch (brig_opcode)
|
||||
{
|
||||
case BRIG_OPCODE_FLOOR:
|
||||
builtin = mathfn_built_in (builtin_type, BUILT_IN_FLOOR);
|
||||
break;
|
||||
case BRIG_OPCODE_CEIL:
|
||||
builtin = mathfn_built_in (builtin_type, BUILT_IN_CEIL);
|
||||
break;
|
||||
case BRIG_OPCODE_SQRT:
|
||||
case BRIG_OPCODE_NSQRT:
|
||||
builtin = mathfn_built_in (builtin_type, BUILT_IN_SQRT);
|
||||
break;
|
||||
case BRIG_OPCODE_RINT:
|
||||
builtin = mathfn_built_in (builtin_type, BUILT_IN_RINT);
|
||||
break;
|
||||
case BRIG_OPCODE_TRUNC:
|
||||
builtin = mathfn_built_in (builtin_type, BUILT_IN_TRUNC);
|
||||
break;
|
||||
case BRIG_OPCODE_COPYSIGN:
|
||||
builtin = mathfn_built_in (builtin_type, BUILT_IN_COPYSIGN);
|
||||
break;
|
||||
case BRIG_OPCODE_NSIN:
|
||||
builtin = mathfn_built_in (builtin_type, BUILT_IN_SIN);
|
||||
break;
|
||||
case BRIG_OPCODE_NLOG2:
|
||||
builtin = mathfn_built_in (builtin_type, BUILT_IN_LOG2);
|
||||
break;
|
||||
case BRIG_OPCODE_NEXP2:
|
||||
builtin = mathfn_built_in (builtin_type, BUILT_IN_EXP2);
|
||||
break;
|
||||
case BRIG_OPCODE_NFMA:
|
||||
builtin = mathfn_built_in (builtin_type, BUILT_IN_FMA);
|
||||
break;
|
||||
case BRIG_OPCODE_NCOS:
|
||||
builtin = mathfn_built_in (builtin_type, BUILT_IN_COS);
|
||||
break;
|
||||
case BRIG_OPCODE_POPCOUNT:
|
||||
/* Popcount should be typed by its argument type (the return value
|
||||
is always u32). Let's use a b64 version for also for b32 for now. */
|
||||
return builtin_decl_explicit (BUILT_IN_POPCOUNTL);
|
||||
case BRIG_OPCODE_BORROW:
|
||||
/* Borrow uses the same builtin for unsigned and signed types. */
|
||||
if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32)
|
||||
return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U32);
|
||||
else
|
||||
return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U64);
|
||||
case BRIG_OPCODE_CARRY:
|
||||
/* Carry also uses the same builtin for unsigned and signed types. */
|
||||
if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32)
|
||||
return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U32);
|
||||
else
|
||||
return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U64);
|
||||
default:
|
||||
|
||||
/* Use our builtin index for finding a proper builtin for the BRIG
|
||||
opcode and BRIG type. This takes care most of the builtin cases,
|
||||
the special cases are handled in the separate 'case' statements
|
||||
above. */
|
||||
builtin_map::const_iterator i
|
||||
= s_custom_builtins.find (std::make_pair (brig_opcode, brig_type));
|
||||
if (i != s_custom_builtins.end ())
|
||||
return (*i).second;
|
||||
|
||||
if (brig_inner_type != brig_type)
|
||||
{
|
||||
/* Try to find a scalar built-in we could use. */
|
||||
i = s_custom_builtins.find
|
||||
(std::make_pair (brig_opcode, brig_inner_type));
|
||||
if (i != s_custom_builtins.end ())
|
||||
return (*i).second;
|
||||
}
|
||||
|
||||
/* In case this is an fp16 operation that is promoted to fp32,
|
||||
try to find a fp32 scalar built-in. */
|
||||
if (brig_inner_type == BRIG_TYPE_F16)
|
||||
{
|
||||
i = s_custom_builtins.find
|
||||
(std::make_pair (brig_opcode, BRIG_TYPE_F32));
|
||||
if (i != s_custom_builtins.end ())
|
||||
return (*i).second;
|
||||
}
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
if (VECTOR_TYPE_P (type) && builtin != NULL_TREE)
|
||||
{
|
||||
/* Try to find a vectorized version of the built-in.
|
||||
TODO: properly assert that builtin is a mathfn builtin? */
|
||||
tree vec_builtin
|
||||
= targetm.vectorize.builtin_vectorized_function
|
||||
(builtin_mathfn_code (builtin), type, type);
|
||||
if (vec_builtin != NULL_TREE)
|
||||
return vec_builtin;
|
||||
else
|
||||
return builtin;
|
||||
}
|
||||
if (builtin == NULL_TREE)
|
||||
gcc_unreachable ();
|
||||
return builtin;
|
||||
}
|
||||
|
||||
/* Return the correct GENERIC type for storing comparison results
|
||||
of operand with the type given in SOURCE_TYPE. */
|
||||
|
||||
|
@ -848,264 +702,6 @@ brig_code_entry_handler::get_comparison_result_type (tree source_type)
|
|||
return gccbrig_tree_type_for_hsa_type (BRIG_TYPE_B1);
|
||||
}
|
||||
|
||||
/* Returns true in case the given opcode needs to know about work-item context
|
||||
data. In such case the context data is passed as a pointer to a work-item
|
||||
context object, as the last argument in the builtin call. */
|
||||
|
||||
bool
|
||||
brig_code_entry_handler::needs_workitem_context_data
|
||||
(BrigOpcode16_t brig_opcode) const
|
||||
{
|
||||
switch (brig_opcode)
|
||||
{
|
||||
case BRIG_OPCODE_WORKITEMABSID:
|
||||
case BRIG_OPCODE_WORKITEMFLATABSID:
|
||||
case BRIG_OPCODE_WORKITEMFLATID:
|
||||
case BRIG_OPCODE_CURRENTWORKITEMFLATID:
|
||||
case BRIG_OPCODE_WORKITEMID:
|
||||
case BRIG_OPCODE_WORKGROUPID:
|
||||
case BRIG_OPCODE_WORKGROUPSIZE:
|
||||
case BRIG_OPCODE_CURRENTWORKGROUPSIZE:
|
||||
case BRIG_OPCODE_GRIDGROUPS:
|
||||
case BRIG_OPCODE_GRIDSIZE:
|
||||
case BRIG_OPCODE_DIM:
|
||||
case BRIG_OPCODE_PACKETID:
|
||||
case BRIG_OPCODE_PACKETCOMPLETIONSIG:
|
||||
case BRIG_OPCODE_BARRIER:
|
||||
case BRIG_OPCODE_WAVEBARRIER:
|
||||
case BRIG_OPCODE_ARRIVEFBAR:
|
||||
case BRIG_OPCODE_INITFBAR:
|
||||
case BRIG_OPCODE_JOINFBAR:
|
||||
case BRIG_OPCODE_LEAVEFBAR:
|
||||
case BRIG_OPCODE_RELEASEFBAR:
|
||||
case BRIG_OPCODE_WAITFBAR:
|
||||
case BRIG_OPCODE_CUID:
|
||||
case BRIG_OPCODE_MAXCUID:
|
||||
case BRIG_OPCODE_DEBUGTRAP:
|
||||
case BRIG_OPCODE_GROUPBASEPTR:
|
||||
case BRIG_OPCODE_KERNARGBASEPTR:
|
||||
case BRIG_OPCODE_ALLOCA:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
};
|
||||
}
|
||||
|
||||
/* Returns true in case the given opcode that would normally be generated
|
||||
as a builtin call can be expanded to tree nodes. */
|
||||
|
||||
bool
|
||||
brig_code_entry_handler::can_expand_builtin (BrigOpcode16_t brig_opcode) const
|
||||
{
|
||||
switch (brig_opcode)
|
||||
{
|
||||
case BRIG_OPCODE_WORKITEMFLATABSID:
|
||||
case BRIG_OPCODE_WORKITEMFLATID:
|
||||
case BRIG_OPCODE_WORKITEMABSID:
|
||||
case BRIG_OPCODE_WORKGROUPSIZE:
|
||||
case BRIG_OPCODE_CURRENTWORKGROUPSIZE:
|
||||
/* TODO: expand more builtins. */
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
};
|
||||
}
|
||||
|
||||
/* Try to expand the given builtin call to reuse a previously generated
|
||||
variable, if possible. If not, just call the given builtin.
|
||||
BRIG_OPCODE and BRIG_TYPE identify the builtin's BRIG opcode/type,
|
||||
ARITH_TYPE its GENERIC type, and OPERANDS contains the builtin's
|
||||
input operands. */
|
||||
|
||||
tree
|
||||
brig_code_entry_handler::expand_or_call_builtin (BrigOpcode16_t brig_opcode,
|
||||
BrigType16_t brig_type,
|
||||
tree arith_type,
|
||||
tree_stl_vec &operands)
|
||||
{
|
||||
if (m_parent.m_cf->m_is_kernel && can_expand_builtin (brig_opcode))
|
||||
return expand_builtin (brig_opcode, operands);
|
||||
|
||||
tree built_in
|
||||
= get_builtin_for_hsa_opcode (arith_type, brig_opcode, brig_type);
|
||||
|
||||
if (!VECTOR_TYPE_P (TREE_TYPE (TREE_TYPE (built_in)))
|
||||
&& arith_type != NULL_TREE && VECTOR_TYPE_P (arith_type)
|
||||
&& brig_opcode != BRIG_OPCODE_LERP
|
||||
&& brig_opcode != BRIG_OPCODE_PACKCVT
|
||||
&& brig_opcode != BRIG_OPCODE_SAD
|
||||
&& brig_opcode != BRIG_OPCODE_SADHI)
|
||||
{
|
||||
/* Call the scalar built-in for all elements in the vector. */
|
||||
tree_stl_vec operand0_elements;
|
||||
if (operands.size () > 0)
|
||||
unpack (operands[0], operand0_elements);
|
||||
|
||||
tree_stl_vec operand1_elements;
|
||||
if (operands.size () > 1)
|
||||
unpack (operands[1], operand1_elements);
|
||||
|
||||
tree_stl_vec result_elements;
|
||||
|
||||
size_t element_count = gccbrig_type_vector_subparts (arith_type);
|
||||
for (size_t i = 0; i < element_count; ++i)
|
||||
{
|
||||
tree_stl_vec call_operands;
|
||||
if (operand0_elements.size () > 0)
|
||||
call_operands.push_back (operand0_elements.at (i));
|
||||
|
||||
if (operand1_elements.size () > 0)
|
||||
call_operands.push_back (operand1_elements.at (i));
|
||||
|
||||
result_elements.push_back
|
||||
(expand_or_call_builtin (brig_opcode, brig_type,
|
||||
TREE_TYPE (arith_type),
|
||||
call_operands));
|
||||
}
|
||||
return pack (result_elements);
|
||||
}
|
||||
|
||||
tree_stl_vec call_operands;
|
||||
tree_stl_vec operand_types;
|
||||
|
||||
tree arg_type_chain = TYPE_ARG_TYPES (TREE_TYPE (built_in));
|
||||
|
||||
for (size_t i = 0; i < operands.size (); ++i)
|
||||
{
|
||||
tree operand_type = TREE_VALUE (arg_type_chain);
|
||||
call_operands.push_back (convert (operand_type, operands[i]));
|
||||
operand_types.push_back (operand_type);
|
||||
arg_type_chain = TREE_CHAIN (arg_type_chain);
|
||||
}
|
||||
|
||||
if (needs_workitem_context_data (brig_opcode))
|
||||
{
|
||||
call_operands.push_back (m_parent.m_cf->m_context_arg);
|
||||
operand_types.push_back (ptr_type_node);
|
||||
m_parent.m_cf->m_has_unexpanded_dp_builtins = true;
|
||||
}
|
||||
|
||||
size_t operand_count = call_operands.size ();
|
||||
|
||||
call_operands.resize (4, NULL_TREE);
|
||||
operand_types.resize (4, NULL_TREE);
|
||||
for (size_t i = 0; i < operand_count; ++i)
|
||||
call_operands.at (i) = build_resize_convert_view (operand_types.at (i),
|
||||
call_operands.at (i));
|
||||
|
||||
tree fnptr = build_fold_addr_expr (built_in);
|
||||
return build_call_array (TREE_TYPE (TREE_TYPE (built_in)), fnptr,
|
||||
operand_count, &call_operands[0]);
|
||||
}
|
||||
|
||||
/* Instead of calling a built-in, reuse a previously returned value known to
|
||||
be still valid. This is beneficial especially for the work-item
|
||||
identification related builtins as not having them as calls can lead to
|
||||
more easily vectorizable parallel loops for multi work-item work-groups.
|
||||
BRIG_OPCODE identifies the builtin and OPERANDS store the operands. */
|
||||
|
||||
tree
|
||||
brig_code_entry_handler::expand_builtin (BrigOpcode16_t brig_opcode,
|
||||
tree_stl_vec &operands)
|
||||
{
|
||||
tree_stl_vec uint32_0 = tree_stl_vec (1, build_int_cst (uint32_type_node, 0));
|
||||
|
||||
tree_stl_vec uint32_1 = tree_stl_vec (1, build_int_cst (uint32_type_node, 1));
|
||||
|
||||
tree_stl_vec uint32_2 = tree_stl_vec (1, build_int_cst (uint32_type_node, 2));
|
||||
|
||||
if (brig_opcode == BRIG_OPCODE_WORKITEMFLATABSID)
|
||||
{
|
||||
tree id0 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_0);
|
||||
id0 = convert (uint64_type_node, id0);
|
||||
|
||||
tree id1 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_1);
|
||||
id1 = convert (uint64_type_node, id1);
|
||||
|
||||
tree id2 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_2);
|
||||
id2 = convert (uint64_type_node, id2);
|
||||
|
||||
tree max0 = convert (uint64_type_node,
|
||||
m_parent.m_cf->m_grid_size_vars[0]);
|
||||
tree max1 = convert (uint64_type_node,
|
||||
m_parent.m_cf->m_grid_size_vars[1]);
|
||||
|
||||
tree id2_x_max0_x_max1 = build2 (MULT_EXPR, uint64_type_node, id2, max0);
|
||||
id2_x_max0_x_max1
|
||||
= build2 (MULT_EXPR, uint64_type_node, id2_x_max0_x_max1, max1);
|
||||
|
||||
tree id1_x_max0 = build2 (MULT_EXPR, uint64_type_node, id1, max0);
|
||||
|
||||
tree sum = build2 (PLUS_EXPR, uint64_type_node, id0, id1_x_max0);
|
||||
sum = build2 (PLUS_EXPR, uint64_type_node, sum, id2_x_max0_x_max1);
|
||||
|
||||
return add_temp_var ("workitemflatabsid", sum);
|
||||
}
|
||||
else if (brig_opcode == BRIG_OPCODE_WORKITEMABSID)
|
||||
{
|
||||
HOST_WIDE_INT dim = int_constant_value (operands[0]);
|
||||
|
||||
tree local_id_var = m_parent.m_cf->m_local_id_vars[dim];
|
||||
tree wg_id_var = m_parent.m_cf->m_wg_id_vars[dim];
|
||||
tree wg_size_var = m_parent.m_cf->m_wg_size_vars[dim];
|
||||
|
||||
tree wg_id_x_wg_size = build2 (MULT_EXPR, uint32_type_node,
|
||||
convert (uint32_type_node, wg_id_var),
|
||||
convert (uint32_type_node, wg_size_var));
|
||||
tree sum
|
||||
= build2 (PLUS_EXPR, uint32_type_node, wg_id_x_wg_size, local_id_var);
|
||||
|
||||
return add_temp_var (std::string ("workitemabsid_")
|
||||
+ (char) ((int) 'x' + dim), sum);
|
||||
}
|
||||
else if (brig_opcode == BRIG_OPCODE_WORKITEMFLATID)
|
||||
{
|
||||
tree z_x_wgsx_wgsy
|
||||
= build2 (MULT_EXPR, uint32_type_node,
|
||||
m_parent.m_cf->m_local_id_vars[2],
|
||||
m_parent.m_cf->m_wg_size_vars[0]);
|
||||
z_x_wgsx_wgsy = build2 (MULT_EXPR, uint32_type_node, z_x_wgsx_wgsy,
|
||||
m_parent.m_cf->m_wg_size_vars[1]);
|
||||
|
||||
tree y_x_wgsx
|
||||
= build2 (MULT_EXPR, uint32_type_node,
|
||||
m_parent.m_cf->m_local_id_vars[1],
|
||||
m_parent.m_cf->m_wg_size_vars[0]);
|
||||
|
||||
tree sum = build2 (PLUS_EXPR, uint32_type_node, y_x_wgsx, z_x_wgsx_wgsy);
|
||||
sum = build2 (PLUS_EXPR, uint32_type_node,
|
||||
m_parent.m_cf->m_local_id_vars[0],
|
||||
sum);
|
||||
return add_temp_var ("workitemflatid", sum);
|
||||
}
|
||||
else if (brig_opcode == BRIG_OPCODE_WORKGROUPSIZE)
|
||||
{
|
||||
HOST_WIDE_INT dim = int_constant_value (operands[0]);
|
||||
return m_parent.m_cf->m_wg_size_vars[dim];
|
||||
}
|
||||
else if (brig_opcode == BRIG_OPCODE_CURRENTWORKGROUPSIZE)
|
||||
{
|
||||
HOST_WIDE_INT dim = int_constant_value (operands[0]);
|
||||
return m_parent.m_cf->m_cur_wg_size_vars[dim];
|
||||
}
|
||||
else
|
||||
gcc_unreachable ();
|
||||
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
||||
/* Appends and returns a new temp variable and an accompanying assignment
|
||||
statement that stores the value of the given EXPR and has the given NAME. */
|
||||
|
||||
tree
|
||||
brig_code_entry_handler::add_temp_var (std::string name, tree expr)
|
||||
{
|
||||
tree temp_var = create_tmp_var (TREE_TYPE (expr), name.c_str ());
|
||||
tree assign = build2 (MODIFY_EXPR, TREE_TYPE (temp_var), temp_var, expr);
|
||||
m_parent.m_cf->append_statement (assign);
|
||||
return temp_var;
|
||||
}
|
||||
|
||||
/* Creates a FP32 to FP16 conversion call, assuming the source and destination
|
||||
are FP32 type variables. */
|
||||
|
||||
|
@ -1387,7 +983,6 @@ brig_code_entry_handler::build_output_assignment (const BrigInstBase &brig_inst,
|
|||
variable type (can be any type; see get_m_var_declfor_reg @
|
||||
brig-function.cc). */
|
||||
tree output_type = TREE_TYPE (output);
|
||||
tree input_type = TREE_TYPE (inst_expr);
|
||||
bool is_fp16 = (brig_inst.type & BRIG_TYPE_BASE_MASK) == BRIG_TYPE_F16
|
||||
&& brig_inst.base.kind != BRIG_KIND_INST_MEM
|
||||
&& !gccbrig_is_bit_operation (brig_inst.opcode);
|
||||
|
@ -1396,6 +991,13 @@ brig_code_entry_handler::build_output_assignment (const BrigInstBase &brig_inst,
|
|||
bool ftz = false;
|
||||
const BrigBase *base = &brig_inst.base;
|
||||
|
||||
if (m_parent.m_cf->is_id_val (inst_expr))
|
||||
inst_expr = m_parent.m_cf->id_val (inst_expr);
|
||||
|
||||
tree input_type = TREE_TYPE (inst_expr);
|
||||
|
||||
m_parent.m_cf->add_reg_var_update (output, inst_expr);
|
||||
|
||||
if (base->kind == BRIG_KIND_INST_MOD)
|
||||
{
|
||||
const BrigInstMod *mod = (const BrigInstMod *) base;
|
||||
|
@ -1418,13 +1020,13 @@ brig_code_entry_handler::build_output_assignment (const BrigInstBase &brig_inst,
|
|||
{
|
||||
/* Ensure we don't duplicate the arithmetics to the arguments of the bit
|
||||
field reference operators. */
|
||||
inst_expr = add_temp_var ("before_ftz", inst_expr);
|
||||
inst_expr = m_parent.m_cf->add_temp_var ("before_ftz", inst_expr);
|
||||
inst_expr = flush_to_zero (is_fp16) (*this, inst_expr);
|
||||
}
|
||||
|
||||
if (is_fp16)
|
||||
{
|
||||
inst_expr = add_temp_var ("before_f2h", inst_expr);
|
||||
inst_expr = m_parent.m_cf->add_temp_var ("before_f2h", inst_expr);
|
||||
tree f2h_output = build_f2h_conversion (inst_expr);
|
||||
tree conv = build_resize_convert_view (output_type, f2h_output);
|
||||
tree assign = build2 (MODIFY_EXPR, output_type, output, conv);
|
||||
|
@ -1486,62 +1088,6 @@ brig_code_entry_handler::append_statement (tree stmt)
|
|||
m_parent.m_cf->append_statement (stmt);
|
||||
}
|
||||
|
||||
/* Unpacks the elements of the vector in VALUE to scalars (bit field
|
||||
references) in ELEMENTS. */
|
||||
|
||||
void
|
||||
brig_code_entry_handler::unpack (tree value, tree_stl_vec &elements)
|
||||
{
|
||||
size_t vec_size = int_size_in_bytes (TREE_TYPE (value));
|
||||
size_t element_size
|
||||
= int_size_in_bytes (TREE_TYPE (TREE_TYPE (value))) * BITS_PER_UNIT;
|
||||
size_t element_count
|
||||
= vec_size * BITS_PER_UNIT / element_size;
|
||||
|
||||
tree input_element_type = TREE_TYPE (TREE_TYPE (value));
|
||||
|
||||
value = add_temp_var ("unpack_input", value);
|
||||
|
||||
for (size_t i = 0; i < element_count; ++i)
|
||||
{
|
||||
tree element
|
||||
= build3 (BIT_FIELD_REF, input_element_type, value,
|
||||
TYPE_SIZE (input_element_type),
|
||||
bitsize_int(i * element_size));
|
||||
|
||||
element = add_temp_var ("scalar", element);
|
||||
elements.push_back (element);
|
||||
}
|
||||
}
|
||||
|
||||
/* Pack the elements of the scalars in ELEMENTS to the returned vector. */
|
||||
|
||||
tree
|
||||
brig_code_entry_handler::pack (tree_stl_vec &elements)
|
||||
{
|
||||
size_t element_count = elements.size ();
|
||||
|
||||
gcc_assert (element_count > 1);
|
||||
|
||||
tree output_element_type = TREE_TYPE (elements.at (0));
|
||||
|
||||
vec<constructor_elt, va_gc> *constructor_vals = NULL;
|
||||
for (size_t i = 0; i < element_count; ++i)
|
||||
CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, elements.at (i));
|
||||
|
||||
tree vec_type = build_vector_type (output_element_type, element_count);
|
||||
|
||||
/* build_constructor creates a vector type which is not a vector_cst
|
||||
that requires compile time constant elements. */
|
||||
tree vec = build_constructor (vec_type, constructor_vals);
|
||||
|
||||
/* Add a temp variable for readability. */
|
||||
tree tmp_var = create_tmp_var (vec_type, "vec_out");
|
||||
tree vec_tmp_assign = build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec);
|
||||
m_parent.m_cf->append_statement (vec_tmp_assign);
|
||||
return tmp_var;
|
||||
}
|
||||
|
||||
/* Visits the element(s) in the OPERAND, calling HANDLER to each of them. */
|
||||
|
||||
tree
|
||||
|
@ -1757,4 +1303,3 @@ brig_code_entry_handler::int_constant_value (tree node)
|
|||
n = TREE_OPERAND (n, 0);
|
||||
return int_cst_value (n);
|
||||
}
|
||||
|
||||
|
|
|
@ -35,8 +35,6 @@ class tree_element_unary_visitor;
|
|||
class brig_code_entry_handler : public brig_entry_handler
|
||||
{
|
||||
public:
|
||||
typedef std::map<std::pair<BrigOpcode16_t, BrigType16_t>, tree> builtin_map;
|
||||
|
||||
brig_code_entry_handler (brig_to_generic &parent);
|
||||
|
||||
/* Handles the brig_code data at the given pointer and adds it to the
|
||||
|
@ -51,8 +49,6 @@ protected:
|
|||
tree get_tree_expr_type_for_hsa_type (BrigType16_t brig_type) const;
|
||||
tree get_tree_cst_for_hsa_operand (const BrigOperandConstantBytes *brigConst,
|
||||
tree type) const;
|
||||
tree get_builtin_for_hsa_opcode (tree type, BrigOpcode16_t brig_opcode,
|
||||
BrigType16_t brig_type) const;
|
||||
tree get_comparison_result_type (tree source_type);
|
||||
|
||||
tree build_code_ref (const BrigBase &ref);
|
||||
|
@ -73,16 +69,6 @@ protected:
|
|||
|
||||
bool needs_workitem_context_data (BrigOpcode16_t brig_opcode) const;
|
||||
|
||||
void unpack (tree value, tree_stl_vec &elements);
|
||||
tree pack (tree_stl_vec &elements);
|
||||
|
||||
bool can_expand_builtin (BrigOpcode16_t brig_opcode) const;
|
||||
tree expand_builtin (BrigOpcode16_t brig_opcode, tree_stl_vec &operands);
|
||||
|
||||
tree expand_or_call_builtin (BrigOpcode16_t brig_opcode,
|
||||
BrigType16_t brig_type, tree arith_type,
|
||||
tree_stl_vec &operands);
|
||||
|
||||
tree add_temp_var (std::string name, tree expr);
|
||||
|
||||
tree build_f2h_conversion (tree source);
|
||||
|
@ -100,10 +86,6 @@ protected:
|
|||
|
||||
tree extend_int (tree input, tree dest_type, tree src_type);
|
||||
|
||||
/* HSAIL-specific builtin functions not yet integrated to gcc. */
|
||||
|
||||
static builtin_map s_custom_builtins;
|
||||
|
||||
private:
|
||||
|
||||
tree_stl_vec build_or_analyze_operands (const BrigInstBase &brig_inst,
|
||||
|
@ -299,9 +281,6 @@ private:
|
|||
|
||||
tree build_unpack_lo_or_hi (BrigOpcode16_t brig_opcode, tree arith_type,
|
||||
tree_stl_vec &operands);
|
||||
|
||||
tree_code get_tree_code_for_hsa_opcode (BrigOpcode16_t brig_opcode,
|
||||
BrigType16_t brig_type) const;
|
||||
};
|
||||
|
||||
class brig_cvt_inst_handler : public brig_inst_mod_handler
|
||||
|
|
|
@ -53,45 +53,45 @@ brig_directive_control_handler::operator () (const BrigBase *base)
|
|||
case BRIG_CONTROL_MAXDYNAMICGROUPSIZE:
|
||||
{
|
||||
m_parent.m_cf->m_descriptor.max_dynamic_group_size
|
||||
= int_constant_value (operands.at (0));
|
||||
= brig_function::int_constant_value (operands.at (0));
|
||||
break;
|
||||
}
|
||||
case BRIG_CONTROL_MAXFLATGRIDSIZE:
|
||||
{
|
||||
m_parent.m_cf->m_descriptor.max_flat_grid_size
|
||||
= int_constant_value (operands.at (0));
|
||||
= brig_function::int_constant_value (operands.at (0));
|
||||
break;
|
||||
}
|
||||
case BRIG_CONTROL_MAXFLATWORKGROUPSIZE:
|
||||
{
|
||||
m_parent.m_cf->m_descriptor.max_flat_workgroup_size
|
||||
= int_constant_value (operands.at (0));
|
||||
= brig_function::int_constant_value (operands.at (0));
|
||||
break;
|
||||
}
|
||||
case BRIG_CONTROL_REQUIREDDIM:
|
||||
{
|
||||
m_parent.m_cf->m_descriptor.required_dim
|
||||
= int_constant_value (operands.at (0));
|
||||
= brig_function::int_constant_value (operands.at (0));
|
||||
break;
|
||||
}
|
||||
case BRIG_CONTROL_REQUIREDGRIDSIZE:
|
||||
{
|
||||
m_parent.m_cf->m_descriptor.required_grid_size[0]
|
||||
= int_constant_value (operands.at (0));
|
||||
= brig_function::int_constant_value (operands.at (0));
|
||||
m_parent.m_cf->m_descriptor.required_grid_size[1]
|
||||
= int_constant_value (operands.at (1));
|
||||
= brig_function::int_constant_value (operands.at (1));
|
||||
m_parent.m_cf->m_descriptor.required_grid_size[2]
|
||||
= int_constant_value (operands.at (2));
|
||||
= brig_function::int_constant_value (operands.at (2));
|
||||
break;
|
||||
}
|
||||
case BRIG_CONTROL_REQUIREDWORKGROUPSIZE:
|
||||
{
|
||||
m_parent.m_cf->m_descriptor.required_workgroup_size[0]
|
||||
= int_constant_value (operands.at (0));
|
||||
= brig_function::int_constant_value (operands.at (0));
|
||||
m_parent.m_cf->m_descriptor.required_workgroup_size[1]
|
||||
= int_constant_value (operands.at (1));
|
||||
= brig_function::int_constant_value (operands.at (1));
|
||||
m_parent.m_cf->m_descriptor.required_workgroup_size[2]
|
||||
= int_constant_value (operands.at (2));
|
||||
= brig_function::int_constant_value (operands.at (2));
|
||||
break;
|
||||
}
|
||||
case BRIG_CONTROL_REQUIRENOPARTIALWORKGROUPS:
|
||||
|
|
|
@ -83,6 +83,12 @@ brig_cvt_inst_handler::generate (const BrigBase *base)
|
|||
tree &input = operands.at (1);
|
||||
tree &output = operands.at (0);
|
||||
|
||||
if (m_parent.m_cf->is_id_val (input))
|
||||
{
|
||||
input = m_parent.m_cf->id_val (input);
|
||||
src_type = TREE_TYPE (input);
|
||||
}
|
||||
|
||||
size_t conv_src_size = int_size_in_bytes (src_type);
|
||||
size_t conv_dst_size = int_size_in_bytes (dest_type);
|
||||
size_t src_reg_size = int_size_in_bytes (TREE_TYPE (input));
|
||||
|
|
|
@ -93,6 +93,25 @@ brig_directive_function_handler::operator () (const BrigBase *base)
|
|||
represent HSAIL registers. */
|
||||
tree bind_expr = build3 (BIND_EXPR, void_type_node, NULL, stmt_list, NULL);
|
||||
|
||||
tree restrict_char_ptr
|
||||
= build_qualified_type (build_pointer_type (char_type_node),
|
||||
TYPE_QUAL_RESTRICT);
|
||||
tree restrict_void_ptr
|
||||
= build_qualified_type (build_pointer_type (void_type_node),
|
||||
TYPE_QUAL_RESTRICT);
|
||||
|
||||
tree restrict_const_char_ptr
|
||||
= build_qualified_type (build_pointer_type
|
||||
(build_qualified_type (char_type_node,
|
||||
TYPE_QUAL_CONST)),
|
||||
TYPE_QUAL_RESTRICT);
|
||||
|
||||
tree restrict_const_void_ptr
|
||||
= build_qualified_type (build_pointer_type
|
||||
(build_qualified_type (void_type_node,
|
||||
TYPE_QUAL_CONST)),
|
||||
TYPE_QUAL_RESTRICT);
|
||||
|
||||
if (is_kernel)
|
||||
{
|
||||
tree name_identifier
|
||||
|
@ -107,12 +126,11 @@ brig_directive_function_handler::operator () (const BrigBase *base)
|
|||
3) a void* parameter that contains the first flat address of the group
|
||||
region allocated to the current work-group. */
|
||||
|
||||
tree char_ptr_type_node = build_pointer_type (char_type_node);
|
||||
fndecl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, name_identifier,
|
||||
build_function_type_list (void_type_node,
|
||||
char_ptr_type_node,
|
||||
ptr_type_node,
|
||||
ptr_type_node, NULL_TREE));
|
||||
restrict_const_char_ptr,
|
||||
restrict_void_ptr,
|
||||
restrict_char_ptr, NULL_TREE));
|
||||
|
||||
SET_DECL_ASSEMBLER_NAME (fndecl, name_identifier);
|
||||
|
||||
|
@ -125,9 +143,10 @@ brig_directive_function_handler::operator () (const BrigBase *base)
|
|||
= gccbrig_get_target_addr_space_id (BRIG_SEGMENT_KERNARG);
|
||||
|
||||
tree arg_arg = build_decl (UNKNOWN_LOCATION, PARM_DECL,
|
||||
get_identifier ("__args"), char_ptr_type_node);
|
||||
get_identifier ("__args"),
|
||||
restrict_const_char_ptr);
|
||||
DECL_ARGUMENTS (fndecl) = arg_arg;
|
||||
DECL_ARG_TYPE (arg_arg) = char_ptr_type_node;
|
||||
DECL_ARG_TYPE (arg_arg) = restrict_const_char_ptr;
|
||||
DECL_CONTEXT (arg_arg) = fndecl;
|
||||
DECL_ARTIFICIAL (arg_arg) = 1;
|
||||
TREE_READONLY (arg_arg) = 1;
|
||||
|
@ -189,7 +208,7 @@ brig_directive_function_handler::operator () (const BrigBase *base)
|
|||
if (arg_decls == NULL_TREE)
|
||||
arg_decls = arg_var;
|
||||
else
|
||||
chainon (arg_decls, arg_var);
|
||||
arg_decls = chainon (arg_decls, arg_var);
|
||||
|
||||
m_parent.m_cf->add_arg_variable (brigVar, arg_var);
|
||||
|
||||
|
@ -230,18 +249,13 @@ brig_directive_function_handler::operator () (const BrigBase *base)
|
|||
vec_safe_push (args, TREE_TYPE (arg_var));
|
||||
|
||||
m_parent.m_cf->add_arg_variable (brigVar, arg_var);
|
||||
|
||||
if (arg_decls == NULL_TREE)
|
||||
arg_decls = arg_var;
|
||||
else
|
||||
chainon (arg_decls, arg_var);
|
||||
arg_decls = chainon (arg_decls, arg_var);
|
||||
}
|
||||
}
|
||||
|
||||
vec_safe_push (args, ptr_type_node);
|
||||
vec_safe_push (args, ptr_type_node);
|
||||
vec_safe_push (args, ptr_type_node);
|
||||
vec_safe_push (args, ptr_type_node);
|
||||
vec_safe_push (args, restrict_void_ptr);
|
||||
vec_safe_push (args, restrict_char_ptr);
|
||||
vec_safe_push (args, uint32_type_node);
|
||||
vec_safe_push (args, restrict_char_ptr);
|
||||
|
||||
fndecl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, name_identifier,
|
||||
build_function_type_vec (ret_type, args));
|
||||
|
@ -254,26 +268,30 @@ brig_directive_function_handler::operator () (const BrigBase *base)
|
|||
|
||||
/* All functions need the hidden __context argument passed on
|
||||
because they might call WI-specific functions which need
|
||||
the context info. */
|
||||
the context info. Only kernels can write it, if they need
|
||||
to update the local ids in the work-item loop. */
|
||||
|
||||
tree context_arg_type
|
||||
= true ? restrict_void_ptr : restrict_const_void_ptr;
|
||||
tree context_arg = build_decl (UNKNOWN_LOCATION, PARM_DECL,
|
||||
get_identifier ("__context"), ptr_type_node);
|
||||
if (DECL_ARGUMENTS (fndecl) == NULL_TREE)
|
||||
DECL_ARGUMENTS (fndecl) = context_arg;
|
||||
else
|
||||
chainon (DECL_ARGUMENTS (fndecl), context_arg);
|
||||
get_identifier ("__context"),
|
||||
context_arg_type);
|
||||
DECL_ARGUMENTS (fndecl) = chainon (DECL_ARGUMENTS (fndecl), context_arg);
|
||||
DECL_CONTEXT (context_arg) = fndecl;
|
||||
DECL_ARG_TYPE (context_arg) = ptr_type_node;
|
||||
DECL_ARG_TYPE (context_arg) = context_arg_type;
|
||||
DECL_ARTIFICIAL (context_arg) = 1;
|
||||
TREE_READONLY (context_arg) = 1;
|
||||
TREE_USED (context_arg) = 1;
|
||||
m_parent.m_cf->m_context_arg = context_arg;
|
||||
|
||||
/* They can also access group memory, so we need to pass the
|
||||
group pointer along too. */
|
||||
tree group_base_arg
|
||||
= build_decl (UNKNOWN_LOCATION, PARM_DECL,
|
||||
get_identifier ("__group_base_addr"), ptr_type_node);
|
||||
chainon (DECL_ARGUMENTS (fndecl), group_base_arg);
|
||||
DECL_ARG_TYPE (group_base_arg) = ptr_type_node;
|
||||
get_identifier ("__group_base_addr"),
|
||||
restrict_char_ptr);
|
||||
DECL_ARGUMENTS (fndecl) = chainon (DECL_ARGUMENTS (fndecl), group_base_arg);
|
||||
DECL_ARG_TYPE (group_base_arg) = restrict_char_ptr;
|
||||
DECL_CONTEXT (group_base_arg) = fndecl;
|
||||
DECL_ARTIFICIAL (group_base_arg) = 1;
|
||||
TREE_READONLY (group_base_arg) = 1;
|
||||
|
@ -288,7 +306,7 @@ brig_directive_function_handler::operator () (const BrigBase *base)
|
|||
tree group_local_offset_arg
|
||||
= build_decl (UNKNOWN_LOCATION, PARM_DECL,
|
||||
get_identifier ("__group_local_offset"), uint32_type_node);
|
||||
chainon (DECL_ARGUMENTS (fndecl), group_local_offset_arg);
|
||||
DECL_ARGUMENTS (fndecl) = chainon (DECL_ARGUMENTS (fndecl), group_local_offset_arg);
|
||||
DECL_ARG_TYPE (group_local_offset_arg) = uint32_type_node;
|
||||
DECL_CONTEXT (group_local_offset_arg) = fndecl;
|
||||
DECL_ARTIFICIAL (group_local_offset_arg) = 1;
|
||||
|
@ -299,24 +317,25 @@ brig_directive_function_handler::operator () (const BrigBase *base)
|
|||
/* Same for private. */
|
||||
tree private_base_arg
|
||||
= build_decl (UNKNOWN_LOCATION, PARM_DECL,
|
||||
get_identifier ("__private_base_addr"), ptr_type_node);
|
||||
chainon (DECL_ARGUMENTS (fndecl), private_base_arg);
|
||||
DECL_ARG_TYPE (private_base_arg) = ptr_type_node;
|
||||
get_identifier ("__private_base_addr"), restrict_char_ptr);
|
||||
DECL_ARGUMENTS (fndecl) = chainon (DECL_ARGUMENTS (fndecl), private_base_arg);
|
||||
DECL_ARG_TYPE (private_base_arg) = restrict_char_ptr;
|
||||
DECL_CONTEXT (private_base_arg) = fndecl;
|
||||
DECL_ARTIFICIAL (private_base_arg) = 1;
|
||||
TREE_READONLY (private_base_arg) = 1;
|
||||
TREE_USED (private_base_arg) = 1;
|
||||
m_parent.m_cf->m_private_base_arg = private_base_arg;
|
||||
|
||||
DECL_SAVED_TREE (fndecl) = bind_expr;
|
||||
|
||||
set_externally_visible (fndecl);
|
||||
|
||||
if (base->kind == BRIG_KIND_DIRECTIVE_FUNCTION)
|
||||
{
|
||||
TREE_STATIC (fndecl) = 0;
|
||||
TREE_PUBLIC (fndecl) = 1;
|
||||
DECL_EXTERNAL (fndecl) = 0;
|
||||
DECL_DECLARED_INLINE_P (fndecl) = 1;
|
||||
set_inline (fndecl);
|
||||
set_externally_visible (fndecl);
|
||||
}
|
||||
else if (base->kind == BRIG_KIND_DIRECTIVE_KERNEL)
|
||||
{
|
||||
|
@ -330,6 +349,7 @@ brig_directive_function_handler::operator () (const BrigBase *base)
|
|||
TREE_STATIC (fndecl) = 0;
|
||||
TREE_PUBLIC (fndecl) = 1;
|
||||
DECL_EXTERNAL (fndecl) = 1;
|
||||
set_inline (fndecl);
|
||||
}
|
||||
else if (base->kind == BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION)
|
||||
{
|
||||
|
@ -371,11 +391,8 @@ brig_directive_function_handler::operator () (const BrigBase *base)
|
|||
}
|
||||
|
||||
m_parent.start_function (fndecl);
|
||||
|
||||
m_parent.m_cf->m_func_decl = fndecl;
|
||||
m_parent.m_cf->m_current_bind_expr = bind_expr;
|
||||
m_parent.m_cf->m_context_arg = context_arg;
|
||||
m_parent.m_cf->m_private_base_arg = private_base_arg;
|
||||
|
||||
if (ret_value != NULL_TREE && TREE_TYPE (ret_value) != void_type_node)
|
||||
{
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -105,6 +105,30 @@ public:
|
|||
|
||||
void analyze_calls ();
|
||||
|
||||
tree expand_builtin (BrigOpcode16_t brig_opcode, tree_stl_vec &operands);
|
||||
|
||||
tree expand_or_call_builtin (BrigOpcode16_t brig_opcode,
|
||||
BrigType16_t brig_type, tree arith_type,
|
||||
tree_stl_vec &operands);
|
||||
bool can_expand_builtin (BrigOpcode16_t brig_opcode) const;
|
||||
|
||||
tree get_builtin_for_hsa_opcode (tree type, BrigOpcode16_t brig_opcode,
|
||||
BrigType16_t brig_type) const;
|
||||
|
||||
void unpack (tree value, tree_stl_vec &elements);
|
||||
tree pack (tree_stl_vec &elements);
|
||||
tree add_temp_var (std::string name, tree expr);
|
||||
|
||||
static bool needs_workitem_context_data (BrigOpcode16_t brig_opcode);
|
||||
static HOST_WIDE_INT int_constant_value (tree node);
|
||||
static tree_code get_tree_code_for_hsa_opcode (BrigOpcode16_t brig_opcode,
|
||||
BrigType16_t brig_type);
|
||||
|
||||
void start_new_bb ();
|
||||
void add_reg_var_update (tree reg_var, tree val);
|
||||
bool is_id_val (tree reg_var);
|
||||
tree id_val (tree reg_var);
|
||||
|
||||
const BrigDirectiveExecutable *m_brig_def;
|
||||
|
||||
bool m_is_kernel;
|
||||
|
@ -183,6 +207,11 @@ public:
|
|||
tree m_wg_id_vars[3];
|
||||
tree m_wg_size_vars[3];
|
||||
tree m_grid_size_vars[3];
|
||||
/* Explicitly computed WG base for the absolute IDs which is used
|
||||
as the initial value when looping that dimension. We update
|
||||
the abs id with ++ to make it easy for the vectorizer. */
|
||||
tree m_abs_id_base_vars[3];
|
||||
tree m_abs_id_vars[3];
|
||||
|
||||
/* Set to true in case the kernel contains at least one dispatch packet
|
||||
(work-item ID-related) builtin call that could not be expanded to
|
||||
|
@ -219,6 +248,20 @@ private:
|
|||
/* Bookkeeping for the different HSA registers and their tree declarations
|
||||
for the currently generated function. */
|
||||
reg_decl_index_entry *m_regs[BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT];
|
||||
|
||||
/* Map for keeping book reads of ID variables, which can be propagated
|
||||
to uses in address expressions to produce cleaner indexing functions
|
||||
with unnecessary casts stripped off, etc. */
|
||||
typedef std::map<tree, tree> id_val_map;
|
||||
|
||||
/* Keeps track of ID values alive in registers in the currently
|
||||
processed BB. */
|
||||
id_val_map m_id_val_defs;
|
||||
|
||||
/* HSAIL-specific builtin functions not yet integrated to gcc. */
|
||||
typedef std::map<std::pair<BrigOpcode16_t, BrigType16_t>, tree> builtin_map;
|
||||
|
||||
static builtin_map s_custom_builtins;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -31,7 +31,10 @@ brig_directive_label_handler::operator () (const BrigBase *base)
|
|||
std::string label_str ((const char *) (label_name->bytes),
|
||||
label_name->byteCount);
|
||||
|
||||
m_parent.m_cf->start_new_bb ();
|
||||
|
||||
tree stmt = build_stmt (LABEL_EXPR, m_parent.m_cf->label (label_str));
|
||||
m_parent.m_cf->append_statement (stmt);
|
||||
|
||||
return base->byteCount;
|
||||
}
|
||||
|
|
|
@ -59,7 +59,7 @@ brig_lane_inst_handler::operator () (const BrigBase *base)
|
|||
elements.push_back (zero_cst);
|
||||
elements.push_back (zero_cst);
|
||||
|
||||
expr = pack (elements);
|
||||
expr = m_parent.m_cf->pack (elements);
|
||||
}
|
||||
else if (inst.base.opcode == BRIG_OPCODE_ACTIVELANEPERMUTE)
|
||||
{
|
||||
|
|
|
@ -63,7 +63,7 @@ brig_mem_inst_handler::build_mem_access (const BrigInstBase *brig_inst,
|
|||
{
|
||||
/* Add a temporary variable so there won't be multiple
|
||||
reads in case of vector unpack. */
|
||||
mem_ref = add_temp_var ("mem_read", mem_ref);
|
||||
mem_ref = m_parent.m_cf->add_temp_var ("mem_read", mem_ref);
|
||||
return build_output_assignment (*brig_inst, data, mem_ref);
|
||||
}
|
||||
else
|
||||
|
@ -95,8 +95,9 @@ brig_mem_inst_handler::operator () (const BrigBase *base)
|
|||
inputs.push_back (operands[1]);
|
||||
inputs.push_back (align_opr);
|
||||
tree builtin_call
|
||||
= expand_or_call_builtin (BRIG_OPCODE_ALLOCA, BRIG_TYPE_U32,
|
||||
uint32_type_node, inputs);
|
||||
= m_parent.m_cf->expand_or_call_builtin (BRIG_OPCODE_ALLOCA,
|
||||
BRIG_TYPE_U32,
|
||||
uint32_type_node, inputs);
|
||||
build_output_assignment (*brig_inst, operands[0], builtin_call);
|
||||
m_parent.m_cf->m_has_allocas = true;
|
||||
return base->byteCount;
|
||||
|
|
|
@ -58,13 +58,22 @@ typedef struct __attribute__((__packed__))
|
|||
|
||||
/* The prefix to use in the ELF section containing descriptor for
|
||||
a function. */
|
||||
|
||||
#define PHSA_DESC_SECTION_PREFIX "phsa.desc."
|
||||
#define PHSA_HOST_DEF_PTR_PREFIX "__phsa.host_def."
|
||||
|
||||
/* The frontend error messages are parsed by the host runtime. Known
|
||||
prefix strings are used to separate the different runtime error
|
||||
codes. */
|
||||
|
||||
#define PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE "Incompatible module: "
|
||||
#define PHSA_ERROR_PREFIX_CORRUPTED_MODULE "Corrupted module: "
|
||||
|
||||
/* Offsets of attributes in the PHSA context structs.
|
||||
Used by -fphsa-wi-context-opt. */
|
||||
#define PHSA_CONTEXT_OFFS_WI_IDS 0
|
||||
#define PHSA_CONTEXT_OFFS_WG_IDS (PHSA_CONTEXT_OFFS_WI_IDS + 3 * 4)
|
||||
#define PHSA_CONTEXT_WG_SIZES (PHSA_CONTEXT_OFFS_WG_IDS + 3 * 4)
|
||||
#define PHSA_CONTEXT_CURRENT_WG_SIZES (PHSA_CONTEXT_WG_SIZES + 3 * 4)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -31,6 +31,11 @@ BRIG Separate Alias(d)
|
|||
-dump=
|
||||
BRIG Joined Alias(d)
|
||||
|
||||
fassume-phsa
|
||||
BRIG Report Var(flag_assume_phsa) Init(1) Optimization
|
||||
Assume we are finalizing for phsa and its libhsail-rt. Enables additional
|
||||
phsa-specific optimizations (default).
|
||||
|
||||
L
|
||||
BRIG Joined Separate
|
||||
; Not documented
|
||||
|
|
|
@ -283,7 +283,9 @@ DEF_FUNCTION_TYPE_1 (BT_FN_UINT_INT, BT_UINT, BT_INT)
|
|||
DEF_FUNCTION_TYPE_1 (BT_FN_UINT_ULONG, BT_UINT, BT_ULONG)
|
||||
DEF_FUNCTION_TYPE_1 (BT_FN_UINT_LONG, BT_UINT, BT_LONG)
|
||||
DEF_FUNCTION_TYPE_1 (BT_FN_UINT_PTR, BT_UINT, BT_PTR)
|
||||
DEF_FUNCTION_TYPE_1 (BT_FN_UINT_CONST_PTR, BT_UINT, BT_CONST_PTR)
|
||||
DEF_FUNCTION_TYPE_1 (BT_FN_ULONG_PTR, BT_ULONG, BT_PTR)
|
||||
DEF_FUNCTION_TYPE_1 (BT_FN_ULONG_CONST_PTR, BT_ULONG, BT_CONST_PTR)
|
||||
DEF_FUNCTION_TYPE_1 (BT_FN_ULONG_ULONG, BT_ULONG, BT_ULONG)
|
||||
DEF_FUNCTION_TYPE_1 (BT_FN_ULONGLONG_ULONGLONG, BT_ULONGLONG, BT_ULONGLONG)
|
||||
DEF_FUNCTION_TYPE_1 (BT_FN_INT8_FLOAT, BT_INT8, BT_FLOAT)
|
||||
|
@ -480,6 +482,7 @@ DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_SIZE_CONST_VPTR, BT_BOOL, BT_SIZE,
|
|||
DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_INT_BOOL, BT_BOOL, BT_INT, BT_BOOL)
|
||||
DEF_FUNCTION_TYPE_2 (BT_FN_VOID_UINT_UINT, BT_VOID, BT_UINT, BT_UINT)
|
||||
DEF_FUNCTION_TYPE_2 (BT_FN_UINT_UINT_PTR, BT_UINT, BT_UINT, BT_PTR)
|
||||
DEF_FUNCTION_TYPE_2 (BT_FN_UINT_UINT_CONST_PTR, BT_UINT, BT_UINT, BT_CONST_PTR)
|
||||
DEF_FUNCTION_TYPE_2 (BT_FN_PTR_CONST_PTR_SIZE, BT_PTR, BT_CONST_PTR, BT_SIZE)
|
||||
DEF_FUNCTION_TYPE_2 (BT_FN_PTR_CONST_PTR_CONST_PTR, BT_PTR, BT_CONST_PTR, BT_CONST_PTR)
|
||||
DEF_FUNCTION_TYPE_2 (BT_FN_VOID_PTRPTR_CONST_PTR, BT_VOID, BT_PTR_PTR, BT_CONST_PTR)
|
||||
|
@ -569,6 +572,7 @@ DEF_FUNCTION_TYPE_3 (BT_FN_VOID_DOUBLE_DOUBLEPTR_DOUBLEPTR,
|
|||
DEF_FUNCTION_TYPE_3 (BT_FN_VOID_LONGDOUBLE_LONGDOUBLEPTR_LONGDOUBLEPTR,
|
||||
BT_VOID, BT_LONGDOUBLE, BT_LONGDOUBLE_PTR, BT_LONGDOUBLE_PTR)
|
||||
DEF_FUNCTION_TYPE_3 (BT_FN_VOID_PTR_PTR_PTR, BT_VOID, BT_PTR, BT_PTR, BT_PTR)
|
||||
DEF_FUNCTION_TYPE_3 (BT_FN_VOID_PTR_PTR_UINT32, BT_VOID, BT_PTR, BT_PTR, BT_UINT32)
|
||||
DEF_FUNCTION_TYPE_3 (BT_FN_INT_CONST_STRING_PTR_CONST_STRING_PTR_CONST_STRING,
|
||||
BT_INT, BT_CONST_STRING, BT_PTR_CONST_STRING, BT_PTR_CONST_STRING)
|
||||
DEF_FUNCTION_TYPE_3 (BT_FN_INT_INT_CONST_STRING_VALIST_ARG,
|
||||
|
|
|
@ -1,4 +1,9 @@
|
|||
2018-05-04 Carl Love <cel@us.ibm.com>
|
||||
2018-05-04 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
|
||||
|
||||
* testsuite/brig.dg/test/gimple/smoke_test.hsail: Fix the test
|
||||
to match the currently produced gimple.
|
||||
|
||||
2018-05-04 Carl Love <cel@us.ibm.com>
|
||||
* gcc.target/powerpc/vsx-vector-6.h (foo): Add test for vec_max,
|
||||
vec_trunc.
|
||||
* gcc.target/powerpc/vsx-vector-6-le.c (dg-final): Update xvcmpeqdp,
|
||||
|
|
|
@ -41,15 +41,15 @@ prog kernel &KernelWithBarrier(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr)
|
|||
};
|
||||
|
||||
/* The kernel function itself should have a fingerprint as follows */
|
||||
/* _Kernel (unsigned char * __args, void * __context, void * __group_base_addr, void * __private_base_addr) */
|
||||
/* { dg-final { scan-tree-dump "_Kernel \\\(unsigned char \\\* __args, void \\\* __context, void \\\* __group_base_addr, unsigned int __group_local_offset, void \\\* __private_base_addr\\\)" "gimple"} } */
|
||||
/* _Kernel (const unsigned char * restrict __args, void * restrict __context, unsigned char * restrict __group_base_addr, unsigned int __group_local_offset, unsigned char * restrict __private_base_addr) */
|
||||
/* { dg-final { scan-tree-dump "_Kernel \\\(const unsigned char \\\* restrict __args, void \\\* restrict __context, unsigned char \\\* restrict __group_base_addr, unsigned int __group_local_offset, unsigned char \\\* restrict __private_base_addr\\\)" "gimple"} } */
|
||||
|
||||
/* ld_kernarg: mem_read.0 = MEM[(unsigned long *)__args]; */
|
||||
/* { dg-final { scan-tree-dump "mem_read.\[0-9\] = MEM\\\[\\\(unsigned long \\\*\\\)__args\\\];" "gimple"} } */
|
||||
|
||||
/* The latter ld_global_u32 should be visible as a pointer dereference (after pointer arithmetics on a temporary var): */
|
||||
/* mem_read.2 = *D.1691; */
|
||||
/* { dg-final { scan-tree-dump "mem_read.\[0-9\] = \\\*\[_0-9\]+;" "gimple"} } */
|
||||
/* { dg-final { scan-tree-dump "mem_read.\[0-9\]+ = \\\*\[_0-9\]+;" "gimple"} } */
|
||||
|
||||
/* add_u32s should generate +operators */
|
||||
/* { dg-final { scan-tree-dump "s2 = s0 \\\+ s1;" "gimple"} } */
|
||||
|
@ -71,8 +71,8 @@ prog kernel &KernelWithBarrier(kernarg_u64 %input_ptr, kernarg_u64 %output_ptr)
|
|||
/* { dg-final { scan-tree-dump "if \\\(__local_z < __cur_wg_size_z\\\) goto __wi_loop_z; else goto" "gimple"} } */
|
||||
|
||||
/* The launcher should call __hsail_launch_wg_function in this case: */
|
||||
/* Kernel (void * __context, void * __group_base_addr) */
|
||||
/* { dg-final { scan-tree-dump "Kernel \\\(void \\\* __context, void \\\* __group_base_addr\\\)" "gimple"} } */
|
||||
/* Kernel (void * restrict __context, unsigned char * restrict __group_base_addr) */
|
||||
/* { dg-final { scan-tree-dump "Kernel \\\(void \\\* restrict __context, unsigned char \\\* restrict __group_base_addr\\\)" "gimple"} } */
|
||||
/* { dg-final { scan-tree-dump "__hsail_launch_wg_function \\\(_Kernel, __context, __group_base_addr, group_local_offset.*\\\);" "gimple"} }*/
|
||||
|
||||
/* The kernel should have the magic metadata section injected to the ELF. */
|
||||
|
|
|
@ -1,3 +1,10 @@
|
|||
2018-05-04 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
|
||||
|
||||
* include/internal/phsa-rt.h: Whitespace cleanup.
|
||||
* include/internal/workitems.h: Store work item ID data to easily
|
||||
accessible locations.
|
||||
* rt/workitems.c: Same.
|
||||
|
||||
2018-05-04 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
|
||||
|
||||
* rt/workitems.c: Fix an alloca stack underflow.
|
||||
|
|
|
@ -54,7 +54,6 @@ typedef void (*gccbrigKernelFunc) (unsigned char *, void *, void *, uint32_t,
|
|||
*/
|
||||
typedef struct
|
||||
{
|
||||
|
||||
/* Data set by the HSA Runtime's kernel launcher. */
|
||||
hsa_kernel_dispatch_packet_t *dp;
|
||||
|
||||
|
|
|
@ -45,11 +45,6 @@
|
|||
|
||||
typedef struct
|
||||
{
|
||||
/* The group id of the currently executed WG. */
|
||||
size_t x;
|
||||
size_t y;
|
||||
size_t z;
|
||||
|
||||
/* This is 1 in case there are more work groups to execute.
|
||||
If 0, the work-item threads should finish themselves. */
|
||||
int more_wgs;
|
||||
|
@ -89,6 +84,16 @@ typedef struct
|
|||
stack frame. Initialized to point outside the private segment. */
|
||||
uint32_t alloca_frame_p;
|
||||
|
||||
/* The group id of the currently executed WG. This is for fiber based
|
||||
execution. The group ids are duplicated also to the per WI context
|
||||
struct for simplified single pointer access in the GCCBRIG produced
|
||||
code.
|
||||
*/
|
||||
|
||||
uint32_t x;
|
||||
uint32_t y;
|
||||
uint32_t z;
|
||||
|
||||
} PHSAWorkGroup;
|
||||
|
||||
/* Data identifying a single work-item, passed to the work-item thread in case
|
||||
|
@ -96,17 +101,42 @@ typedef struct
|
|||
|
||||
typedef struct
|
||||
{
|
||||
/* NOTE: These members STARTing here should not be moved as they are
|
||||
accessed directly by code emitted by BRIG FE. */
|
||||
|
||||
/* The local id of the current WI. */
|
||||
|
||||
uint32_t x;
|
||||
uint32_t y;
|
||||
uint32_t z;
|
||||
|
||||
/* The group id of the currently executed WG. */
|
||||
|
||||
uint32_t group_x;
|
||||
uint32_t group_y;
|
||||
uint32_t group_z;
|
||||
|
||||
/* The local size of a complete WG. */
|
||||
|
||||
uint32_t wg_size_x;
|
||||
uint32_t wg_size_y;
|
||||
uint32_t wg_size_z;
|
||||
|
||||
/* The local size of the current WG. */
|
||||
|
||||
uint32_t cur_wg_size_x;
|
||||
uint32_t cur_wg_size_y;
|
||||
uint32_t cur_wg_size_z;
|
||||
|
||||
/* NOTE: Fixed members END here. */
|
||||
|
||||
PHSAKernelLaunchData *launch_data;
|
||||
/* Identifies and keeps book of the currently executed WG of the WI swarm. */
|
||||
volatile PHSAWorkGroup *wg;
|
||||
/* The local id of the current WI. */
|
||||
size_t x;
|
||||
size_t y;
|
||||
size_t z;
|
||||
#ifdef HAVE_FIBERS
|
||||
fiber_t fiber;
|
||||
#endif
|
||||
} PHSAWorkItem;
|
||||
} __attribute__((packed)) PHSAWorkItem;
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -107,11 +107,20 @@ phsa_work_item_thread (int arg0, int arg1)
|
|||
the current_work_group_* is set to point to the WG executed next. */
|
||||
if (!wi->wg->more_wgs)
|
||||
break;
|
||||
|
||||
wi->group_x = wg->x;
|
||||
wi->group_y = wg->y;
|
||||
wi->group_z = wg->z;
|
||||
|
||||
wi->cur_wg_size_x = __hsail_currentworkgroupsize (0, wi);
|
||||
wi->cur_wg_size_y = __hsail_currentworkgroupsize (1, wi);
|
||||
wi->cur_wg_size_z = __hsail_currentworkgroupsize (2, wi);
|
||||
|
||||
#ifdef DEBUG_PHSA_RT
|
||||
printf (
|
||||
"Running work-item %lu/%lu/%lu for wg %lu/%lu/%lu / %lu/%lu/%lu...\n",
|
||||
wi->x, wi->y, wi->z, wg->x, wg->y, wg->z, l_data->wg_max_x,
|
||||
l_data->wg_max_y, l_data->wg_max_z);
|
||||
wi->x, wi->y, wi->z, wi->group_x, wi->group_y, wi->group_z,
|
||||
l_data->wg_max_x, l_data->wg_max_y, l_data->wg_max_z);
|
||||
#endif
|
||||
|
||||
if (wi->x < __hsail_currentworkgroupsize (0, wi)
|
||||
|
@ -180,6 +189,13 @@ phsa_work_item_thread (int arg0, int arg1)
|
|||
else
|
||||
wg->x++;
|
||||
#endif
|
||||
wi->group_x = wg->x;
|
||||
wi->group_y = wg->y;
|
||||
wi->group_z = wg->z;
|
||||
|
||||
wi->cur_wg_size_x = __hsail_currentworkgroupsize (0, wi);
|
||||
wi->cur_wg_size_y = __hsail_currentworkgroupsize (1, wi);
|
||||
wi->cur_wg_size_z = __hsail_currentworkgroupsize (2, wi);
|
||||
|
||||
/* Reinitialize the work-group barrier according to the new WG's
|
||||
size, which might not be the same as the previous ones, due
|
||||
|
@ -233,6 +249,7 @@ phsa_execute_wi_gang (PHSAKernelLaunchData *context, void *group_base_ptr,
|
|||
PHSAWorkItem *wi_threads = NULL;
|
||||
PHSAWorkGroup wg;
|
||||
size_t flat_wi_id = 0, x, y, z, max_x, max_y, max_z;
|
||||
uint32_t group_x, group_y, group_z;
|
||||
fiber_barrier_t wg_start_barrier;
|
||||
fiber_barrier_t wg_completion_barrier;
|
||||
fiber_barrier_t wg_sync_barrier;
|
||||
|
@ -257,13 +274,13 @@ phsa_execute_wi_gang (PHSAKernelLaunchData *context, void *group_base_ptr,
|
|||
wg.initial_group_offset = group_local_offset;
|
||||
|
||||
#ifdef EXECUTE_WGS_BACKWARDS
|
||||
wg.x = context->wg_max_x - 1;
|
||||
wg.y = context->wg_max_y - 1;
|
||||
wg.z = context->wg_max_z - 1;
|
||||
group_x = context->wg_max_x - 1;
|
||||
group_y = context->wg_max_y - 1;
|
||||
group_z = context->wg_max_z - 1;
|
||||
#else
|
||||
wg.x = context->wg_min_x;
|
||||
wg.y = context->wg_min_y;
|
||||
wg.z = context->wg_min_z;
|
||||
group_x = context->wg_min_x;
|
||||
group_y = context->wg_min_y;
|
||||
group_z = context->wg_min_z;
|
||||
#endif
|
||||
|
||||
fiber_barrier_init (&wg_sync_barrier, wg_size);
|
||||
|
@ -290,6 +307,19 @@ phsa_execute_wi_gang (PHSAKernelLaunchData *context, void *group_base_ptr,
|
|||
PHSAWorkItem *wi = &wi_threads[flat_wi_id];
|
||||
wi->launch_data = context;
|
||||
wi->wg = &wg;
|
||||
|
||||
wg.x = wi->group_x = group_x;
|
||||
wg.y = wi->group_y = group_y;
|
||||
wg.z = wi->group_z = group_z;
|
||||
|
||||
wi->wg_size_x = context->dp->workgroup_size_x;
|
||||
wi->wg_size_y = context->dp->workgroup_size_y;
|
||||
wi->wg_size_z = context->dp->workgroup_size_z;
|
||||
|
||||
wi->cur_wg_size_x = __hsail_currentworkgroupsize (0, wi);
|
||||
wi->cur_wg_size_y = __hsail_currentworkgroupsize (1, wi);
|
||||
wi->cur_wg_size_z = __hsail_currentworkgroupsize (2, wi);
|
||||
|
||||
wi->x = x;
|
||||
wi->y = y;
|
||||
wi->z = z;
|
||||
|
@ -467,9 +497,17 @@ phsa_execute_work_groups (PHSAKernelLaunchData *context, void *group_base_ptr,
|
|||
for (wg_y = context->wg_min_y; wg_y < context->wg_max_y; ++wg_y)
|
||||
for (wg_x = context->wg_min_x; wg_x < context->wg_max_x; ++wg_x)
|
||||
{
|
||||
wi.wg->x = wg_x;
|
||||
wi.wg->y = wg_y;
|
||||
wi.wg->z = wg_z;
|
||||
wi.group_x = wg_x;
|
||||
wi.group_y = wg_y;
|
||||
wi.group_z = wg_z;
|
||||
|
||||
wi.wg_size_x = context->dp->workgroup_size_x;
|
||||
wi.wg_size_y = context->dp->workgroup_size_y;
|
||||
wi.wg_size_z = context->dp->workgroup_size_z;
|
||||
|
||||
wi.cur_wg_size_x = __hsail_currentworkgroupsize (0, &wi);
|
||||
wi.cur_wg_size_y = __hsail_currentworkgroupsize (1, &wi);
|
||||
wi.cur_wg_size_z = __hsail_currentworkgroupsize (2, &wi);
|
||||
|
||||
context->kernel (context->kernarg_addr, &wi, group_base_ptr,
|
||||
group_local_offset, private_base_ptr);
|
||||
|
@ -564,15 +602,15 @@ __hsail_workitemabsid (uint32_t dim, PHSAWorkItem *context)
|
|||
default:
|
||||
case 0:
|
||||
/* Overflow semantics in the case of WG dim > grid dim. */
|
||||
id = ((uint64_t) context->wg->x * dp->workgroup_size_x + context->x)
|
||||
id = ((uint64_t) context->group_x * dp->workgroup_size_x + context->x)
|
||||
% dp->grid_size_x;
|
||||
break;
|
||||
case 1:
|
||||
id = ((uint64_t) context->wg->y * dp->workgroup_size_y + context->y)
|
||||
id = ((uint64_t) context->group_y * dp->workgroup_size_y + context->y)
|
||||
% dp->grid_size_y;
|
||||
break;
|
||||
case 2:
|
||||
id = ((uint64_t) context->wg->z * dp->workgroup_size_z + context->z)
|
||||
id = ((uint64_t) context->group_z * dp->workgroup_size_z + context->z)
|
||||
% dp->grid_size_z;
|
||||
break;
|
||||
}
|
||||
|
@ -590,15 +628,15 @@ __hsail_workitemabsid_u64 (uint32_t dim, PHSAWorkItem *context)
|
|||
default:
|
||||
case 0:
|
||||
/* Overflow semantics in the case of WG dim > grid dim. */
|
||||
id = ((uint64_t) context->wg->x * dp->workgroup_size_x + context->x)
|
||||
id = ((uint64_t) context->group_x * dp->workgroup_size_x + context->x)
|
||||
% dp->grid_size_x;
|
||||
break;
|
||||
case 1:
|
||||
id = ((uint64_t) context->wg->y * dp->workgroup_size_y + context->y)
|
||||
id = ((uint64_t) context->group_y * dp->workgroup_size_y + context->y)
|
||||
% dp->grid_size_y;
|
||||
break;
|
||||
case 2:
|
||||
id = ((uint64_t) context->wg->z * dp->workgroup_size_z + context->z)
|
||||
id = ((uint64_t) context->group_z * dp->workgroup_size_z + context->z)
|
||||
% dp->grid_size_z;
|
||||
break;
|
||||
}
|
||||
|
@ -738,19 +776,19 @@ __hsail_currentworkgroupsize (uint32_t dim, PHSAWorkItem *wi)
|
|||
{
|
||||
default:
|
||||
case 0:
|
||||
if ((uint64_t) wi->wg->x < dp->grid_size_x / dp->workgroup_size_x)
|
||||
if ((uint64_t) wi->group_x < dp->grid_size_x / dp->workgroup_size_x)
|
||||
wg_size = dp->workgroup_size_x; /* Full WG. */
|
||||
else
|
||||
wg_size = dp->grid_size_x % dp->workgroup_size_x; /* Partial WG. */
|
||||
break;
|
||||
case 1:
|
||||
if ((uint64_t) wi->wg->y < dp->grid_size_y / dp->workgroup_size_y)
|
||||
if ((uint64_t) wi->group_y < dp->grid_size_y / dp->workgroup_size_y)
|
||||
wg_size = dp->workgroup_size_y; /* Full WG. */
|
||||
else
|
||||
wg_size = dp->grid_size_y % dp->workgroup_size_y; /* Partial WG. */
|
||||
break;
|
||||
case 2:
|
||||
if ((uint64_t) wi->wg->z < dp->grid_size_z / dp->workgroup_size_z)
|
||||
if ((uint64_t) wi->group_z < dp->grid_size_z / dp->workgroup_size_z)
|
||||
wg_size = dp->workgroup_size_z; /* Full WG. */
|
||||
else
|
||||
wg_size = dp->grid_size_z % dp->workgroup_size_z; /* Partial WG. */
|
||||
|
@ -798,11 +836,11 @@ __hsail_workgroupid (uint32_t dim, PHSAWorkItem *wi)
|
|||
{
|
||||
default:
|
||||
case 0:
|
||||
return wi->wg->x;
|
||||
return wi->group_x;
|
||||
case 1:
|
||||
return wi->wg->y;
|
||||
return wi->group_y;
|
||||
case 2:
|
||||
return wi->wg->z;
|
||||
return wi->group_z;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue