Middle-end: Change order of LEN_MASK_LOAD/LEN_MASK_STORE arguments
Hi, Richard. I fix the order as you suggeted. Before this patch, the order is {len,mask,bias}. Now, after this patch, the order becomes {len,bias,mask}. Since you said we should not need 'internal_fn_bias_index', the bias index should always be the len index + 1. I notice LEN_STORE order is {len,vector,bias}, to make them consistent, I reorder into LEN_STORE {len,bias,vector}. Just like MASK_STORE {mask,vector}. Ok for trunk ? gcc/ChangeLog: * config/riscv/autovec.md: Change order of LEN_MASK_LOAD/LEN_MASK_STORE/LEN_LOAD/LEN_STORE arguments. * config/riscv/riscv-v.cc (expand_load_store): Ditto. * doc/md.texi: Ditto. * gimple-fold.cc (gimple_fold_partial_load_store_mem_ref): Ditto. * internal-fn.cc (len_maskload_direct): Ditto. (len_maskstore_direct): Ditto. (add_len_and_mask_args): New function. (expand_partial_load_optab_fn): Change order of LEN_MASK_LOAD/LEN_MASK_STORE/LEN_LOAD/LEN_STORE arguments. (expand_partial_store_optab_fn): Ditto. (internal_fn_len_index): New function. (internal_fn_mask_index): Change order of LEN_MASK_LOAD/LEN_MASK_STORE/LEN_LOAD/LEN_STORE arguments. (internal_fn_stored_value_index): Ditto. (internal_len_load_store_bias): Ditto. * internal-fn.h (internal_fn_len_index): New function. * tree-ssa-dse.cc (initialize_ao_ref_for_dse): Change order of LEN_MASK_LOAD/LEN_MASK_STORE/LEN_LOAD/LEN_STORE arguments. * tree-vect-stmts.cc (vectorizable_store): Ditto. (vectorizable_load): Ditto.
This commit is contained in:
parent
37449332dd
commit
b8806f6ffb
8 changed files with 107 additions and 106 deletions
|
@ -26,8 +26,8 @@
|
|||
[(match_operand:V 0 "register_operand")
|
||||
(match_operand:V 1 "memory_operand")
|
||||
(match_operand 2 "autovec_length_operand")
|
||||
(match_operand:<VM> 3 "vector_mask_operand")
|
||||
(match_operand 4 "const_0_operand")]
|
||||
(match_operand 3 "const_0_operand")
|
||||
(match_operand:<VM> 4 "vector_mask_operand")]
|
||||
"TARGET_VECTOR"
|
||||
{
|
||||
riscv_vector::expand_load_store (operands, true);
|
||||
|
@ -38,8 +38,8 @@
|
|||
[(match_operand:V 0 "memory_operand")
|
||||
(match_operand:V 1 "register_operand")
|
||||
(match_operand 2 "autovec_length_operand")
|
||||
(match_operand:<VM> 3 "vector_mask_operand")
|
||||
(match_operand 4 "const_0_operand")]
|
||||
(match_operand 3 "const_0_operand")
|
||||
(match_operand:<VM> 4 "vector_mask_operand")]
|
||||
"TARGET_VECTOR"
|
||||
{
|
||||
riscv_vector::expand_load_store (operands, false);
|
||||
|
|
|
@ -2777,7 +2777,7 @@ expand_load_store (rtx *ops, bool is_load)
|
|||
{
|
||||
poly_int64 value;
|
||||
rtx len = ops[2];
|
||||
rtx mask = ops[3];
|
||||
rtx mask = ops[4];
|
||||
machine_mode mode = GET_MODE (ops[0]);
|
||||
|
||||
if (poly_int_rtx_p (len, &value) && known_eq (value, GET_MODE_NUNITS (mode)))
|
||||
|
|
|
@ -5302,15 +5302,15 @@ This pattern is not allowed to @code{FAIL}.
|
|||
@cindex @code{len_maskload@var{m}@var{n}} instruction pattern
|
||||
@item @samp{len_maskload@var{m}@var{n}}
|
||||
Perform a masked load from the memory location pointed to by operand 1
|
||||
into register operand 0. (operand 2 + operand 4) elements are loaded from
|
||||
into register operand 0. (operand 2 + operand 3) elements are loaded from
|
||||
memory and other elements in operand 0 are set to undefined values.
|
||||
This is a combination of len_load and maskload.
|
||||
Operands 0 and 1 have mode @var{m}, which must be a vector mode. Operand 2
|
||||
has whichever integer mode the target prefers. A mask is specified in
|
||||
operand 3 which must be of type @var{n}. The mask has lower precedence than
|
||||
operand 4 which must be of type @var{n}. The mask has lower precedence than
|
||||
the length and is itself subject to length masking,
|
||||
i.e. only mask indices < (operand 2 + operand 4) are used.
|
||||
Operand 4 conceptually has mode @code{QI}.
|
||||
i.e. only mask indices < (operand 2 + operand 3) are used.
|
||||
Operand 3 conceptually has mode @code{QI}.
|
||||
|
||||
Operand 2 can be a variable or a constant amount. Operand 4 specifies a
|
||||
constant bias: it is either a constant 0 or a constant -1. The predicate on
|
||||
|
@ -5329,14 +5329,14 @@ This pattern is not allowed to @code{FAIL}.
|
|||
@cindex @code{len_maskstore@var{m}@var{n}} instruction pattern
|
||||
@item @samp{len_maskstore@var{m}@var{n}}
|
||||
Perform a masked store from vector register operand 1 into memory operand 0.
|
||||
(operand 2 + operand 4) elements are stored to memory
|
||||
(operand 2 + operand 3) elements are stored to memory
|
||||
and leave the other elements of operand 0 unchanged.
|
||||
This is a combination of len_store and maskstore.
|
||||
Operands 0 and 1 have mode @var{m}, which must be a vector mode. Operand 2 has whichever
|
||||
integer mode the target prefers. A mask is specified in operand 3 which must be
|
||||
integer mode the target prefers. A mask is specified in operand 4 which must be
|
||||
of type @var{n}. The mask has lower precedence than the length and is itself subject to
|
||||
length masking, i.e. only mask indices < (operand 2 + operand 4) are used.
|
||||
Operand 4 conceptually has mode @code{QI}.
|
||||
length masking, i.e. only mask indices < (operand 2 + operand 3) are used.
|
||||
Operand 3 conceptually has mode @code{QI}.
|
||||
|
||||
Operand 2 can be a variable or a constant amount. Operand 3 specifies a
|
||||
constant bias: it is either a constant 0 or a constant -1. The predicate on
|
||||
|
|
|
@ -5391,11 +5391,12 @@ gimple_fold_partial_load_store_mem_ref (gcall *call, tree vectype, bool mask_p)
|
|||
}
|
||||
else
|
||||
{
|
||||
tree basic_len = gimple_call_arg (call, 2);
|
||||
internal_fn ifn = gimple_call_internal_fn (call);
|
||||
int len_index = internal_fn_len_index (ifn);
|
||||
tree basic_len = gimple_call_arg (call, len_index);
|
||||
if (!poly_int_tree_p (basic_len))
|
||||
return NULL_TREE;
|
||||
unsigned int nargs = gimple_call_num_args (call);
|
||||
tree bias = gimple_call_arg (call, nargs - 1);
|
||||
tree bias = gimple_call_arg (call, len_index + 1);
|
||||
gcc_assert (TREE_CODE (bias) == INTEGER_CST);
|
||||
/* For LEN_LOAD/LEN_STORE/LEN_MASK_LOAD/LEN_MASK_STORE,
|
||||
we don't fold when (bias + len) != VF. */
|
||||
|
@ -5405,7 +5406,6 @@ gimple_fold_partial_load_store_mem_ref (gcall *call, tree vectype, bool mask_p)
|
|||
|
||||
/* For LEN_MASK_{LOAD,STORE}, we should also check whether
|
||||
the mask is all ones mask. */
|
||||
internal_fn ifn = gimple_call_internal_fn (call);
|
||||
if (ifn == IFN_LEN_MASK_LOAD || ifn == IFN_LEN_MASK_STORE)
|
||||
{
|
||||
tree mask = gimple_call_arg (call, internal_fn_mask_index (ifn));
|
||||
|
|
|
@ -165,7 +165,7 @@ init_internal_fns ()
|
|||
#define mask_load_lanes_direct { -1, -1, false }
|
||||
#define gather_load_direct { 3, 1, false }
|
||||
#define len_load_direct { -1, -1, false }
|
||||
#define len_maskload_direct { -1, 3, false }
|
||||
#define len_maskload_direct { -1, 4, false }
|
||||
#define mask_store_direct { 3, 2, false }
|
||||
#define store_lanes_direct { 0, 0, false }
|
||||
#define mask_store_lanes_direct { 0, 0, false }
|
||||
|
@ -173,7 +173,7 @@ init_internal_fns ()
|
|||
#define vec_cond_direct { 2, 0, false }
|
||||
#define scatter_store_direct { 3, 1, false }
|
||||
#define len_store_direct { 3, 3, false }
|
||||
#define len_maskstore_direct { 4, 3, false }
|
||||
#define len_maskstore_direct { 4, 5, false }
|
||||
#define vec_set_direct { 3, 3, false }
|
||||
#define unary_direct { 0, 0, true }
|
||||
#define unary_convert_direct { -1, 0, true }
|
||||
|
@ -293,6 +293,38 @@ get_multi_vector_move (tree array_type, convert_optab optab)
|
|||
return convert_optab_handler (optab, imode, vmode);
|
||||
}
|
||||
|
||||
/* Add len and mask arguments according to the STMT. */
|
||||
|
||||
static unsigned int
|
||||
add_len_and_mask_args (expand_operand *ops, unsigned int opno, gcall *stmt)
|
||||
{
|
||||
internal_fn ifn = gimple_call_internal_fn (stmt);
|
||||
int len_index = internal_fn_len_index (ifn);
|
||||
/* BIAS is always consecutive next of LEN. */
|
||||
int bias_index = len_index + 1;
|
||||
int mask_index = internal_fn_mask_index (ifn);
|
||||
/* The order of arguments are always {len,bias,mask}. */
|
||||
if (len_index >= 0)
|
||||
{
|
||||
tree len = gimple_call_arg (stmt, len_index);
|
||||
rtx len_rtx = expand_normal (len);
|
||||
create_convert_operand_from (&ops[opno++], len_rtx,
|
||||
TYPE_MODE (TREE_TYPE (len)),
|
||||
TYPE_UNSIGNED (TREE_TYPE (len)));
|
||||
tree biast = gimple_call_arg (stmt, bias_index);
|
||||
rtx bias = expand_normal (biast);
|
||||
create_input_operand (&ops[opno++], bias, QImode);
|
||||
}
|
||||
if (mask_index >= 0)
|
||||
{
|
||||
tree mask = gimple_call_arg (stmt, mask_index);
|
||||
rtx mask_rtx = expand_normal (mask);
|
||||
create_input_operand (&ops[opno++], mask_rtx,
|
||||
TYPE_MODE (TREE_TYPE (mask)));
|
||||
}
|
||||
return opno;
|
||||
}
|
||||
|
||||
/* Expand LOAD_LANES call STMT using optab OPTAB. */
|
||||
|
||||
static void
|
||||
|
@ -2879,14 +2911,15 @@ expand_call_mem_ref (tree type, gcall *stmt, int index)
|
|||
* OPTAB. */
|
||||
|
||||
static void
|
||||
expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
|
||||
expand_partial_load_optab_fn (internal_fn ifn, gcall *stmt, convert_optab optab)
|
||||
{
|
||||
int i = 0;
|
||||
class expand_operand ops[5];
|
||||
tree type, lhs, rhs, maskt, biast;
|
||||
rtx mem, target, mask, bias;
|
||||
tree type, lhs, rhs, maskt;
|
||||
rtx mem, target;
|
||||
insn_code icode;
|
||||
|
||||
maskt = gimple_call_arg (stmt, 2);
|
||||
maskt = gimple_call_arg (stmt, internal_fn_mask_index (ifn));
|
||||
lhs = gimple_call_lhs (stmt);
|
||||
if (lhs == NULL_TREE)
|
||||
return;
|
||||
|
@ -2903,38 +2936,11 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
|
|||
|
||||
mem = expand_expr (rhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
|
||||
gcc_assert (MEM_P (mem));
|
||||
mask = expand_normal (maskt);
|
||||
target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
|
||||
create_output_operand (&ops[0], target, TYPE_MODE (type));
|
||||
create_fixed_operand (&ops[1], mem);
|
||||
if (optab == len_load_optab)
|
||||
{
|
||||
create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)),
|
||||
TYPE_UNSIGNED (TREE_TYPE (maskt)));
|
||||
biast = gimple_call_arg (stmt, 3);
|
||||
bias = expand_normal (biast);
|
||||
create_input_operand (&ops[3], bias, QImode);
|
||||
expand_insn (icode, 4, ops);
|
||||
}
|
||||
else if (optab == len_maskload_optab)
|
||||
{
|
||||
create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)),
|
||||
TYPE_UNSIGNED (TREE_TYPE (maskt)));
|
||||
maskt = gimple_call_arg (stmt, 3);
|
||||
mask = expand_normal (maskt);
|
||||
create_input_operand (&ops[3], mask, TYPE_MODE (TREE_TYPE (maskt)));
|
||||
icode = convert_optab_handler (optab, TYPE_MODE (type),
|
||||
TYPE_MODE (TREE_TYPE (maskt)));
|
||||
biast = gimple_call_arg (stmt, 4);
|
||||
bias = expand_normal (biast);
|
||||
create_input_operand (&ops[4], bias, QImode);
|
||||
expand_insn (icode, 5, ops);
|
||||
}
|
||||
else
|
||||
{
|
||||
create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
|
||||
expand_insn (icode, 3, ops);
|
||||
}
|
||||
create_output_operand (&ops[i++], target, TYPE_MODE (type));
|
||||
create_fixed_operand (&ops[i++], mem);
|
||||
i = add_len_and_mask_args (ops, i, stmt);
|
||||
expand_insn (icode, i, ops);
|
||||
|
||||
if (!rtx_equal_p (target, ops[0].value))
|
||||
emit_move_insn (target, ops[0].value);
|
||||
|
@ -2951,12 +2957,13 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
|
|||
static void
|
||||
expand_partial_store_optab_fn (internal_fn ifn, gcall *stmt, convert_optab optab)
|
||||
{
|
||||
int i = 0;
|
||||
class expand_operand ops[5];
|
||||
tree type, lhs, rhs, maskt, biast;
|
||||
rtx mem, reg, mask, bias;
|
||||
tree type, lhs, rhs, maskt;
|
||||
rtx mem, reg;
|
||||
insn_code icode;
|
||||
|
||||
maskt = gimple_call_arg (stmt, 2);
|
||||
maskt = gimple_call_arg (stmt, internal_fn_mask_index (ifn));
|
||||
rhs = gimple_call_arg (stmt, internal_fn_stored_value_index (ifn));
|
||||
type = TREE_TYPE (rhs);
|
||||
lhs = expand_call_mem_ref (type, stmt, 0);
|
||||
|
@ -2971,37 +2978,11 @@ expand_partial_store_optab_fn (internal_fn ifn, gcall *stmt, convert_optab optab
|
|||
|
||||
mem = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
|
||||
gcc_assert (MEM_P (mem));
|
||||
mask = expand_normal (maskt);
|
||||
reg = expand_normal (rhs);
|
||||
create_fixed_operand (&ops[0], mem);
|
||||
create_input_operand (&ops[1], reg, TYPE_MODE (type));
|
||||
if (optab == len_store_optab)
|
||||
{
|
||||
create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)),
|
||||
TYPE_UNSIGNED (TREE_TYPE (maskt)));
|
||||
biast = gimple_call_arg (stmt, 4);
|
||||
bias = expand_normal (biast);
|
||||
create_input_operand (&ops[3], bias, QImode);
|
||||
expand_insn (icode, 4, ops);
|
||||
}
|
||||
else if (optab == len_maskstore_optab)
|
||||
{
|
||||
create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)),
|
||||
TYPE_UNSIGNED (TREE_TYPE (maskt)));
|
||||
maskt = gimple_call_arg (stmt, 3);
|
||||
mask = expand_normal (maskt);
|
||||
create_input_operand (&ops[3], mask, TYPE_MODE (TREE_TYPE (maskt)));
|
||||
biast = gimple_call_arg (stmt, 5);
|
||||
bias = expand_normal (biast);
|
||||
create_input_operand (&ops[4], bias, QImode);
|
||||
icode = convert_optab_handler (optab, TYPE_MODE (type), GET_MODE (mask));
|
||||
expand_insn (icode, 5, ops);
|
||||
}
|
||||
else
|
||||
{
|
||||
create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
|
||||
expand_insn (icode, 3, ops);
|
||||
}
|
||||
create_fixed_operand (&ops[i++], mem);
|
||||
create_input_operand (&ops[i++], reg, TYPE_MODE (type));
|
||||
i = add_len_and_mask_args (ops, i, stmt);
|
||||
expand_insn (icode, i, ops);
|
||||
}
|
||||
|
||||
#define expand_mask_store_optab_fn expand_partial_store_optab_fn
|
||||
|
@ -4482,6 +4463,25 @@ internal_gather_scatter_fn_p (internal_fn fn)
|
|||
}
|
||||
}
|
||||
|
||||
/* If FN takes a vector len argument, return the index of that argument,
|
||||
otherwise return -1. */
|
||||
|
||||
int
|
||||
internal_fn_len_index (internal_fn fn)
|
||||
{
|
||||
switch (fn)
|
||||
{
|
||||
case IFN_LEN_LOAD:
|
||||
case IFN_LEN_STORE:
|
||||
case IFN_LEN_MASK_LOAD:
|
||||
case IFN_LEN_MASK_STORE:
|
||||
return 2;
|
||||
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* If FN takes a vector mask argument, return the index of that argument,
|
||||
otherwise return -1. */
|
||||
|
||||
|
@ -4498,11 +4498,9 @@ internal_fn_mask_index (internal_fn fn)
|
|||
|
||||
case IFN_MASK_GATHER_LOAD:
|
||||
case IFN_MASK_SCATTER_STORE:
|
||||
return 4;
|
||||
|
||||
case IFN_LEN_MASK_LOAD:
|
||||
case IFN_LEN_MASK_STORE:
|
||||
return 3;
|
||||
return 4;
|
||||
|
||||
default:
|
||||
return (conditional_internal_fn_code (fn) != ERROR_MARK
|
||||
|
@ -4522,12 +4520,14 @@ internal_fn_stored_value_index (internal_fn fn)
|
|||
case IFN_MASK_STORE_LANES:
|
||||
case IFN_SCATTER_STORE:
|
||||
case IFN_MASK_SCATTER_STORE:
|
||||
case IFN_LEN_STORE:
|
||||
return 3;
|
||||
|
||||
case IFN_LEN_MASK_STORE:
|
||||
case IFN_LEN_STORE:
|
||||
return 4;
|
||||
|
||||
case IFN_LEN_MASK_STORE:
|
||||
return 5;
|
||||
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
|
@ -4592,7 +4592,6 @@ internal_len_load_store_bias (internal_fn ifn, machine_mode mode)
|
|||
{
|
||||
optab optab = direct_internal_fn_optab (ifn);
|
||||
insn_code icode = direct_optab_handler (optab, mode);
|
||||
int bias_opno = 3;
|
||||
|
||||
if (icode == CODE_FOR_nothing)
|
||||
{
|
||||
|
@ -4610,15 +4609,14 @@ internal_len_load_store_bias (internal_fn ifn, machine_mode mode)
|
|||
optab = direct_internal_fn_optab (IFN_LEN_MASK_STORE);
|
||||
}
|
||||
icode = convert_optab_handler (optab, mode, mask_mode);
|
||||
bias_opno = 4;
|
||||
}
|
||||
|
||||
if (icode != CODE_FOR_nothing)
|
||||
{
|
||||
/* For now we only support biases of 0 or -1. Try both of them. */
|
||||
if (insn_operand_matches (icode, bias_opno, GEN_INT (0)))
|
||||
if (insn_operand_matches (icode, 3, GEN_INT (0)))
|
||||
return 0;
|
||||
if (insn_operand_matches (icode, bias_opno, GEN_INT (-1)))
|
||||
if (insn_operand_matches (icode, 3, GEN_INT (-1)))
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
|
|
@ -234,6 +234,7 @@ extern bool internal_load_fn_p (internal_fn);
|
|||
extern bool internal_store_fn_p (internal_fn);
|
||||
extern bool internal_gather_scatter_fn_p (internal_fn);
|
||||
extern int internal_fn_mask_index (internal_fn);
|
||||
extern int internal_fn_len_index (internal_fn);
|
||||
extern int internal_fn_stored_value_index (internal_fn);
|
||||
extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
|
||||
tree, tree, int);
|
||||
|
|
|
@ -161,12 +161,13 @@ initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write, bool may_def_ok = false)
|
|||
case IFN_MASK_STORE:
|
||||
case IFN_LEN_MASK_STORE:
|
||||
{
|
||||
int stored_value_index
|
||||
= internal_fn_stored_value_index (gimple_call_internal_fn (stmt));
|
||||
if (gimple_call_internal_fn (stmt) == IFN_LEN_STORE)
|
||||
internal_fn ifn = gimple_call_internal_fn (stmt);
|
||||
int stored_value_index = internal_fn_stored_value_index (ifn);
|
||||
int len_index = internal_fn_len_index (ifn);
|
||||
if (ifn == IFN_LEN_STORE)
|
||||
{
|
||||
tree len = gimple_call_arg (stmt, 2);
|
||||
tree bias = gimple_call_arg (stmt, 4);
|
||||
tree len = gimple_call_arg (stmt, len_index);
|
||||
tree bias = gimple_call_arg (stmt, len_index + 1);
|
||||
if (tree_fits_uhwi_p (len))
|
||||
{
|
||||
ao_ref_init_from_ptr_and_size (write,
|
||||
|
|
|
@ -9122,13 +9122,14 @@ vectorizable_store (vec_info *vinfo,
|
|||
if (partial_ifn == IFN_LEN_MASK_STORE)
|
||||
call = gimple_build_call_internal (IFN_LEN_MASK_STORE, 6,
|
||||
dataref_ptr, ptr,
|
||||
final_len, final_mask,
|
||||
vec_oprnd, bias);
|
||||
final_len, bias,
|
||||
final_mask, vec_oprnd);
|
||||
else
|
||||
call
|
||||
= gimple_build_call_internal (IFN_LEN_STORE, 5,
|
||||
dataref_ptr, ptr, final_len,
|
||||
vec_oprnd, bias);
|
||||
dataref_ptr, ptr,
|
||||
final_len, bias,
|
||||
vec_oprnd);
|
||||
gimple_call_set_nothrow (call, true);
|
||||
vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
|
||||
new_stmt = call;
|
||||
|
@ -10523,7 +10524,7 @@ vectorizable_load (vec_info *vinfo,
|
|||
call = gimple_build_call_internal (IFN_LEN_MASK_LOAD,
|
||||
5, dataref_ptr,
|
||||
ptr, final_len,
|
||||
final_mask, bias);
|
||||
bias, final_mask);
|
||||
else
|
||||
call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
|
||||
dataref_ptr, ptr,
|
||||
|
|
Loading…
Add table
Reference in a new issue