[PATCH v2 1/2] RISC-V: Make vectorized memset handle more cases
`expand_vec_setmem` only generated vectorized memset if it fitted into a single vector store of at least (TARGET_MIN_VLEN / 8) bytes. Also, without dynamic LMUL the operation was always TARGET_MAX_LMUL even if it would have fitted a smaller LMUL. Allow vectorized memset to be generated for smaller lengths and smaller LMUL by switching to using use_vector_string_op. Smaller LMUL can be seen in setmem-3.c:f3. Smaller lengths will be seen after the second patch in this series which selectively disables by pieces. gcc/ChangeLog: * config/riscv/riscv-string.cc (use_vector_stringop_p): Add comment. (expand_vec_setmem): Use use_vector_stringop_p instead of check_vectorise_memory_operation. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/setmem-3.c: Expect smaller lmul.
This commit is contained in:
parent
fe97ac43e0
commit
b30c6a5eab
2 changed files with 22 additions and 21 deletions
|
@ -1062,6 +1062,9 @@ struct stringop_info {
|
|||
|
||||
MAX_EW is the maximum element width that the caller wants to use and
|
||||
LENGTH_IN is the length of the stringop in bytes.
|
||||
|
||||
This is currently used for cpymem and setmem. If expand_vec_cmpmem switches
|
||||
to using it too then check_vectorise_memory_operation can be removed.
|
||||
*/
|
||||
|
||||
static bool
|
||||
|
@ -1600,41 +1603,39 @@ check_vectorise_memory_operation (rtx length_in, HOST_WIDE_INT &lmul_out)
|
|||
bool
|
||||
expand_vec_setmem (rtx dst_in, rtx length_in, rtx fill_value_in)
|
||||
{
|
||||
HOST_WIDE_INT lmul;
|
||||
stringop_info info;
|
||||
|
||||
/* Check we are able and allowed to vectorise this operation;
|
||||
bail if not. */
|
||||
if (!check_vectorise_memory_operation (length_in, lmul))
|
||||
if (!use_vector_stringop_p (info, 1, length_in) || info.need_loop)
|
||||
return false;
|
||||
|
||||
machine_mode vmode
|
||||
= riscv_vector::get_vector_mode (QImode, BYTES_PER_RISCV_VECTOR * lmul)
|
||||
.require ();
|
||||
rtx dst_addr = copy_addr_to_reg (XEXP (dst_in, 0));
|
||||
rtx dst = change_address (dst_in, vmode, dst_addr);
|
||||
rtx dst = change_address (dst_in, info.vmode, dst_addr);
|
||||
|
||||
rtx fill_value = gen_reg_rtx (vmode);
|
||||
rtx fill_value = gen_reg_rtx (info.vmode);
|
||||
rtx broadcast_ops[] = { fill_value, fill_value_in };
|
||||
|
||||
/* If the length is exactly vlmax for the selected mode, do that.
|
||||
Otherwise, use a predicated store. */
|
||||
if (known_eq (GET_MODE_SIZE (vmode), INTVAL (length_in)))
|
||||
if (known_eq (GET_MODE_SIZE (info.vmode), INTVAL (info.avl)))
|
||||
{
|
||||
emit_vlmax_insn (code_for_pred_broadcast (vmode), UNARY_OP,
|
||||
broadcast_ops);
|
||||
emit_vlmax_insn (code_for_pred_broadcast (info.vmode), UNARY_OP,
|
||||
broadcast_ops);
|
||||
emit_move_insn (dst, fill_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!satisfies_constraint_K (length_in))
|
||||
length_in = force_reg (Pmode, length_in);
|
||||
emit_nonvlmax_insn (code_for_pred_broadcast (vmode), UNARY_OP,
|
||||
broadcast_ops, length_in);
|
||||
if (!satisfies_constraint_K (info.avl))
|
||||
info.avl = force_reg (Pmode, info.avl);
|
||||
emit_nonvlmax_insn (code_for_pred_broadcast (info.vmode),
|
||||
riscv_vector::UNARY_OP, broadcast_ops, info.avl);
|
||||
machine_mode mask_mode
|
||||
= riscv_vector::get_vector_mode (BImode, GET_MODE_NUNITS (vmode))
|
||||
.require ();
|
||||
= riscv_vector::get_vector_mode (BImode, GET_MODE_NUNITS (info.vmode))
|
||||
.require ();
|
||||
rtx mask = CONSTM1_RTX (mask_mode);
|
||||
emit_insn (gen_pred_store (vmode, dst, mask, fill_value, length_in,
|
||||
get_avl_type_rtx (riscv_vector::NONVLMAX)));
|
||||
emit_insn (gen_pred_store (info.vmode, dst, mask, fill_value, info.avl,
|
||||
get_avl_type_rtx (riscv_vector::NONVLMAX)));
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -21,13 +21,13 @@ f1 (void *a, int const b)
|
|||
return __builtin_memset (a, b, MIN_VECTOR_BYTES - 1);
|
||||
}
|
||||
|
||||
/* Vectorise+inline minimum vector register width using requested lmul.
|
||||
/* Vectorised code should use smallest lmul known to fit length.
|
||||
** f2:
|
||||
** (
|
||||
** vsetivli\s+zero,\d+,e8,m8,ta,ma
|
||||
** vsetivli\s+zero,\d+,e8,m1,ta,ma
|
||||
** |
|
||||
** li\s+a\d+,\d+
|
||||
** vsetvli\s+zero,a\d+,e8,m8,ta,ma
|
||||
** vsetvli\s+zero,a\d+,e8,m1,ta,ma
|
||||
** )
|
||||
** vmv\.v\.x\s+v\d+,a1
|
||||
** vse8\.v\s+v\d+,0\(a0\)
|
||||
|
|
Loading…
Add table
Reference in a new issue