[NDS32] Optimize movmem and setmem operations.
gcc/ * config/nds32/nds32-intrinsic.md (unaligned_storedi): Modify patterns implementation. (unaligned_store_dw): Ditto. * config/nds32/nds32-memory-manipulation.c (nds32_expand_movmemsi_loop_known_size): Refactoring implementation. (nds32_gen_dup_4_byte_to_word_value): Rename to ... (nds32_gen_dup_4_byte_to_word_value_aux): ... this. (emit_setmem_word_loop): Rename to ... (emit_setmem_doubleword_loop): ... this. (nds32_gen_dup_4_byte_to_word_value): New function. (nds32_gen_dup_8_byte_to_double_word_value): New function. (nds32_expand_setmem_loop): Refine implementation. (nds32_expand_setmem_loop_v3m): Ditto. * config/nds32/nds32-multiple.md (unaligned_store_update_base_dw): New pattern. Co-Authored-By: Chung-Ju Wu <jasonwucj@gmail.com> From-SVN: r260805
This commit is contained in:
parent
0be3bad705
commit
8889fbe542
4 changed files with 205 additions and 34 deletions
|
@ -1,3 +1,22 @@
|
|||
2018-05-27 Monk Chiang <sh.chiang04@gmail.com>
|
||||
Chung-Ju Wu <jasonwucj@gmail.com>
|
||||
|
||||
* config/nds32/nds32-intrinsic.md (unaligned_storedi): Modify patterns
|
||||
implementation.
|
||||
(unaligned_store_dw): Ditto.
|
||||
* config/nds32/nds32-memory-manipulation.c
|
||||
(nds32_expand_movmemsi_loop_known_size): Refactoring implementation.
|
||||
(nds32_gen_dup_4_byte_to_word_value): Rename to ...
|
||||
(nds32_gen_dup_4_byte_to_word_value_aux): ... this.
|
||||
(emit_setmem_word_loop): Rename to ...
|
||||
(emit_setmem_doubleword_loop): ... this.
|
||||
(nds32_gen_dup_4_byte_to_word_value): New function.
|
||||
(nds32_gen_dup_8_byte_to_double_word_value): New function.
|
||||
(nds32_expand_setmem_loop): Refine implementation.
|
||||
(nds32_expand_setmem_loop_v3m): Ditto.
|
||||
* config/nds32/nds32-multiple.md (unaligned_store_update_base_dw): New
|
||||
pattern.
|
||||
|
||||
2018-05-27 Chung-Ju Wu <jasonwucj@gmail.com>
|
||||
|
||||
* config/nds32/nds32.md (bswapsi2, bswaphi2): New patterns.
|
||||
|
|
|
@ -1596,22 +1596,17 @@
|
|||
if (TARGET_ISA_V3M)
|
||||
nds32_expand_unaligned_store (operands, DImode);
|
||||
else
|
||||
emit_insn (gen_unaligned_store_dw (operands[0], operands[1]));
|
||||
emit_insn (gen_unaligned_store_dw (gen_rtx_MEM (DImode, operands[0]),
|
||||
operands[1]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "unaligned_store_dw"
|
||||
[(set (mem:DI (match_operand:SI 0 "register_operand" "r"))
|
||||
(unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_UASTORE_DW))]
|
||||
[(set (match_operand:DI 0 "nds32_lmw_smw_base_operand" "=Umw")
|
||||
(unspec:DI [(match_operand:DI 1 "register_operand" " r")] UNSPEC_UASTORE_DW))]
|
||||
""
|
||||
{
|
||||
rtx otherops[3];
|
||||
otherops[0] = gen_rtx_REG (SImode, REGNO (operands[1]));
|
||||
otherops[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
|
||||
otherops[2] = operands[0];
|
||||
|
||||
output_asm_insn ("smw.bi\t%0, [%2], %1, 0", otherops);
|
||||
return "";
|
||||
return nds32_output_smw_double_word (operands);
|
||||
}
|
||||
[(set_attr "type" "store")
|
||||
(set_attr "length" "4")]
|
||||
|
|
|
@ -257,8 +257,124 @@ static bool
|
|||
nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem,
|
||||
rtx size, rtx alignment)
|
||||
{
|
||||
return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem,
|
||||
size, alignment);
|
||||
rtx dst_base_reg, src_base_reg;
|
||||
rtx dst_itr, src_itr;
|
||||
rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m;
|
||||
rtx dst_end;
|
||||
rtx double_word_mode_loop, byte_mode_loop;
|
||||
rtx tmp;
|
||||
int start_regno;
|
||||
bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0;
|
||||
unsigned HOST_WIDE_INT total_bytes = UINTVAL (size);
|
||||
|
||||
if (TARGET_ISA_V3M && !align_to_4_bytes)
|
||||
return 0;
|
||||
|
||||
if (TARGET_REDUCED_REGS)
|
||||
start_regno = 2;
|
||||
else
|
||||
start_regno = 16;
|
||||
|
||||
dst_itr = gen_reg_rtx (Pmode);
|
||||
src_itr = gen_reg_rtx (Pmode);
|
||||
dst_end = gen_reg_rtx (Pmode);
|
||||
tmp = gen_reg_rtx (QImode);
|
||||
|
||||
double_word_mode_loop = gen_label_rtx ();
|
||||
byte_mode_loop = gen_label_rtx ();
|
||||
|
||||
dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0));
|
||||
src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0));
|
||||
|
||||
if (total_bytes < 8)
|
||||
{
|
||||
/* Emit total_bytes less than 8 loop version of movmem.
|
||||
add $dst_end, $dst, $size
|
||||
move $dst_itr, $dst
|
||||
.Lbyte_mode_loop:
|
||||
lbi.bi $tmp, [$src_itr], #1
|
||||
sbi.bi $tmp, [$dst_itr], #1
|
||||
! Not readch upper bound. Loop.
|
||||
bne $dst_itr, $dst_end, .Lbyte_mode_loop */
|
||||
|
||||
/* add $dst_end, $dst, $size */
|
||||
dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size,
|
||||
NULL_RTX, 0, OPTAB_WIDEN);
|
||||
/* move $dst_itr, $dst
|
||||
move $src_itr, $src */
|
||||
emit_move_insn (dst_itr, dst_base_reg);
|
||||
emit_move_insn (src_itr, src_base_reg);
|
||||
|
||||
/* .Lbyte_mode_loop: */
|
||||
emit_label (byte_mode_loop);
|
||||
|
||||
/* lbi.bi $tmp, [$src_itr], #1 */
|
||||
nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true);
|
||||
|
||||
/* sbi.bi $tmp, [$dst_itr], #1 */
|
||||
nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false);
|
||||
/* ! Not readch upper bound. Loop.
|
||||
bne $dst_itr, $dst_end, .Lbyte_mode_loop */
|
||||
emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL,
|
||||
SImode, 1, byte_mode_loop);
|
||||
return true;
|
||||
}
|
||||
else if (total_bytes % 8 == 0)
|
||||
{
|
||||
/* Emit multiple of 8 loop version of movmem.
|
||||
|
||||
add $dst_end, $dst, $size
|
||||
move $dst_itr, $dst
|
||||
move $src_itr, $src
|
||||
|
||||
.Ldouble_word_mode_loop:
|
||||
lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
|
||||
smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr
|
||||
! move will delete after register allocation
|
||||
move $src_itr, $src_itr'
|
||||
move $dst_itr, $dst_itr'
|
||||
! Not readch upper bound. Loop.
|
||||
bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
|
||||
|
||||
/* add $dst_end, $dst, $size */
|
||||
dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size,
|
||||
NULL_RTX, 0, OPTAB_WIDEN);
|
||||
|
||||
/* move $dst_itr, $dst
|
||||
move $src_itr, $src */
|
||||
emit_move_insn (dst_itr, dst_base_reg);
|
||||
emit_move_insn (src_itr, src_base_reg);
|
||||
|
||||
/* .Ldouble_word_mode_loop: */
|
||||
emit_label (double_word_mode_loop);
|
||||
/* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
|
||||
smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */
|
||||
src_itr_m = src_itr;
|
||||
dst_itr_m = dst_itr;
|
||||
srcmem_m = srcmem;
|
||||
dstmem_m = dstmem;
|
||||
nds32_emit_mem_move_block (start_regno, 2,
|
||||
&dst_itr_m, &dstmem_m,
|
||||
&src_itr_m, &srcmem_m,
|
||||
true);
|
||||
/* move $src_itr, $src_itr'
|
||||
move $dst_itr, $dst_itr' */
|
||||
emit_move_insn (dst_itr, dst_itr_m);
|
||||
emit_move_insn (src_itr, src_itr_m);
|
||||
|
||||
/* ! Not readch upper bound. Loop.
|
||||
bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
|
||||
emit_cmp_and_jump_insns (dst_end, dst_itr, NE, NULL,
|
||||
Pmode, 1, double_word_mode_loop);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Handle size greater than 8, and not a multiple of 8. */
|
||||
return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem,
|
||||
size, alignment);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
|
@ -433,10 +549,8 @@ nds32_expand_movmemsi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment)
|
|||
/* Auxiliary function for expand setmem pattern. */
|
||||
|
||||
static rtx
|
||||
nds32_gen_dup_4_byte_to_word_value (rtx value)
|
||||
nds32_gen_dup_4_byte_to_word_value_aux (rtx value, rtx value4word)
|
||||
{
|
||||
rtx value4word = gen_reg_rtx (SImode);
|
||||
|
||||
gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value));
|
||||
|
||||
if (CONST_INT_P (value))
|
||||
|
@ -493,7 +607,30 @@ nds32_gen_dup_4_byte_to_word_value (rtx value)
|
|||
}
|
||||
|
||||
static rtx
|
||||
emit_setmem_word_loop (rtx itr, rtx size, rtx value)
|
||||
nds32_gen_dup_4_byte_to_word_value (rtx value)
|
||||
{
|
||||
rtx value4word = gen_reg_rtx (SImode);
|
||||
nds32_gen_dup_4_byte_to_word_value_aux (value, value4word);
|
||||
|
||||
return value4word;
|
||||
}
|
||||
|
||||
static rtx
|
||||
nds32_gen_dup_8_byte_to_double_word_value (rtx value)
|
||||
{
|
||||
rtx value4doubleword = gen_reg_rtx (DImode);
|
||||
|
||||
nds32_gen_dup_4_byte_to_word_value_aux (
|
||||
value, nds32_di_low_part_subreg(value4doubleword));
|
||||
|
||||
emit_move_insn (nds32_di_high_part_subreg(value4doubleword),
|
||||
nds32_di_low_part_subreg(value4doubleword));
|
||||
return value4doubleword;
|
||||
}
|
||||
|
||||
|
||||
static rtx
|
||||
emit_setmem_doubleword_loop (rtx itr, rtx size, rtx value)
|
||||
{
|
||||
rtx word_mode_label = gen_label_rtx ();
|
||||
rtx word_mode_end_label = gen_label_rtx ();
|
||||
|
@ -502,9 +639,9 @@ emit_setmem_word_loop (rtx itr, rtx size, rtx value)
|
|||
rtx word_mode_end = gen_reg_rtx (SImode);
|
||||
rtx size_for_word = gen_reg_rtx (SImode);
|
||||
|
||||
/* and $size_for_word, $size, #~3 */
|
||||
/* and $size_for_word, $size, #~0x7 */
|
||||
size_for_word = expand_binop (SImode, and_optab, size,
|
||||
gen_int_mode (~3, SImode),
|
||||
gen_int_mode (~0x7, SImode),
|
||||
NULL_RTX, 0, OPTAB_WIDEN);
|
||||
|
||||
emit_move_insn (byte_mode_size, size);
|
||||
|
@ -516,8 +653,8 @@ emit_setmem_word_loop (rtx itr, rtx size, rtx value)
|
|||
word_mode_end = expand_binop (Pmode, add_optab, itr, size_for_word,
|
||||
NULL_RTX, 0, OPTAB_WIDEN);
|
||||
|
||||
/* andi $byte_mode_size, $size, 3 */
|
||||
byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (3),
|
||||
/* andi $byte_mode_size, $size, 0x7 */
|
||||
byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (0x7),
|
||||
NULL_RTX, 0, OPTAB_WIDEN);
|
||||
|
||||
emit_move_insn (byte_mode_size, byte_mode_size_tmp);
|
||||
|
@ -527,9 +664,9 @@ emit_setmem_word_loop (rtx itr, rtx size, rtx value)
|
|||
/* ! word-mode set loop
|
||||
smw.bim $value4word, [$dst_itr], $value4word, 0
|
||||
bne $word_mode_end, $dst_itr, .Lword_mode */
|
||||
emit_insn (gen_unaligned_store_update_base_w (itr,
|
||||
itr,
|
||||
value));
|
||||
emit_insn (gen_unaligned_store_update_base_dw (itr,
|
||||
itr,
|
||||
value));
|
||||
emit_cmp_and_jump_insns (word_mode_end, itr, NE, NULL,
|
||||
Pmode, 1, word_mode_label);
|
||||
|
||||
|
@ -581,7 +718,7 @@ emit_setmem_byte_loop (rtx itr, rtx size, rtx value, bool need_end)
|
|||
static bool
|
||||
nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value)
|
||||
{
|
||||
rtx value4word;
|
||||
rtx value4doubleword;
|
||||
rtx value4byte;
|
||||
rtx dst;
|
||||
rtx byte_mode_size;
|
||||
|
@ -624,7 +761,7 @@ nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value)
|
|||
or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab
|
||||
slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000
|
||||
or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */
|
||||
value4word = nds32_gen_dup_4_byte_to_word_value (value);
|
||||
value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value);
|
||||
|
||||
/* and $size_for_word, $size, #-4
|
||||
beqz $size_for_word, .Lword_mode_end
|
||||
|
@ -637,7 +774,7 @@ nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value)
|
|||
smw.bim $value4word, [$dst], $value4word, 0
|
||||
bne $word_mode_end, $dst, .Lword_mode
|
||||
.Lword_mode_end: */
|
||||
byte_mode_size = emit_setmem_word_loop (dst, size, value4word);
|
||||
byte_mode_size = emit_setmem_doubleword_loop (dst, size, value4doubleword);
|
||||
|
||||
/* beqz $byte_mode_size, .Lend
|
||||
add $byte_mode_end, $dst, $byte_mode_size
|
||||
|
@ -648,8 +785,8 @@ nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value)
|
|||
bne $byte_mode_end, $dst, .Lbyte_mode
|
||||
.Lend: */
|
||||
|
||||
value4byte = simplify_gen_subreg (QImode, value4word, SImode,
|
||||
subreg_lowpart_offset (QImode, SImode));
|
||||
value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode,
|
||||
subreg_lowpart_offset (QImode, DImode));
|
||||
|
||||
emit_setmem_byte_loop (dst, byte_mode_size, value4byte, false);
|
||||
|
||||
|
@ -666,14 +803,15 @@ nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value)
|
|||
rtx byte_loop_size = gen_reg_rtx (SImode);
|
||||
rtx remain_size = gen_reg_rtx (SImode);
|
||||
rtx new_base_reg;
|
||||
rtx value4byte, value4word;
|
||||
rtx value4byte, value4doubleword;
|
||||
rtx byte_mode_size;
|
||||
rtx last_byte_loop_label = gen_label_rtx ();
|
||||
|
||||
size = force_reg (SImode, size);
|
||||
|
||||
value4word = nds32_gen_dup_4_byte_to_word_value (value);
|
||||
value4byte = simplify_gen_subreg (QImode, value4word, SImode, 0);
|
||||
value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value);
|
||||
value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode,
|
||||
subreg_lowpart_offset (QImode, DImode));
|
||||
|
||||
emit_move_insn (byte_loop_size, size);
|
||||
emit_move_insn (byte_loop_base, base_reg);
|
||||
|
@ -701,9 +839,9 @@ nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value)
|
|||
emit_insn (gen_subsi3 (remain_size, size, need_align_bytes));
|
||||
|
||||
/* Set memory word by word. */
|
||||
byte_mode_size = emit_setmem_word_loop (new_base_reg,
|
||||
remain_size,
|
||||
value4word);
|
||||
byte_mode_size = emit_setmem_doubleword_loop (new_base_reg,
|
||||
remain_size,
|
||||
value4doubleword);
|
||||
|
||||
emit_move_insn (byte_loop_base, new_base_reg);
|
||||
emit_move_insn (byte_loop_size, byte_mode_size);
|
||||
|
|
|
@ -2854,6 +2854,25 @@
|
|||
(set_attr "length" "4")]
|
||||
)
|
||||
|
||||
(define_expand "unaligned_store_update_base_dw"
|
||||
[(parallel [(set (match_operand:SI 0 "register_operand" "=r")
|
||||
(plus:SI (match_operand:SI 1 "register_operand" "0") (const_int 8)))
|
||||
(set (mem:DI (match_dup 1))
|
||||
(unspec:DI [(match_operand:DI 2 "register_operand" "r")] UNSPEC_UASTORE_DW))])]
|
||||
""
|
||||
{
|
||||
/* DO NOT emit unaligned_store_w_m immediately since web pass don't
|
||||
recognize post_inc, try it again after GCC 5.0.
|
||||
REF: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63156 */
|
||||
emit_insn (gen_unaligned_store_dw (gen_rtx_MEM (DImode, operands[1]), operands[2]));
|
||||
emit_insn (gen_addsi3 (operands[0], operands[1], gen_int_mode (8, Pmode)));
|
||||
DONE;
|
||||
}
|
||||
[(set_attr "type" "store_multiple")
|
||||
(set_attr "combo" "2")
|
||||
(set_attr "length" "4")]
|
||||
)
|
||||
|
||||
(define_insn "*stmsi25"
|
||||
[(match_parallel 0 "nds32_store_multiple_operation"
|
||||
[(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
|
||||
|
|
Loading…
Add table
Reference in a new issue