arc: Improve/add instruction patterns to better use MAC instructions.
ARC MYP7+ instructions adds MAC instructions for either vector and scalar data types. This patch adds a madd pattern for 16it datum using the 32bit MAC instruction, and dot_prod patterns for v4hi vector types. The 64bit moves are also upgraded by using vadd2 instuction. 2020-11-09 Claudiu Zissulescu <claziss@synopsys.com> gcc/ * config/arc/arc.c (arc_split_move): Recognize vadd2 instructions. * config/arc/arc.md (movdi_insn): Update pattern to use vadd2 instructions. (movdf_insn): Likewise. (maddhisi4): New pattern. (umaddhisi4): Likewise. * config/arc/simdext.md (mov<mode>_int): Update pattern to use vadd2. (sdot_prodv4hi): New pattern. (udot_prodv4hi): Likewise. (arc_vec_<V_US>mac_hi_v4hi): Update/renamed to arc_vec_<V_US>mac_v2hiv2si. (arc_vec_<V_US>mac_v2hiv2si_zero): New pattern. * config/arc/constraints.md (Ral): Accumulator register constraint. Signed-off-by: Claudiu Zissulescu <claziss@synopsys.com>
This commit is contained in:
parent
b5cff0db6e
commit
c5395d88df
4 changed files with 147 additions and 27 deletions
|
@ -10154,6 +10154,14 @@ arc_split_move (rtx *operands)
|
|||
return;
|
||||
}
|
||||
|
||||
if (TARGET_PLUS_QMACW
|
||||
&& even_register_operand (operands[0], mode)
|
||||
&& even_register_operand (operands[1], mode))
|
||||
{
|
||||
emit_move_insn (operands[0], operands[1]);
|
||||
return;
|
||||
}
|
||||
|
||||
if (TARGET_PLUS_QMACW
|
||||
&& GET_CODE (operands[1]) == CONST_VECTOR)
|
||||
{
|
||||
|
|
|
@ -1322,8 +1322,8 @@ core_3, archs4x, archs4xd, archs4xd_slow"
|
|||
")
|
||||
|
||||
(define_insn_and_split "*movdi_insn"
|
||||
[(set (match_operand:DI 0 "move_dest_operand" "=w, w,r, m")
|
||||
(match_operand:DI 1 "move_double_src_operand" "c,Hi,m,cCm3"))]
|
||||
[(set (match_operand:DI 0 "move_dest_operand" "=r, r,r, m")
|
||||
(match_operand:DI 1 "move_double_src_operand" "r,Hi,m,rCm3"))]
|
||||
"register_operand (operands[0], DImode)
|
||||
|| register_operand (operands[1], DImode)
|
||||
|| (satisfies_constraint_Cm3 (operands[1])
|
||||
|
@ -1335,6 +1335,13 @@ core_3, archs4x, archs4xd, archs4xd_slow"
|
|||
default:
|
||||
return \"#\";
|
||||
|
||||
case 0:
|
||||
if (TARGET_PLUS_QMACW
|
||||
&& even_register_operand (operands[0], DImode)
|
||||
&& even_register_operand (operands[1], DImode))
|
||||
return \"vadd2\\t%0,%1,0\";
|
||||
return \"#\";
|
||||
|
||||
case 2:
|
||||
if (TARGET_LL64
|
||||
&& memory_operand (operands[1], DImode)
|
||||
|
@ -1351,7 +1358,7 @@ core_3, archs4x, archs4xd, archs4xd_slow"
|
|||
return \"#\";
|
||||
}
|
||||
}"
|
||||
"reload_completed"
|
||||
"&& reload_completed"
|
||||
[(const_int 0)]
|
||||
{
|
||||
arc_split_move (operands);
|
||||
|
@ -1397,15 +1404,24 @@ core_3, archs4x, archs4xd, archs4xd_slow"
|
|||
"if (prepare_move_operands (operands, DFmode)) DONE;")
|
||||
|
||||
(define_insn_and_split "*movdf_insn"
|
||||
[(set (match_operand:DF 0 "move_dest_operand" "=D,r,c,c,r,m")
|
||||
(match_operand:DF 1 "move_double_src_operand" "r,D,c,E,m,c"))]
|
||||
"register_operand (operands[0], DFmode) || register_operand (operands[1], DFmode)"
|
||||
[(set (match_operand:DF 0 "move_dest_operand" "=D,r,r,r,r,m")
|
||||
(match_operand:DF 1 "move_double_src_operand" "r,D,r,E,m,r"))]
|
||||
"register_operand (operands[0], DFmode)
|
||||
|| register_operand (operands[1], DFmode)"
|
||||
"*
|
||||
{
|
||||
switch (which_alternative)
|
||||
{
|
||||
default:
|
||||
return \"#\";
|
||||
|
||||
case 2:
|
||||
if (TARGET_PLUS_QMACW
|
||||
&& even_register_operand (operands[0], DFmode)
|
||||
&& even_register_operand (operands[1], DFmode))
|
||||
return \"vadd2\\t%0,%1,0\";
|
||||
return \"#\";
|
||||
|
||||
case 4:
|
||||
if (TARGET_LL64
|
||||
&& ((even_register_operand (operands[0], DFmode)
|
||||
|
@ -6126,6 +6142,49 @@ core_3, archs4x, archs4xd, archs4xd_slow"
|
|||
[(set_attr "length" "0")])
|
||||
|
||||
;; MAC and DMPY instructions
|
||||
|
||||
; Use MAC instruction to emulate 16bit mac.
|
||||
(define_expand "maddhisi4"
|
||||
[(match_operand:SI 0 "register_operand" "")
|
||||
(match_operand:HI 1 "register_operand" "")
|
||||
(match_operand:HI 2 "extend_operand" "")
|
||||
(match_operand:SI 3 "register_operand" "")]
|
||||
"TARGET_PLUS_DMPY"
|
||||
"{
|
||||
rtx acc_reg = gen_rtx_REG (DImode, ACC_REG_FIRST);
|
||||
rtx tmp1 = gen_reg_rtx (SImode);
|
||||
rtx tmp2 = gen_reg_rtx (SImode);
|
||||
rtx accl = gen_lowpart (SImode, acc_reg);
|
||||
|
||||
emit_move_insn (accl, operands[3]);
|
||||
emit_insn (gen_rtx_SET (tmp1, gen_rtx_SIGN_EXTEND (SImode, operands[1])));
|
||||
emit_insn (gen_rtx_SET (tmp2, gen_rtx_SIGN_EXTEND (SImode, operands[2])));
|
||||
emit_insn (gen_mac (tmp1, tmp2));
|
||||
emit_move_insn (operands[0], accl);
|
||||
DONE;
|
||||
}")
|
||||
|
||||
; The same for the unsigned variant, but using MACU instruction.
|
||||
(define_expand "umaddhisi4"
|
||||
[(match_operand:SI 0 "register_operand" "")
|
||||
(match_operand:HI 1 "register_operand" "")
|
||||
(match_operand:HI 2 "extend_operand" "")
|
||||
(match_operand:SI 3 "register_operand" "")]
|
||||
"TARGET_PLUS_DMPY"
|
||||
"{
|
||||
rtx acc_reg = gen_rtx_REG (DImode, ACC_REG_FIRST);
|
||||
rtx tmp1 = gen_reg_rtx (SImode);
|
||||
rtx tmp2 = gen_reg_rtx (SImode);
|
||||
rtx accl = gen_lowpart (SImode, acc_reg);
|
||||
|
||||
emit_move_insn (accl, operands[3]);
|
||||
emit_insn (gen_rtx_SET (tmp1, gen_rtx_ZERO_EXTEND (SImode, operands[1])));
|
||||
emit_insn (gen_rtx_SET (tmp2, gen_rtx_ZERO_EXTEND (SImode, operands[2])));
|
||||
emit_insn (gen_macu (tmp1, tmp2));
|
||||
emit_move_insn (operands[0], accl);
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "maddsidi4"
|
||||
[(match_operand:DI 0 "register_operand" "")
|
||||
(match_operand:SI 1 "register_operand" "")
|
||||
|
|
|
@ -493,6 +493,11 @@
|
|||
Condition Codes"
|
||||
(and (match_code "reg") (match_test "cc_register (op, VOIDmode)")))
|
||||
|
||||
(define_constraint "Ral"
|
||||
"@internal
|
||||
Accumulator register @code{ACCL} - do not reload into its class"
|
||||
(and (match_code "reg")
|
||||
(match_test "REGNO (op) == ACCL_REGNO")))
|
||||
|
||||
(define_constraint "Q"
|
||||
"@internal
|
||||
|
|
|
@ -1400,8 +1400,7 @@
|
|||
(define_insn_and_split "*mov<mode>_insn"
|
||||
[(set (match_operand:VWH 0 "move_dest_operand" "=r,r,r,m")
|
||||
(match_operand:VWH 1 "general_operand" "i,r,m,r"))]
|
||||
"TARGET_PLUS_QMACW
|
||||
&& (register_operand (operands[0], <MODE>mode)
|
||||
"(register_operand (operands[0], <MODE>mode)
|
||||
|| register_operand (operands[1], <MODE>mode))"
|
||||
"*
|
||||
{
|
||||
|
@ -1411,7 +1410,11 @@
|
|||
return \"#\";
|
||||
|
||||
case 1:
|
||||
return \"vadd2 %0, %1, 0\";
|
||||
if (TARGET_PLUS_QMACW
|
||||
&& even_register_operand (operands[0], <MODE>mode)
|
||||
&& even_register_operand (operands[1], <MODE>mode))
|
||||
return \"vadd2\\t%0,%1,0\";
|
||||
return \"#\";
|
||||
|
||||
case 2:
|
||||
if (TARGET_LL64)
|
||||
|
@ -1430,7 +1433,7 @@
|
|||
arc_split_move (operands);
|
||||
DONE;
|
||||
}
|
||||
[(set_attr "type" "move,move,load,store")
|
||||
[(set_attr "type" "move,multi,load,store")
|
||||
(set_attr "predicable" "yes,no,no,no")
|
||||
(set_attr "iscompact" "false,false,false,false")
|
||||
])
|
||||
|
@ -1612,6 +1615,44 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "sdot_prodv4hi"
|
||||
[(match_operand:V2SI 0 "register_operand" "")
|
||||
(match_operand:V4HI 1 "register_operand" "")
|
||||
(match_operand:V4HI 2 "register_operand" "")
|
||||
(match_operand:V2SI 3 "register_operand" "")]
|
||||
"TARGET_PLUS_MACD"
|
||||
{
|
||||
rtx acc_reg = gen_rtx_REG (V2SImode, ACC_REG_FIRST);
|
||||
rtx op1_low = gen_lowpart (V2HImode, operands[1]);
|
||||
rtx op1_high = gen_highpart (V2HImode, operands[1]);
|
||||
rtx op2_low = gen_lowpart (V2HImode, operands[2]);
|
||||
rtx op2_high = gen_highpart (V2HImode, operands[2]);
|
||||
|
||||
emit_move_insn (acc_reg, operands[3]);
|
||||
emit_insn (gen_arc_vec_smac_v2hiv2si_zero (op1_low, op2_low));
|
||||
emit_insn (gen_arc_vec_smac_v2hiv2si (operands[0], op1_high, op2_high));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "udot_prodv4hi"
|
||||
[(match_operand:V2SI 0 "register_operand" "")
|
||||
(match_operand:V4HI 1 "register_operand" "")
|
||||
(match_operand:V4HI 2 "register_operand" "")
|
||||
(match_operand:V2SI 3 "register_operand" "")]
|
||||
"TARGET_PLUS_MACD"
|
||||
{
|
||||
rtx acc_reg = gen_rtx_REG (V2SImode, ACC_REG_FIRST);
|
||||
rtx op1_low = gen_lowpart (V2HImode, operands[1]);
|
||||
rtx op1_high = gen_highpart (V2HImode, operands[1]);
|
||||
rtx op2_low = gen_lowpart (V2HImode, operands[2]);
|
||||
rtx op2_high = gen_highpart (V2HImode, operands[2]);
|
||||
|
||||
emit_move_insn (acc_reg, operands[3]);
|
||||
emit_insn (gen_arc_vec_umac_v2hiv2si_zero (op1_low, op2_low));
|
||||
emit_insn (gen_arc_vec_umac_v2hiv2si (operands[0], op1_high, op2_high));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "arc_vec_<V_US>mult_lo_v4hi"
|
||||
[(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
|
||||
(mult:V2SI (SE:V2SI (vec_select:V2HI
|
||||
|
@ -1704,30 +1745,37 @@
|
|||
}
|
||||
)
|
||||
|
||||
(define_insn "arc_vec_<V_US>mac_hi_v4hi"
|
||||
[(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
|
||||
(define_insn "arc_vec_<V_US>mac_v2hiv2si"
|
||||
[(set (match_operand:V2SI 0 "even_register_operand" "=r,Ral,r")
|
||||
(plus:V2SI
|
||||
(reg:V2SI ARCV2_ACC)
|
||||
(mult:V2SI (SE:V2SI (vec_select:V2HI
|
||||
(match_operand:V4HI 1 "even_register_operand" "0,r")
|
||||
(parallel [(const_int 2) (const_int 3)])))
|
||||
(SE:V2SI (vec_select:V2HI
|
||||
(match_operand:V4HI 2 "even_register_operand" "r,r")
|
||||
(parallel [(const_int 2) (const_int 3)]))))))
|
||||
(mult:V2SI (SE:V2SI (match_operand:V2HI 1 "register_operand" "0, r,r"))
|
||||
(SE:V2SI (match_operand:V2HI 2 "register_operand" "r, r,r")))
|
||||
(reg:V2SI ARCV2_ACC)))
|
||||
(set (reg:V2SI ARCV2_ACC)
|
||||
(plus:V2SI
|
||||
(reg:V2SI ARCV2_ACC)
|
||||
(mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1)
|
||||
(parallel [(const_int 2) (const_int 3)])))
|
||||
(SE:V2SI (vec_select:V2HI (match_dup 2)
|
||||
(parallel [(const_int 2) (const_int 3)]))))))
|
||||
(mult:V2SI (SE:V2SI (match_dup 1))
|
||||
(SE:V2SI (match_dup 2)))
|
||||
(reg:V2SI ARCV2_ACC)))
|
||||
]
|
||||
"TARGET_PLUS_MACD"
|
||||
"vmac2h<V_US_suffix>%? %0, %R1, %R2"
|
||||
"@
|
||||
vmac2h<V_US_suffix>%?\\t%0,%1,%2
|
||||
vmac2h<V_US_suffix>%?\\t0,%1,%2
|
||||
vmac2h<V_US_suffix>%?\\t%0,%1,%2"
|
||||
[(set_attr "length" "4")
|
||||
(set_attr "type" "multi")
|
||||
(set_attr "predicable" "yes,no")
|
||||
(set_attr "cond" "canuse,nocond")])
|
||||
(set_attr "predicable" "yes,no,no")])
|
||||
|
||||
(define_insn "arc_vec_<V_US>mac_v2hiv2si_zero"
|
||||
[(set (reg:V2SI ARCV2_ACC)
|
||||
(plus:V2SI
|
||||
(mult:V2SI (SE:V2SI (match_operand:V2HI 0 "register_operand" "r"))
|
||||
(SE:V2SI (match_operand:V2HI 1 "register_operand" "r")))
|
||||
(reg:V2SI ARCV2_ACC)))]
|
||||
"TARGET_PLUS_MACD"
|
||||
"vmac2h<V_US_suffix>%?\\t0,%0,%1"
|
||||
[(set_attr "length" "4")
|
||||
(set_attr "type" "multi")])
|
||||
|
||||
;; Builtins
|
||||
(define_insn "dmach"
|
||||
|
|
Loading…
Add table
Reference in a new issue