amdgcn: switch mov insns to compact syntax

The move instructions typically have many alternatives (and I'm about to add
more) so are good candidates for the new syntax.

This patch only converts the patterns where there are no significant changes to
the generated files. The other patterns can be converted another time.

gcc/ChangeLog:

	* config/gcn/gcn-valu.md (*mov<mode>): Convert to compact syntax.
	(mov<mode>_exec): Likewise.
	(mov<mode>_sgprbase): Likewise.
	* config/gcn/gcn.md (*mov<mode>_insn): Likewise.
	(*movti_insn): Likewise.
This commit is contained in:
Andrew Stubbs 2023-10-02 11:42:03 +01:00
parent eb239c7f22
commit ddfa43933e
2 changed files with 106 additions and 128 deletions

View file

@ -457,23 +457,21 @@
(set_attr "length" "4,8")])
(define_insn "mov<mode>_exec"
[(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v, v, v, v, v, m")
[(set (match_operand:V_1REG 0 "nonimmediate_operand")
(vec_merge:V_1REG
(match_operand:V_1REG 1 "general_operand" "vA, B, v,vA, m, v")
(match_operand:V_1REG 2 "gcn_alu_or_unspec_operand"
"U0,U0,vA,vA,U0,U0")
(match_operand:DI 3 "register_operand" " e, e,cV,Sv, e, e")))
(clobber (match_scratch:<VnDI> 4 "=X, X, X, X,&v,&v"))]
(match_operand:V_1REG 1 "general_operand")
(match_operand:V_1REG 2 "gcn_alu_or_unspec_operand")
(match_operand:DI 3 "register_operand")))
(clobber (match_scratch:<VnDI> 4))]
"!MEM_P (operands[0]) || REG_P (operands[1])"
"@
v_mov_b32\t%0, %1
v_mov_b32\t%0, %1
v_cndmask_b32\t%0, %2, %1, vcc
v_cndmask_b32\t%0, %2, %1, %3
#
#"
[(set_attr "type" "vop1,vop1,vop2,vop3a,*,*")
(set_attr "length" "4,8,4,8,16,16")])
{@ [cons: =0, 1, 2, 3, =4; attrs: type, length]
[v,vA,U0,e ,X ;vop1 ,4 ] v_mov_b32\t%0, %1
[v,B ,U0,e ,X ;vop1 ,8 ] v_mov_b32\t%0, %1
[v,v ,vA,cV,X ;vop2 ,4 ] v_cndmask_b32\t%0, %2, %1, vcc
[v,vA,vA,Sv,X ;vop3a,8 ] v_cndmask_b32\t%0, %2, %1, %3
[v,m ,U0,e ,&v;* ,16] #
[m,v ,U0,e ,&v;* ,16] #
})
; This variant does not accept an unspec, but does permit MEM
; read/modify/write which is necessary for maskstore.
@ -644,19 +642,18 @@
; flat_load v, vT
(define_insn "mov<mode>_sgprbase"
[(set (match_operand:V_1REG 0 "nonimmediate_operand" "= v, v, v, m")
[(set (match_operand:V_1REG 0 "nonimmediate_operand")
(unspec:V_1REG
[(match_operand:V_1REG 1 "general_operand" " vA,vB, m, v")]
[(match_operand:V_1REG 1 "general_operand")]
UNSPEC_SGPRBASE))
(clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v,&v"))]
(clobber (match_operand:<VnDI> 2 "register_operand"))]
"lra_in_progress || reload_completed"
"@
v_mov_b32\t%0, %1
v_mov_b32\t%0, %1
#
#"
[(set_attr "type" "vop1,vop1,*,*")
(set_attr "length" "4,8,12,12")])
{@ [cons: =0, 1, =2; attrs: type, length]
[v,vA,&v;vop1,4 ] v_mov_b32\t%0, %1
[v,vB,&v;vop1,8 ] ^
[v,m ,&v;* ,12] #
[m,v ,&v;* ,12] #
})
(define_insn "mov<mode>_sgprbase"
[(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, m")
@ -676,17 +673,17 @@
(set_attr "length" "8,12,12")])
(define_insn "mov<mode>_sgprbase"
[(set (match_operand:V_4REG 0 "nonimmediate_operand" "= v, v, m")
[(set (match_operand:V_4REG 0 "nonimmediate_operand")
(unspec:V_4REG
[(match_operand:V_4REG 1 "general_operand" "vDB, m, v")]
[(match_operand:V_4REG 1 "general_operand")]
UNSPEC_SGPRBASE))
(clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v"))]
(clobber (match_operand:<VnDI> 2 "register_operand"))]
"lra_in_progress || reload_completed"
"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1
#
#"
[(set_attr "type" "vmult,*,*")
(set_attr "length" "8,12,12")])
{@ [cons: =0, 1, =2; attrs: type, length]
[v,vDB,&v;vmult,8 ] v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1
[v,m ,&v;* ,12] #
[m,v ,&v;* ,12] #
})
; reload_in was once a standard name, but here it's only referenced by
; gcn_secondary_reload. It allows a reload with a scratch register.

View file

@ -542,87 +542,76 @@
; 32bit move pattern
(define_insn "*mov<mode>_insn"
[(set (match_operand:SISF 0 "nonimmediate_operand"
"=SD,SD,SD,SD,RB,Sm,RS,v,Sg, v, v,RF,v,RLRG, v,SD, v,RM")
(match_operand:SISF 1 "gcn_load_operand"
"SSA, J, B,RB,Sm,RS,Sm,v, v,Sv,RF, v,B, v,RLRG, Y,RM, v"))]
[(set (match_operand:SISF 0 "nonimmediate_operand")
(match_operand:SISF 1 "gcn_load_operand"))]
""
"@
s_mov_b32\t%0, %1
s_movk_i32\t%0, %1
s_mov_b32\t%0, %1
s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
s_buffer_store%s1\t%1, s[0:3], %0
s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
s_store_dword\t%1, %A0
v_mov_b32\t%0, %1
v_readlane_b32\t%0, %1, 0
v_writelane_b32\t%0, %1, 0
flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
flat_store_dword\t%A0, %1%O0%g0
v_mov_b32\t%0, %1
ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
s_mov_b32\t%0, %1
global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
global_store_dword\t%A0, %1%O0%g0"
[(set_attr "type" "sop1,sopk,sop1,smem,smem,smem,smem,vop1,vop3a,vop3a,flat,
flat,vop1,ds,ds,sop1,flat,flat")
(set_attr "exec" "*,*,*,*,*,*,*,*,none,none,*,*,*,*,*,*,*,*")
(set_attr "length" "4,4,8,12,12,12,12,4,8,8,12,12,8,12,12,8,12,12")])
{@ [cons: =0, 1; attrs: type, exec, length]
[SD ,SSA ;sop1 ,* ,4 ] s_mov_b32\t%0, %1
[SD ,J ;sopk ,* ,4 ] s_movk_i32\t%0, %1
[SD ,B ;sop1 ,* ,8 ] s_mov_b32\t%0, %1
[SD ,RB ;smem ,* ,12] s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
[RB ,Sm ;smem ,* ,12] s_buffer_store%s1\t%1, s[0:3], %0
[Sm ,RS ;smem ,* ,12] s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
[RS ,Sm ;smem ,* ,12] s_store_dword\t%1, %A0
[v ,v ;vop1 ,* ,4 ] v_mov_b32\t%0, %1
[Sg ,v ;vop3a,none,8 ] v_readlane_b32\t%0, %1, 0
[v ,Sv ;vop3a,none,8 ] v_writelane_b32\t%0, %1, 0
[v ,RF ;flat ,* ,12] flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
[RF ,v ;flat ,* ,12] flat_store_dword\t%A0, %1%O0%g0
[v ,B ;vop1 ,* ,8 ] v_mov_b32\t%0, %1
[RLRG,v ;ds ,* ,12] ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
[v ,RLRG;ds ,* ,12] ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
[SD ,Y ;sop1 ,* ,8 ] s_mov_b32\t%0, %1
[v ,RM ;flat ,* ,12] global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
[RM ,v ;flat ,* ,12] global_store_dword\t%A0, %1%O0%g0
})
; 8/16bit move pattern
; TODO: implement combined load and zero_extend, but *only* for -msram-ecc=on
(define_insn "*mov<mode>_insn"
[(set (match_operand:QIHI 0 "nonimmediate_operand"
"=SD,SD,SD,v,Sg, v, v,RF,v,RLRG, v, v,RM")
(match_operand:QIHI 1 "gcn_load_operand"
"SSA, J, B,v, v,Sv,RF, v,B, v,RLRG,RM, v"))]
[(set (match_operand:QIHI 0 "nonimmediate_operand")
(match_operand:QIHI 1 "gcn_load_operand"))]
"gcn_valid_move_p (<MODE>mode, operands[0], operands[1])"
"@
s_mov_b32\t%0, %1
s_movk_i32\t%0, %1
s_mov_b32\t%0, %1
v_mov_b32\t%0, %1
v_readlane_b32\t%0, %1, 0
v_writelane_b32\t%0, %1, 0
flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
flat_store%s0\t%A0, %1%O0%g0
v_mov_b32\t%0, %1
ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
global_store%s0\t%A0, %1%O0%g0"
[(set_attr "type"
"sop1,sopk,sop1,vop1,vop3a,vop3a,flat,flat,vop1,ds,ds,flat,flat")
(set_attr "exec" "*,*,*,*,none,none,*,*,*,*,*,*,*")
(set_attr "length" "4,4,8,4,4,4,12,12,8,12,12,12,12")])
{@ [cons: =0, 1; attrs: type, exec, length]
[SD ,SSA ;sop1 ,* ,4 ] s_mov_b32\t%0, %1
[SD ,J ;sopk ,* ,4 ] s_movk_i32\t%0, %1
[SD ,B ;sop1 ,* ,8 ] s_mov_b32\t%0, %1
[v ,v ;vop1 ,* ,4 ] v_mov_b32\t%0, %1
[Sg ,v ;vop3a,none,4 ] v_readlane_b32\t%0, %1, 0
[v ,Sv ;vop3a,none,4 ] v_writelane_b32\t%0, %1, 0
[v ,RF ;flat ,* ,12] flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
[RF ,v ;flat ,* ,12] flat_store%s0\t%A0, %1%O0%g0
[v ,B ;vop1 ,* ,8 ] v_mov_b32\t%0, %1
[RLRG,v ;ds ,* ,12] ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
[v ,RLRG;ds ,* ,12] ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
[v ,RM ;flat ,* ,12] global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
[RM ,v ;flat ,* ,12] global_store%s0\t%A0, %1%O0%g0
})
; 64bit move pattern
(define_insn_and_split "*mov<mode>_insn"
[(set (match_operand:DIDF 0 "nonimmediate_operand"
"=SD,SD,SD,RS,Sm,v, v,Sg, v, v,RF,RLRG, v, v,RM")
(match_operand:DIDF 1 "general_operand"
"SSA, C,DB,Sm,RS,v,DB, v,Sv,RF, v, v,RLRG,RM, v"))]
[(set (match_operand:DIDF 0 "nonimmediate_operand")
(match_operand:DIDF 1 "general_operand"))]
"GET_CODE(operands[1]) != SYMBOL_REF"
"@
s_mov_b64\t%0, %1
s_mov_b64\t%0, %1
#
s_store_dwordx2\t%1, %A0
s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
#
#
#
#
flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
flat_store_dwordx2\t%A0, %1%O0%g0
ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
global_store_dwordx2\t%A0, %1%O0%g0"
{@ [cons: =0, 1; attrs: type, length]
[SD ,SSA ;sop1 ,4 ] s_mov_b64\t%0, %1
[SD ,C ;sop1 ,8 ] ^
[SD ,DB ;mult ,* ] #
[RS ,Sm ;smem ,12] s_store_dwordx2\t%1, %A0
[Sm ,RS ;smem ,12] s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
[v ,v ;vmult,* ] #
[v ,DB ;vmult,* ] #
[Sg ,v ;vmult,* ] #
[v ,Sv ;vmult,* ] #
[v ,RF ;flat ,12] flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
[RF ,v ;flat ,12] flat_store_dwordx2\t%A0, %1%O0%g0
[RLRG,v ;ds ,12] ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
[v ,RLRG;ds ,12] ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
[v ,RM ;flat ,12] global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
[RM ,v ;flat ,12] global_store_dwordx2\t%A0, %1%O0%g0
}
"reload_completed
&& ((!MEM_P (operands[0]) && !MEM_P (operands[1])
&& !gcn_sgpr_move_p (operands[0], operands[1]))
@ -651,32 +640,28 @@
operands[2] = outhi;
operands[3] = inhi;
}
}
[(set_attr "type" "sop1,sop1,mult,smem,smem,vmult,vmult,vmult,vmult,flat,
flat,ds,ds,flat,flat")
(set_attr "length" "4,8,*,12,12,*,*,*,*,12,12,12,12,12,12")])
})
; 128-bit move.
(define_insn_and_split "*movti_insn"
[(set (match_operand:TI 0 "nonimmediate_operand"
"=SD,RS,Sm,RF, v,v, v,SD,RM, v,RL, v")
(match_operand:TI 1 "general_operand"
"SSB,Sm,RS, v,RF,v,Sv, v, v,RM, v,RL"))]
[(set (match_operand:TI 0 "nonimmediate_operand")
(match_operand:TI 1 "general_operand" ))]
""
"@
#
s_store_dwordx4\t%1, %A0
s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
flat_store_dwordx4\t%A0, %1%O0%g0
flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0
#
#
#
global_store_dwordx4\t%A0, %1%O0%g0
global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)"
{@ [cons: =0, 1; attrs: type, delayeduse, length]
[SD,SSB;mult ,* ,* ] #
[RS,Sm ;smem ,* ,12] s_store_dwordx4\t%1, %A0
[Sm,RS ;smem ,yes,12] s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
[RF,v ;flat ,* ,12] flat_store_dwordx4\t%A0, %1%O0%g0
[v ,RF ;flat ,* ,12] flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0
[v ,v ;vmult,* ,* ] #
[v ,Sv ;vmult,* ,* ] #
[SD,v ;vmult,* ,* ] #
[RM,v ;flat ,yes,12] global_store_dwordx4\t%A0, %1%O0%g0
[v ,RM ;flat ,* ,12] global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
[RL,v ;ds ,* ,12] ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
[v ,RL ;ds ,* ,12] ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
}
"reload_completed
&& REG_P (operands[0])
&& (REG_P (operands[1]) || GET_CODE (operands[1]) == CONST_INT)"
@ -695,11 +680,7 @@
operands[3] = gcn_operand_part (TImode, operands[1], 1);
operands[0] = gcn_operand_part (TImode, operands[0], 0);
operands[1] = gcn_operand_part (TImode, operands[1], 0);
}
[(set_attr "type" "mult,smem,smem,flat,flat,vmult,vmult,vmult,flat,flat,\
ds,ds")
(set_attr "delayeduse" "*,*,yes,*,*,*,*,*,yes,*,*,*")
(set_attr "length" "*,12,12,12,12,*,*,*,12,12,12,12")])
})
;; }}}
;; {{{ Prologue/Epilogue