amdgcn: Align VGPR pairs

Aligning the registers is not needed by the architecture, but doing so
allows us to remove the requirement for bug-prone early-clobber
constraints from many split patterns (and avoid adding more in future).

2020-02-21  Andrew Stubbs  <ams@codesourcery.com>

	gcc/
	* config/gcn/gcn.c (gcn_hard_regno_mode_ok): Align VGPR pairs.
	* config/gcn/gcn-valu.md (addv64di3): Remove early-clobber.
	(addv64di3_exec): Likewise.
	(subv64di3): Likewise.
	(subv64di3_exec): Likewise.
	(addv64di3_zext): Likewise.
	(addv64di3_zext_exec): Likewise.
	(addv64di3_zext_dup): Likewise.
	(addv64di3_zext_dup_exec): Likewise.
	(addv64di3_zext_dup2): Likewise.
	(addv64di3_zext_dup2_exec): Likewise.
	(addv64di3_sext_dup2): Likewise.
	(addv64di3_sext_dup2_exec): Likewise.
	(<expander>v64di3): Likewise.
	(<expander>v64di3_exec): Likewise.
	(*<reduc_op>_dpp_shr_v64di): Likewise.
	(*plus_carry_dpp_shr_v64di): Likewise.
	* config/gcn/gcn.md (adddi3): Likewise.
	(addptrdi3): Likewise.
	(<expander>di3): Likewise.
This commit is contained in:
Andrew Stubbs 2020-02-21 11:07:55 +00:00
parent 2291d1fd85
commit 3abfd4f341
4 changed files with 98 additions and 74 deletions

View file

@ -1,3 +1,26 @@
2020-02-21 Andrew Stubbs <ams@codesourcery.com>
* config/gcn/gcn.c (gcn_hard_regno_mode_ok): Align VGPR pairs.
* config/gcn/gcn-valu.md (addv64di3): Remove early-clobber.
(addv64di3_exec): Likewise.
(subv64di3): Likewise.
(subv64di3_exec): Likewise.
(addv64di3_zext): Likewise.
(addv64di3_zext_exec): Likewise.
(addv64di3_zext_dup): Likewise.
(addv64di3_zext_dup_exec): Likewise.
(addv64di3_zext_dup2): Likewise.
(addv64di3_zext_dup2_exec): Likewise.
(addv64di3_sext_dup2): Likewise.
(addv64di3_sext_dup2_exec): Likewise.
(<expander>v64di3): Likewise.
(<expander>v64di3_exec): Likewise.
(*<reduc_op>_dpp_shr_v64di): Likewise.
(*plus_carry_dpp_shr_v64di): Likewise.
* config/gcn/gcn.md (adddi3): Likewise.
(addptrdi3): Likewise.
(<expander>di3): Likewise.
2020-02-21 Andrew Stubbs <ams@codesourcery.com>
* config/gcn/gcn-valu.md (vec_seriesv64di): Use gen_vec_duplicatev64di.

View file

@ -1171,10 +1171,10 @@
(set_attr "length" "4,8,4,8")])
(define_insn_and_split "addv64di3"
[(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
[(set (match_operand:V64DI 0 "register_operand" "= v")
(plus:V64DI
(match_operand:V64DI 1 "register_operand" "%vDb,vDb0")
(match_operand:V64DI 2 "gcn_alu_operand" "vDb0, vDb")))
(match_operand:V64DI 1 "register_operand" "%vDb")
(match_operand:V64DI 2 "gcn_alu_operand" " vDb")))
(clobber (reg:DI VCC_REG))]
""
"#"
@ -1200,14 +1200,13 @@
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_exec"
[(set (match_operand:V64DI 0 "register_operand" "= &v, &v, &v")
[(set (match_operand:V64DI 0 "register_operand" "= v")
(vec_merge:V64DI
(plus:V64DI
(match_operand:V64DI 1 "register_operand" "%vDb,vDb0,vDb")
(match_operand:V64DI 2 "gcn_alu_operand" "vDb0, vDb,vDb"))
(match_operand:V64DI 3 "gcn_register_or_unspec_operand"
" U, U, 0")
(match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e")))
(match_operand:V64DI 1 "register_operand" "%vDb")
(match_operand:V64DI 2 "gcn_alu_operand" " vDb"))
(match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
(match_operand:DI 4 "gcn_exec_reg_operand" " e")))
(clobber (reg:DI VCC_REG))]
""
"#"
@ -1238,10 +1237,10 @@
(set_attr "length" "8")])
(define_insn_and_split "subv64di3"
[(set (match_operand:V64DI 0 "register_operand" "=&v, &v, &v, &v")
(minus:V64DI
(match_operand:V64DI 1 "gcn_alu_operand" "vDb,vDb0, v, v0")
(match_operand:V64DI 2 "gcn_alu_operand" " v0, v,vDb0,vDb")))
[(set (match_operand:V64DI 0 "register_operand" "= v, v")
(minus:V64DI
(match_operand:V64DI 1 "gcn_alu_operand" "vDb, v")
(match_operand:V64DI 2 "gcn_alu_operand" " v,vDb")))
(clobber (reg:DI VCC_REG))]
""
"#"
@ -1267,14 +1266,13 @@
(set_attr "length" "8")])
(define_insn_and_split "subv64di3_exec"
[(set (match_operand:V64DI 0 "register_operand" "= &v, &v, &v, &v")
[(set (match_operand:V64DI 0 "register_operand" "= v, v")
(vec_merge:V64DI
(minus:V64DI
(match_operand:V64DI 1 "gcn_alu_operand" "vSvB,vSvB0, v, v0")
(match_operand:V64DI 2 "gcn_alu_operand" " v0, v,vSvB0,vSvB"))
(match_operand:V64DI 3 "gcn_register_or_unspec_operand"
" U0, U0, U0, U0")
(match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e")))
(match_operand:V64DI 1 "gcn_alu_operand" "vSvB, v")
(match_operand:V64DI 2 "gcn_alu_operand" " v,vSvB"))
(match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
(match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
(clobber (reg:DI VCC_REG))]
"register_operand (operands[1], VOIDmode)
|| register_operand (operands[2], VOIDmode)"
@ -1306,11 +1304,11 @@
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_zext"
[(set (match_operand:V64DI 0 "register_operand" "=&v, &v, &v, &v")
[(set (match_operand:V64DI 0 "register_operand" "= v, v")
(plus:V64DI
(zero_extend:V64DI
(match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB, vA, vB"))
(match_operand:V64DI 2 "gcn_alu_operand" "vDb,vDA,0vDb,0vDA")))
(match_operand:V64SI 1 "gcn_alu_operand" " vA, vB"))
(match_operand:V64DI 2 "gcn_alu_operand" "vDb,vDA")))
(clobber (reg:DI VCC_REG))]
""
"#"
@ -1334,15 +1332,14 @@
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_zext_exec"
[(set (match_operand:V64DI 0 "register_operand" "=&v, &v, &v, &v")
[(set (match_operand:V64DI 0 "register_operand" "= v, v")
(vec_merge:V64DI
(plus:V64DI
(zero_extend:V64DI
(match_operand:V64SI 1 "gcn_alu_operand" "0vA, vA,0vB, vB"))
(match_operand:V64DI 2 "gcn_alu_operand" "vDb,0vDb,vDA,0vDA"))
(match_operand:V64DI 3 "gcn_register_or_unspec_operand"
" U0, U0, U0, U0")
(match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e")))
(match_operand:V64SI 1 "gcn_alu_operand" " vA, vB"))
(match_operand:V64DI 2 "gcn_alu_operand" "vDb,vDA"))
(match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
(match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
(clobber (reg:DI VCC_REG))]
""
"#"
@ -1371,12 +1368,12 @@
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_zext_dup"
[(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
[(set (match_operand:V64DI 0 "register_operand" "= v, v")
(plus:V64DI
(zero_extend:V64DI
(vec_duplicate:V64SI
(match_operand:SI 1 "gcn_alu_operand" " BSv, ASv")))
(match_operand:V64DI 2 "gcn_alu_operand" "vDA0,vDb0")))
(match_operand:SI 1 "gcn_alu_operand" "BSv,ASv")))
(match_operand:V64DI 2 "gcn_alu_operand" "vDA,vDb")))
(clobber (reg:DI VCC_REG))]
""
"#"
@ -1400,15 +1397,15 @@
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_zext_dup_exec"
[(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
[(set (match_operand:V64DI 0 "register_operand" "= v, v")
(vec_merge:V64DI
(plus:V64DI
(zero_extend:V64DI
(vec_duplicate:V64SI
(match_operand:SI 1 "gcn_alu_operand" " ASv, BSv")))
(match_operand:V64DI 2 "gcn_alu_operand" "vDb0,vDA0"))
(match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
(match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
(match_operand:SI 1 "gcn_alu_operand" "ASv,BSv")))
(match_operand:V64DI 2 "gcn_alu_operand" "vDb,vDA"))
(match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
(match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
(clobber (reg:DI VCC_REG))]
""
"#"
@ -1437,7 +1434,7 @@
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_zext_dup2"
[(set (match_operand:V64DI 0 "register_operand" "= &v")
[(set (match_operand:V64DI 0 "register_operand" "= v")
(plus:V64DI
(zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
(vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "DbSv"))))
@ -1463,7 +1460,7 @@
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_zext_dup2_exec"
[(set (match_operand:V64DI 0 "register_operand" "=&v")
[(set (match_operand:V64DI 0 "register_operand" "= v")
(vec_merge:V64DI
(plus:V64DI
(zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
@ -1500,7 +1497,7 @@
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_sext_dup2"
[(set (match_operand:V64DI 0 "register_operand" "=&v")
[(set (match_operand:V64DI 0 "register_operand" "= v")
(plus:V64DI
(sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
(vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
@ -1528,7 +1525,7 @@
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_sext_dup2_exec"
[(set (match_operand:V64DI 0 "register_operand" "=&v")
[(set (match_operand:V64DI 0 "register_operand" "= v")
(vec_merge:V64DI
(plus:V64DI
(sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
@ -1935,10 +1932,10 @@
(set_attr "length" "8,8")])
(define_insn_and_split "<expander>v64di3"
[(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
[(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "= v,RD")
(bitop:V64DI
(match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
(match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
(match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
(match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
""
"@
#
@ -1960,7 +1957,7 @@
(set_attr "length" "16,8")])
(define_insn_and_split "<expander>v64di3_exec"
[(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
[(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "= v,RD")
(vec_merge:V64DI
(bitop:V64DI
(match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
@ -2999,11 +2996,11 @@
(set_attr "length" "8")])
(define_insn_and_split "*<reduc_op>_dpp_shr_v64di"
[(set (match_operand:V64DI 0 "register_operand" "=&v")
[(set (match_operand:V64DI 0 "register_operand" "=v")
(unspec:V64DI
[(match_operand:V64DI 1 "register_operand" "v0")
(match_operand:V64DI 2 "register_operand" "v0")
(match_operand:SI 3 "const_int_operand" "n")]
[(match_operand:V64DI 1 "register_operand" "v")
(match_operand:V64DI 2 "register_operand" "v")
(match_operand:SI 3 "const_int_operand" "n")]
REDUC_2REG_UNSPEC))]
""
"#"
@ -3065,11 +3062,11 @@
(set_attr "length" "8")])
(define_insn_and_split "*plus_carry_dpp_shr_v64di"
[(set (match_operand:V64DI 0 "register_operand" "=&v")
[(set (match_operand:V64DI 0 "register_operand" "=v")
(unspec:V64DI
[(match_operand:V64DI 1 "register_operand" "v0")
(match_operand:V64DI 2 "register_operand" "v0")
(match_operand:SI 3 "const_int_operand" "n")]
[(match_operand:V64DI 1 "register_operand" "v")
(match_operand:V64DI 2 "register_operand" "v")
(match_operand:SI 3 "const_int_operand" "n")]
UNSPEC_PLUS_CARRY_DPP_SHR))
(clobber (reg:DI VCC_REG))]
""

View file

@ -458,7 +458,15 @@ gcn_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
|| (!((regno - FIRST_SGPR_REG) & 1) && sgpr_2reg_mode_p (mode))
|| (((regno - FIRST_SGPR_REG) & 3) == 0 && mode == TImode));
if (VGPR_REGNO_P (regno))
return (vgpr_1reg_mode_p (mode) || vgpr_2reg_mode_p (mode)
/* Vector instructions do not care about the alignment of register
pairs, but where there is no 64-bit instruction, many of the
define_split do not work if the input and output registers partially
overlap. We tried to fix this with early clobber and match
constraints, but it was bug prone, added complexity, and conflicts
with the 'U0' constraints on vec_merge.
Therefore, we restrict ourselved to aligned registers. */
return (vgpr_1reg_mode_p (mode)
|| (!((regno - FIRST_VGPR_REG) & 1) && vgpr_2reg_mode_p (mode))
/* TImode is used by DImode compare_and_swap. */
|| mode == TImode);
return false;

View file

@ -1065,22 +1065,16 @@
; through some RTL optimisation passes, and means the CC reg we set isn't
; dependent on the constraint alternative (which doesn't seem to work well).
; There's an early clobber in the case where "v[0:1]=v[1:2]+?" but
; "v[0:1]=v[0:1]+?" is fine (as is "v[1:2]=v[0:1]+?", but that's trickier).
; If v_addc_u32 is used to add with carry, a 32-bit literal constant cannot be
; used as an operand due to the read of VCC, so we restrict constants to the
; inlinable range for that alternative.
(define_insn_and_split "adddi3"
[(set (match_operand:DI 0 "register_operand"
"=&Sg,&Sg,&Sg,&Sg,&v,&v,&v,&v")
(plus:DI (match_operand:DI 1 "register_operand"
" Sg, 0, 0, Sg, v, 0, 0, v")
(match_operand:DI 2 "nonmemory_operand"
" 0,SgB, 0,SgB, 0,vA, 0,vA")))
(clobber (match_scratch:BI 3 "= cs, cs, cs, cs, X, X, X, X"))
(clobber (match_scratch:DI 4 "= X, X, X, X,cV,cV,cV,cV"))]
[(set (match_operand:DI 0 "register_operand" "=Sg, v")
(plus:DI (match_operand:DI 1 "register_operand" " Sg, v")
(match_operand:DI 2 "nonmemory_operand" "SgB,vA")))
(clobber (match_scratch:BI 3 "=cs, X"))
(clobber (match_scratch:DI 4 "= X,cV"))]
""
"#"
"&& reload_completed"
@ -1109,7 +1103,7 @@
cc));
DONE;
}
[(set_attr "type" "mult,mult,mult,mult,vmult,vmult,vmult,vmult")
[(set_attr "type" "mult,vmult")
(set_attr "length" "8")])
(define_expand "adddi3_scc"
@ -1196,11 +1190,14 @@
; for this, so we use a custom VOP3 add with CC_SAVE_REG as a temp.
; Note that it is not safe to save/clobber/restore SCC because doing so will
; break data-flow analysis, so this must use vector registers.
;
; The "v0" should be just "v", but somehow the "0" helps LRA not loop forever
; on testcase pr54713-2.c with -O0. It's only an optimization hint anyway.
(define_insn "addptrdi3"
[(set (match_operand:DI 0 "register_operand" "= &v")
(plus:DI (match_operand:DI 1 "register_operand" " v0")
(match_operand:DI 2 "nonmemory_operand" "vDA0")))]
[(set (match_operand:DI 0 "register_operand" "= v")
(plus:DI (match_operand:DI 1 "register_operand" " v0")
(match_operand:DI 2 "nonmemory_operand" "vDA")))]
""
{
rtx new_operands[4] = { operands[0], operands[1], operands[2],
@ -1470,15 +1467,14 @@
(define_code_iterator vec_and_scalar64_com [and ior xor])
(define_insn_and_split "<expander>di3"
[(set (match_operand:DI 0 "register_operand" "= Sg, &v, &v")
[(set (match_operand:DI 0 "register_operand" "= Sg, v")
(vec_and_scalar64_com:DI
(match_operand:DI 1 "gcn_alu_operand" "%SgA,vSvDB,vSvDB")
(match_operand:DI 2 "gcn_alu_operand" " SgC, v, 0")))
(clobber (match_scratch:BI 3 "= cs, X, X"))]
(match_operand:DI 1 "gcn_alu_operand" "%SgA,vSvDB")
(match_operand:DI 2 "gcn_alu_operand" " SgC, v")))
(clobber (match_scratch:BI 3 "= cs, X"))]
""
"@
s_<mnemonic>0\t%0, %1, %2
#
#"
"reload_completed && gcn_vgpr_register_operand (operands[0], DImode)"
[(parallel [(set (match_dup 4)
@ -1495,7 +1491,7 @@
operands[8] = gcn_operand_part (DImode, operands[1], 1);
operands[9] = gcn_operand_part (DImode, operands[2], 1);
}
[(set_attr "type" "sop2,vop2,vop2")
[(set_attr "type" "sop2,vop2")
(set_attr "length" "8")])
(define_insn "<expander>di3"