amdgcn: Add Accelerator VGPR registers
Add the new CDNA register file. We don't support any of the specialized instructions that use these registers, but they're useful to relieve register pressure without spilling to stack. Co-authored-by: Andrew Jenner <andrew@codesourcery.com> gcc/ChangeLog: * config/gcn/constraints.md: Add "a" AVGPR constraint. * config/gcn/gcn-valu.md (*mov<mode>): Add AVGPR alternatives. (*mov<mode>_4reg): Likewise. (@mov<mode>_sgprbase): Likewise. (gather<mode>_insn_1offset<exec>): Likewise. (gather<mode>_insn_1offset_ds<exec>): Likewise. (gather<mode>_insn_2offsets<exec>): Likewise. (scatter<mode>_expr<exec_scatter>): Likewise. (scatter<mode>_insn_1offset_ds<exec_scatter>): Likewise. (scatter<mode>_insn_2offsets<exec_scatter>): Likewise. * config/gcn/gcn.cc (MAX_NORMAL_AVGPR_COUNT): Define. (gcn_class_max_nregs): Handle AVGPR_REGS and ALL_VGPR_REGS. (gcn_hard_regno_mode_ok): Likewise. (gcn_regno_reg_class): Likewise. (gcn_spill_class): Allow spilling to AVGPRs on TARGET_CDNA1_PLUS. (gcn_sgpr_move_p): Handle AVGPRs. (gcn_secondary_reload): Reload AVGPRs via VGPRs. (gcn_conditional_register_usage): Handle AVGPRs. (gcn_vgpr_equivalent_register_operand): New function. (gcn_valid_move_p): Check for validity of AVGPR moves. (gcn_compute_frame_offsets): Handle AVGPRs. (gcn_memory_move_cost): Likewise. (gcn_register_move_cost): Likewise. (gcn_vmem_insn_p): Handle TYPE_VOP3P_MAI. (gcn_md_reorg): Handle AVGPRs. (gcn_hsa_declare_function_name): Likewise. (print_reg): Likewise. (gcn_dwarf_register_number): Likewise. * config/gcn/gcn.h (FIRST_AVGPR_REG): Define. (AVGPR_REGNO): Define. (LAST_AVGPR_REG): Define. (SOFT_ARG_REG): Update. (FRAME_POINTER_REGNUM): Update. (DWARF_LINK_REGISTER): Update. (FIRST_PSEUDO_REGISTER): Update. (AVGPR_REGNO_P): Define. (enum reg_class): Add AVGPR_REGS and ALL_VGPR_REGS. (REG_CLASS_CONTENTS): Add new register classes and add entries for AVGPRs to all classes. (REGISTER_NAMES): Add AVGPRs. * config/gcn/gcn.md (FIRST_AVGPR_REG, LAST_AVGPR_REG): Define. (AP_REGNUM, FP_REGNUM): Update. (define_attr "type"): Add vop3p_mai. (define_attr "unit"): Handle vop3p_mai. (define_attr "gcn_version"): Add "cdna2". (define_attr "enabled"): Handle cdna2. (*mov<mode>_insn): Add AVGPR alternatives. (*movti_insn): Likewise. * config/gcn/mkoffload.cc (isa_has_combined_avgprs): New. (process_asm): Process avgpr_count. * config/gcn/predicates.md (gcn_avgpr_register_operand): New. (gcn_avgpr_hard_register_operand): New. * doc/md.texi: Document the "a" constraint. gcc/testsuite/ChangeLog: * gcc.target/gcn/avgpr-mem-double.c: New test. * gcc.target/gcn/avgpr-mem-int.c: New test. * gcc.target/gcn/avgpr-mem-long.c: New test. * gcc.target/gcn/avgpr-mem-short.c: New test. * gcc.target/gcn/avgpr-spill-double.c: New test. * gcc.target/gcn/avgpr-spill-int.c: New test. * gcc.target/gcn/avgpr-spill-long.c: New test. * gcc.target/gcn/avgpr-spill-short.c: New test. libgomp/ChangeLog: * plugin/plugin-gcn.c (max_isa_vgprs): New. (run_kernel): CDNA2 devices have more VGPRs.
This commit is contained in:
parent
a0e6306b7e
commit
ae0d2c2402
17 changed files with 839 additions and 192 deletions
|
@ -77,6 +77,9 @@
|
|||
(define_register_constraint "v" "VGPR_REGS"
|
||||
"VGPR registers")
|
||||
|
||||
(define_register_constraint "a" "TARGET_CDNA1_PLUS ? AVGPR_REGS : NO_REGS"
|
||||
"Accumulator VGPR registers")
|
||||
|
||||
(define_register_constraint "Sg" "SGPR_REGS"
|
||||
"SGPR registers")
|
||||
|
||||
|
|
|
@ -449,12 +449,16 @@
|
|||
(set_attr "length" "0")])
|
||||
|
||||
(define_insn "*mov<mode>"
|
||||
[(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v")
|
||||
(match_operand:V_1REG 1 "general_operand" "vA,B"))]
|
||||
[(set (match_operand:V_1REG 0 "nonimmediate_operand")
|
||||
(match_operand:V_1REG 1 "general_operand"))]
|
||||
""
|
||||
"v_mov_b32\t%0, %1"
|
||||
[(set_attr "type" "vop1,vop1")
|
||||
(set_attr "length" "4,8")])
|
||||
{@ [cons: =0, 1; attrs: type, length, gcn_version]
|
||||
[v ,vA;vop1 ,4,* ] v_mov_b32\t%0, %1
|
||||
[v ,B ;vop1 ,8,* ] ^
|
||||
[v ,a ;vop3p_mai,8,* ] v_accvgpr_read_b32\t%0, %1
|
||||
[$a ,v ;vop3p_mai,8,* ] v_accvgpr_write_b32\t%0, %1
|
||||
[a ,a ;vop1 ,4,cdna2] v_accvgpr_mov_b32\t%0, %1
|
||||
})
|
||||
|
||||
(define_insn "mov<mode>_exec"
|
||||
[(set (match_operand:V_1REG 0 "nonimmediate_operand")
|
||||
|
@ -493,17 +497,29 @@
|
|||
; (set_attr "length" "4,8,16,16")])
|
||||
|
||||
(define_insn "*mov<mode>"
|
||||
[(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v")
|
||||
(match_operand:V_2REG 1 "general_operand" "vDB"))]
|
||||
[(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v,$a,a")
|
||||
(match_operand:V_2REG 1 "general_operand" "vDB,a, v,a"))]
|
||||
""
|
||||
{
|
||||
if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
|
||||
return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
|
||||
else
|
||||
return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
|
||||
}
|
||||
[(set_attr "type" "vmult")
|
||||
(set_attr "length" "16")])
|
||||
"@
|
||||
* if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
|
||||
return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
|
||||
else \
|
||||
return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
|
||||
* if (REGNO (operands[0]) <= REGNO (operands[1])) \
|
||||
return \"v_accvgpr_read_b32\t%L0, %L1\;v_accvgpr_read_b32\t%H0, %H1\"; \
|
||||
else \
|
||||
return \"v_accvgpr_read_b32\t%H0, %H1\;v_accvgpr_read_b32\t%L0, %L1\";
|
||||
* if (REGNO (operands[0]) <= REGNO (operands[1])) \
|
||||
return \"v_accvgpr_write_b32\t%L0, %L1\;v_accvgpr_write_b32\t%H0, %H1\"; \
|
||||
else \
|
||||
return \"v_accvgpr_write_b32\t%H0, %H1\;v_accvgpr_write_b32\t%L0, %L1\";
|
||||
* if (REGNO (operands[0]) <= REGNO (operands[1])) \
|
||||
return \"v_accvgpr_mov_b32\t%L0, %L1\;v_accvgpr_mov_b32\t%H0, %H1\"; \
|
||||
else \
|
||||
return \"v_accvgpr_mov_b32\t%H0, %H1\;v_accvgpr_mov_b32\t%L0, %L1\";"
|
||||
[(set_attr "type" "vmult,vmult,vmult,vmult")
|
||||
(set_attr "length" "16,16,16,8")
|
||||
(set_attr "gcn_version" "*,*,*,cdna2")])
|
||||
|
||||
(define_insn "mov<mode>_exec"
|
||||
[(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, v, v, m")
|
||||
|
@ -546,17 +562,15 @@
|
|||
(set_attr "length" "16,16,16,16,16")])
|
||||
|
||||
(define_insn "*mov<mode>_4reg"
|
||||
[(set (match_operand:V_4REG 0 "nonimmediate_operand" "=v")
|
||||
(match_operand:V_4REG 1 "general_operand" "vDB"))]
|
||||
[(set (match_operand:V_4REG 0 "nonimmediate_operand")
|
||||
(match_operand:V_4REG 1 "general_operand"))]
|
||||
""
|
||||
{
|
||||
return "v_mov_b32\t%L0, %L1\;"
|
||||
"v_mov_b32\t%H0, %H1\;"
|
||||
"v_mov_b32\t%J0, %J1\;"
|
||||
"v_mov_b32\t%K0, %K1\;";
|
||||
}
|
||||
[(set_attr "type" "vmult")
|
||||
(set_attr "length" "16")])
|
||||
{@ [cons: =0, 1; attrs: type, length, gcn_version]
|
||||
[v,vDB;vmult,16,* ] v_mov_b32\t%L0, %L1\; v_mov_b32\t%H0, %H1\; v_mov_b32\t%J0, %J1\; v_mov_b32\t%K0, %K1
|
||||
[v,a ;vmult,32,* ] v_accvgpr_read_b32\t%L0, %L1\; v_accvgpr_read_b32\t%H0, %H1\; v_accvgpr_read_b32\t%J0, %J1\; v_accvgpr_read_b32\t%K0, %K1
|
||||
[a,v ;vmult,32,* ] v_accvgpr_write_b32\t%L0, %L1\;v_accvgpr_write_b32\t%H0, %H1\;v_accvgpr_write_b32\t%J0, %J1\;v_accvgpr_write_b32\t%K0, %K1
|
||||
[a,a ;vmult,32,cdna2] v_accvgpr_mov_b32\t%L0, %L1\; v_accvgpr_mov_b32\t%H0, %H1\; v_accvgpr_mov_b32\t%J0, %J1\; v_accvgpr_mov_b32\t%K0, %K1
|
||||
})
|
||||
|
||||
(define_insn "mov<mode>_exec"
|
||||
[(set (match_operand:V_4REG 0 "nonimmediate_operand" "= v, v, v, v, m")
|
||||
|
@ -648,19 +662,21 @@
|
|||
UNSPEC_SGPRBASE))
|
||||
(clobber (match_operand:<VnDI> 2 "register_operand"))]
|
||||
"lra_in_progress || reload_completed"
|
||||
{@ [cons: =0, 1, =2; attrs: type, length]
|
||||
[v,vA,&v;vop1,4 ] v_mov_b32\t%0, %1
|
||||
[v,vB,&v;vop1,8 ] ^
|
||||
[v,m ,&v;* ,12] #
|
||||
[m,v ,&v;* ,12] #
|
||||
{@ [cons: =0, 1, =2; attrs: type, length, gcn_version]
|
||||
[v,vA,&v;vop1,4 ,* ] v_mov_b32\t%0, %1
|
||||
[v,vB,&v;vop1,8 ,* ] ^
|
||||
[v,m ,&v;* ,12,* ] #
|
||||
[m,v ,&v;* ,12,* ] #
|
||||
[a,m ,&v;* ,12,cdna2] #
|
||||
[m,a ,&v;* ,12,cdna2] #
|
||||
})
|
||||
|
||||
(define_insn "@mov<mode>_sgprbase"
|
||||
[(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, m")
|
||||
[(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, m, a, m")
|
||||
(unspec:V_2REG
|
||||
[(match_operand:V_2REG 1 "general_operand" "vDB, m, v")]
|
||||
[(match_operand:V_2REG 1 "general_operand" "vDB, m, v, m, a")]
|
||||
UNSPEC_SGPRBASE))
|
||||
(clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v"))]
|
||||
(clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v,&v,&v"))]
|
||||
"lra_in_progress || reload_completed"
|
||||
"@
|
||||
* if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
|
||||
|
@ -668,9 +684,12 @@
|
|||
else \
|
||||
return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
|
||||
#
|
||||
#
|
||||
#
|
||||
#"
|
||||
[(set_attr "type" "vmult,*,*")
|
||||
(set_attr "length" "8,12,12")])
|
||||
[(set_attr "type" "vmult,*,*,*,*")
|
||||
(set_attr "length" "8,12,12,12,12")
|
||||
(set_attr "gcn_version" "*,*,*,cdna2,cdna2")])
|
||||
|
||||
(define_insn "@mov<mode>_sgprbase"
|
||||
[(set (match_operand:V_4REG 0 "nonimmediate_operand")
|
||||
|
@ -1126,13 +1145,13 @@
|
|||
{})
|
||||
|
||||
(define_insn "gather<mode>_insn_1offset<exec>"
|
||||
[(set (match_operand:V_MOV 0 "register_operand" "=v")
|
||||
[(set (match_operand:V_MOV 0 "register_operand" "=v,a")
|
||||
(unspec:V_MOV
|
||||
[(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v")
|
||||
[(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v,v")
|
||||
(vec_duplicate:<VnDI>
|
||||
(match_operand 2 "immediate_operand" " n")))
|
||||
(match_operand 3 "immediate_operand" " n")
|
||||
(match_operand 4 "immediate_operand" " n")
|
||||
(match_operand 2 "immediate_operand" " n,n")))
|
||||
(match_operand 3 "immediate_operand" " n,n")
|
||||
(match_operand 4 "immediate_operand" " n,n")
|
||||
(mem:BLK (scratch))]
|
||||
UNSPEC_GATHER))]
|
||||
"(AS_FLAT_P (INTVAL (operands[3]))
|
||||
|
@ -1162,16 +1181,17 @@
|
|||
return buf;
|
||||
}
|
||||
[(set_attr "type" "flat")
|
||||
(set_attr "length" "12")])
|
||||
(set_attr "length" "12")
|
||||
(set_attr "gcn_version" "*,cdna2")])
|
||||
|
||||
(define_insn "gather<mode>_insn_1offset_ds<exec>"
|
||||
[(set (match_operand:V_MOV 0 "register_operand" "=v")
|
||||
[(set (match_operand:V_MOV 0 "register_operand" "=v,a")
|
||||
(unspec:V_MOV
|
||||
[(plus:<VnSI> (match_operand:<VnSI> 1 "register_operand" " v")
|
||||
[(plus:<VnSI> (match_operand:<VnSI> 1 "register_operand" " v,v")
|
||||
(vec_duplicate:<VnSI>
|
||||
(match_operand 2 "immediate_operand" " n")))
|
||||
(match_operand 3 "immediate_operand" " n")
|
||||
(match_operand 4 "immediate_operand" " n")
|
||||
(match_operand 2 "immediate_operand" " n,n")))
|
||||
(match_operand 3 "immediate_operand" " n,n")
|
||||
(match_operand 4 "immediate_operand" " n,n")
|
||||
(mem:BLK (scratch))]
|
||||
UNSPEC_GATHER))]
|
||||
"(AS_ANY_DS_P (INTVAL (operands[3]))
|
||||
|
@ -1184,20 +1204,22 @@
|
|||
return buf;
|
||||
}
|
||||
[(set_attr "type" "ds")
|
||||
(set_attr "length" "12")])
|
||||
(set_attr "length" "12")
|
||||
(set_attr "gcn_version" "*,cdna2")])
|
||||
|
||||
(define_insn "gather<mode>_insn_2offsets<exec>"
|
||||
[(set (match_operand:V_MOV 0 "register_operand" "=v")
|
||||
[(set (match_operand:V_MOV 0 "register_operand" "=v,a")
|
||||
(unspec:V_MOV
|
||||
[(plus:<VnDI>
|
||||
(plus:<VnDI>
|
||||
(vec_duplicate:<VnDI>
|
||||
(match_operand:DI 1 "register_operand" "Sv"))
|
||||
(match_operand:DI 1 "register_operand" "Sv,Sv"))
|
||||
(sign_extend:<VnDI>
|
||||
(match_operand:<VnSI> 2 "register_operand" " v")))
|
||||
(vec_duplicate:<VnDI> (match_operand 3 "immediate_operand" " n")))
|
||||
(match_operand 4 "immediate_operand" " n")
|
||||
(match_operand 5 "immediate_operand" " n")
|
||||
(match_operand:<VnSI> 2 "register_operand" " v,v")))
|
||||
(vec_duplicate:<VnDI> (match_operand 3 "immediate_operand"
|
||||
" n,n")))
|
||||
(match_operand 4 "immediate_operand" " n,n")
|
||||
(match_operand 5 "immediate_operand" " n,n")
|
||||
(mem:BLK (scratch))]
|
||||
UNSPEC_GATHER))]
|
||||
"(AS_GLOBAL_P (INTVAL (operands[4]))
|
||||
|
@ -1216,7 +1238,8 @@
|
|||
return buf;
|
||||
}
|
||||
[(set_attr "type" "flat")
|
||||
(set_attr "length" "12")])
|
||||
(set_attr "length" "12")
|
||||
(set_attr "gcn_version" "*,cdna2")])
|
||||
|
||||
(define_expand "scatter_store<mode><vnsi>"
|
||||
[(match_operand:DI 0 "register_operand")
|
||||
|
@ -1255,12 +1278,12 @@
|
|||
(define_insn "scatter<mode>_insn_1offset<exec_scatter>"
|
||||
[(set (mem:BLK (scratch))
|
||||
(unspec:BLK
|
||||
[(plus:<VnDI> (match_operand:<VnDI> 0 "register_operand" "v")
|
||||
[(plus:<VnDI> (match_operand:<VnDI> 0 "register_operand" "v,v")
|
||||
(vec_duplicate:<VnDI>
|
||||
(match_operand 1 "immediate_operand" "n")))
|
||||
(match_operand:V_MOV 2 "register_operand" "v")
|
||||
(match_operand 3 "immediate_operand" "n")
|
||||
(match_operand 4 "immediate_operand" "n")]
|
||||
(match_operand 1 "immediate_operand" "n,n")))
|
||||
(match_operand:V_MOV 2 "register_operand" "v,a")
|
||||
(match_operand 3 "immediate_operand" "n,n")
|
||||
(match_operand 4 "immediate_operand" "n,n")]
|
||||
UNSPEC_SCATTER))]
|
||||
"(AS_FLAT_P (INTVAL (operands[3]))
|
||||
&& (INTVAL(operands[1]) == 0
|
||||
|
@ -1288,17 +1311,18 @@
|
|||
return buf;
|
||||
}
|
||||
[(set_attr "type" "flat")
|
||||
(set_attr "length" "12")])
|
||||
(set_attr "length" "12")
|
||||
(set_attr "gcn_version" "*,cdna2")])
|
||||
|
||||
(define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>"
|
||||
[(set (mem:BLK (scratch))
|
||||
(unspec:BLK
|
||||
[(plus:<VnSI> (match_operand:<VnSI> 0 "register_operand" "v")
|
||||
[(plus:<VnSI> (match_operand:<VnSI> 0 "register_operand" "v,v")
|
||||
(vec_duplicate:<VnSI>
|
||||
(match_operand 1 "immediate_operand" "n")))
|
||||
(match_operand:V_MOV 2 "register_operand" "v")
|
||||
(match_operand 3 "immediate_operand" "n")
|
||||
(match_operand 4 "immediate_operand" "n")]
|
||||
(match_operand 1 "immediate_operand" "n,n")))
|
||||
(match_operand:V_MOV 2 "register_operand" "v,a")
|
||||
(match_operand 3 "immediate_operand" "n,n")
|
||||
(match_operand 4 "immediate_operand" "n,n")]
|
||||
UNSPEC_SCATTER))]
|
||||
"(AS_ANY_DS_P (INTVAL (operands[3]))
|
||||
&& ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))"
|
||||
|
@ -1310,7 +1334,8 @@
|
|||
return buf;
|
||||
}
|
||||
[(set_attr "type" "ds")
|
||||
(set_attr "length" "12")])
|
||||
(set_attr "length" "12")
|
||||
(set_attr "gcn_version" "*,cdna2")])
|
||||
|
||||
(define_insn "scatter<mode>_insn_2offsets<exec_scatter>"
|
||||
[(set (mem:BLK (scratch))
|
||||
|
@ -1318,13 +1343,13 @@
|
|||
[(plus:<VnDI>
|
||||
(plus:<VnDI>
|
||||
(vec_duplicate:<VnDI>
|
||||
(match_operand:DI 0 "register_operand" "Sv"))
|
||||
(match_operand:DI 0 "register_operand" "Sv,Sv"))
|
||||
(sign_extend:<VnDI>
|
||||
(match_operand:<VnSI> 1 "register_operand" " v")))
|
||||
(vec_duplicate:<VnDI> (match_operand 2 "immediate_operand" " n")))
|
||||
(match_operand:V_MOV 3 "register_operand" " v")
|
||||
(match_operand 4 "immediate_operand" " n")
|
||||
(match_operand 5 "immediate_operand" " n")]
|
||||
(match_operand:<VnSI> 1 "register_operand" "v,v")))
|
||||
(vec_duplicate:<VnDI> (match_operand 2 "immediate_operand" "n,n")))
|
||||
(match_operand:V_MOV 3 "register_operand" "v,a")
|
||||
(match_operand 4 "immediate_operand" "n,n")
|
||||
(match_operand 5 "immediate_operand" "n,n")]
|
||||
UNSPEC_SCATTER))]
|
||||
"(AS_GLOBAL_P (INTVAL (operands[4]))
|
||||
&& (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
|
||||
|
@ -1341,7 +1366,8 @@
|
|||
return buf;
|
||||
}
|
||||
[(set_attr "type" "flat")
|
||||
(set_attr "length" "12")])
|
||||
(set_attr "length" "12")
|
||||
(set_attr "gcn_version" "*,cdna2")])
|
||||
|
||||
;; }}}
|
||||
;; {{{ Permutations
|
||||
|
|
|
@ -96,6 +96,7 @@ static hash_map<tree, int> lds_allocs;
|
|||
|
||||
#define MAX_NORMAL_SGPR_COUNT 62 // i.e. 64 with VCC
|
||||
#define MAX_NORMAL_VGPR_COUNT 24
|
||||
#define MAX_NORMAL_AVGPR_COUNT 24
|
||||
|
||||
/* }}} */
|
||||
/* {{{ Initialization and options. */
|
||||
|
@ -483,7 +484,8 @@ gcn_class_max_nregs (reg_class_t rclass, machine_mode mode)
|
|||
{
|
||||
/* Scalar registers are 32bit, vector registers are in fact tuples of
|
||||
64 lanes. */
|
||||
if (rclass == VGPR_REGS)
|
||||
if (rclass == VGPR_REGS || rclass == AVGPR_REGS
|
||||
|| rclass == ALL_VGPR_REGS)
|
||||
{
|
||||
if (vgpr_1reg_mode_p (mode))
|
||||
return 1;
|
||||
|
@ -583,7 +585,7 @@ gcn_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
|
|||
return (sgpr_1reg_mode_p (mode)
|
||||
|| (!((regno - FIRST_SGPR_REG) & 1) && sgpr_2reg_mode_p (mode))
|
||||
|| (((regno - FIRST_SGPR_REG) & 3) == 0 && mode == TImode));
|
||||
if (VGPR_REGNO_P (regno))
|
||||
if (VGPR_REGNO_P (regno) || (AVGPR_REGNO_P (regno) && TARGET_CDNA1_PLUS))
|
||||
/* Vector instructions do not care about the alignment of register
|
||||
pairs, but where there is no 64-bit instruction, many of the
|
||||
define_split do not work if the input and output registers partially
|
||||
|
@ -623,6 +625,8 @@ gcn_regno_reg_class (int regno)
|
|||
}
|
||||
if (VGPR_REGNO_P (regno))
|
||||
return VGPR_REGS;
|
||||
if (AVGPR_REGNO_P (regno))
|
||||
return AVGPR_REGS;
|
||||
if (SGPR_REGNO_P (regno))
|
||||
return SGPR_REGS;
|
||||
if (regno < FIRST_VGPR_REG)
|
||||
|
@ -813,7 +817,7 @@ gcn_spill_class (reg_class_t c, machine_mode /*mode */ )
|
|||
|| c == VCC_CONDITIONAL_REG || c == EXEC_MASK_REG)
|
||||
return SGPR_REGS;
|
||||
else
|
||||
return NO_REGS;
|
||||
return c == VGPR_REGS && TARGET_CDNA1_PLUS ? AVGPR_REGS : NO_REGS;
|
||||
}
|
||||
|
||||
/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
|
||||
|
@ -2348,12 +2352,15 @@ gcn_sgpr_move_p (rtx op0, rtx op1)
|
|||
return true;
|
||||
if (MEM_P (op1) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op1)))
|
||||
return true;
|
||||
if (!REG_P (op0) || REGNO (op0) >= FIRST_PSEUDO_REGISTER
|
||||
|| VGPR_REGNO_P (REGNO (op0)))
|
||||
if (!REG_P (op0)
|
||||
|| REGNO (op0) >= FIRST_PSEUDO_REGISTER
|
||||
|| VGPR_REGNO_P (REGNO (op0))
|
||||
|| AVGPR_REGNO_P (REGNO (op0)))
|
||||
return false;
|
||||
if (REG_P (op1)
|
||||
&& REGNO (op1) < FIRST_PSEUDO_REGISTER
|
||||
&& !VGPR_REGNO_P (REGNO (op1)))
|
||||
&& !VGPR_REGNO_P (REGNO (op1))
|
||||
&& !AVGPR_REGNO_P (REGNO (op1)))
|
||||
return true;
|
||||
return immediate_operand (op1, VOIDmode) || memory_operand (op1, VOIDmode);
|
||||
}
|
||||
|
@ -2424,6 +2431,11 @@ gcn_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
|
|||
result = (rclass == VGPR_REGS ? NO_REGS : VGPR_REGS);
|
||||
break;
|
||||
}
|
||||
|
||||
/* CDNA1 doesn't have an instruction for going between the accumulator
|
||||
registers and memory. Go via a VGPR in this case. */
|
||||
if (TARGET_CDNA1 && rclass == AVGPR_REGS && result != VGPR_REGS)
|
||||
result = VGPR_REGS;
|
||||
}
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
|
@ -2445,7 +2457,8 @@ gcn_conditional_register_usage (void)
|
|||
|
||||
if (cfun->machine->normal_function)
|
||||
{
|
||||
/* Restrict the set of SGPRs and VGPRs used by non-kernel functions. */
|
||||
/* Restrict the set of SGPRs, VGPRs and AVGPRs used by non-kernel
|
||||
functions. */
|
||||
for (int i = SGPR_REGNO (MAX_NORMAL_SGPR_COUNT);
|
||||
i <= LAST_SGPR_REG; i++)
|
||||
fixed_regs[i] = 1, call_used_regs[i] = 1;
|
||||
|
@ -2454,6 +2467,9 @@ gcn_conditional_register_usage (void)
|
|||
i <= LAST_VGPR_REG; i++)
|
||||
fixed_regs[i] = 1, call_used_regs[i] = 1;
|
||||
|
||||
for (int i = AVGPR_REGNO (MAX_NORMAL_AVGPR_COUNT);
|
||||
i <= LAST_AVGPR_REG; i++)
|
||||
fixed_regs[i] = 1, call_used_regs[i] = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -2507,6 +2523,16 @@ gcn_conditional_register_usage (void)
|
|||
fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_Z_ARG]] = 1;
|
||||
}
|
||||
|
||||
static bool
|
||||
gcn_vgpr_equivalent_register_operand (rtx x, machine_mode mode)
|
||||
{
|
||||
if (gcn_vgpr_register_operand (x, mode))
|
||||
return true;
|
||||
if (TARGET_CDNA2_PLUS && gcn_avgpr_register_operand (x, mode))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Determine if a load or store is valid, according to the register classes
|
||||
and address space. Used primarily by the machine description to decide
|
||||
when to split a move into two steps. */
|
||||
|
@ -2515,21 +2541,36 @@ bool
|
|||
gcn_valid_move_p (machine_mode mode, rtx dest, rtx src)
|
||||
{
|
||||
if (!MEM_P (dest) && !MEM_P (src))
|
||||
return true;
|
||||
{
|
||||
if (gcn_vgpr_register_operand (src, mode)
|
||||
&& gcn_avgpr_register_operand (dest, mode))
|
||||
return true;
|
||||
if (gcn_avgpr_register_operand (src, mode)
|
||||
&& gcn_vgpr_register_operand (dest, mode))
|
||||
return true;
|
||||
if (TARGET_CDNA2_PLUS
|
||||
&& gcn_avgpr_register_operand (src, mode)
|
||||
&& gcn_avgpr_register_operand (dest, mode))
|
||||
return true;
|
||||
if (gcn_avgpr_hard_register_operand (src, mode)
|
||||
|| gcn_avgpr_hard_register_operand (dest, mode))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (MEM_P (dest)
|
||||
&& AS_FLAT_P (MEM_ADDR_SPACE (dest))
|
||||
&& (gcn_flat_address_p (XEXP (dest, 0), mode)
|
||||
|| GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
|
||||
|| GET_CODE (XEXP (dest, 0)) == LABEL_REF)
|
||||
&& gcn_vgpr_register_operand (src, mode))
|
||||
&& gcn_vgpr_equivalent_register_operand (src, mode))
|
||||
return true;
|
||||
else if (MEM_P (src)
|
||||
&& AS_FLAT_P (MEM_ADDR_SPACE (src))
|
||||
&& (gcn_flat_address_p (XEXP (src, 0), mode)
|
||||
|| GET_CODE (XEXP (src, 0)) == SYMBOL_REF
|
||||
|| GET_CODE (XEXP (src, 0)) == LABEL_REF)
|
||||
&& gcn_vgpr_register_operand (dest, mode))
|
||||
&& gcn_vgpr_equivalent_register_operand (dest, mode))
|
||||
return true;
|
||||
|
||||
if (MEM_P (dest)
|
||||
|
@ -2537,14 +2578,14 @@ gcn_valid_move_p (machine_mode mode, rtx dest, rtx src)
|
|||
&& (gcn_global_address_p (XEXP (dest, 0))
|
||||
|| GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
|
||||
|| GET_CODE (XEXP (dest, 0)) == LABEL_REF)
|
||||
&& gcn_vgpr_register_operand (src, mode))
|
||||
&& gcn_vgpr_equivalent_register_operand (src, mode))
|
||||
return true;
|
||||
else if (MEM_P (src)
|
||||
&& AS_GLOBAL_P (MEM_ADDR_SPACE (src))
|
||||
&& (gcn_global_address_p (XEXP (src, 0))
|
||||
|| GET_CODE (XEXP (src, 0)) == SYMBOL_REF
|
||||
|| GET_CODE (XEXP (src, 0)) == LABEL_REF)
|
||||
&& gcn_vgpr_register_operand (dest, mode))
|
||||
&& gcn_vgpr_equivalent_register_operand (dest, mode))
|
||||
return true;
|
||||
|
||||
if (MEM_P (dest)
|
||||
|
@ -2565,12 +2606,12 @@ gcn_valid_move_p (machine_mode mode, rtx dest, rtx src)
|
|||
if (MEM_P (dest)
|
||||
&& AS_ANY_DS_P (MEM_ADDR_SPACE (dest))
|
||||
&& gcn_ds_address_p (XEXP (dest, 0))
|
||||
&& gcn_vgpr_register_operand (src, mode))
|
||||
&& gcn_vgpr_equivalent_register_operand (src, mode))
|
||||
return true;
|
||||
else if (MEM_P (src)
|
||||
&& AS_ANY_DS_P (MEM_ADDR_SPACE (src))
|
||||
&& gcn_ds_address_p (XEXP (src, 0))
|
||||
&& gcn_vgpr_register_operand (dest, mode))
|
||||
&& gcn_vgpr_equivalent_register_operand (dest, mode))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
@ -3006,7 +3047,8 @@ gcn_compute_frame_offsets (void)
|
|||
if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
|
||||
|| ((regno & ~1) == HARD_FRAME_POINTER_REGNUM
|
||||
&& frame_pointer_needed))
|
||||
offsets->callee_saves += (VGPR_REGNO_P (regno) ? 256 : 4);
|
||||
offsets->callee_saves += (VGPR_REGNO_P (regno)
|
||||
|| AVGPR_REGNO_P (regno) ? 256 : 4);
|
||||
|
||||
/* Round up to 64-bit boundary to maintain stack alignment. */
|
||||
offsets->callee_saves = (offsets->callee_saves + 7) & ~7;
|
||||
|
@ -3949,6 +3991,11 @@ gcn_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
|
|||
if (in)
|
||||
return (LOAD_COST + 2) * nregs;
|
||||
return STORE_COST * nregs;
|
||||
case AVGPR_REGS:
|
||||
case ALL_VGPR_REGS:
|
||||
if (in)
|
||||
return (LOAD_COST + (TARGET_CDNA2_PLUS ? 2 : 4)) * nregs;
|
||||
return (STORE_COST + (TARGET_CDNA2_PLUS ? 0 : 2)) * nregs;
|
||||
case ALL_REGS:
|
||||
case ALL_GPR_REGS:
|
||||
case SRCDST_REGS:
|
||||
|
@ -3968,6 +4015,15 @@ gcn_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
|
|||
static int
|
||||
gcn_register_move_cost (machine_mode, reg_class_t dst, reg_class_t src)
|
||||
{
|
||||
if (src == AVGPR_REGS)
|
||||
{
|
||||
if (dst == AVGPR_REGS)
|
||||
return TARGET_CDNA1 ? 6 : 2;
|
||||
if (dst != VGPR_REGS)
|
||||
return 6;
|
||||
}
|
||||
if (dst == AVGPR_REGS && src != VGPR_REGS)
|
||||
return 6;
|
||||
/* Increase cost of moving from and to vector registers. While this is
|
||||
fast in hardware (I think), it has hidden cost of setting up the exec
|
||||
flags. */
|
||||
|
@ -5674,6 +5730,7 @@ gcn_vmem_insn_p (attr_type type)
|
|||
case TYPE_MUBUF:
|
||||
case TYPE_MTBUF:
|
||||
case TYPE_FLAT:
|
||||
case TYPE_VOP3P_MAI:
|
||||
return true;
|
||||
case TYPE_UNKNOWN:
|
||||
case TYPE_SOP1:
|
||||
|
@ -5913,7 +5970,8 @@ gcn_md_reorg (void)
|
|||
FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
|
||||
{
|
||||
const_rtx x = *iter;
|
||||
if (REG_P (x) && VGPR_REGNO_P (REGNO (x)))
|
||||
if (REG_P (x) && (VGPR_REGNO_P (REGNO (x))
|
||||
|| AVGPR_REGNO_P (REGNO (x))))
|
||||
{
|
||||
if (VECTOR_MODE_P (GET_MODE (x)))
|
||||
{
|
||||
|
@ -6069,17 +6127,16 @@ gcn_md_reorg (void)
|
|||
if (!prev_insn->insn)
|
||||
continue;
|
||||
|
||||
HARD_REG_SET depregs = prev_insn->writes & ireads;
|
||||
|
||||
/* VALU writes SGPR followed by VMEM reading the same SGPR
|
||||
requires 5 wait states. */
|
||||
if ((prev_insn->age + nops_rqd) < 5
|
||||
&& prev_insn->unit == UNIT_VECTOR
|
||||
&& gcn_vmem_insn_p (itype))
|
||||
{
|
||||
HARD_REG_SET regs = prev_insn->writes & ireads;
|
||||
if (hard_reg_set_intersect_p
|
||||
(regs, reg_class_contents[(int) SGPR_REGS]))
|
||||
nops_rqd = 5 - prev_insn->age;
|
||||
}
|
||||
&& gcn_vmem_insn_p (itype)
|
||||
&& hard_reg_set_intersect_p
|
||||
(depregs, reg_class_contents[(int) SGPR_REGS]))
|
||||
nops_rqd = 5 - prev_insn->age;
|
||||
|
||||
/* VALU sets VCC/EXEC followed by VALU uses VCCZ/EXECZ
|
||||
requires 5 wait states. */
|
||||
|
@ -6101,15 +6158,12 @@ gcn_md_reorg (void)
|
|||
SGPR/VCC as lane select requires 4 wait states. */
|
||||
if ((prev_insn->age + nops_rqd) < 4
|
||||
&& prev_insn->unit == UNIT_VECTOR
|
||||
&& get_attr_laneselect (insn) == LANESELECT_YES)
|
||||
{
|
||||
HARD_REG_SET regs = prev_insn->writes & ireads;
|
||||
if (hard_reg_set_intersect_p
|
||||
(regs, reg_class_contents[(int) SGPR_REGS])
|
||||
&& get_attr_laneselect (insn) == LANESELECT_YES
|
||||
&& (hard_reg_set_intersect_p
|
||||
(depregs, reg_class_contents[(int) SGPR_REGS])
|
||||
|| hard_reg_set_intersect_p
|
||||
(regs, reg_class_contents[(int) VCC_CONDITIONAL_REG]))
|
||||
nops_rqd = 4 - prev_insn->age;
|
||||
}
|
||||
(depregs, reg_class_contents[(int) VCC_CONDITIONAL_REG])))
|
||||
nops_rqd = 4 - prev_insn->age;
|
||||
|
||||
/* VALU writes VGPR followed by VALU_DPP reading that VGPR
|
||||
requires 2 wait states. */
|
||||
|
@ -6117,9 +6171,8 @@ gcn_md_reorg (void)
|
|||
&& prev_insn->unit == UNIT_VECTOR
|
||||
&& itype == TYPE_VOP_DPP)
|
||||
{
|
||||
HARD_REG_SET regs = prev_insn->writes & ireads;
|
||||
if (hard_reg_set_intersect_p
|
||||
(regs, reg_class_contents[(int) VGPR_REGS]))
|
||||
(depregs, reg_class_contents[(int) VGPR_REGS]))
|
||||
nops_rqd = 2 - prev_insn->age;
|
||||
}
|
||||
|
||||
|
@ -6138,6 +6191,35 @@ gcn_md_reorg (void)
|
|||
(prev_insn->writes,
|
||||
reg_class_contents[(int)VCC_CONDITIONAL_REG])))
|
||||
nops_rqd = ivccwait - prev_insn->age;
|
||||
|
||||
/* CDNA1: write VGPR before v_accvgpr_write reads it. */
|
||||
if (TARGET_CDNA1
|
||||
&& (prev_insn->age + nops_rqd) < 2
|
||||
&& hard_reg_set_intersect_p
|
||||
(depregs, reg_class_contents[(int) VGPR_REGS])
|
||||
&& hard_reg_set_intersect_p
|
||||
(iwrites, reg_class_contents[(int) AVGPR_REGS]))
|
||||
nops_rqd = 2 - prev_insn->age;
|
||||
|
||||
/* CDNA1: v_accvgpr_write writes AVGPR before v_accvgpr_read. */
|
||||
if (TARGET_CDNA1
|
||||
&& (prev_insn->age + nops_rqd) < 3
|
||||
&& hard_reg_set_intersect_p
|
||||
(depregs, reg_class_contents[(int) AVGPR_REGS])
|
||||
&& hard_reg_set_intersect_p
|
||||
(iwrites, reg_class_contents[(int) VGPR_REGS]))
|
||||
nops_rqd = 3 - prev_insn->age;
|
||||
|
||||
/* CDNA1: Undocumented(?!) read-after-write when restoring values
|
||||
from AVGPRs to VGPRS. Observed problem was for address register
|
||||
of flat_load instruction, but others may be affected? */
|
||||
if (TARGET_CDNA1
|
||||
&& (prev_insn->age + nops_rqd) < 2
|
||||
&& hard_reg_set_intersect_p
|
||||
(prev_insn->reads, reg_class_contents[(int) AVGPR_REGS])
|
||||
&& hard_reg_set_intersect_p
|
||||
(depregs, reg_class_contents[(int) VGPR_REGS]))
|
||||
nops_rqd = 2 - prev_insn->age;
|
||||
}
|
||||
|
||||
/* Insert the required number of NOPs. */
|
||||
|
@ -6429,7 +6511,7 @@ output_file_start (void)
|
|||
void
|
||||
gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
|
||||
{
|
||||
int sgpr, vgpr;
|
||||
int sgpr, vgpr, avgpr;
|
||||
bool xnack_enabled = TARGET_XNACK;
|
||||
|
||||
fputs ("\n\n", file);
|
||||
|
@ -6454,6 +6536,12 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
|
|||
if (df_regs_ever_live_p (FIRST_VGPR_REG + vgpr))
|
||||
break;
|
||||
vgpr++;
|
||||
for (avgpr = 255; avgpr >= 0; avgpr--)
|
||||
if (df_regs_ever_live_p (FIRST_AVGPR_REG + avgpr))
|
||||
break;
|
||||
avgpr++;
|
||||
vgpr = (vgpr + 3) & ~3;
|
||||
avgpr = (avgpr + 3) & ~3;
|
||||
|
||||
if (!leaf_function_p ())
|
||||
{
|
||||
|
@ -6462,6 +6550,8 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
|
|||
vgpr = MAX_NORMAL_VGPR_COUNT;
|
||||
if (sgpr < MAX_NORMAL_SGPR_COUNT)
|
||||
sgpr = MAX_NORMAL_SGPR_COUNT;
|
||||
if (avgpr < MAX_NORMAL_AVGPR_COUNT)
|
||||
avgpr = MAX_NORMAL_AVGPR_COUNT;
|
||||
}
|
||||
|
||||
/* The gfx90a accum_offset field can't represent 0 registers. */
|
||||
|
@ -6519,6 +6609,11 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
|
|||
? 2
|
||||
: cfun->machine->args.requested & (1 << WORK_ITEM_ID_Y_ARG)
|
||||
? 1 : 0);
|
||||
int next_free_vgpr = vgpr;
|
||||
if (TARGET_CDNA1 && avgpr > vgpr)
|
||||
next_free_vgpr = avgpr;
|
||||
if (TARGET_CDNA2_PLUS)
|
||||
next_free_vgpr += avgpr;
|
||||
fprintf (file,
|
||||
"\t .amdhsa_next_free_vgpr\t%i\n"
|
||||
"\t .amdhsa_next_free_sgpr\t%i\n"
|
||||
|
@ -6529,7 +6624,7 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
|
|||
"\t .amdhsa_group_segment_fixed_size\t%u\n"
|
||||
"\t .amdhsa_float_denorm_mode_32\t3\n"
|
||||
"\t .amdhsa_float_denorm_mode_16_64\t3\n",
|
||||
vgpr,
|
||||
next_free_vgpr,
|
||||
sgpr,
|
||||
xnack_enabled,
|
||||
LDS_SIZE);
|
||||
|
@ -6537,7 +6632,7 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
|
|||
fprintf (file,
|
||||
"\t .amdhsa_accum_offset\t%i\n"
|
||||
"\t .amdhsa_tg_split\t0\n",
|
||||
(vgpr+3)&~3); // I think this means the AGPRs come after the VGPRs
|
||||
vgpr); /* The AGPRs come after the VGPRs. */
|
||||
fputs ("\t.end_amdhsa_kernel\n", file);
|
||||
|
||||
#if 1
|
||||
|
@ -6564,9 +6659,9 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
|
|||
cfun->machine->kernarg_segment_byte_size,
|
||||
cfun->machine->kernarg_segment_alignment,
|
||||
LDS_SIZE,
|
||||
sgpr, vgpr);
|
||||
if (gcn_arch == PROCESSOR_GFX90a)
|
||||
fprintf (file, " .agpr_count: 0\n"); // AGPRs are not used, yet
|
||||
sgpr, next_free_vgpr);
|
||||
if (gcn_arch == PROCESSOR_GFX90a || gcn_arch == PROCESSOR_GFX908)
|
||||
fprintf (file, " .agpr_count: %i\n", avgpr);
|
||||
fputs (" .end_amdgpu_metadata\n", file);
|
||||
#endif
|
||||
|
||||
|
@ -6662,6 +6757,9 @@ print_reg (FILE *file, rtx x)
|
|||
else if (VGPR_REGNO_P (REGNO (x)))
|
||||
fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG,
|
||||
REGNO (x) - FIRST_VGPR_REG + 1);
|
||||
else if (AVGPR_REGNO_P (REGNO (x)))
|
||||
fprintf (file, "a[%i:%i]", REGNO (x) - FIRST_AVGPR_REG,
|
||||
REGNO (x) - FIRST_AVGPR_REG + 1);
|
||||
else if (REGNO (x) == FLAT_SCRATCH_REG)
|
||||
fprintf (file, "flat_scratch");
|
||||
else if (REGNO (x) == EXEC_REG)
|
||||
|
@ -6680,6 +6778,9 @@ print_reg (FILE *file, rtx x)
|
|||
else if (VGPR_REGNO_P (REGNO (x)))
|
||||
fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG,
|
||||
REGNO (x) - FIRST_VGPR_REG + 3);
|
||||
else if (AVGPR_REGNO_P (REGNO (x)))
|
||||
fprintf (file, "a[%i:%i]", REGNO (x) - FIRST_AVGPR_REG,
|
||||
REGNO (x) - FIRST_AVGPR_REG + 3);
|
||||
else
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
@ -7603,6 +7704,8 @@ gcn_dwarf_register_number (unsigned int regno)
|
|||
}
|
||||
else if (VGPR_REGNO_P (regno))
|
||||
return (regno - FIRST_VGPR_REG + 2560);
|
||||
else if (AVGPR_REGNO_P (regno))
|
||||
return (regno - FIRST_AVGPR_REG + 3072);
|
||||
|
||||
/* Otherwise, there's nothing sensible to do. */
|
||||
return regno + 100000;
|
||||
|
|
|
@ -146,6 +146,9 @@
|
|||
#define FIRST_VGPR_REG 160
|
||||
#define VGPR_REGNO(N) ((N)+FIRST_VGPR_REG)
|
||||
#define LAST_VGPR_REG 415
|
||||
#define FIRST_AVGPR_REG 416
|
||||
#define AVGPR_REGNO(N) ((N)+FIRST_AVGPR_REG)
|
||||
#define LAST_AVGPR_REG 671
|
||||
|
||||
/* Frame Registers, and other registers */
|
||||
|
||||
|
@ -157,10 +160,10 @@
|
|||
#define RETURN_VALUE_REG 168 /* Must be divisible by 4. */
|
||||
#define STATIC_CHAIN_REGNUM 30
|
||||
#define WORK_ITEM_ID_Z_REG 162
|
||||
#define SOFT_ARG_REG 416
|
||||
#define FRAME_POINTER_REGNUM 418
|
||||
#define DWARF_LINK_REGISTER 420
|
||||
#define FIRST_PSEUDO_REGISTER 421
|
||||
#define SOFT_ARG_REG 672
|
||||
#define FRAME_POINTER_REGNUM 674
|
||||
#define DWARF_LINK_REGISTER 676
|
||||
#define FIRST_PSEUDO_REGISTER 677
|
||||
|
||||
#define FIRST_PARM_REG (FIRST_SGPR_REG + 24)
|
||||
#define FIRST_VPARM_REG (FIRST_VGPR_REG + 8)
|
||||
|
@ -176,6 +179,7 @@
|
|||
#define SGPR_OR_VGPR_REGNO_P(N) ((N)>=FIRST_VGPR_REG && (N) <= LAST_SGPR_REG)
|
||||
#define SGPR_REGNO_P(N) ((N) <= LAST_SGPR_REG)
|
||||
#define VGPR_REGNO_P(N) ((N)>=FIRST_VGPR_REG && (N) <= LAST_VGPR_REG)
|
||||
#define AVGPR_REGNO_P(N) ((N)>=FIRST_AVGPR_REG && (N) <= LAST_AVGPR_REG)
|
||||
#define SSRC_REGNO_P(N) ((N) <= SCC_REG && (N) != VCCZ_REG)
|
||||
#define SDST_REGNO_P(N) ((N) <= EXEC_HI_REG && (N) != VCCZ_REG)
|
||||
#define CC_REG_P(X) (REG_P (X) && CC_REGNO_P (REGNO (X)))
|
||||
|
@ -206,7 +210,7 @@
|
|||
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
/* VGRPs */ \
|
||||
/* VGPRs */ \
|
||||
0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
|
@ -223,6 +227,23 @@
|
|||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
/* Accumulation VGPRs */ \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
/* Other registers. */ \
|
||||
1, 1, 1, 1, 1 \
|
||||
}
|
||||
|
@ -244,7 +265,7 @@
|
|||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
/* VGRPs */ \
|
||||
/* VGPRs */ \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
|
@ -261,6 +282,23 @@
|
|||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
|
||||
/* Accumulation VGPRs */ \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
|
||||
/* Other registers. */ \
|
||||
1, 1, 1, 1, 1 \
|
||||
}
|
||||
|
@ -320,6 +358,8 @@ enum reg_class
|
|||
SGPR_SRC_REGS,
|
||||
GENERAL_REGS,
|
||||
VGPR_REGS,
|
||||
AVGPR_REGS,
|
||||
ALL_VGPR_REGS,
|
||||
ALL_GPR_REGS,
|
||||
SRCDST_REGS,
|
||||
AFP_REGS,
|
||||
|
@ -345,6 +385,8 @@ enum reg_class
|
|||
"SGPR_SRC_REGS", \
|
||||
"GENERAL_REGS", \
|
||||
"VGPR_REGS", \
|
||||
"AVGPR_REGS", \
|
||||
"ALL_VGPR_REGS", \
|
||||
"ALL_GPR_REGS", \
|
||||
"SRCDST_REGS", \
|
||||
"AFP_REGS", \
|
||||
|
@ -357,40 +399,58 @@ enum reg_class
|
|||
#define REG_CLASS_CONTENTS { \
|
||||
/* NO_REGS. */ \
|
||||
{0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0}, \
|
||||
/* SCC_CONDITIONAL_REG. */ \
|
||||
{0, 0, 0, 0, \
|
||||
NAMED_REG_MASK2 (SCC_REG), 0, 0, 0, \
|
||||
0, 0, 0, 0, 0}, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0}, \
|
||||
/* VCCZ_CONDITIONAL_REG. */ \
|
||||
{0, 0, 0, NAMED_REG_MASK (VCCZ_REG), \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0}, \
|
||||
/* VCC_CONDITIONAL_REG. */ \
|
||||
{0, 0, 0, NAMED_REG_MASK (VCC_LO_REG)|NAMED_REG_MASK (VCC_HI_REG), \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0}, \
|
||||
/* EXECZ_CONDITIONAL_REG. */ \
|
||||
{0, 0, 0, 0, \
|
||||
NAMED_REG_MASK2 (EXECZ_REG), 0, 0, 0, \
|
||||
0, 0, 0, 0, 0}, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0}, \
|
||||
/* ALL_CONDITIONAL_REGS. */ \
|
||||
{0, 0, 0, NAMED_REG_MASK (VCCZ_REG), \
|
||||
NAMED_REG_MASK2 (EXECZ_REG) | NAMED_REG_MASK2 (SCC_REG), 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0}, \
|
||||
/* EXEC_MASK_REG. */ \
|
||||
{0, 0, 0, NAMED_REG_MASK (EXEC_LO_REG) | NAMED_REG_MASK (EXEC_HI_REG), \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0}, \
|
||||
/* SGPR_REGS. */ \
|
||||
{0xffffffff, 0xffffffff, 0xffffffff, 0xf1, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0}, \
|
||||
/* SGPR_EXEC_REGS. */ \
|
||||
{0xffffffff, 0xffffffff, 0xffffffff, \
|
||||
0xf1 | NAMED_REG_MASK (EXEC_LO_REG) | NAMED_REG_MASK (EXEC_HI_REG), \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0}, \
|
||||
/* SGPR_VOP_SRC_REGS. */ \
|
||||
{0xffffffff, 0xffffffff, 0xffffffff, \
|
||||
|
@ -398,12 +458,16 @@ enum reg_class
|
|||
-NAMED_REG_MASK (EXEC_LO_REG) \
|
||||
-NAMED_REG_MASK (EXEC_HI_REG), \
|
||||
NAMED_REG_MASK2 (SCC_REG), 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0}, \
|
||||
/* SGPR_MEM_SRC_REGS. */ \
|
||||
{0xffffffff, 0xffffffff, 0xffffffff, \
|
||||
0xffffffff-NAMED_REG_MASK (VCCZ_REG)-NAMED_REG_MASK (M0_REG) \
|
||||
-NAMED_REG_MASK (EXEC_LO_REG)-NAMED_REG_MASK (EXEC_HI_REG), \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0}, \
|
||||
/* SGPR_DST_REGS. */ \
|
||||
{0xffffffff, 0xffffffff, 0xffffffff, \
|
||||
|
@ -413,30 +477,56 @@ enum reg_class
|
|||
/* SGPR_SRC_REGS. */ \
|
||||
{0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
|
||||
NAMED_REG_MASK2 (EXECZ_REG) | NAMED_REG_MASK2 (SCC_REG), 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0}, \
|
||||
/* GENERAL_REGS. */ \
|
||||
{0xffffffff, 0xffffffff, 0xffffffff, 0xf1, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0}, \
|
||||
/* VGPR_REGS. */ \
|
||||
{0, 0, 0, 0, \
|
||||
0, 0xffffffff, 0xffffffff, 0xffffffff, \
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
|
||||
0xffffffff, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0}, \
|
||||
/* AVGPR_REGS. */ \
|
||||
{0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0xffffffff, 0xffffffff, 0xffffffff, \
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0}, \
|
||||
/* ALL_VGPR_REGS. */ \
|
||||
{0, 0, 0, 0, \
|
||||
0, 0xffffffff, 0xffffffff, 0xffffffff, \
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0}, \
|
||||
/* ALL_GPR_REGS. */ \
|
||||
{0xffffffff, 0xffffffff, 0xffffffff, 0xf1, \
|
||||
0, 0xffffffff, 0xffffffff, 0xffffffff, \
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0}, \
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
|
||||
0xffffffff, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0}, \
|
||||
/* SRCDST_REGS. */ \
|
||||
{0xffffffff, 0xffffffff, 0xffffffff, \
|
||||
0xffffffff-NAMED_REG_MASK (VCCZ_REG), \
|
||||
0, 0xffffffff, 0xffffffff, 0xffffffff, \
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0}, \
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
|
||||
0xffffffff, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0}, \
|
||||
/* AFP_REGS. */ \
|
||||
{0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, \
|
||||
0, 0, 0, 0, 0, 0xf}, \
|
||||
/* ALL_REGS. */ \
|
||||
{0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
|
||||
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0 }}
|
||||
|
||||
|
@ -541,6 +631,34 @@ enum gcn_address_spaces
|
|||
"v236", "v237", "v238", "v239", "v240", "v241", "v242", "v243", "v244", \
|
||||
"v245", "v246", "v247", "v248", "v249", "v250", "v251", "v252", "v253", \
|
||||
"v254", "v255", \
|
||||
"a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9", "a10", \
|
||||
"a11", "a12", "a13", "a14", "a15", "a16", "a17", "a18", "a19", "a20", \
|
||||
"a21", "a22", "a23", "a24", "a25", "a26", "a27", "a28", "a29", "a30", \
|
||||
"a31", "a32", "a33", "a34", "a35", "a36", "a37", "a38", "a39", "a40", \
|
||||
"a41", "a42", "a43", "a44", "a45", "a46", "a47", "a48", "a49", "a50", \
|
||||
"a51", "a52", "a53", "a54", "a55", "a56", "a57", "a58", "a59", "a60", \
|
||||
"a61", "a62", "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", \
|
||||
"a71", "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", \
|
||||
"a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", "a90", \
|
||||
"a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", "a99", "a100", \
|
||||
"a101", "a102", "a103", "a104", "a105", "a106", "a107", "a108", "a109", \
|
||||
"a110", "a111", "a112", "a113", "a114", "a115", "a116", "a117", "a118", \
|
||||
"a119", "a120", "a121", "a122", "a123", "a124", "a125", "a126", "a127", \
|
||||
"a128", "a129", "a130", "a131", "a132", "a133", "a134", "a135", "a136", \
|
||||
"a137", "a138", "a139", "a140", "a141", "a142", "a143", "a144", "a145", \
|
||||
"a146", "a147", "a148", "a149", "a150", "a151", "a152", "a153", "a154", \
|
||||
"a155", "a156", "a157", "a158", "a159", "a160", "a161", "a162", "a163", \
|
||||
"a164", "a165", "a166", "a167", "a168", "a169", "a170", "a171", "a172", \
|
||||
"a173", "a174", "a175", "a176", "a177", "a178", "a179", "a180", "a181", \
|
||||
"a182", "a183", "a184", "a185", "a186", "a187", "a188", "a189", "a190", \
|
||||
"a191", "a192", "a193", "a194", "a195", "a196", "a197", "a198", "a199", \
|
||||
"a200", "a201", "a202", "a203", "a204", "a205", "a206", "a207", "a208", \
|
||||
"a209", "a210", "a211", "a212", "a213", "a214", "a215", "a216", "a217", \
|
||||
"a218", "a219", "a220", "a221", "a222", "a223", "a224", "a225", "a226", \
|
||||
"a227", "a228", "a229", "a230", "a231", "a232", "a233", "a234", "a235", \
|
||||
"a236", "a237", "a238", "a239", "a240", "a241", "a242", "a243", "a244", \
|
||||
"a245", "a246", "a247", "a248", "a249", "a250", "a251", "a252", "a253", \
|
||||
"a254", "a255", \
|
||||
"?ap0", "?ap1", "?fp0", "?fp1", "?dwlr" }
|
||||
|
||||
#define PRINT_OPERAND(FILE, X, CODE) print_operand(FILE, X, CODE)
|
||||
|
|
|
@ -51,13 +51,15 @@
|
|||
(EXECZ_REG 128)
|
||||
(SCC_REG 129)
|
||||
(FIRST_VGPR_REG 160)
|
||||
(LAST_VGPR_REG 415)])
|
||||
(LAST_VGPR_REG 415)
|
||||
(FIRST_AVGPR_REG 416)
|
||||
(LAST_AVGPR_REG 671)])
|
||||
|
||||
(define_constants
|
||||
[(SP_REGNUM 16)
|
||||
(LR_REGNUM 18)
|
||||
(AP_REGNUM 416)
|
||||
(FP_REGNUM 418)])
|
||||
(AP_REGNUM 672)
|
||||
(FP_REGNUM 674)])
|
||||
|
||||
(define_c_enum "unspecv" [
|
||||
UNSPECV_PROLOGUE_USE
|
||||
|
@ -171,6 +173,11 @@
|
|||
; vdst: vgpr0-255
|
||||
; sdst: sgpr0-103/vcc/tba/tma/ttmp0-11
|
||||
;
|
||||
; vop3p_mai - vector, three inputs, one vector output
|
||||
; vsrc0,vsrc1,vsrc2: inline constant -16 to -64, fp inline immediate,
|
||||
; (acc or arch) vgpr0-255
|
||||
; vdst: (acc or arch) vgpr0-255
|
||||
;
|
||||
; vop_sdwa - second dword for vop1/vop2/vopc for specifying sub-dword address
|
||||
; src0: vgpr0-255
|
||||
; dst_sel: BYTE_0-3, WORD_0-1, DWORD
|
||||
|
@ -229,7 +236,8 @@
|
|||
|
||||
(define_attr "type"
|
||||
"unknown,sop1,sop2,sopk,sopc,sopp,smem,ds,vop2,vop1,vopc,
|
||||
vop3a,vop3b,vop_sdwa,vop_dpp,mubuf,mtbuf,flat,mult,vmult"
|
||||
vop3a,vop3b,vop3p_mai,vop_sdwa,vop_dpp,mubuf,mtbuf,flat,mult,
|
||||
vmult"
|
||||
(const_string "unknown"))
|
||||
|
||||
; Set if instruction is executed in scalar or vector unit
|
||||
|
@ -237,7 +245,7 @@
|
|||
(define_attr "unit" "unknown,scalar,vector"
|
||||
(cond [(eq_attr "type" "sop1,sop2,sopk,sopc,sopp,smem,mult")
|
||||
(const_string "scalar")
|
||||
(eq_attr "type" "vop2,vop1,vopc,vop3a,vop3b,ds,
|
||||
(eq_attr "type" "vop2,vop1,vopc,vop3a,vop3b,ds,vop3p_mai,
|
||||
vop_sdwa,vop_dpp,flat,vmult")
|
||||
(const_string "vector")]
|
||||
(const_string "unknown")))
|
||||
|
@ -284,7 +292,7 @@
|
|||
|
||||
; Disable alternatives that only apply to specific ISA variants.
|
||||
|
||||
(define_attr "gcn_version" "gcn3,gcn5" (const_string "gcn3"))
|
||||
(define_attr "gcn_version" "gcn3,gcn5,cdna2" (const_string "gcn3"))
|
||||
(define_attr "rdna" "any,no,yes" (const_string "any"))
|
||||
|
||||
(define_attr "enabled" ""
|
||||
|
@ -297,6 +305,9 @@
|
|||
(eq_attr "gcn_version" "gcn3") (const_int 1)
|
||||
(and (eq_attr "gcn_version" "gcn5")
|
||||
(ne (symbol_ref "TARGET_GCN5_PLUS") (const_int 0)))
|
||||
(const_int 1)
|
||||
(and (eq_attr "gcn_version" "cdna2")
|
||||
(ne (symbol_ref "TARGET_CDNA2_PLUS") (const_int 0)))
|
||||
(const_int 1)]
|
||||
(const_int 0)))
|
||||
|
||||
|
@ -552,25 +563,32 @@
|
|||
[(set (match_operand:SISF 0 "nonimmediate_operand")
|
||||
(match_operand:SISF 1 "gcn_load_operand"))]
|
||||
""
|
||||
{@ [cons: =0, 1; attrs: type, exec, length]
|
||||
[SD ,SSA ;sop1 ,* ,4 ] s_mov_b32\t%0, %1
|
||||
[SD ,J ;sopk ,* ,4 ] s_movk_i32\t%0, %1
|
||||
[SD ,B ;sop1 ,* ,8 ] s_mov_b32\t%0, %1
|
||||
[SD ,RB ;smem ,* ,12] s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
|
||||
[RB ,Sm ;smem ,* ,12] s_buffer_store%s1\t%1, s[0:3], %0
|
||||
[Sm ,RS ;smem ,* ,12] s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
|
||||
[RS ,Sm ;smem ,* ,12] s_store_dword\t%1, %A0
|
||||
[v ,v ;vop1 ,* ,4 ] v_mov_b32\t%0, %1
|
||||
[Sg ,v ;vop3a,none,8 ] v_readlane_b32\t%0, %1, 0
|
||||
[v ,Sv ;vop3a,none,8 ] v_writelane_b32\t%0, %1, 0
|
||||
[v ,RF ;flat ,* ,12] flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
|
||||
[RF ,v ;flat ,* ,12] flat_store_dword\t%A0, %1%O0%g0
|
||||
[v ,B ;vop1 ,* ,8 ] v_mov_b32\t%0, %1
|
||||
[RLRG,v ;ds ,* ,12] ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
|
||||
[v ,RLRG;ds ,* ,12] ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
|
||||
[SD ,Y ;sop1 ,* ,8 ] s_mov_b32\t%0, %1
|
||||
[v ,RM ;flat ,* ,12] global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
|
||||
[RM ,v ;flat ,* ,12] global_store_dword\t%A0, %1%O0%g0
|
||||
{@ [cons: =0, 1; attrs: type, exec, length, gcn_version]
|
||||
[SD ,SSA ;sop1 ,* ,4 ,* ] s_mov_b32\t%0, %1
|
||||
[SD ,J ;sopk ,* ,4 ,* ] s_movk_i32\t%0, %1
|
||||
[SD ,B ;sop1 ,* ,8 ,* ] s_mov_b32\t%0, %1
|
||||
[SD ,RB ;smem ,* ,12,* ] s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
|
||||
[RB ,Sm ;smem ,* ,12,* ] s_buffer_store%s1\t%1, s[0:3], %0
|
||||
[Sm ,RS ;smem ,* ,12,* ] s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
|
||||
[RS ,Sm ;smem ,* ,12,* ] s_store_dword\t%1, %A0
|
||||
[v ,v ;vop1 ,* ,4 ,* ] v_mov_b32\t%0, %1
|
||||
[Sg ,v ;vop3a,none,8 ,* ] v_readlane_b32\t%0, %1, 0
|
||||
[v ,Sv ;vop3a,none,8 ,* ] v_writelane_b32\t%0, %1, 0
|
||||
[v ,^a ;vop3p_mai,*,8,* ] v_accvgpr_read_b32\t%0, %1
|
||||
[a ,v ;vop3p_mai,*,8,* ] v_accvgpr_write_b32\t%0, %1
|
||||
[a ,a ;vop1 ,* ,4,cdna2] v_accvgpr_mov_b32\t%0, %1
|
||||
[v ,RF ;flat ,* ,12,* ] flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
|
||||
[^a ,RF ;flat ,* ,12,cdna2] ^
|
||||
[RF ,v ;flat ,* ,12,* ] flat_store_dword\t%A0, %1%O0%g0
|
||||
[RF ,a ;flat ,* ,12,cdna2] ^
|
||||
[v ,B ;vop1 ,* ,8 ,* ] v_mov_b32\t%0, %1
|
||||
[RLRG,v ;ds ,* ,12,* ] ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
|
||||
[v ,RLRG;ds ,* ,12,* ] ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
|
||||
[SD ,Y ;sop1 ,* ,8 ,* ] s_mov_b32\t%0, %1
|
||||
[v ,RM ;flat ,* ,12,* ] global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
|
||||
[^a ,RM ;flat ,* ,12,cdna2] ^
|
||||
[RM ,v ;flat ,* ,12,* ] global_store_dword\t%A0, %1%O0%g0
|
||||
[RM ,a ;flat ,* ,12,cdna2] ^
|
||||
})
|
||||
|
||||
; 8/16bit move pattern
|
||||
|
@ -580,20 +598,27 @@
|
|||
[(set (match_operand:QIHI 0 "nonimmediate_operand")
|
||||
(match_operand:QIHI 1 "gcn_load_operand"))]
|
||||
"gcn_valid_move_p (<MODE>mode, operands[0], operands[1])"
|
||||
{@ [cons: =0, 1; attrs: type, exec, length]
|
||||
[SD ,SSA ;sop1 ,* ,4 ] s_mov_b32\t%0, %1
|
||||
[SD ,J ;sopk ,* ,4 ] s_movk_i32\t%0, %1
|
||||
[SD ,B ;sop1 ,* ,8 ] s_mov_b32\t%0, %1
|
||||
[v ,v ;vop1 ,* ,4 ] v_mov_b32\t%0, %1
|
||||
[Sg ,v ;vop3a,none,4 ] v_readlane_b32\t%0, %1, 0
|
||||
[v ,Sv ;vop3a,none,4 ] v_writelane_b32\t%0, %1, 0
|
||||
[v ,RF ;flat ,* ,12] flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
|
||||
[RF ,v ;flat ,* ,12] flat_store%s0\t%A0, %1%O0%g0
|
||||
[v ,B ;vop1 ,* ,8 ] v_mov_b32\t%0, %1
|
||||
[RLRG,v ;ds ,* ,12] ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
|
||||
[v ,RLRG;ds ,* ,12] ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
|
||||
[v ,RM ;flat ,* ,12] global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
|
||||
[RM ,v ;flat ,* ,12] global_store%s0\t%A0, %1%O0%g0
|
||||
{@ [cons: =0, 1; attrs: type, exec, length, gcn_version]
|
||||
[SD ,SSA ;sop1 ,* ,4 ,* ] s_mov_b32\t%0, %1
|
||||
[SD ,J ;sopk ,* ,4 ,* ] s_movk_i32\t%0, %1
|
||||
[SD ,B ;sop1 ,* ,8 ,* ] s_mov_b32\t%0, %1
|
||||
[v ,v ;vop1 ,* ,4 ,* ] v_mov_b32\t%0, %1
|
||||
[Sg ,v ;vop3a,none,4 ,* ] v_readlane_b32\t%0, %1, 0
|
||||
[v ,Sv ;vop3a,none,4 ,* ] v_writelane_b32\t%0, %1, 0
|
||||
[v ,^a ;vop3p_mai,*,8,* ] v_accvgpr_read_b32\t%0, %1
|
||||
[a ,v ;vop3p_mai,*,8,* ] v_accvgpr_write_b32\t%0, %1
|
||||
[a ,a ;vop1 ,* ,8,cdna2] v_accvgpr_mov_b32\t%0, %1
|
||||
[v ,RF ;flat ,* ,12,* ] flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
|
||||
[^a ,RF ;flat ,* ,12,cdna2] ^
|
||||
[RF ,v ;flat ,* ,12,* ] flat_store%s0\t%A0, %1%O0%g0
|
||||
[RF ,a ;flat ,* ,12,cdna2] ^
|
||||
[v ,B ;vop1 ,* ,8 ,* ] v_mov_b32\t%0, %1
|
||||
[RLRG,v ;ds ,* ,12,* ] ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
|
||||
[v ,RLRG;ds ,* ,12,* ] ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
|
||||
[v ,RM ;flat ,* ,12,* ] global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
|
||||
[^a ,RM ;flat ,* ,12,cdna2] ^
|
||||
[RM ,v ;flat ,* ,12,* ] global_store%s0\t%A0, %1%O0%g0
|
||||
[RM ,a ;flat ,* ,12,cdna2] ^
|
||||
})
|
||||
|
||||
; 64bit move pattern
|
||||
|
@ -602,22 +627,29 @@
|
|||
[(set (match_operand:DIDF 0 "nonimmediate_operand")
|
||||
(match_operand:DIDF 1 "general_operand"))]
|
||||
"GET_CODE(operands[1]) != SYMBOL_REF"
|
||||
{@ [cons: =0, 1; attrs: type, length]
|
||||
[SD ,SSA ;sop1 ,4 ] s_mov_b64\t%0, %1
|
||||
[SD ,C ;sop1 ,8 ] ^
|
||||
[SD ,DB ;mult ,* ] #
|
||||
[RS ,Sm ;smem ,12] s_store_dwordx2\t%1, %A0
|
||||
[Sm ,RS ;smem ,12] s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
|
||||
[v ,v ;vmult,* ] #
|
||||
[v ,DB ;vmult,* ] #
|
||||
[Sg ,v ;vmult,* ] #
|
||||
[v ,Sv ;vmult,* ] #
|
||||
[v ,RF ;flat ,12] flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
|
||||
[RF ,v ;flat ,12] flat_store_dwordx2\t%A0, %1%O0%g0
|
||||
[RLRG,v ;ds ,12] ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
|
||||
[v ,RLRG;ds ,12] ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
|
||||
[v ,RM ;flat ,12] global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
|
||||
[RM ,v ;flat ,12] global_store_dwordx2\t%A0, %1%O0%g0
|
||||
{@ [cons: =0, 1; attrs: type, length, gcn_version]
|
||||
[SD ,SSA ;sop1 ,4 ,* ] s_mov_b64\t%0, %1
|
||||
[SD ,C ;sop1 ,8 ,* ] ^
|
||||
[SD ,DB ;mult ,* ,* ] #
|
||||
[RS ,Sm ;smem ,12,* ] s_store_dwordx2\t%1, %A0
|
||||
[Sm ,RS ;smem ,12,* ] s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
|
||||
[v ,v ;vmult,* ,* ] #
|
||||
[v ,DB ;vmult,* ,* ] #
|
||||
[Sg ,v ;vmult,* ,* ] #
|
||||
[v ,Sv ;vmult,* ,* ] #
|
||||
[v ,^a ;vmult,* ,* ] #
|
||||
[a ,v ;vmult,* ,* ] #
|
||||
[a ,a ;vmult,* ,cdna2] #
|
||||
[v ,RF ;flat ,12,* ] flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
|
||||
[^a ,RF ;flat ,12,cdna2] ^
|
||||
[RF ,v ;flat ,12,* ] flat_store_dwordx2\t%A0, %1%O0%g0
|
||||
[RF ,a ;flat ,12,cdna2] ^
|
||||
[RLRG,v ;ds ,12,* ] ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
|
||||
[v ,RLRG;ds ,12,* ] ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
|
||||
[v ,RM ;flat ,12,* ] global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
|
||||
[^a ,RM ;flat ,12,cdna2] ^
|
||||
[RM ,v ;flat ,12,* ] global_store_dwordx2\t%A0, %1%O0%g0
|
||||
[RM ,a ;flat ,12,cdna2] ^
|
||||
}
|
||||
"reload_completed
|
||||
&& ((!MEM_P (operands[0]) && !MEM_P (operands[1])
|
||||
|
@ -655,19 +687,26 @@
|
|||
[(set (match_operand:TI 0 "nonimmediate_operand")
|
||||
(match_operand:TI 1 "general_operand" ))]
|
||||
""
|
||||
{@ [cons: =0, 1; attrs: type, delayeduse, length]
|
||||
[SD,SSB;mult ,* ,* ] #
|
||||
[RS,Sm ;smem ,* ,12] s_store_dwordx4\t%1, %A0
|
||||
[Sm,RS ;smem ,yes,12] s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
|
||||
[RF,v ;flat ,* ,12] flat_store_dwordx4\t%A0, %1%O0%g0
|
||||
[v ,RF ;flat ,* ,12] flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0
|
||||
[v ,v ;vmult,* ,* ] #
|
||||
[v ,Sv ;vmult,* ,* ] #
|
||||
[SD,v ;vmult,* ,* ] #
|
||||
[RM,v ;flat ,yes,12] global_store_dwordx4\t%A0, %1%O0%g0
|
||||
[v ,RM ;flat ,* ,12] global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
|
||||
[RL,v ;ds ,* ,12] ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
|
||||
[v ,RL ;ds ,* ,12] ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
|
||||
{@ [cons: =0, 1; attrs: type, delayeduse, length, gcn_version]
|
||||
[SD,SSB;mult ,* ,* ,* ] #
|
||||
[RS,Sm ;smem ,* ,12,* ] s_store_dwordx4\t%1, %A0
|
||||
[Sm,RS ;smem ,yes,12,* ] s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
|
||||
[RF,v ;flat ,* ,12,* ] flat_store_dwordx4\t%A0, %1%O0%g0
|
||||
[RF,a ;flat ,* ,12,cdna2] ^
|
||||
[v ,RF ;flat ,* ,12,* ] flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0
|
||||
[^a,RF ;flat ,* ,12,cdna2] ^
|
||||
[v ,v ;vmult,* ,* ,* ] #
|
||||
[v ,Sv ;vmult,* ,* ,* ] #
|
||||
[SD,v ;vmult,* ,* ,* ] #
|
||||
[RM,v ;flat ,yes,12,* ] global_store_dwordx4\t%A0, %1%O0%g0
|
||||
[RM,a ;flat ,yes,12,cdna2] ^
|
||||
[v ,RM ;flat ,* ,12,* ] global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
|
||||
[^a,RM ;flat ,* ,12,cdna2] ^
|
||||
[RL,v ;ds ,* ,12,* ] ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
|
||||
[v ,RL ;ds ,* ,12,* ] ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
|
||||
[v ,^a ;vmult,* ,* ,* ] #
|
||||
[a ,v ;vmult,* ,* ,* ] #
|
||||
[a ,a ;vmult,* ,* ,cdna2] #
|
||||
}
|
||||
"reload_completed
|
||||
&& REG_P (operands[0])
|
||||
|
|
|
@ -471,6 +471,26 @@ copy_early_debug_info (const char *infile, const char *outfile)
|
|||
return true;
|
||||
}
|
||||
|
||||
/* CDNA2 devices have twice as many VGPRs compared to older devices,
|
||||
but the AVGPRS are allocated from the same pool. */
|
||||
|
||||
static int
|
||||
isa_has_combined_avgprs (int isa)
|
||||
{
|
||||
switch (isa)
|
||||
{
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX803:
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX900:
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX906:
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX908:
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1030:
|
||||
return false;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX90a:
|
||||
return true;
|
||||
}
|
||||
fatal_error (input_location, "unhandled ISA in isa_has_combined_avgprs");
|
||||
}
|
||||
|
||||
/* Parse an input assembler file, extract the offload tables etc.,
|
||||
and output (1) the assembler code, minus the tables (which can contain
|
||||
problematic relocations), and (2) a C file with the offload tables
|
||||
|
@ -496,6 +516,7 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
|
|||
{
|
||||
int sgpr_count;
|
||||
int vgpr_count;
|
||||
int avgpr_count;
|
||||
char *kernel_name;
|
||||
} regcount = { -1, -1, NULL };
|
||||
|
||||
|
@ -543,6 +564,12 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
|
|||
gcc_assert (regcount.kernel_name);
|
||||
break;
|
||||
}
|
||||
else if (sscanf (buf, " .agpr_count: %d\n",
|
||||
®count.avgpr_count) == 1)
|
||||
{
|
||||
gcc_assert (regcount.kernel_name);
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
@ -685,6 +712,8 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
|
|||
{
|
||||
sgpr_count = regcounts[j].sgpr_count;
|
||||
vgpr_count = regcounts[j].vgpr_count;
|
||||
if (isa_has_combined_avgprs (elf_arch))
|
||||
vgpr_count += regcounts[j].avgpr_count;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -70,6 +70,30 @@
|
|||
return VGPR_REGNO_P (REGNO (op)) || REGNO (op) >= FIRST_PSEUDO_REGISTER;
|
||||
})
|
||||
|
||||
(define_predicate "gcn_avgpr_register_operand"
|
||||
(match_operand 0 "register_operand")
|
||||
{
|
||||
if (GET_CODE (op) == SUBREG)
|
||||
op = SUBREG_REG (op);
|
||||
|
||||
if (!REG_P (op))
|
||||
return false;
|
||||
|
||||
return AVGPR_REGNO_P (REGNO (op)) || REGNO (op) >= FIRST_PSEUDO_REGISTER;
|
||||
})
|
||||
|
||||
(define_predicate "gcn_avgpr_hard_register_operand"
|
||||
(match_operand 0 "register_operand")
|
||||
{
|
||||
if (GET_CODE (op) == SUBREG)
|
||||
op = SUBREG_REG (op);
|
||||
|
||||
if (!REG_P (op))
|
||||
return false;
|
||||
|
||||
return AVGPR_REGNO_P (REGNO (op));
|
||||
})
|
||||
|
||||
(define_predicate "gcn_inline_immediate_operand"
|
||||
(match_code "const_int,const_double,const_vector")
|
||||
{
|
||||
|
|
|
@ -2010,6 +2010,9 @@ Any @code{symbol_ref} or @code{label_ref}
|
|||
@item v
|
||||
VGPR register
|
||||
|
||||
@item a
|
||||
Accelerator VGPR register (CDNA1 onwards)
|
||||
|
||||
@item Sg
|
||||
SGPR register
|
||||
|
||||
|
|
9
gcc/testsuite/gcc.target/gcn/avgpr-mem-double.c
Normal file
9
gcc/testsuite/gcc.target/gcn/avgpr-mem-double.c
Normal file
|
@ -0,0 +1,9 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-march=gfx90a -O1" } */
|
||||
/* { dg-skip-if "incompatible ISA" { *-*-* } { "-march=gfx90[068]" } } */
|
||||
/* { dg-final { scan-assembler {load[^\n]*a[0-9[]} } } */
|
||||
/* { dg-final { scan-assembler {store[^\n]*a[0-9[]} } } */
|
||||
|
||||
#define TYPE double
|
||||
|
||||
#include "avgpr-mem-int.c"
|
116
gcc/testsuite/gcc.target/gcn/avgpr-mem-int.c
Normal file
116
gcc/testsuite/gcc.target/gcn/avgpr-mem-int.c
Normal file
|
@ -0,0 +1,116 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-march=gfx90a -O1" } */
|
||||
/* { dg-skip-if "incompatible ISA" { *-*-* } { "-march=gfx90[068]" } } */
|
||||
/* { dg-final { scan-assembler {load[^\n]*a[0-9[]} } } */
|
||||
/* { dg-final { scan-assembler {store[^\n]*a[0-9[]} } } */
|
||||
|
||||
#ifndef TYPE
|
||||
#define TYPE int
|
||||
#endif
|
||||
|
||||
TYPE a[50];
|
||||
|
||||
int f()
|
||||
{
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[0]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[1]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[2]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[3]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[4]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[5]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[6]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[7]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[8]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[9]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[10]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[11]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[12]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[13]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[14]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[15]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[16]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[17]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[18]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[19]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[20]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[21]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[22]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[23]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[24]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[25]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[26]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[27]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[28]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[29]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[30]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[31]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[32]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[33]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[34]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[35]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[36]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[37]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[38]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[39]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[40]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[41]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[42]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[43]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[44]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[45]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[46]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[47]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[48]));
|
||||
__asm__ volatile ("; fake -> %0" :: "va"(a[49]));
|
||||
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[0]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[1]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[2]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[3]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[4]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[5]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[6]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[7]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[8]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[9]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[10]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[11]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[12]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[13]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[14]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[15]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[16]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[17]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[18]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[19]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[20]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[21]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[22]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[23]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[24]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[25]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[26]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[27]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[28]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[29]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[30]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[31]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[32]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[33]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[34]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[35]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[36]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[37]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[38]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[39]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[40]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[41]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[42]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[43]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[44]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[45]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[46]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[47]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[48]));
|
||||
__asm__ volatile ("; fake <- %0" : "+va"(a[49]));
|
||||
}
|
9
gcc/testsuite/gcc.target/gcn/avgpr-mem-long.c
Normal file
9
gcc/testsuite/gcc.target/gcn/avgpr-mem-long.c
Normal file
|
@ -0,0 +1,9 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-march=gfx90a -O1" } */
|
||||
/* { dg-skip-if "incompatible ISA" { *-*-* } { "-march=gfx90[068]" } } */
|
||||
/* { dg-final { scan-assembler {load[^\n]*a[0-9[]} } } */
|
||||
/* { dg-final { scan-assembler {store[^\n]*a[0-9[]} } } */
|
||||
|
||||
#define TYPE long
|
||||
|
||||
#include "avgpr-mem-int.c"
|
9
gcc/testsuite/gcc.target/gcn/avgpr-mem-short.c
Normal file
9
gcc/testsuite/gcc.target/gcn/avgpr-mem-short.c
Normal file
|
@ -0,0 +1,9 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-march=gfx90a -O1" } */
|
||||
/* { dg-skip-if "incompatible ISA" { *-*-* } { "-march=gfx90[068]" } } */
|
||||
/* { dg-final { scan-assembler {load[^\n]*a[0-9[]} } } */
|
||||
/* { dg-final { scan-assembler {store[^\n]*a[0-9[]} } } */
|
||||
|
||||
#define TYPE short
|
||||
|
||||
#include "avgpr-mem-int.c"
|
8
gcc/testsuite/gcc.target/gcn/avgpr-spill-double.c
Normal file
8
gcc/testsuite/gcc.target/gcn/avgpr-spill-double.c
Normal file
|
@ -0,0 +1,8 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-march=gfx908 -O1" } */
|
||||
/* { dg-skip-if "incompatible ISA" { *-*-* } { "-march=gfx90[06]" } } */
|
||||
/* { dg-final { scan-assembler "accvgpr" } } */
|
||||
|
||||
#define TYPE double
|
||||
|
||||
#include "avgpr-spill-int.c"
|
115
gcc/testsuite/gcc.target/gcn/avgpr-spill-int.c
Normal file
115
gcc/testsuite/gcc.target/gcn/avgpr-spill-int.c
Normal file
|
@ -0,0 +1,115 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-march=gfx908 -O1" } */
|
||||
/* { dg-skip-if "incompatible ISA" { *-*-* } { "-march=gfx90[06]" } } */
|
||||
/* { dg-final { scan-assembler "accvgpr" } } */
|
||||
|
||||
#ifndef TYPE
|
||||
#define TYPE int
|
||||
#endif
|
||||
|
||||
TYPE a[50];
|
||||
|
||||
int f()
|
||||
{
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[0]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[1]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[2]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[3]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[4]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[5]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[6]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[7]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[8]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[9]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[10]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[11]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[12]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[13]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[14]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[15]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[16]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[17]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[18]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[19]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[20]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[21]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[22]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[23]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[24]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[25]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[26]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[27]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[28]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[29]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[30]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[31]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[32]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[33]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[34]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[35]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[36]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[37]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[38]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[39]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[40]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[41]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[42]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[43]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[44]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[45]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[46]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[47]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[48]));
|
||||
__asm__ volatile ("; fake <- %0" : "=v"(a[49]));
|
||||
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[0]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[1]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[2]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[3]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[4]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[5]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[6]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[7]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[8]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[9]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[10]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[11]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[12]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[13]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[14]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[15]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[16]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[17]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[18]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[19]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[20]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[21]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[22]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[23]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[24]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[25]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[26]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[27]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[28]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[29]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[30]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[31]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[32]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[33]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[34]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[35]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[36]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[37]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[38]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[39]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[40]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[41]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[42]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[43]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[44]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[45]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[46]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[47]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[48]));
|
||||
__asm__ volatile ("; fake -> %0" :: "v"(a[49]));
|
||||
}
|
8
gcc/testsuite/gcc.target/gcn/avgpr-spill-long.c
Normal file
8
gcc/testsuite/gcc.target/gcn/avgpr-spill-long.c
Normal file
|
@ -0,0 +1,8 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-march=gfx908 -O1" } */
|
||||
/* { dg-skip-if "incompatible ISA" { *-*-* } { "-march=gfx90[06]" } } */
|
||||
/* { dg-final { scan-assembler "accvgpr" } } */
|
||||
|
||||
#define TYPE long
|
||||
|
||||
#include "avgpr-spill-int.c"
|
8
gcc/testsuite/gcc.target/gcn/avgpr-spill-short.c
Normal file
8
gcc/testsuite/gcc.target/gcn/avgpr-spill-short.c
Normal file
|
@ -0,0 +1,8 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-march=gfx908 -O1" } */
|
||||
/* { dg-skip-if "incompatible ISA" { *-*-* } { "-march=gfx90[06]" } } */
|
||||
/* { dg-final { scan-assembler "accvgpr" } } */
|
||||
|
||||
#define TYPE short
|
||||
|
||||
#include "avgpr-spill-int.c"
|
|
@ -1702,6 +1702,25 @@ isa_code(const char *isa) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
/* CDNA2 devices have twice as many VGPRs compared to older devices. */
|
||||
|
||||
static int
|
||||
max_isa_vgprs (int isa)
|
||||
{
|
||||
switch (isa)
|
||||
{
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX803:
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX900:
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX906:
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX908:
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1030:
|
||||
return 256;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX90a:
|
||||
return 512;
|
||||
}
|
||||
GOMP_PLUGIN_fatal ("unhandled ISA in max_isa_vgprs");
|
||||
}
|
||||
|
||||
/* }}} */
|
||||
/* {{{ Run */
|
||||
|
||||
|
@ -2143,6 +2162,7 @@ run_kernel (struct kernel_info *kernel, void *vars,
|
|||
struct GOMP_kernel_launch_attributes *kla,
|
||||
struct goacc_asyncqueue *aq, bool module_locked)
|
||||
{
|
||||
struct agent_info *agent = kernel->agent;
|
||||
GCN_DEBUG ("SGPRs: %d, VGPRs: %d\n", kernel->description->sgpr_count,
|
||||
kernel->description->vpgr_count);
|
||||
|
||||
|
@ -2150,8 +2170,9 @@ run_kernel (struct kernel_info *kernel, void *vars,
|
|||
VGPRs available to run the kernels together. */
|
||||
if (kla->ndim == 3 && kernel->description->vpgr_count > 0)
|
||||
{
|
||||
int max_vgprs = max_isa_vgprs (agent->device_isa);
|
||||
int granulated_vgprs = (kernel->description->vpgr_count + 3) & ~3;
|
||||
int max_threads = (256 / granulated_vgprs) * 4;
|
||||
int max_threads = (max_vgprs / granulated_vgprs) * 4;
|
||||
if (kla->gdims[2] > max_threads)
|
||||
{
|
||||
GCN_WARNING ("Too many VGPRs required to support %d threads/workers"
|
||||
|
@ -2188,7 +2209,6 @@ run_kernel (struct kernel_info *kernel, void *vars,
|
|||
DEBUG_PRINT ("]\n");
|
||||
DEBUG_FLUSH ();
|
||||
|
||||
struct agent_info *agent = kernel->agent;
|
||||
if (!module_locked && pthread_rwlock_rdlock (&agent->module_rwlock))
|
||||
GOMP_PLUGIN_fatal ("Unable to read-lock a GCN agent rwlock");
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue