amdgcn: add -march=gfx1030 EXPERIMENTAL
Accept the architecture configure option and resolve build failures. This is enough to build binaries, but I've not got a device to test it on, so there are probably runtime issues to fix. The cache control instructions might be unsafe (or too conservative), and the kernel metadata might be off. Vector reductions will need to be reworked for RDNA2. In principle, it would be better to use wavefrontsize32 for this architecture, but that would mean switching everything to allow SImode masks, so wavefrontsize64 it is. The multilib is not included in the default configuration so either configure --with-arch=gfx1030 or include it in --with-multilib-list=gfx1030,.... The majority of this patch has no effect on other devices, but changing from using scalar writes for the exit value to vector writes means we don't need the scalar cache write-back instruction anywhere (which doesn't exist in RDNA2). gcc/ChangeLog: * config.gcc: Allow --with-arch=gfx1030. * config/gcn/gcn-hsa.h (NO_XNACK): gfx1030 does not support xnack. (ASM_SPEC): gfx1030 needs -mattr=+wavefrontsize64 set. * config/gcn/gcn-opts.h (enum processor_type): Add PROCESSOR_GFX1030. (TARGET_GFX1030): New. (TARGET_RDNA2): New. * config/gcn/gcn-valu.md (@dpp_move<mode>): Disable for RDNA2. (addc<mode>3<exec_vcc>): Add RDNA2 syntax variant. (subc<mode>3<exec_vcc>): Likewise. (<convop><mode><vndi>2_exec): Add RDNA2 alternatives. (vec_cmp<mode>di): Likewise. (vec_cmp<u><mode>di): Likewise. (vec_cmp<mode>di_exec): Likewise. (vec_cmp<u><mode>di_exec): Likewise. (vec_cmp<mode>di_dup): Likewise. (vec_cmp<mode>di_dup_exec): Likewise. (reduc_<reduc_op>_scal_<mode>): Disable for RDNA2. (*<reduc_op>_dpp_shr_<mode>): Likewise. (*plus_carry_dpp_shr_<mode>): Likewise. (*plus_carry_in_dpp_shr_<mode>): Likewise. * config/gcn/gcn.cc (gcn_option_override): Recognise gfx1030. (gcn_global_address_p): RDNA2 only allows smaller offsets. (gcn_addr_space_legitimate_address_p): Likewise. (gcn_omp_device_kind_arch_isa): Recognise gfx1030. (gcn_expand_epilogue): Use VGPRs instead of SGPRs. (output_file_start): Configure gfx1030. * config/gcn/gcn.h (TARGET_CPU_CPP_BUILTINS): Add __RDNA2__; (ASSEMBLER_DIALECT): New. * config/gcn/gcn.md (rdna): New define_attr. (enabled): Use "rdna" attribute. (gcn_return): Remove s_dcache_wb. (addcsi3_scalar): Add RDNA2 syntax variant. (addcsi3_scalar_zero): Likewise. (addptrdi3): Likewise. (mulsi3): v_mul_lo_i32 should be v_mul_lo_u32 on all ISA. (*memory_barrier): Add RDNA2 syntax variant. (atomic_load<mode>): Add RDNA2 cache control variants, and disable scalar atomics for RDNA2. (atomic_store<mode>): Likewise. (atomic_exchange<mode>): Likewise. * config/gcn/gcn.opt (gpu_type): Add gfx1030. * config/gcn/mkoffload.cc (EF_AMDGPU_MACH_AMDGCN_GFX1030): New. (main): Recognise -march=gfx1030. * config/gcn/t-omp-device: Add gfx1030 isa. libgcc/ChangeLog: * config/gcn/amdgcn_veclib.h (CDNA3_PLUS): Set false for __RDNA2__. libgomp/ChangeLog: * plugin/plugin-gcn.c (EF_AMDGPU_MACH_AMDGCN_GFX1030): New. (isa_hsa_name): Recognise gfx1030. (isa_code): Likewise. * team.c (defined): Remove s_endpgm.
This commit is contained in:
parent
d118738e71
commit
c7ec7bd1c6
13 changed files with 195 additions and 92 deletions
|
@ -4529,7 +4529,7 @@ case "${target}" in
|
|||
for which in arch tune; do
|
||||
eval "val=\$with_$which"
|
||||
case ${val} in
|
||||
"" | fiji | gfx900 | gfx906 | gfx908 | gfx90a)
|
||||
"" | fiji | gfx900 | gfx906 | gfx908 | gfx90a | gfx1030)
|
||||
# OK
|
||||
;;
|
||||
*)
|
||||
|
|
|
@ -75,7 +75,7 @@ extern unsigned int gcn_local_sym_hash (const char *name);
|
|||
supported for gcn. */
|
||||
#define GOMP_SELF_SPECS ""
|
||||
|
||||
#define NO_XNACK "!march=*:;march=fiji:;"
|
||||
#define NO_XNACK "!march=*:;march=fiji:;march=gfx1030:;"
|
||||
#define NO_SRAM_ECC "!march=*:;march=fiji:;march=gfx900:;march=gfx906:;"
|
||||
|
||||
/* In HSACOv4 no attribute setting means the binary supports "any" hardware
|
||||
|
@ -92,6 +92,7 @@ extern unsigned int gcn_local_sym_hash (const char *name);
|
|||
"%{!march=*|march=fiji:--amdhsa-code-object-version=3} " \
|
||||
"%{" NO_XNACK XNACKOPT "}" \
|
||||
"%{" NO_SRAM_ECC SRAMOPT "} " \
|
||||
"%{march=gfx1030:-mattr=+wavefrontsize64} " \
|
||||
"-filetype=obj"
|
||||
#define LINK_SPEC "--pie --export-dynamic"
|
||||
#define LIB_SPEC "-lc"
|
||||
|
|
|
@ -24,7 +24,8 @@ enum processor_type
|
|||
PROCESSOR_VEGA10, // gfx900
|
||||
PROCESSOR_VEGA20, // gfx906
|
||||
PROCESSOR_GFX908,
|
||||
PROCESSOR_GFX90a
|
||||
PROCESSOR_GFX90a,
|
||||
PROCESSOR_GFX1030
|
||||
};
|
||||
|
||||
#define TARGET_FIJI (gcn_arch == PROCESSOR_FIJI)
|
||||
|
@ -32,12 +33,14 @@ enum processor_type
|
|||
#define TARGET_VEGA20 (gcn_arch == PROCESSOR_VEGA20)
|
||||
#define TARGET_GFX908 (gcn_arch == PROCESSOR_GFX908)
|
||||
#define TARGET_GFX90a (gcn_arch == PROCESSOR_GFX90a)
|
||||
#define TARGET_GFX1030 (gcn_arch == PROCESSOR_GFX1030)
|
||||
|
||||
/* Set in gcn_option_override. */
|
||||
extern enum gcn_isa {
|
||||
ISA_UNKNOWN,
|
||||
ISA_GCN3,
|
||||
ISA_GCN5,
|
||||
ISA_RDNA2,
|
||||
ISA_CDNA1,
|
||||
ISA_CDNA2
|
||||
} gcn_isa;
|
||||
|
@ -50,6 +53,8 @@ extern enum gcn_isa {
|
|||
#define TARGET_CDNA1_PLUS (gcn_isa >= ISA_CDNA1)
|
||||
#define TARGET_CDNA2 (gcn_isa == ISA_CDNA2)
|
||||
#define TARGET_CDNA2_PLUS (gcn_isa >= ISA_CDNA2)
|
||||
#define TARGET_RDNA2 (gcn_isa == ISA_RDNA2)
|
||||
|
||||
|
||||
#define TARGET_M0_LDS_LIMIT (TARGET_GCN3)
|
||||
#define TARGET_PACKED_WORK_ITEMS (TARGET_CDNA2_PLUS)
|
||||
|
|
|
@ -1412,7 +1412,7 @@
|
|||
[(match_operand:V_noHI 1 "register_operand" " v")
|
||||
(match_operand:SI 2 "const_int_operand" " n")]
|
||||
UNSPEC_MOV_DPP_SHR))]
|
||||
""
|
||||
"!TARGET_RDNA2"
|
||||
{
|
||||
return gcn_expand_dpp_shr_insn (<MODE>mode, "v_mov_b32",
|
||||
UNSPEC_MOV_DPP_SHR, INTVAL (operands[2]));
|
||||
|
@ -1548,7 +1548,7 @@
|
|||
(match_dup 1))
|
||||
(match_dup 1))))]
|
||||
""
|
||||
"v_addc%^_u32\t%0, %4, %2, %1, %3"
|
||||
"{v_addc%^_u32|v_add_co_ci_u32}\t%0, %4, %2, %1, %3"
|
||||
[(set_attr "type" "vop2,vop3b")
|
||||
(set_attr "length" "4,8")])
|
||||
|
||||
|
@ -1613,10 +1613,10 @@
|
|||
(match_dup 1))))]
|
||||
""
|
||||
"@
|
||||
v_subb%^_u32\t%0, %4, %1, %2, %3
|
||||
v_subb%^_u32\t%0, %4, %1, %2, %3
|
||||
v_subbrev%^_u32\t%0, %4, %2, %1, %3
|
||||
v_subbrev%^_u32\t%0, %4, %2, %1, %3"
|
||||
{v_subb%^_u32|v_sub_co_ci_u32}\t%0, %4, %1, %2, %3
|
||||
{v_subb%^_u32|v_sub_co_ci_u32}\t%0, %4, %1, %2, %3
|
||||
{v_subbrev%^_u32|v_subrev_co_ci_u32}\t%0, %4, %2, %1, %3
|
||||
{v_subbrev%^_u32|v_subrev_co_ci_u32}\t%0, %4, %2, %1, %3"
|
||||
[(set_attr "type" "vop2,vop3b,vop2,vop3b")
|
||||
(set_attr "length" "4,8,4,8")])
|
||||
|
||||
|
@ -3667,11 +3667,11 @@
|
|||
;; {{{ Vector comparison/merge
|
||||
|
||||
(define_insn "vec_cmp<mode>di"
|
||||
[(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
|
||||
[(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg, e, e")
|
||||
(match_operator:DI 1 "gcn_fp_compare_operator"
|
||||
[(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA")
|
||||
(match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v")]))
|
||||
(clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X"))]
|
||||
[(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA,vSv, B")
|
||||
(match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v, v, v")]))
|
||||
(clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X, X, X"))]
|
||||
""
|
||||
"@
|
||||
v_cmp%E1\tvcc, %2, %3
|
||||
|
@ -3679,9 +3679,12 @@
|
|||
v_cmpx%E1\tvcc, %2, %3
|
||||
v_cmpx%E1\tvcc, %2, %3
|
||||
v_cmp%E1\t%0, %2, %3
|
||||
v_cmp%E1\t%0, %2, %3"
|
||||
[(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
|
||||
(set_attr "length" "4,8,4,8,8,8")])
|
||||
v_cmp%E1\t%0, %2, %3
|
||||
v_cmpx%E1\t%2, %3
|
||||
v_cmpx%E1\t%2, %3"
|
||||
[(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc")
|
||||
(set_attr "length" "4,8,4,8,8,8,4,8")
|
||||
(set_attr "rdna" "*,*,no,no,*,*,yes,yes")])
|
||||
|
||||
(define_expand "vec_cmpu<mode>di"
|
||||
[(match_operand:DI 0 "register_operand")
|
||||
|
@ -3716,13 +3719,13 @@
|
|||
})
|
||||
|
||||
(define_insn "vec_cmp<mode>di_exec"
|
||||
[(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
|
||||
[(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg, e, e")
|
||||
(and:DI
|
||||
(match_operator 1 "gcn_fp_compare_operator"
|
||||
[(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA")
|
||||
(match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v")])
|
||||
(match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e")))
|
||||
(clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))]
|
||||
[(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA,vSv, B")
|
||||
(match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v, v, v")])
|
||||
(match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e, e, e")))
|
||||
(clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X, X, X"))]
|
||||
""
|
||||
"@
|
||||
v_cmp%E1\tvcc, %2, %3
|
||||
|
@ -3730,9 +3733,12 @@
|
|||
v_cmpx%E1\tvcc, %2, %3
|
||||
v_cmpx%E1\tvcc, %2, %3
|
||||
v_cmp%E1\t%0, %2, %3
|
||||
v_cmp%E1\t%0, %2, %3"
|
||||
[(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
|
||||
(set_attr "length" "4,8,4,8,8,8")])
|
||||
v_cmp%E1\t%0, %2, %3
|
||||
v_cmpx%E1\t%2, %3
|
||||
v_cmpx%E1\t%2, %3"
|
||||
[(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc")
|
||||
(set_attr "length" "4,8,4,8,8,8,4,8")
|
||||
(set_attr "rdna" "*,*,no,no,*,*,yes,yes")])
|
||||
|
||||
(define_expand "vec_cmpu<mode>di_exec"
|
||||
[(match_operand:DI 0 "register_operand")
|
||||
|
@ -3772,42 +3778,48 @@
|
|||
})
|
||||
|
||||
(define_insn "vec_cmp<mode>di_dup"
|
||||
[(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
|
||||
[(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg, e,e")
|
||||
(match_operator:DI 1 "gcn_fp_compare_operator"
|
||||
[(vec_duplicate:V_noQI
|
||||
(match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
|
||||
" Sv, B,Sv,B, A"))
|
||||
(match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v")]))
|
||||
(clobber (match_scratch:DI 4 "= X,X,cV,cV, X"))]
|
||||
" Sv, B,Sv,B, A,Sv,B"))
|
||||
(match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v, v,v")]))
|
||||
(clobber (match_scratch:DI 4 "= X,X,cV,cV, X, X,X"))]
|
||||
""
|
||||
"@
|
||||
v_cmp%E1\tvcc, %2, %3
|
||||
v_cmp%E1\tvcc, %2, %3
|
||||
v_cmpx%E1\tvcc, %2, %3
|
||||
v_cmpx%E1\tvcc, %2, %3
|
||||
v_cmp%E1\t%0, %2, %3"
|
||||
[(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
|
||||
(set_attr "length" "4,8,4,8,8")])
|
||||
v_cmp%E1\t%0, %2, %3
|
||||
v_cmpx%E1\t%2, %3
|
||||
v_cmpx%E1\t%2, %3"
|
||||
[(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc")
|
||||
(set_attr "length" "4,8,4,8,8,4,8")
|
||||
(set_attr "rdna" "*,*,no,no,*,yes,yes")])
|
||||
|
||||
(define_insn "vec_cmp<mode>di_dup_exec"
|
||||
[(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
|
||||
[(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg, e,e")
|
||||
(and:DI
|
||||
(match_operator 1 "gcn_fp_compare_operator"
|
||||
[(vec_duplicate:V_noQI
|
||||
(match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
|
||||
" Sv, B,Sv,B, A"))
|
||||
(match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v")])
|
||||
(match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e")))
|
||||
(clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))]
|
||||
" Sv, B,Sv,B, A,Sv,B"))
|
||||
(match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v, v,v")])
|
||||
(match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e, e,e")))
|
||||
(clobber (match_scratch:DI 5 "= X,X,cV,cV, X, X,X"))]
|
||||
""
|
||||
"@
|
||||
v_cmp%E1\tvcc, %2, %3
|
||||
v_cmp%E1\tvcc, %2, %3
|
||||
v_cmpx%E1\tvcc, %2, %3
|
||||
v_cmpx%E1\tvcc, %2, %3
|
||||
v_cmp%E1\t%0, %2, %3"
|
||||
[(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
|
||||
(set_attr "length" "4,8,4,8,8")])
|
||||
v_cmp%E1\t%0, %2, %3
|
||||
v_cmpx%E1\t%2, %3
|
||||
v_cmpx%E1\t%2, %3"
|
||||
[(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc")
|
||||
(set_attr "length" "4,8,4,8,8,4,8")
|
||||
(set_attr "rdna" "*,*,no,no,*,yes,yes")])
|
||||
|
||||
(define_expand "vcond_mask_<mode>di"
|
||||
[(parallel
|
||||
|
@ -4176,7 +4188,7 @@
|
|||
(unspec:<SCALAR_MODE>
|
||||
[(match_operand:V_ALL 1 "register_operand")]
|
||||
REDUC_UNSPEC))]
|
||||
""
|
||||
"!TARGET_RDNA2"
|
||||
{
|
||||
rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
|
||||
<reduc_unspec>);
|
||||
|
@ -4229,7 +4241,8 @@
|
|||
REDUC_UNSPEC))]
|
||||
; GCN3 requires a carry out, GCN5 not
|
||||
"!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
|
||||
&& <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)"
|
||||
&& <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)
|
||||
&& !TARGET_RDNA2"
|
||||
{
|
||||
return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
|
||||
<reduc_unspec>, INTVAL (operands[3]));
|
||||
|
@ -4274,7 +4287,7 @@
|
|||
(match_operand:SI 3 "const_int_operand" "n")]
|
||||
UNSPEC_PLUS_CARRY_DPP_SHR))
|
||||
(clobber (reg:DI VCC_REG))]
|
||||
""
|
||||
"!TARGET_RDNA2"
|
||||
{
|
||||
return gcn_expand_dpp_shr_insn (<VnSI>mode, "v_add%^_u32",
|
||||
UNSPEC_PLUS_CARRY_DPP_SHR,
|
||||
|
@ -4292,7 +4305,7 @@
|
|||
(match_operand:DI 4 "register_operand" "cV")]
|
||||
UNSPEC_PLUS_CARRY_IN_DPP_SHR))
|
||||
(clobber (reg:DI VCC_REG))]
|
||||
""
|
||||
"!TARGET_RDNA2"
|
||||
{
|
||||
return gcn_expand_dpp_shr_insn (<MODE>mode, "v_addc%^_u32",
|
||||
UNSPEC_PLUS_CARRY_IN_DPP_SHR,
|
||||
|
|
|
@ -136,6 +136,7 @@ gcn_option_override (void)
|
|||
: gcn_arch == PROCESSOR_VEGA20 ? ISA_GCN5
|
||||
: gcn_arch == PROCESSOR_GFX908 ? ISA_CDNA1
|
||||
: gcn_arch == PROCESSOR_GFX90a ? ISA_CDNA2
|
||||
: gcn_arch == PROCESSOR_GFX1030 ? ISA_RDNA2
|
||||
: ISA_UNKNOWN);
|
||||
gcc_assert (gcn_isa != ISA_UNKNOWN);
|
||||
|
||||
|
@ -1616,6 +1617,7 @@ gcn_global_address_p (rtx addr)
|
|||
{
|
||||
rtx base = XEXP (addr, 0);
|
||||
rtx offset = XEXP (addr, 1);
|
||||
int offsetbits = (TARGET_RDNA2 ? 11 : 12);
|
||||
bool immediate_p = (CONST_INT_P (offset)
|
||||
&& INTVAL (offset) >= -(1 << 12)
|
||||
&& INTVAL (offset) < (1 << 12));
|
||||
|
@ -1748,10 +1750,11 @@ gcn_addr_space_legitimate_address_p (machine_mode mode, rtx x, bool strict,
|
|||
rtx base = XEXP (x, 0);
|
||||
rtx offset = XEXP (x, 1);
|
||||
|
||||
int offsetbits = (TARGET_RDNA2 ? 11 : 12);
|
||||
bool immediate_p = (GET_CODE (offset) == CONST_INT
|
||||
/* Signed 13-bit immediate. */
|
||||
&& INTVAL (offset) >= -(1 << 12)
|
||||
&& INTVAL (offset) < (1 << 12)
|
||||
/* Signed 12/13-bit immediate. */
|
||||
&& INTVAL (offset) >= -(1 << offsetbits)
|
||||
&& INTVAL (offset) < (1 << offsetbits)
|
||||
/* The low bits of the offset are ignored, even
|
||||
when they're meant to realign the pointer. */
|
||||
&& !(INTVAL (offset) & 0x3));
|
||||
|
@ -3029,6 +3032,8 @@ gcn_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait,
|
|||
return gcn_arch == PROCESSOR_GFX908;
|
||||
if (strcmp (name, "gfx90a") == 0)
|
||||
return gcn_arch == PROCESSOR_GFX90a;
|
||||
if (strcmp (name, "gfx1030") == 0)
|
||||
return gcn_arch == PROCESSOR_GFX1030;
|
||||
return 0;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
|
@ -3610,9 +3615,11 @@ gcn_expand_epilogue (void)
|
|||
set_mem_addr_space (retptr_mem, ADDR_SPACE_SCALAR_FLAT);
|
||||
emit_move_insn (kernarg_reg, retptr_mem);
|
||||
|
||||
rtx retval_mem = gen_rtx_MEM (SImode, kernarg_reg);
|
||||
rtx scalar_retval = gen_rtx_REG (SImode, FIRST_PARM_REG);
|
||||
set_mem_addr_space (retval_mem, ADDR_SPACE_SCALAR_FLAT);
|
||||
rtx retval_addr = gen_rtx_REG (DImode, FIRST_VPARM_REG);
|
||||
emit_move_insn (retval_addr, kernarg_reg);
|
||||
rtx retval_mem = gen_rtx_MEM (SImode, retval_addr);
|
||||
rtx scalar_retval = gen_rtx_REG (SImode, FIRST_VPARM_REG + 2);
|
||||
set_mem_addr_space (retval_mem, ADDR_SPACE_FLAT);
|
||||
emit_move_insn (scalar_retval, gen_rtx_REG (SImode, RETURN_VALUE_REG));
|
||||
emit_move_insn (retval_mem, scalar_retval);
|
||||
}
|
||||
|
@ -6454,6 +6461,11 @@ output_file_start (void)
|
|||
case PROCESSOR_GFX90a:
|
||||
cpu = "gfx90a";
|
||||
break;
|
||||
case PROCESSOR_GFX1030:
|
||||
cpu = "gfx1030";
|
||||
xnack = "";
|
||||
sram_ecc = "";
|
||||
break;
|
||||
default: gcc_unreachable ();
|
||||
}
|
||||
|
||||
|
|
|
@ -28,6 +28,8 @@
|
|||
builtin_define ("__CDNA1__"); \
|
||||
else if (TARGET_CDNA2) \
|
||||
builtin_define ("__CDNA2__"); \
|
||||
else if (TARGET_RDNA2) \
|
||||
builtin_define ("__RDNA2__"); \
|
||||
if (TARGET_FIJI) \
|
||||
{ \
|
||||
builtin_define ("__fiji__"); \
|
||||
|
@ -43,6 +45,8 @@
|
|||
builtin_define ("__gfx90a__"); \
|
||||
} while (0)
|
||||
|
||||
#define ASSEMBLER_DIALECT (TARGET_RDNA2 ? 1 : 0)
|
||||
|
||||
/* Support for a compile-time default architecture and tuning.
|
||||
The rules are:
|
||||
--with-arch is ignored if -march is specified.
|
||||
|
|
|
@ -285,9 +285,16 @@
|
|||
; Disable alternatives that only apply to specific ISA variants.
|
||||
|
||||
(define_attr "gcn_version" "gcn3,gcn5" (const_string "gcn3"))
|
||||
(define_attr "rdna" "any,no,yes" (const_string "any"))
|
||||
|
||||
(define_attr "enabled" ""
|
||||
(cond [(eq_attr "gcn_version" "gcn3") (const_int 1)
|
||||
(cond [(and (eq_attr "rdna" "no")
|
||||
(ne (symbol_ref "TARGET_RDNA2") (const_int 0)))
|
||||
(const_int 0)
|
||||
(and (eq_attr "rdna" "yes")
|
||||
(eq (symbol_ref "TARGET_RDNA2") (const_int 0)))
|
||||
(const_int 0)
|
||||
(eq_attr "gcn_version" "gcn3") (const_int 1)
|
||||
(and (eq_attr "gcn_version" "gcn5")
|
||||
(ne (symbol_ref "TARGET_GCN5_PLUS") (const_int 0)))
|
||||
(const_int 1)]
|
||||
|
@ -812,7 +819,7 @@
|
|||
if (cfun && cfun->machine && cfun->machine->normal_function)
|
||||
return "s_setpc_b64\ts[18:19]";
|
||||
else
|
||||
return "s_waitcnt\tlgkmcnt(0)\;s_dcache_wb\;s_endpgm";
|
||||
return "s_waitcnt\tlgkmcnt(0)\;s_endpgm";
|
||||
}
|
||||
[(set_attr "type" "sop1")
|
||||
(set_attr "length" "12")])
|
||||
|
@ -1179,7 +1186,7 @@
|
|||
""
|
||||
"@
|
||||
s_addc_u32\t%0, %1, %2
|
||||
v_addc%^_u32\t%0, vcc, %2, %1, vcc"
|
||||
{v_addc%^_u32|v_add_co_ci_u32}\t%0, vcc, %2, %1, vcc"
|
||||
[(set_attr "type" "sop2,vop2")
|
||||
(set_attr "length" "8,4")])
|
||||
|
||||
|
@ -1195,7 +1202,7 @@
|
|||
""
|
||||
"@
|
||||
s_addc_u32\t%0, %1, 0
|
||||
v_addc%^_u32\t%0, vcc, 0, %1, vcc"
|
||||
{v_addc%^_u32|v_add_co_ci_u32}\t%0, vcc, 0, %1, vcc"
|
||||
[(set_attr "type" "sop2,vop2")
|
||||
(set_attr "length" "4")])
|
||||
|
||||
|
@ -1225,7 +1232,8 @@
|
|||
gen_rtx_REG (DImode, CC_SAVE_REG) };
|
||||
|
||||
output_asm_insn ("v_add%^_u32\t%L0, %3, %L2, %L1", new_operands);
|
||||
output_asm_insn ("v_addc%^_u32\t%H0, %3, %H2, %H1, %3", new_operands);
|
||||
output_asm_insn ("{v_addc%^_u32|v_add_co_ci_u32}\t%H0, %3, %H2, %H1, %3",
|
||||
new_operands);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1363,7 +1371,7 @@
|
|||
s_mul_i32\t%0, %1, %2
|
||||
s_mulk_i32\t%0, %2
|
||||
s_mul_i32\t%0, %1, %2
|
||||
v_mul_lo_i32\t%0, %1, %2"
|
||||
v_mul_lo_u32\t%0, %1, %2"
|
||||
[(set_attr "type" "sop2,sopk,sop2,vop3a")
|
||||
(set_attr "length" "4,4,8,4")])
|
||||
|
||||
|
@ -1885,7 +1893,7 @@
|
|||
[(set (match_operand:BLK 0)
|
||||
(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
|
||||
""
|
||||
"buffer_wbinvl1_vol"
|
||||
"{buffer_wbinvl1_vol|buffer_gl0_inv}"
|
||||
[(set_attr "type" "mubuf")
|
||||
(set_attr "length" "4")])
|
||||
|
||||
|
@ -2004,6 +2012,7 @@
|
|||
(use (match_operand:SIDI 2 "immediate_operand" " i, i, i"))]
|
||||
""
|
||||
{
|
||||
/* FIXME: RDNA cache instructions may be too conservative? */
|
||||
switch (INTVAL (operands[2]))
|
||||
{
|
||||
case MEMMODEL_RELAXED:
|
||||
|
@ -2026,11 +2035,17 @@
|
|||
return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)\;"
|
||||
"s_dcache_wb_vol";
|
||||
case 1:
|
||||
return "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;"
|
||||
"buffer_wbinvl1_vol";
|
||||
return (TARGET_RDNA2
|
||||
? "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;"
|
||||
"buffer_gl0_inv"
|
||||
: "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;"
|
||||
"buffer_wbinvl1_vol");
|
||||
case 2:
|
||||
return "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;"
|
||||
"buffer_wbinvl1_vol";
|
||||
return (TARGET_RDNA2
|
||||
? "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;"
|
||||
"buffer_gl0_inv"
|
||||
: "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;"
|
||||
"buffer_wbinvl1_vol");
|
||||
}
|
||||
break;
|
||||
case MEMMODEL_ACQ_REL:
|
||||
|
@ -2042,11 +2057,17 @@
|
|||
return "s_dcache_wb_vol\;s_load%o0\t%0, %A1 glc\;"
|
||||
"s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
|
||||
case 1:
|
||||
return "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 glc\;"
|
||||
"s_waitcnt\t0\;buffer_wbinvl1_vol";
|
||||
return (TARGET_RDNA2
|
||||
? "buffer_gl0_inv\;flat_load%o0\t%0, %A1%O1 glc\;"
|
||||
"s_waitcnt\t0\;buffer_gl0_inv"
|
||||
: "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 glc\;"
|
||||
"s_waitcnt\t0\;buffer_wbinvl1_vol");
|
||||
case 2:
|
||||
return "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 glc\;"
|
||||
"s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol";
|
||||
return (TARGET_RDNA2
|
||||
? "buffer_gl0_inv\;global_load%o0\t%0, %A1%O1 glc\;"
|
||||
"s_waitcnt\tvmcnt(0)\;buffer_gl0_inv"
|
||||
: "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 glc\;"
|
||||
"s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -2054,7 +2075,8 @@
|
|||
}
|
||||
[(set_attr "type" "smem,flat,flat")
|
||||
(set_attr "length" "20")
|
||||
(set_attr "gcn_version" "gcn5,*,gcn5")])
|
||||
(set_attr "gcn_version" "gcn5,*,gcn5")
|
||||
(set_attr "rdna" "no,*,*")])
|
||||
|
||||
(define_insn "atomic_store<mode>"
|
||||
[(set (match_operand:SIDI 0 "memory_operand" "=RS,RF,RM")
|
||||
|
@ -2084,9 +2106,13 @@
|
|||
case 0:
|
||||
return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc";
|
||||
case 1:
|
||||
return "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc";
|
||||
return (TARGET_RDNA2
|
||||
? "buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 glc"
|
||||
: "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc");
|
||||
case 2:
|
||||
return "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc";
|
||||
return (TARGET_RDNA2
|
||||
? "buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 glc"
|
||||
: "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc");
|
||||
}
|
||||
break;
|
||||
case MEMMODEL_ACQ_REL:
|
||||
|
@ -2098,11 +2124,17 @@
|
|||
return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc\;"
|
||||
"s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
|
||||
case 1:
|
||||
return "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc\;"
|
||||
"s_waitcnt\t0\;buffer_wbinvl1_vol";
|
||||
return (TARGET_RDNA2
|
||||
? "buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 glc\;"
|
||||
"s_waitcnt\t0\;buffer_gl0_inv"
|
||||
: "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc\;"
|
||||
"s_waitcnt\t0\;buffer_wbinvl1_vol");
|
||||
case 2:
|
||||
return "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc\;"
|
||||
"s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol";
|
||||
return (TARGET_RDNA2
|
||||
? "buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 glc\;"
|
||||
"s_waitcnt\tvmcnt(0)\;buffer_gl0_inv"
|
||||
: "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc\;"
|
||||
"s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -2110,7 +2142,8 @@
|
|||
}
|
||||
[(set_attr "type" "smem,flat,flat")
|
||||
(set_attr "length" "20")
|
||||
(set_attr "gcn_version" "gcn5,*,gcn5")])
|
||||
(set_attr "gcn_version" "gcn5,*,gcn5")
|
||||
(set_attr "rdna" "no,*,*")])
|
||||
|
||||
(define_insn "atomic_exchange<mode>"
|
||||
[(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v")
|
||||
|
@ -2145,11 +2178,17 @@
|
|||
return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)\;"
|
||||
"s_dcache_wb_vol\;s_dcache_inv_vol";
|
||||
case 1:
|
||||
return "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;"
|
||||
"buffer_wbinvl1_vol";
|
||||
return (TARGET_RDNA2
|
||||
? "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;"
|
||||
"buffer_gl0_inv"
|
||||
: "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;"
|
||||
"buffer_wbinvl1_vol");
|
||||
case 2:
|
||||
return "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
|
||||
"s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol";
|
||||
return (TARGET_RDNA2
|
||||
? "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
|
||||
"s_waitcnt\tvmcnt(0)\;buffer_gl0_inv"
|
||||
: "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
|
||||
"s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol");
|
||||
}
|
||||
break;
|
||||
case MEMMODEL_RELEASE:
|
||||
|
@ -2160,12 +2199,19 @@
|
|||
return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;"
|
||||
"s_waitcnt\tlgkmcnt(0)";
|
||||
case 1:
|
||||
return "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
|
||||
"s_waitcnt\t0";
|
||||
return (TARGET_RDNA2
|
||||
? "buffer_gl0_inv\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
|
||||
"s_waitcnt\t0"
|
||||
: "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
|
||||
"s_waitcnt\t0");
|
||||
case 2:
|
||||
return "buffer_wbinvl1_vol\;"
|
||||
"global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
|
||||
"s_waitcnt\tvmcnt(0)";
|
||||
return (TARGET_RDNA2
|
||||
? "buffer_gl0_inv\;"
|
||||
"global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
|
||||
"s_waitcnt\tvmcnt(0)"
|
||||
: "buffer_wbinvl1_vol\;"
|
||||
"global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
|
||||
"s_waitcnt\tvmcnt(0)");
|
||||
}
|
||||
break;
|
||||
case MEMMODEL_ACQ_REL:
|
||||
|
@ -2177,12 +2223,19 @@
|
|||
return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;"
|
||||
"s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
|
||||
case 1:
|
||||
return "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
|
||||
"s_waitcnt\t0\;buffer_wbinvl1_vol";
|
||||
return (TARGET_RDNA2
|
||||
? "buffer_gl0_inv\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
|
||||
"s_waitcnt\t0\;buffer_gl0_inv"
|
||||
: "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
|
||||
"s_waitcnt\t0\;buffer_wbinvl1_vol");
|
||||
case 2:
|
||||
return "buffer_wbinvl1_vol\;"
|
||||
"global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
|
||||
"s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol";
|
||||
return (TARGET_RDNA2
|
||||
? "buffer_gl0_inv\;"
|
||||
"global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
|
||||
"s_waitcnt\tvmcnt(0)\;buffer_gl0_inv"
|
||||
: "buffer_wbinvl1_vol\;"
|
||||
"global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
|
||||
"s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -2190,7 +2243,8 @@
|
|||
}
|
||||
[(set_attr "type" "smem,flat,flat")
|
||||
(set_attr "length" "20")
|
||||
(set_attr "gcn_version" "gcn5,*,gcn5")])
|
||||
(set_attr "gcn_version" "gcn5,*,gcn5")
|
||||
(set_attr "rdna" "no,*,*")])
|
||||
|
||||
;; }}}
|
||||
;; {{{ OpenACC / OpenMP
|
||||
|
|
|
@ -40,6 +40,9 @@ Enum(gpu_type) String(gfx908) Value(PROCESSOR_GFX908)
|
|||
EnumValue
|
||||
Enum(gpu_type) String(gfx90a) Value(PROCESSOR_GFX90a)
|
||||
|
||||
EnumValue
|
||||
Enum(gpu_type) String(gfx1030) Value(PROCESSOR_GFX1030)
|
||||
|
||||
march=
|
||||
Target RejectNegative Joined ToLower Enum(gpu_type) Var(gcn_arch) Init(PROCESSOR_FIJI)
|
||||
Specify the name of the target GPU.
|
||||
|
|
|
@ -57,6 +57,8 @@
|
|||
#define EF_AMDGPU_MACH_AMDGCN_GFX908 0x30
|
||||
#undef EF_AMDGPU_MACH_AMDGCN_GFX90a
|
||||
#define EF_AMDGPU_MACH_AMDGCN_GFX90a 0x3f
|
||||
#undef EF_AMDGPU_MACH_AMDGCN_GFX1030
|
||||
#define EF_AMDGPU_MACH_AMDGCN_GFX1030 0x36
|
||||
|
||||
#define EF_AMDGPU_FEATURE_XNACK_V4 0x300 /* Mask. */
|
||||
#define EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 0x000
|
||||
|
@ -942,6 +944,8 @@ main (int argc, char **argv)
|
|||
elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX908;
|
||||
else if (strcmp (argv[i], "-march=gfx90a") == 0)
|
||||
elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX90a;
|
||||
else if (strcmp (argv[i], "-march=gfx1030") == 0)
|
||||
elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX1030;
|
||||
#define STR "-mstack-size="
|
||||
else if (startswith (argv[i], STR))
|
||||
gcn_stack_size = atoi (argv[i] + strlen (STR));
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
omp-device-properties-gcn: $(srcdir)/config/gcn/gcn.cc
|
||||
echo kind: gpu > $@
|
||||
echo arch: amdgcn gcn >> $@
|
||||
echo isa: fiji gfx803 gfx900 gfx906 gfx908 gfx90a >> $@
|
||||
echo isa: fiji gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 >> $@
|
||||
|
|
|
@ -229,7 +229,8 @@ do { \
|
|||
|
||||
|
||||
#if defined (__GCN3__) || defined (__GCN5__) \
|
||||
|| defined (__CDNA1__) || defined (__CDNA2__)
|
||||
|| defined (__CDNA1__) || defined (__CDNA2__) \
|
||||
|| defined (__RDNA2__)
|
||||
#define CDNA3_PLUS 0
|
||||
#else
|
||||
#define CDNA3_PLUS 1
|
||||
|
|
|
@ -377,7 +377,8 @@ typedef enum {
|
|||
EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX90a = 0x03f
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX90a = 0x03f,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX1030 = 0x036
|
||||
} EF_AMDGPU_MACH;
|
||||
|
||||
const static int EF_AMDGPU_MACH_MASK = 0x000000ff;
|
||||
|
@ -1633,6 +1634,7 @@ const static char *gcn_gfx900_s = "gfx900";
|
|||
const static char *gcn_gfx906_s = "gfx906";
|
||||
const static char *gcn_gfx908_s = "gfx908";
|
||||
const static char *gcn_gfx90a_s = "gfx90a";
|
||||
const static char *gcn_gfx1030_s = "gfx1030";
|
||||
const static int gcn_isa_name_len = 6;
|
||||
|
||||
/* Returns the name that the HSA runtime uses for the ISA or NULL if we do not
|
||||
|
@ -1652,6 +1654,8 @@ isa_hsa_name (int isa) {
|
|||
return gcn_gfx908_s;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX90a:
|
||||
return gcn_gfx90a_s;
|
||||
case EF_AMDGPU_MACH_AMDGCN_GFX1030:
|
||||
return gcn_gfx1030_s;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
@ -1691,6 +1695,9 @@ isa_code(const char *isa) {
|
|||
if (!strncmp (isa, gcn_gfx90a_s, gcn_isa_name_len))
|
||||
return EF_AMDGPU_MACH_AMDGCN_GFX90a;
|
||||
|
||||
if (!strncmp (isa, gcn_gfx1030_s, gcn_isa_name_len))
|
||||
return EF_AMDGPU_MACH_AMDGCN_GFX1030;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
|
|
@ -253,8 +253,7 @@ gomp_free_pool_helper (void *thread_pool)
|
|||
#elif defined(__nvptx__)
|
||||
asm ("exit;");
|
||||
#elif defined(__AMDGCN__)
|
||||
asm ("s_dcache_wb\n\t"
|
||||
"s_endpgm");
|
||||
asm ("s_endpgm");
|
||||
#else
|
||||
#error gomp_free_pool_helper must terminate the thread
|
||||
#endif
|
||||
|
|
Loading…
Add table
Reference in a new issue