nvptx: Support '-mfake-ptx-alloca'
With '-mfake-ptx-alloca' enabled, the user-visible behavior changes only for configurations where PTX 'alloca' is not available. Rather than a compile-time 'sorry, unimplemented: dynamic stack allocation not supported' in presence of dynamic stack allocation, compilation and assembly then succeeds. However, attempting to link in such '*.o' files then fails due to unresolved symbol '__GCC_nvptx__PTX_alloca_not_supported'. This is meant to be used in scenarios where large volumes of code are compiled, a small fraction of which runs into dynamic stack allocation, but these parts are not important for specific use cases, and we'd thus like the build to succeed, and error out just upon actual, very rare use of the offending '*.o' files. gcc/ * config/nvptx/nvptx.opt (-mfake-ptx-alloca): New. * config/nvptx/nvptx-protos.h (nvptx_output_fake_ptx_alloca): Declare. * config/nvptx/nvptx.cc (nvptx_output_fake_ptx_alloca): New. * config/nvptx/nvptx.md (define_insn "@nvptx_alloca_<mode>") [!(TARGET_PTX_7_3 && TARGET_SM52)]: Use it for '-mfake-ptx-alloca'. gcc/testsuite/ * gcc.target/nvptx/alloca-1-O0_-mfake-ptx-alloca.c: New. * gcc.target/nvptx/alloca-2-O0_-mfake-ptx-alloca.c: Likewise. * gcc.target/nvptx/alloca-4-O3_-mfake-ptx-alloca.c: Likewise. * gcc.target/nvptx/vla-1-O0_-mfake-ptx-alloca.c: Likewise. * gcc.target/nvptx/alloca-4-O3.c: 'dg-additional-options -mfake-ptx-alloca'.
This commit is contained in:
parent
22e76700ae
commit
1146410c0f
9 changed files with 187 additions and 0 deletions
|
@ -55,6 +55,7 @@ extern rtx nvptx_expand_compare (rtx);
|
|||
extern const char *nvptx_ptx_type_from_mode (machine_mode, bool);
|
||||
extern const char *nvptx_output_mov_insn (rtx, rtx);
|
||||
extern const char *nvptx_output_call_insn (rtx_insn *, rtx, rtx);
|
||||
extern const char *nvptx_output_fake_ptx_alloca (void);
|
||||
extern const char *nvptx_output_return (void);
|
||||
extern const char *nvptx_output_set_softstack (unsigned);
|
||||
extern const char *nvptx_output_simt_enter (rtx, rtx, rtx);
|
||||
|
|
|
@ -1758,6 +1758,27 @@ nvptx_output_set_softstack (unsigned src_regno)
|
|||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
/* Output a fake PTX 'alloca'. */
|
||||
|
||||
const char *
|
||||
nvptx_output_fake_ptx_alloca (void)
|
||||
{
|
||||
#define FAKE_PTX_ALLOCA_NAME "__GCC_nvptx__PTX_alloca_not_supported"
|
||||
static tree decl;
|
||||
if (!decl)
|
||||
{
|
||||
tree alloca_type = TREE_TYPE (builtin_decl_explicit (BUILT_IN_ALLOCA));
|
||||
decl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL,
|
||||
get_identifier (FAKE_PTX_ALLOCA_NAME), alloca_type);
|
||||
DECL_EXTERNAL (decl) = 1;
|
||||
TREE_PUBLIC (decl) = 1;
|
||||
nvptx_record_needed_fndecl (decl);
|
||||
}
|
||||
return "\tcall\t(%0), " FAKE_PTX_ALLOCA_NAME ", (%1);";
|
||||
#undef FAKE_PTX_ALLOCA_NAME
|
||||
}
|
||||
|
||||
/* Output a return instruction. Also copy the return value to its outgoing
|
||||
location. */
|
||||
|
||||
|
|
|
@ -1705,6 +1705,8 @@
|
|||
output_asm_insn ("}", NULL);
|
||||
return "";
|
||||
}
|
||||
else if (nvptx_fake_ptx_alloca)
|
||||
return nvptx_output_fake_ptx_alloca ();
|
||||
else
|
||||
{
|
||||
sorry_at (INSN_LOCATION (insn),
|
||||
|
@ -1733,6 +1735,7 @@
|
|||
gcc_checking_assert (REG_P (operands[0]));
|
||||
emit_insn (gen_nvptx_stacksave (Pmode, operands[0], operands[1]));
|
||||
}
|
||||
/* We don't bother to special-case '-mfake-ptx-alloca' here. */
|
||||
else
|
||||
{
|
||||
/* The concept of a '%stack' pointer doesn't apply like this.
|
||||
|
@ -1765,6 +1768,7 @@
|
|||
operands[1] = force_reg (Pmode, operands[1]);
|
||||
emit_insn (gen_nvptx_stackrestore (Pmode, operands[0], operands[1]));
|
||||
}
|
||||
/* We don't bother to special-case '-mfake-ptx-alloca' here. */
|
||||
else if (!TARGET_SOFT_STACK)
|
||||
; /* See 'save_stack_block'. */
|
||||
else if (TARGET_SOFT_STACK)
|
||||
|
|
|
@ -167,3 +167,18 @@ Target Var(nvptx_alias) Init(0) Undocumented
|
|||
|
||||
mexperimental
|
||||
Target Var(nvptx_experimental) Init(0) Undocumented
|
||||
|
||||
mfake-ptx-alloca
|
||||
Target Var(nvptx_fake_ptx_alloca) Init(0) Undocumented
|
||||
; With '-mfake-ptx-alloca' enabled, the user-visible behavior changes only
|
||||
; for configurations where PTX 'alloca' is not available. Rather than a
|
||||
; compile-time 'sorry, unimplemented: dynamic stack allocation not supported'
|
||||
; in presence of dynamic stack allocation, compilation and assembly then
|
||||
; succeeds. However, attempting to link in such '*.o' files then fails due
|
||||
; to unresolved symbol '__GCC_nvptx__PTX_alloca_not_supported'.
|
||||
;
|
||||
; This is meant to be used in scenarios where large volumes of code are
|
||||
; compiled, a small fraction of which runs into dynamic stack allocation, but
|
||||
; these parts are not important for specific use cases, and we'd thus like the
|
||||
; build to succeed, and error out just upon actual, very rare use of the
|
||||
; offending '*.o' files.
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
/* { dg-do assemble } */
|
||||
/* { dg-options {-O0 -mno-soft-stack} } */
|
||||
/* { dg-additional-options -march=sm_30 } */
|
||||
/* { dg-additional-options -mfake-ptx-alloca } */
|
||||
/* { dg-additional-options -save-temps } */
|
||||
/* { dg-final { check-function-bodies {** } {} } } */
|
||||
|
||||
void sink(void *);
|
||||
|
||||
void f(void)
|
||||
{
|
||||
sink(__builtin_alloca(123));
|
||||
/* { dg-bogus {sorry, unimplemented: dynamic stack allocation not supported} {} { target *-*-* } .-1 } */
|
||||
}
|
||||
/*
|
||||
** f:
|
||||
** \.visible \.func f
|
||||
** {
|
||||
** \.reg\.u64 (%r[0-9]+);
|
||||
** \.reg\.u64 (%r[0-9]+);
|
||||
** \.reg\.u64 (%r[0-9]+);
|
||||
** \.reg\.u64 (%r[0-9]+);
|
||||
** \.reg\.u64 (%r[0-9]+);
|
||||
** \.reg\.u64 (%r[0-9]+);
|
||||
** \.reg\.u64 (%r[0-9]+);
|
||||
** \.reg\.u64 (%r[0-9]+);
|
||||
** \.reg\.u64 (%r[0-9]+);
|
||||
** \.reg\.u64 (%r[0-9]+);
|
||||
** \.reg\.u64 (%r[0-9]+);
|
||||
** mov\.u64 \11, 16;
|
||||
** add\.u64 \2, \11, -1;
|
||||
** add\.u64 \3, \2, 123;
|
||||
** div\.u64 \4, \3, 16;
|
||||
** mul\.lo\.u64 \5, \4, 16;
|
||||
** call \(\6\), __GCC_nvptx__PTX_alloca_not_supported, \(\5\);
|
||||
** add\.u64 \7, \6, 15;
|
||||
** shr\.u64 \8, \7, 4;
|
||||
** shl\.b64 \9, \8, 4;
|
||||
** mov\.u64 \1, \9;
|
||||
** mov\.u64 \10, \1;
|
||||
** {
|
||||
** \.param\.u64 %out_arg1;
|
||||
** st\.param\.u64 \[%out_arg1\], \10;
|
||||
** call sink, \(%out_arg1\);
|
||||
** }
|
||||
** ret;
|
||||
*/
|
||||
|
||||
/* { dg-final { scan-assembler-times {(?n)^\.extern \.func \(\.param\.u64 %value_out\) __GCC_nvptx__PTX_alloca_not_supported \(\.param\.u64 %in_ar0\);$} 1 } } */
|
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do link } */
|
||||
/* { dg-options {-O0 -mno-soft-stack} } */
|
||||
/* { dg-additional-options -march=sm_30 } */
|
||||
/* { dg-additional-options -mfake-ptx-alloca } */
|
||||
/* { dg-additional-options -save-temps } */
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
return !(__builtin_alloca(100) != __builtin_alloca(10));
|
||||
}
|
||||
/* { dg-final { scan-assembler-times {(?n)\tcall\t\(%r[0-9]+\), __GCC_nvptx__PTX_alloca_not_supported, \(%r[0-9]+\);$} 2 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {(?n)^\.extern \.func \(\.param\.u64 %value_out\) __GCC_nvptx__PTX_alloca_not_supported \(\.param\.u64 %in_ar0\);$} 1 } } */
|
||||
|
||||
/* { dg-message __GCC_nvptx__PTX_alloca_not_supported {unresolved symbol} { target *-*-* } 0 } */
|
||||
|
||||
/* { dg-final output-exists-not } */
|
|
@ -1,6 +1,8 @@
|
|||
/* { dg-do assemble } */
|
||||
/* { dg-options {-O3 -mno-soft-stack} } */
|
||||
/* { dg-add-options nvptx_alloca_ptx } */
|
||||
/* Verify the fake one isn't used if the real PTX 'alloca' is available.
|
||||
{ dg-additional-options -mfake-ptx-alloca } */
|
||||
/* { dg-additional-options -save-temps } */
|
||||
/* { dg-final { check-function-bodies {** } {} } } */
|
||||
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
/* { dg-do assemble } */
|
||||
/* { dg-options {-O3 -mno-soft-stack} } */
|
||||
/* { dg-additional-options {-march=sm_30 -mfake-ptx-alloca} } */
|
||||
/* { dg-additional-options -save-temps } */
|
||||
/* { dg-final { check-function-bodies {** } {} } } */
|
||||
|
||||
void sink(void *);
|
||||
|
||||
void f(void)
|
||||
{
|
||||
void *p;
|
||||
p = __builtin_stack_save();
|
||||
sink(__builtin_alloca(25));
|
||||
/* { dg-bogus {sorry, unimplemented: dynamic stack allocation not supported} {} { target *-*-* } .-1 } */
|
||||
__builtin_stack_restore(p);
|
||||
sink(__builtin_alloca(13));
|
||||
/* { dg-bogus {sorry, unimplemented: dynamic stack allocation not supported} {} { target *-*-* } .-1 } */
|
||||
}
|
||||
/*
|
||||
** f:
|
||||
** .visible .func f
|
||||
** {
|
||||
** \.reg\.u64 (%r[0-9]+);
|
||||
** \.reg\.u64 (%r[0-9]+);
|
||||
** \.reg\.u64 (%r[0-9]+);
|
||||
** \.reg\.u64 (%r[0-9]+);
|
||||
** \.reg\.u64 (%r[0-9]+);
|
||||
** \.reg\.u64 (%r[0-9]+);
|
||||
** call \(\1\), __GCC_nvptx__PTX_alloca_not_supported, \(32\);
|
||||
** add\.u64 \2, \1, 15;
|
||||
** and\.b64 \3, \2, -16;
|
||||
** {
|
||||
** \.param\.u64 %out_arg1;
|
||||
** st\.param\.u64 \[%out_arg1\], \3;
|
||||
** call sink, \(%out_arg1\);
|
||||
** }
|
||||
** call \(\4\), __GCC_nvptx__PTX_alloca_not_supported, \(16\);
|
||||
** add\.u64 \5, \4, 15;
|
||||
** and\.b64 \6, \5, -16;
|
||||
** {
|
||||
** \.param\.u64 %out_arg1;
|
||||
** st\.param\.u64 \[%out_arg1\], \6;
|
||||
** call sink, \(%out_arg1\);
|
||||
** }
|
||||
** ret;
|
||||
*/
|
||||
|
||||
/* { dg-final { scan-assembler-times {(?n)^\.extern \.func \(\.param\.u64 %value_out\) __GCC_nvptx__PTX_alloca_not_supported \(\.param\.u64 %in_ar0\);$} 1 } } */
|
29
gcc/testsuite/gcc.target/nvptx/vla-1-O0_-mfake-ptx-alloca.c
Normal file
29
gcc/testsuite/gcc.target/nvptx/vla-1-O0_-mfake-ptx-alloca.c
Normal file
|
@ -0,0 +1,29 @@
|
|||
/* { dg-do assemble } */
|
||||
/* { dg-options {-O0 -mno-soft-stack} } */
|
||||
/* { dg-additional-options -march=sm_30 } */
|
||||
/* { dg-additional-options -mfake-ptx-alloca } */
|
||||
/* { dg-additional-options -save-temps } */
|
||||
/* { dg-final { check-function-bodies {** } {} } } */
|
||||
|
||||
void sink(void *);
|
||||
|
||||
void f(int s)
|
||||
{
|
||||
char a[s];
|
||||
/* { dg-bogus {sorry, unimplemented: dynamic stack allocation not supported} {} { target *-*-* } .-1 } */
|
||||
sink(a);
|
||||
}
|
||||
/*
|
||||
** f:
|
||||
** ...
|
||||
** cvt\.s64\.s32 (%r[0-9]+), (%r[0-9]+);
|
||||
** mov\.u64 (%r[0-9]+), 16;
|
||||
** add\.u64 (%r[0-9]+), \3, -1;
|
||||
** add\.u64 (%r[0-9]+), \1, \4;
|
||||
** div\.u64 (%r[0-9]+), \5, 16;
|
||||
** mul\.lo\.u64 (%r[0-9]+), \6, 16;
|
||||
** call \((%r[0-9]+)\), __GCC_nvptx__PTX_alloca_not_supported, \(\7\);
|
||||
** ...
|
||||
*/
|
||||
|
||||
/* { dg-final { scan-assembler-times {(?n)^\.extern \.func \(\.param\.u64 %value_out\) __GCC_nvptx__PTX_alloca_not_supported \(\.param\.u64 %in_ar0\);$} 1 } } */
|
Loading…
Add table
Reference in a new issue