[nvptx, PR83589] Workaround for branch-around-nothing JIT bug
2018-01-24 Tom de Vries <tom@codesourcery.com> PR target/83589 * config/nvptx/nvptx.c (WORKAROUND_PTXJIT_BUG_2): Define to 1. (nvptx_pc_set, nvptx_condjump_label): New function. Copy from jump.c. Add strict parameter. (prevent_branch_around_nothing): Insert dummy insn between branch to label and label with no ptx insn inbetween. * config/nvptx/nvptx.md (define_insn "fake_nop"): New insn. * testsuite/libgomp.oacc-c-c++-common/pr83589.c: New test. From-SVN: r257016
This commit is contained in:
parent
be606483c9
commit
3dede32b88
5 changed files with 137 additions and 0 deletions
|
@ -1,3 +1,13 @@
|
|||
2018-01-24 Tom de Vries <tom@codesourcery.com>
|
||||
|
||||
PR target/83589
|
||||
* config/nvptx/nvptx.c (WORKAROUND_PTXJIT_BUG_2): Define to 1.
|
||||
(nvptx_pc_set, nvptx_condjump_label): New function. Copy from jump.c.
|
||||
Add strict parameter.
|
||||
(prevent_branch_around_nothing): Insert dummy insn between branch to
|
||||
label and label with no ptx insn inbetween.
|
||||
* config/nvptx/nvptx.md (define_insn "fake_nop"): New insn.
|
||||
|
||||
2018-01-24 Tom de Vries <tom@codesourcery.com>
|
||||
|
||||
PR target/81352
|
||||
|
|
|
@ -78,6 +78,7 @@
|
|||
#include "target-def.h"
|
||||
|
||||
#define WORKAROUND_PTXJIT_BUG 1
|
||||
#define WORKAROUND_PTXJIT_BUG_2 1
|
||||
|
||||
/* The various PTX memory areas an object might reside in. */
|
||||
enum nvptx_data_area
|
||||
|
@ -4363,6 +4364,93 @@ nvptx_neuter_pars (parallel *par, unsigned modes, unsigned outer)
|
|||
nvptx_neuter_pars (par->next, modes, outer);
|
||||
}
|
||||
|
||||
#if WORKAROUND_PTXJIT_BUG_2
|
||||
/* Variant of pc_set that only requires JUMP_P (INSN) if STRICT. This variant
|
||||
is needed in the nvptx target because the branches generated for
|
||||
parititioning are NONJUMP_INSN_P, not JUMP_P. */
|
||||
|
||||
static rtx
|
||||
nvptx_pc_set (const rtx_insn *insn, bool strict = true)
|
||||
{
|
||||
rtx pat;
|
||||
if ((strict && !JUMP_P (insn))
|
||||
|| (!strict && !INSN_P (insn)))
|
||||
return NULL_RTX;
|
||||
pat = PATTERN (insn);
|
||||
|
||||
/* The set is allowed to appear either as the insn pattern or
|
||||
the first set in a PARALLEL. */
|
||||
if (GET_CODE (pat) == PARALLEL)
|
||||
pat = XVECEXP (pat, 0, 0);
|
||||
if (GET_CODE (pat) == SET && GET_CODE (SET_DEST (pat)) == PC)
|
||||
return pat;
|
||||
|
||||
return NULL_RTX;
|
||||
}
|
||||
|
||||
/* Variant of condjump_label that only requires JUMP_P (INSN) if STRICT. */
|
||||
|
||||
static rtx
|
||||
nvptx_condjump_label (const rtx_insn *insn, bool strict = true)
|
||||
{
|
||||
rtx x = nvptx_pc_set (insn, strict);
|
||||
|
||||
if (!x)
|
||||
return NULL_RTX;
|
||||
x = SET_SRC (x);
|
||||
if (GET_CODE (x) == LABEL_REF)
|
||||
return x;
|
||||
if (GET_CODE (x) != IF_THEN_ELSE)
|
||||
return NULL_RTX;
|
||||
if (XEXP (x, 2) == pc_rtx && GET_CODE (XEXP (x, 1)) == LABEL_REF)
|
||||
return XEXP (x, 1);
|
||||
if (XEXP (x, 1) == pc_rtx && GET_CODE (XEXP (x, 2)) == LABEL_REF)
|
||||
return XEXP (x, 2);
|
||||
return NULL_RTX;
|
||||
}
|
||||
|
||||
/* Insert a dummy ptx insn when encountering a branch to a label with no ptx
|
||||
insn inbetween the branch and the label. This works around a JIT bug
|
||||
observed at driver version 384.111, at -O0 for sm_50. */
|
||||
|
||||
static void
|
||||
prevent_branch_around_nothing (void)
|
||||
{
|
||||
rtx_insn *seen_label = NULL;
|
||||
for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
|
||||
{
|
||||
if (seen_label == NULL)
|
||||
{
|
||||
if (INSN_P (insn) && condjump_p (insn))
|
||||
seen_label = label_ref_label (nvptx_condjump_label (insn, false));
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (NOTE_P (insn) || DEBUG_INSN_P (insn))
|
||||
continue;
|
||||
|
||||
if (INSN_P (insn))
|
||||
switch (recog_memoized (insn))
|
||||
{
|
||||
case CODE_FOR_nvptx_fork:
|
||||
case CODE_FOR_nvptx_forked:
|
||||
case CODE_FOR_nvptx_joining:
|
||||
case CODE_FOR_nvptx_join:
|
||||
continue;
|
||||
default:
|
||||
seen_label = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (LABEL_P (insn) && insn == seen_label)
|
||||
emit_insn_before (gen_fake_nop (), insn);
|
||||
|
||||
seen_label = NULL;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* PTX-specific reorganization
|
||||
- Split blocks at fork and join instructions
|
||||
- Compute live registers
|
||||
|
@ -4442,6 +4530,10 @@ nvptx_reorg (void)
|
|||
if (TARGET_UNIFORM_SIMT)
|
||||
nvptx_reorg_uniform_simt ();
|
||||
|
||||
#if WORKAROUND_PTXJIT_BUG_2
|
||||
prevent_branch_around_nothing ();
|
||||
#endif
|
||||
|
||||
regstat_free_n_sets_and_refs ();
|
||||
|
||||
df_finish_pass (true);
|
||||
|
|
|
@ -999,6 +999,15 @@
|
|||
""
|
||||
"exit;")
|
||||
|
||||
(define_insn "fake_nop"
|
||||
[(const_int 2)]
|
||||
""
|
||||
"{
|
||||
.reg .u32 %%nop_src;
|
||||
.reg .u32 %%nop_dst;
|
||||
mov.u32 %%nop_dst, %%nop_src;
|
||||
}")
|
||||
|
||||
(define_insn "return"
|
||||
[(return)]
|
||||
""
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
2018-01-24 Tom de Vries <tom@codesourcery.com>
|
||||
|
||||
PR target/83589
|
||||
* testsuite/libgomp.oacc-c-c++-common/pr83589.c: New test.
|
||||
|
||||
2018-01-24 Tom de Vries <tom@codesourcery.com>
|
||||
|
||||
PR target/81352
|
||||
|
|
21
libgomp/testsuite/libgomp.oacc-c-c++-common/pr83589.c
Normal file
21
libgomp/testsuite/libgomp.oacc-c-c++-common/pr83589.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-set-target-env-var GOMP_NVPTX_JIT "-O0" } */
|
||||
|
||||
#define n 32
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
int arr_a[n];
|
||||
|
||||
#pragma acc parallel copyout(arr_a) num_gangs(1) num_workers(1) vector_length(32)
|
||||
{
|
||||
#pragma acc loop vector
|
||||
for (int m = 0; m < 32; m++)
|
||||
;
|
||||
|
||||
#pragma acc loop vector
|
||||
for (int m = 0; m < 32; m++)
|
||||
arr_a[m] = 0;
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue