[nvptx] Update bar.sync for ptx isa 6.0
In ptx isa 6.0, a new barrier instruction was added, and bar.sync was redefined as barrier.sync.aligned. The aligned modifier indicates that all threads in a CTA will execute the same barrier instruction. The seems fine for a form "bar.sync 0". But a "bar.sync %rx,64" (as used for vector length > 32) may execute a diffferent barrier depending on the value of %rx, so we can't assume it's aligned. Fix this by using "barrier.sync %rx,64" instead. Tested on x86_64 with nvptx accelerator. gcc/ChangeLog: 2022-01-27 Tom de Vries <tdevries@suse.de> * config/nvptx/nvptx-opts.h (enum ptx_version): Add PTX_VERSION_6_0. * config/nvptx/nvptx.h (TARGET_PTX_6_0): New macro. * config/nvptx/nvptx.md (define_insn "nvptx_barsync"): Use barrier insn for TARGET_PTX_6_0.
This commit is contained in:
parent
456de10c54
commit
57f971f992
3 changed files with 8 additions and 2 deletions
|
@ -32,6 +32,7 @@ enum ptx_isa
|
|||
enum ptx_version
|
||||
{
|
||||
PTX_VERSION_3_1,
|
||||
PTX_VERSION_6_0,
|
||||
PTX_VERSION_6_3,
|
||||
PTX_VERSION_7_0
|
||||
};
|
||||
|
|
|
@ -91,6 +91,7 @@
|
|||
#define TARGET_SM75 (ptx_isa_option >= PTX_ISA_SM75)
|
||||
#define TARGET_SM80 (ptx_isa_option >= PTX_ISA_SM80)
|
||||
|
||||
#define TARGET_PTX_6_0 (ptx_version_option >= PTX_VERSION_6_0)
|
||||
#define TARGET_PTX_6_3 (ptx_version_option >= PTX_VERSION_6_3)
|
||||
#define TARGET_PTX_7_0 (ptx_version_option >= PTX_VERSION_7_0)
|
||||
|
||||
|
|
|
@ -1968,9 +1968,13 @@
|
|||
""
|
||||
{
|
||||
if (INTVAL (operands[1]) == 0)
|
||||
return "\\tbar.sync\\t%0;";
|
||||
return (TARGET_PTX_6_0
|
||||
? "\\tbarrier.sync.aligned\\t%0;"
|
||||
: "\\tbar.sync\\t%0;");
|
||||
else
|
||||
return "\\tbar.sync\\t%0, %1;";
|
||||
return (TARGET_PTX_6_0
|
||||
? "\\tbarrier.sync\\t%0, %1;"
|
||||
: "\\tbar.sync\\t%0, %1;");
|
||||
}
|
||||
[(set_attr "predicable" "false")])
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue