[nvptx] Update bar.sync for ptx isa 6.0

In ptx isa 6.0, a new barrier instruction was added, and bar.sync was
redefined as barrier.sync.aligned.

The aligned modifier indicates that all threads in a CTA will execute the same
barrier instruction.

The seems fine for a form "bar.sync 0".

But a "bar.sync %rx,64" (as used for vector length > 32) may execute a
diffferent barrier depending on the value of %rx, so we can't assume it's
aligned.

Fix this by using "barrier.sync %rx,64" instead.

Tested on x86_64 with nvptx accelerator.

gcc/ChangeLog:

2022-01-27  Tom de Vries  <tdevries@suse.de>

	* config/nvptx/nvptx-opts.h (enum ptx_version): Add PTX_VERSION_6_0.
	* config/nvptx/nvptx.h (TARGET_PTX_6_0): New macro.
	* config/nvptx/nvptx.md (define_insn "nvptx_barsync"): Use barrier
	insn for TARGET_PTX_6_0.
This commit is contained in:
Tom de Vries 2022-01-26 14:16:42 +01:00
parent 456de10c54
commit 57f971f992
3 changed files with 8 additions and 2 deletions

View file

@ -32,6 +32,7 @@ enum ptx_isa
enum ptx_version
{
PTX_VERSION_3_1,
PTX_VERSION_6_0,
PTX_VERSION_6_3,
PTX_VERSION_7_0
};

View file

@ -91,6 +91,7 @@
#define TARGET_SM75 (ptx_isa_option >= PTX_ISA_SM75)
#define TARGET_SM80 (ptx_isa_option >= PTX_ISA_SM80)
#define TARGET_PTX_6_0 (ptx_version_option >= PTX_VERSION_6_0)
#define TARGET_PTX_6_3 (ptx_version_option >= PTX_VERSION_6_3)
#define TARGET_PTX_7_0 (ptx_version_option >= PTX_VERSION_7_0)

View file

@ -1968,9 +1968,13 @@
""
{
if (INTVAL (operands[1]) == 0)
return "\\tbar.sync\\t%0;";
return (TARGET_PTX_6_0
? "\\tbarrier.sync.aligned\\t%0;"
: "\\tbar.sync\\t%0;");
else
return "\\tbar.sync\\t%0, %1;";
return (TARGET_PTX_6_0
? "\\tbarrier.sync\\t%0, %1;"
: "\\tbar.sync\\t%0, %1;");
}
[(set_attr "predicable" "false")])