nvptx offloading linking

gcc/
	* config/nvptx/mkoffload.c (Kind, Vis): Remove enums.
	(Token, Stmt): Remove structs.
	(decls, vars, fns): Remove variables.
	(alloc_comment, append_stmt, is_keyword): Remove macros.
	(tokenize, write_token, write_tokens, alloc_stmt, rev_stmts)
	(write_stmt, write_stmts, parse_insn, parse_list_nosemi)
	(parse_init, parse_file): Remove functions.
	(read_file): Accept a pointer to a length and store into it.
	(process): Don't try to parse the input file, just write it out as
	a string, but looking for maps.  Also write out the length.
	(main): Don't use "-S" to compile PTX code.

	libgomp/
	* oacc-ptx.h: Remove file, moving its content into...
	* config/nvptx/fortran.c: ... here...
	* config/nvptx/oacc-init.c: ..., here...
	* config/nvptx/oacc-parallel.c: ..., and here.
	* config/nvptx/openacc.f90: New file.
	* plugin/plugin-nvptx.c: Don't include "oacc-ptx.h".
	(link_ptx): Don't link in predefined bits of PTX code.

Co-Authored-By: Bernd Schmidt <bernds@codesourcery.com>

From-SVN: r228418
This commit is contained in:
Thomas Schwinge 2015-10-02 21:43:41 +02:00 committed by Thomas Schwinge
parent 7fcdbdd21d
commit 113020dc59
10 changed files with 617 additions and 1089 deletions

View file

@ -1,3 +1,18 @@
2015-10-02 Thomas Schwinge <thomas@codesourcery.com>
Bernd Schmidt <bernds@codesourcery.com>
* config/nvptx/mkoffload.c (Kind, Vis): Remove enums.
(Token, Stmt): Remove structs.
(decls, vars, fns): Remove variables.
(alloc_comment, append_stmt, is_keyword): Remove macros.
(tokenize, write_token, write_tokens, alloc_stmt, rev_stmts)
(write_stmt, write_stmts, parse_insn, parse_list_nosemi)
(parse_init, parse_file): Remove functions.
(read_file): Accept a pointer to a length and store into it.
(process): Don't try to parse the input file, just write it out as
a string, but looking for maps. Also write out the length.
(main): Don't use "-S" to compile PTX code.
2015-10-02 Jeff Law <law@redhat.com>
* tree-ssa-dom.c (optimize_stmt): Note when loop structures need

View file

@ -41,84 +41,12 @@ const char tool_name[] = "nvptx mkoffload";
#define COMMENT_PREFIX "#"
typedef enum Kind
{
/* 0-ff used for single char tokens */
K_symbol = 0x100, /* a symbol */
K_label, /* a label defn (i.e. symbol:) */
K_ident, /* other ident */
K_dotted, /* dotted identifier */
K_number,
K_string,
K_comment
} Kind;
typedef struct Token
{
unsigned short kind : 12;
unsigned short space : 1; /* preceded by space */
unsigned short end : 1; /* succeeded by end of line */
/* Length of token */
unsigned short len;
/* Token itself */
char const *ptr;
} Token;
/* statement info */
typedef enum Vis
{
V_dot = 0, /* random pseudo */
V_var = 1, /* var decl/defn */
V_func = 2, /* func decl/defn */
V_insn = 3, /* random insn */
V_label = 4, /* label defn */
V_comment = 5,
V_pred = 6, /* predicate */
V_mask = 0x7,
V_global = 0x08, /* globalize */
V_weak = 0x10, /* weakly globalize */
V_no_eol = 0x20, /* no end of line */
V_prefix_comment = 0x40 /* prefixed comment */
} Vis;
typedef struct Stmt
{
struct Stmt *next;
Token *tokens;
unsigned char vis;
unsigned len : 12;
unsigned sym : 12;
} Stmt;
struct id_map
{
id_map *next;
char *ptx_name;
};
static const char *read_file (FILE *);
static Token *tokenize (const char *);
static void write_token (FILE *, const Token *);
static void write_tokens (FILE *, const Token *, unsigned, int);
static Stmt *alloc_stmt (unsigned, Token *, Token *, const Token *);
#define alloc_comment(S,E) alloc_stmt (V_comment, S, E, 0)
#define append_stmt(V, S) ((S)->next = *(V), *(V) = (S))
static Stmt *rev_stmts (Stmt *);
static void write_stmt (FILE *, const Stmt *);
static void write_stmts (FILE *, const Stmt *);
static Token *parse_insn (Token *);
static Token *parse_list_nosemi (Token *);
static Token *parse_init (Token *);
static Token *parse_file (Token *);
static Stmt *decls;
static Stmt *vars;
static Stmt *fns;
static id_map *func_ids, **funcs_tail = &func_ids;
static id_map *var_ids, **vars_tail = &var_ids;
@ -183,7 +111,7 @@ record_id (const char *p1, id_map ***where)
remember, there could be a NUL in the file itself. */
static const char *
read_file (FILE *stream)
read_file (FILE *stream, size_t *plen)
{
size_t alloc = 16384;
size_t base = 0;
@ -213,557 +141,10 @@ read_file (FILE *stream)
}
}
buffer[base] = 0;
*plen = base;
return buffer;
}
/* Read a token, advancing ptr.
If we read a comment, append it to the comments block. */
static Token *
tokenize (const char *ptr)
{
unsigned alloc = 1000;
unsigned num = 0;
Token *toks = XNEWVEC (Token, alloc);
int in_comment = 0;
int not_comment = 0;
for (;; num++)
{
const char *base;
unsigned kind;
int ws = 0;
int eol = 0;
again:
base = ptr;
if (in_comment)
goto block_comment;
switch (kind = *ptr++)
{
default:
break;
case '\n':
eol = 1;
/* Fall through */
case ' ':
case '\t':
case '\r':
case '\v':
/* White space */
ws = not_comment;
goto again;
case '/':
{
if (*ptr == '/')
{
/* line comment. Do not include trailing \n */
base += 2;
for (; *ptr; ptr++)
if (*ptr == '\n')
break;
kind = K_comment;
}
else if (*ptr == '*')
{
/* block comment */
base += 2;
ptr++;
block_comment:
eol = in_comment;
in_comment = 1;
for (; *ptr; ptr++)
{
if (*ptr == '\n')
{
ptr++;
break;
}
if (ptr[0] == '*' && ptr[1] == '/')
{
in_comment = 2;
ptr += 2;
break;
}
}
kind = K_comment;
}
else
break;
}
break;
case '"':
/* quoted string */
kind = K_string;
while (*ptr)
if (*ptr == '"')
{
ptr++;
break;
}
else if (*ptr++ == '\\')
ptr++;
break;
case '.':
if (*ptr < '0' || *ptr > '9')
{
kind = K_dotted;
ws = not_comment;
goto ident;
}
/* FALLTHROUGH */
case '0'...'9':
kind = K_number;
goto ident;
break;
case '$': /* local labels. */
case '%': /* register names, pseudoes etc */
kind = K_ident;
goto ident;
case 'a'...'z':
case 'A'...'Z':
case '_':
kind = K_symbol; /* possible symbol name */
ident:
for (; *ptr; ptr++)
{
if (*ptr >= 'A' && *ptr <= 'Z')
continue;
if (*ptr >= 'a' && *ptr <= 'z')
continue;
if (*ptr >= '0' && *ptr <= '9')
continue;
if (*ptr == '_' || *ptr == '$')
continue;
if (*ptr == '.' && kind != K_dotted)
/* Idents starting with a dot, cannot have internal dots. */
continue;
if ((*ptr == '+' || *ptr == '-')
&& kind == K_number
&& (ptr[-1] == 'e' || ptr[-1] == 'E'
|| ptr[-1] == 'p' || ptr[-1] == 'P'))
/* exponent */
continue;
break;
}
if (*ptr == ':')
{
ptr++;
kind = K_label;
}
break;
}
if (alloc == num)
{
alloc *= 2;
toks = XRESIZEVEC (Token, toks, alloc);
}
Token *tok = toks + num;
tok->kind = kind;
tok->space = ws;
tok->end = 0;
tok->ptr = base;
tok->len = ptr - base - in_comment;
in_comment &= 1;
not_comment = kind != K_comment;
if (eol && num)
tok[-1].end = 1;
if (!kind)
break;
}
return toks;
}
/* Write an encoded token. */
static void
write_token (FILE *out, Token const *tok)
{
if (tok->space)
fputc (' ', out);
switch (tok->kind)
{
case K_string:
{
const char *c = tok->ptr + 1;
size_t len = tok->len - 2;
fputs ("\\\"", out);
while (len)
{
const char *bs = (const char *)memchr (c, '\\', len);
size_t l = bs ? bs - c : len;
fprintf (out, "%.*s", (int)l, c);
len -= l;
c += l;
if (bs)
{
fputs ("\\\\", out);
len--, c++;
}
}
fputs ("\\\"", out);
}
break;
default:
/* All other tokens shouldn't have anything magic in them */
fprintf (out, "%.*s", tok->len, tok->ptr);
break;
}
if (tok->end)
fputs ("\\n", out);
}
static void
write_tokens (FILE *out, Token const *toks, unsigned len, int spc)
{
fputs ("\t\"", out);
for (; len--; toks++)
write_token (out, toks);
if (spc)
fputs (" ", out);
fputs ("\"", out);
}
static Stmt *
alloc_stmt (unsigned vis, Token *tokens, Token *end, Token const *sym)
{
static unsigned alloc = 0;
static Stmt *heap = 0;
if (!alloc)
{
alloc = 1000;
heap = XNEWVEC (Stmt, alloc);
}
Stmt *stmt = heap++;
alloc--;
tokens->space = 0;
stmt->next = 0;
stmt->vis = vis;
stmt->tokens = tokens;
stmt->len = end - tokens;
stmt->sym = sym ? sym - tokens : ~0;
return stmt;
}
static Stmt *
rev_stmts (Stmt *stmt)
{
Stmt *prev = 0;
Stmt *next;
while (stmt)
{
next = stmt->next;
stmt->next = prev;
prev = stmt;
stmt = next;
}
return prev;
}
static void
write_stmt (FILE *out, const Stmt *stmt)
{
if ((stmt->vis & V_mask) != V_comment)
{
write_tokens (out, stmt->tokens, stmt->len,
(stmt->vis & V_mask) == V_pred);
fputs (stmt->vis & V_no_eol ? "\t" : "\n", out);
}
}
static void
write_stmts (FILE *out, const Stmt *stmts)
{
for (; stmts; stmts = stmts->next)
write_stmt (out, stmts);
}
static Token *
parse_insn (Token *tok)
{
unsigned depth = 0;
do
{
Stmt *stmt;
Token *sym = 0;
unsigned s = V_insn;
Token *start = tok;
switch (tok++->kind)
{
case K_comment:
while (tok->kind == K_comment)
tok++;
stmt = alloc_comment (start, tok);
append_stmt (&fns, stmt);
continue;
case '{':
depth++;
break;
case '}':
depth--;
break;
case K_label:
if (tok[-1].ptr[0] != '$')
sym = tok - 1;
tok[-1].end = 1;
s = V_label;
break;
case '@':
tok->space = 0;
if (tok->kind == '!')
tok++;
if (tok->kind == K_symbol)
sym = tok;
tok++;
s = V_pred;
break;
default:
for (; tok->kind != ';'; tok++)
{
if (tok->kind == ',')
tok[1].space = 0;
else if (tok->kind == K_symbol)
sym = tok;
}
tok++->end = 1;
break;
}
stmt = alloc_stmt (s, start, tok, sym);
append_stmt (&fns, stmt);
if (!tok[-1].end && tok[0].kind == K_comment)
{
stmt->vis |= V_no_eol;
stmt = alloc_comment (tok, tok + 1);
append_stmt (&fns, stmt);
tok++;
}
}
while (depth);
return tok;
}
/* comma separated list of tokens */
static Token *
parse_list_nosemi (Token *tok)
{
Token *start = tok;
do
if (!(++tok)->kind)
break;
while ((++tok)->kind == ',');
tok[-1].end = 1;
Stmt *stmt = alloc_stmt (V_dot, start, tok, 0);
append_stmt (&decls, stmt);
return tok;
}
#define is_keyword(T,S) \
(sizeof (S) == (T)->len && !memcmp ((T)->ptr + 1, (S), (T)->len - 1))
static Token *
parse_init (Token *tok)
{
for (;;)
{
Token *start = tok;
Token const *sym = 0;
Stmt *stmt;
if (tok->kind == K_comment)
{
while (tok->kind == K_comment)
tok++;
stmt = alloc_comment (start, tok);
append_stmt (&vars, stmt);
start = tok;
}
if (tok->kind == '{')
tok[1].space = 0;
for (; tok->kind != ',' && tok->kind != ';'; tok++)
if (tok->kind == K_symbol)
sym = tok;
tok[1].space = 0;
int end = tok++->kind == ';';
stmt = alloc_stmt (V_insn, start, tok, sym);
append_stmt (&vars, stmt);
if (!tok[-1].end && tok->kind == K_comment)
{
stmt->vis |= V_no_eol;
stmt = alloc_comment (tok, tok + 1);
append_stmt (&vars, stmt);
tok++;
}
if (end)
break;
}
return tok;
}
static Token *
parse_file (Token *tok)
{
Stmt *comment = 0;
if (tok->kind == K_comment)
{
Token *start = tok;
while (tok->kind == K_comment)
{
if (strncmp (tok->ptr, ":VAR_MAP ", 9) == 0)
record_id (tok->ptr + 9, &vars_tail);
if (strncmp (tok->ptr, ":FUNC_MAP ", 10) == 0)
record_id (tok->ptr + 10, &funcs_tail);
tok++;
}
comment = alloc_comment (start, tok);
comment->vis |= V_prefix_comment;
}
if (tok->kind == K_dotted)
{
if (is_keyword (tok, "version")
|| is_keyword (tok, "target")
|| is_keyword (tok, "address_size"))
{
if (comment)
append_stmt (&decls, comment);
tok = parse_list_nosemi (tok);
}
else
{
unsigned vis = 0;
const Token *def = 0;
unsigned is_decl = 0;
Token *start;
for (start = tok;
tok->kind && tok->kind != '=' && tok->kind != K_comment
&& tok->kind != '{' && tok->kind != ';'; tok++)
{
if (is_keyword (tok, "global")
|| is_keyword (tok, "const"))
vis |= V_var;
else if (is_keyword (tok, "func")
|| is_keyword (tok, "entry"))
vis |= V_func;
else if (is_keyword (tok, "visible"))
vis |= V_global;
else if (is_keyword (tok, "extern"))
is_decl = 1;
else if (is_keyword (tok, "weak"))
vis |= V_weak;
if (tok->kind == '(')
{
tok[1].space = 0;
tok[0].space = 1;
}
else if (tok->kind == ')' && tok[1].kind != ';')
tok[1].space = 1;
if (tok->kind == K_symbol)
def = tok;
}
if (!tok->kind)
{
/* end of file */
if (comment)
append_stmt (&fns, comment);
}
else if (tok->kind == '{'
|| tok->kind == K_comment)
{
/* function defn */
Stmt *stmt = alloc_stmt (vis, start, tok, def);
if (comment)
{
append_stmt (&fns, comment);
stmt->vis |= V_prefix_comment;
}
append_stmt (&fns, stmt);
tok = parse_insn (tok);
}
else
{
int assign = tok->kind == '=';
tok++->end = 1;
if ((vis & V_mask) == V_var && !is_decl)
{
/* variable */
Stmt *stmt = alloc_stmt (vis, start, tok, def);
if (comment)
{
append_stmt (&vars, comment);
stmt->vis |= V_prefix_comment;
}
append_stmt (&vars, stmt);
if (assign)
tok = parse_init (tok);
}
else
{
/* declaration */
Stmt *stmt = alloc_stmt (vis, start, tok, 0);
if (comment)
{
append_stmt (&decls, comment);
stmt->vis |= V_prefix_comment;
}
append_stmt (&decls, stmt);
}
}
}
}
else
{
/* Something strange. Ignore it. */
if (comment)
append_stmt (&fns, comment);
do
tok++;
while (tok->kind && !tok->end);
}
return tok;
}
/* Parse STR, saving found tokens into PVALUES and return their number.
Tokens are assumed to be delimited by ':'. */
static unsigned
@ -839,22 +220,55 @@ access_check (const char *name, int mode)
static void
process (FILE *in, FILE *out)
{
const char *input = read_file (in);
Token *tok = tokenize (input);
size_t len = 0;
const char *input = read_file (in, &len);
const char *comma;
id_map const *id;
unsigned obj_count = 0;
unsigned ix;
do
tok = parse_file (tok);
while (tok->kind);
/* Dump out char arrays for each PTX object file. These are
terminated by a NUL. */
for (size_t i = 0; i != len;)
{
char c;
fprintf (out, "static const char ptx_code_%u[] = \n", obj_count++);
write_stmts (out, rev_stmts (decls));
write_stmts (out, rev_stmts (vars));
write_stmts (out, rev_stmts (fns));
fprintf (out, ";\n\n");
fprintf (out, "static const char ptx_code_%u[] =\n\t\"", obj_count++);
while ((c = input[i++]))
{
switch (c)
{
case '\r':
continue;
case '\n':
fprintf (out, "\\n\"\n\t\"");
/* Look for mappings on subsequent lines. */
while (strncmp (input + i, "//:", 3) == 0)
{
i += 3;
if (strncmp (input + i, "VAR_MAP ", 8) == 0)
record_id (input + i + 8, &vars_tail);
else if (strncmp (input + i, "FUNC_MAP ", 9) == 0)
record_id (input + i + 9, &funcs_tail);
else
abort ();
/* Skip to next line. */
while (input[i++] != '\n')
continue;
}
continue;
case '"':
case '\\':
putc ('\\', out);
break;
default:
break;
}
putc (c, out);
}
fprintf (out, "\";\n\n");
}
/* Dump out array of pointers to ptx object strings. */
fprintf (out, "static const struct ptx_obj {\n"
@ -1068,7 +482,6 @@ main (int argc, char **argv)
default:
gcc_unreachable ();
}
obstack_ptr_grow (&argv_obstack, "-S");
for (int ix = 1; ix != argc; ix++)
{

View file

@ -1,3 +1,13 @@
2015-10-02 Thomas Schwinge <thomas@codesourcery.com>
* oacc-ptx.h: Remove file, moving its content into...
* config/nvptx/fortran.c: ... here...
* config/nvptx/oacc-init.c: ..., here...
* config/nvptx/oacc-parallel.c: ..., and here.
* config/nvptx/openacc.f90: New file.
* plugin/plugin-nvptx.c: Don't include "oacc-ptx.h".
(link_ptx): Don't link in predefined bits of PTX code.
2015-09-30 Nathan Sidwell <nathan@codesourcery.com>
Bernd Schmidt <bernds@codesourcery.com>

View file

@ -0,0 +1,40 @@
/* OpenACC Runtime Fortran wrapper routines
Copyright (C) 2014-2015 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* Temporary hack; this will be provided by libgfortran. */
extern void _gfortran_abort (void);
__asm__ ("// BEGIN GLOBAL FUNCTION DECL: _gfortran_abort\n"
".visible .func _gfortran_abort;\n"
"// BEGIN GLOBAL FUNCTION DEF: _gfortran_abort\n"
".visible .func _gfortran_abort\n"
"{\n"
"trap;\n"
"ret;\n"
"}\n");

View file

@ -0,0 +1,42 @@
/* OpenACC Runtime initialization routines
Copyright (C) 2014-2015 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "openacc.h"
/* For -O and higher, the compiler always attempts to expand acc_on_device, but
if the user disables the builtin, or calls it via a pointer, we'll need this
version.
Compile this with optimization, so that the compiler expands
this, rather than generating infinitely recursive code. */
int __attribute__ ((__optimize__ ("O2")))
acc_on_device (acc_device_t dev)
{
return __builtin_acc_on_device (dev);
}

View file

@ -0,0 +1,358 @@
/* OpenACC constructs
Copyright (C) 2014-2015 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "libgomp_g.h"
__asm__ (".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1);\n"
".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1);\n"
".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1);\n"
".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1);\n"
"// BEGIN GLOBAL FUNCTION DECL: GOACC_get_num_threads\n"
".visible .func (.param .u32 %out_retval) GOACC_get_num_threads;\n"
"// BEGIN GLOBAL FUNCTION DECL: GOACC_get_thread_num\n"
".visible .func (.param .u32 %out_retval) GOACC_get_thread_num;\n"
"// BEGIN GLOBAL FUNCTION DECL: abort\n"
".extern .func abort;\n"
".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1)\n"
"{\n"
".reg .u32 %ar1;\n"
".reg .u32 %retval;\n"
".reg .u64 %hr10;\n"
".reg .u32 %r22;\n"
".reg .u32 %r23;\n"
".reg .u32 %r24;\n"
".reg .u32 %r25;\n"
".reg .u32 %r26;\n"
".reg .u32 %r27;\n"
".reg .u32 %r28;\n"
".reg .u32 %r29;\n"
".reg .pred %r30;\n"
".reg .u32 %r31;\n"
".reg .pred %r32;\n"
".reg .u32 %r33;\n"
".reg .pred %r34;\n"
".local .align 8 .b8 %frame[4];\n"
"ld.param.u32 %ar1,[%in_ar1];\n"
"mov.u32 %r27,%ar1;\n"
"st.local.u32 [%frame],%r27;\n"
"ld.local.u32 %r28,[%frame];\n"
"mov.u32 %r29,1;\n"
"setp.eq.u32 %r30,%r28,%r29;\n"
"@%r30 bra $L4;\n"
"mov.u32 %r31,2;\n"
"setp.eq.u32 %r32,%r28,%r31;\n"
"@%r32 bra $L5;\n"
"mov.u32 %r33,0;\n"
"setp.eq.u32 %r34,%r28,%r33;\n"
"@!%r34 bra $L8;\n"
"mov.u32 %r23,%tid.x;\n"
"mov.u32 %r22,%r23;\n"
"bra $L7;\n"
"$L4:\n"
"mov.u32 %r24,%tid.y;\n"
"mov.u32 %r22,%r24;\n"
"bra $L7;\n"
"$L5:\n"
"mov.u32 %r25,%tid.z;\n"
"mov.u32 %r22,%r25;\n"
"bra $L7;\n"
"$L8:\n"
"{\n"
"{\n"
"call abort;\n"
"}\n"
"}\n"
"$L7:\n"
"mov.u32 %r26,%r22;\n"
"mov.u32 %retval,%r26;\n"
"st.param.u32 [%out_retval],%retval;\n"
"ret;\n"
"}\n"
".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1)\n"
"{\n"
".reg .u32 %ar1;\n"
".reg .u32 %retval;\n"
".reg .u64 %hr10;\n"
".reg .u32 %r22;\n"
".reg .u32 %r23;\n"
".reg .u32 %r24;\n"
".reg .u32 %r25;\n"
".reg .u32 %r26;\n"
".reg .u32 %r27;\n"
".reg .u32 %r28;\n"
".reg .u32 %r29;\n"
".reg .pred %r30;\n"
".reg .u32 %r31;\n"
".reg .pred %r32;\n"
".reg .u32 %r33;\n"
".reg .pred %r34;\n"
".local .align 8 .b8 %frame[4];\n"
"ld.param.u32 %ar1,[%in_ar1];\n"
"mov.u32 %r27,%ar1;\n"
"st.local.u32 [%frame],%r27;\n"
"ld.local.u32 %r28,[%frame];\n"
"mov.u32 %r29,1;\n"
"setp.eq.u32 %r30,%r28,%r29;\n"
"@%r30 bra $L11;\n"
"mov.u32 %r31,2;\n"
"setp.eq.u32 %r32,%r28,%r31;\n"
"@%r32 bra $L12;\n"
"mov.u32 %r33,0;\n"
"setp.eq.u32 %r34,%r28,%r33;\n"
"@!%r34 bra $L15;\n"
"mov.u32 %r23,%ntid.x;\n"
"mov.u32 %r22,%r23;\n"
"bra $L14;\n"
"$L11:\n"
"mov.u32 %r24,%ntid.y;\n"
"mov.u32 %r22,%r24;\n"
"bra $L14;\n"
"$L12:\n"
"mov.u32 %r25,%ntid.z;\n"
"mov.u32 %r22,%r25;\n"
"bra $L14;\n"
"$L15:\n"
"{\n"
"{\n"
"call abort;\n"
"}\n"
"}\n"
"$L14:\n"
"mov.u32 %r26,%r22;\n"
"mov.u32 %retval,%r26;\n"
"st.param.u32 [%out_retval],%retval;\n"
"ret;\n"
"}\n"
".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1)\n"
"{\n"
".reg .u32 %ar1;\n"
".reg .u32 %retval;\n"
".reg .u64 %hr10;\n"
".reg .u32 %r22;\n"
".reg .u32 %r23;\n"
".reg .u32 %r24;\n"
".reg .u32 %r25;\n"
".reg .u32 %r26;\n"
".reg .u32 %r27;\n"
".reg .u32 %r28;\n"
".reg .u32 %r29;\n"
".reg .pred %r30;\n"
".reg .u32 %r31;\n"
".reg .pred %r32;\n"
".reg .u32 %r33;\n"
".reg .pred %r34;\n"
".local .align 8 .b8 %frame[4];\n"
"ld.param.u32 %ar1,[%in_ar1];\n"
"mov.u32 %r27,%ar1;\n"
"st.local.u32 [%frame],%r27;\n"
"ld.local.u32 %r28,[%frame];\n"
"mov.u32 %r29,1;\n"
"setp.eq.u32 %r30,%r28,%r29;\n"
"@%r30 bra $L18;\n"
"mov.u32 %r31,2;\n"
"setp.eq.u32 %r32,%r28,%r31;\n"
"@%r32 bra $L19;\n"
"mov.u32 %r33,0;\n"
"setp.eq.u32 %r34,%r28,%r33;\n"
"@!%r34 bra $L22;\n"
"mov.u32 %r23,%ctaid.x;\n"
"mov.u32 %r22,%r23;\n"
"bra $L21;\n"
"$L18:\n"
"mov.u32 %r24,%ctaid.y;\n"
"mov.u32 %r22,%r24;\n"
"bra $L21;\n"
"$L19:\n"
"mov.u32 %r25,%ctaid.z;\n"
"mov.u32 %r22,%r25;\n"
"bra $L21;\n"
"$L22:\n"
"{\n"
"{\n"
"call abort;\n"
"}\n"
"}\n"
"$L21:\n"
"mov.u32 %r26,%r22;\n"
"mov.u32 %retval,%r26;\n"
"st.param.u32 [%out_retval],%retval;\n"
"ret;\n"
"}\n"
".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1)\n"
"{\n"
".reg .u32 %ar1;\n"
".reg .u32 %retval;\n"
".reg .u64 %hr10;\n"
".reg .u32 %r22;\n"
".reg .u32 %r23;\n"
".reg .u32 %r24;\n"
".reg .u32 %r25;\n"
".reg .u32 %r26;\n"
".reg .u32 %r27;\n"
".reg .u32 %r28;\n"
".reg .u32 %r29;\n"
".reg .pred %r30;\n"
".reg .u32 %r31;\n"
".reg .pred %r32;\n"
".reg .u32 %r33;\n"
".reg .pred %r34;\n"
".local .align 8 .b8 %frame[4];\n"
"ld.param.u32 %ar1,[%in_ar1];\n"
"mov.u32 %r27,%ar1;\n"
"st.local.u32 [%frame],%r27;\n"
"ld.local.u32 %r28,[%frame];\n"
"mov.u32 %r29,1;\n"
"setp.eq.u32 %r30,%r28,%r29;\n"
"@%r30 bra $L25;\n"
"mov.u32 %r31,2;\n"
"setp.eq.u32 %r32,%r28,%r31;\n"
"@%r32 bra $L26;\n"
"mov.u32 %r33,0;\n"
"setp.eq.u32 %r34,%r28,%r33;\n"
"@!%r34 bra $L29;\n"
"mov.u32 %r23,%nctaid.x;\n"
"mov.u32 %r22,%r23;\n"
"bra $L28;\n"
"$L25:\n"
"mov.u32 %r24,%nctaid.y;\n"
"mov.u32 %r22,%r24;\n"
"bra $L28;\n"
"$L26:\n"
"mov.u32 %r25,%nctaid.z;\n"
"mov.u32 %r22,%r25;\n"
"bra $L28;\n"
"$L29:\n"
"{\n"
"{\n"
"call abort;\n"
"}\n"
"}\n"
"$L28:\n"
"mov.u32 %r26,%r22;\n"
"mov.u32 %retval,%r26;\n"
"st.param.u32 [%out_retval],%retval;\n"
"ret;\n"
"}\n"
"// BEGIN GLOBAL FUNCTION DEF: GOACC_get_num_threads\n"
".visible .func (.param .u32 %out_retval) GOACC_get_num_threads\n"
"{\n"
".reg .u32 %retval;\n"
".reg .u64 %hr10;\n"
".reg .u32 %r22;\n"
".reg .u32 %r23;\n"
".reg .u32 %r24;\n"
".reg .u32 %r25;\n"
".reg .u32 %r26;\n"
".reg .u32 %r27;\n"
".reg .u32 %r28;\n"
".reg .u32 %r29;\n"
"mov.u32 %r26,0;\n"
"{\n"
".param .u32 %retval_in;\n"
"{\n"
".param .u32 %out_arg0;\n"
"st.param.u32 [%out_arg0],%r26;\n"
"call (%retval_in),GOACC_ntid,(%out_arg0);\n"
"}\n"
"ld.param.u32 %r27,[%retval_in];\n"
"}\n"
"mov.u32 %r22,%r27;\n"
"mov.u32 %r28,0;\n"
"{\n"
".param .u32 %retval_in;\n"
"{\n"
".param .u32 %out_arg0;\n"
"st.param.u32 [%out_arg0],%r28;\n"
"call (%retval_in),GOACC_nctaid,(%out_arg0);\n"
"}\n"
"ld.param.u32 %r29,[%retval_in];\n"
"}\n"
"mov.u32 %r23,%r29;\n"
"mul.lo.u32 %r24,%r22,%r23;\n"
"mov.u32 %r25,%r24;\n"
"mov.u32 %retval,%r25;\n"
"st.param.u32 [%out_retval],%retval;\n"
"ret;\n"
"}\n"
"// BEGIN GLOBAL FUNCTION DEF: GOACC_get_thread_num\n"
".visible .func (.param .u32 %out_retval) GOACC_get_thread_num\n"
"{\n"
".reg .u32 %retval;\n"
".reg .u64 %hr10;\n"
".reg .u32 %r22;\n"
".reg .u32 %r23;\n"
".reg .u32 %r24;\n"
".reg .u32 %r25;\n"
".reg .u32 %r26;\n"
".reg .u32 %r27;\n"
".reg .u32 %r28;\n"
".reg .u32 %r29;\n"
".reg .u32 %r30;\n"
".reg .u32 %r31;\n"
".reg .u32 %r32;\n"
".reg .u32 %r33;\n"
"mov.u32 %r28,0;\n"
"{\n"
".param .u32 %retval_in;\n"
"{\n"
".param .u32 %out_arg0;\n"
"st.param.u32 [%out_arg0],%r28;\n"
"call (%retval_in),GOACC_ntid,(%out_arg0);\n"
"}\n"
"ld.param.u32 %r29,[%retval_in];\n"
"}\n"
"mov.u32 %r22,%r29;\n"
"mov.u32 %r30,0;\n"
"{\n"
".param .u32 %retval_in;\n"
"{\n"
".param .u32 %out_arg0;\n"
"st.param.u32 [%out_arg0],%r30;\n"
"call (%retval_in),GOACC_ctaid,(%out_arg0);\n"
"}\n"
"ld.param.u32 %r31,[%retval_in];\n"
"}\n"
"mov.u32 %r23,%r31;\n"
"mul.lo.u32 %r24,%r22,%r23;\n"
"mov.u32 %r32,0;\n"
"{\n"
".param .u32 %retval_in;\n"
"{\n"
".param .u32 %out_arg0;\n"
"st.param.u32 [%out_arg0],%r32;\n"
"call (%retval_in),GOACC_tid,(%out_arg0);\n"
"}\n"
"ld.param.u32 %r33,[%retval_in];\n"
"}\n"
"mov.u32 %r25,%r33;\n"
"add.u32 %r26,%r24,%r25;\n"
"mov.u32 %r27,%r26;\n"
"mov.u32 %retval,%r27;\n"
"st.param.u32 [%out_retval],%retval;\n"
"ret;\n"
"}\n");

View file

@ -0,0 +1,102 @@
! OpenACC Runtime Library Definitions.
! Copyright (C) 2014-2015 Free Software Foundation, Inc.
! Contributed by Tobias Burnus <burnus@net-b.de>
! and Mentor Embedded.
! This file is part of the GNU Offloading and Multi Processing Library
! (libgomp).
! Libgomp is free software; you can redistribute it and/or modify it
! under the terms of the GNU General Public License as published by
! the Free Software Foundation; either version 3, or (at your option)
! any later version.
! Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
! FOR A PARTICULAR PURPOSE. See the GNU General Public License for
! more details.
! Under Section 7 of GPL version 3, you are granted additional
! permissions described in the GCC Runtime Library Exception, version
! 3.1, as published by the Free Software Foundation.
! You should have received a copy of the GNU General Public License and
! a copy of the GCC Runtime Library Exception along with this program;
! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
! <http://www.gnu.org/licenses/>.
! Wrapper functions will be built from openacc.f90. We use a separate file
! here, because for using ../../openacc.f90, implementations are required for
! all the functions that it wraps, which we currently don't provide, so linking
! would fail.
module openacc_kinds
use iso_fortran_env, only: int32
implicit none
private :: int32
public :: acc_device_kind
integer, parameter :: acc_device_kind = int32
public :: acc_device_none, acc_device_default, acc_device_host
public :: acc_device_not_host, acc_device_nvidia
! Keep in sync with include/gomp-constants.h.
integer (acc_device_kind), parameter :: acc_device_none = 0
integer (acc_device_kind), parameter :: acc_device_default = 1
integer (acc_device_kind), parameter :: acc_device_host = 2
! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed.
integer (acc_device_kind), parameter :: acc_device_not_host = 4
integer (acc_device_kind), parameter :: acc_device_nvidia = 5
end module
module openacc_internal
use openacc_kinds
implicit none
interface
function acc_on_device_h (d)
import
integer (acc_device_kind) d
logical acc_on_device_h
end function
end interface
interface
function acc_on_device_l (d) &
bind (C, name = "acc_on_device")
use iso_c_binding, only: c_int
integer (c_int) :: acc_on_device_l
integer (c_int), value :: d
end function
end interface
end module
module openacc
use openacc_kinds
use openacc_internal
implicit none
public :: acc_on_device
interface acc_on_device
procedure :: acc_on_device_h
end interface
end module openacc
function acc_on_device_h (d)
use openacc_internal, only: acc_on_device_l
use openacc_kinds
integer (acc_device_kind) d
logical acc_on_device_h
if (acc_on_device_l (d) .eq. 1) then
acc_on_device_h = .TRUE.
else
acc_on_device_h = .FALSE.
end if
end function

View file

@ -620,7 +620,11 @@ acc_set_device_num (int ord, acc_device_t d)
ialias (acc_set_device_num)
/* Compile on_device with optimization, so that the compiler expands
/* For -O and higher, the compiler always attempts to expand acc_on_device, but
if the user disables the builtin, or calls it via a pointer, we'll need this
version.
Compile this with optimization, so that the compiler expands
this, rather than generating infinitely recursive code. */
int __attribute__ ((__optimize__ ("O2")))

View file

@ -1,426 +0,0 @@
/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#define ABORT_PTX \
".version 3.1\n" \
".target sm_30\n" \
".address_size 64\n" \
".visible .func abort;\n" \
".visible .func abort\n" \
"{\n" \
"trap;\n" \
"ret;\n" \
"}\n" \
".visible .func _gfortran_abort;\n" \
".visible .func _gfortran_abort\n" \
"{\n" \
"trap;\n" \
"ret;\n" \
"}\n" \
/* Generated with:
$ echo 'int acc_on_device(int d) { return __builtin_acc_on_device(d); } int acc_on_device_h_(int *d) { return acc_on_device(*d); }' | accel-gcc/xgcc -Baccel-gcc -x c - -o - -S -m64 -O3 -fno-builtin-acc_on_device -fno-inline
*/
#define ACC_ON_DEVICE_PTX \
" .version 3.1\n" \
" .target sm_30\n" \
" .address_size 64\n" \
".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1);\n" \
".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1)\n" \
"{\n" \
" .reg.u32 %ar1;\n" \
".reg.u32 %retval;\n" \
" .reg.u64 %hr10;\n" \
" .reg.u32 %r24;\n" \
" .reg.u32 %r25;\n" \
" .reg.pred %r27;\n" \
" .reg.u32 %r30;\n" \
" ld.param.u32 %ar1, [%in_ar1];\n" \
" mov.u32 %r24, %ar1;\n" \
" setp.ne.u32 %r27,%r24,4;\n" \
" set.u32.eq.u32 %r30,%r24,5;\n" \
" neg.s32 %r25, %r30;\n" \
" @%r27 bra $L3;\n" \
" mov.u32 %r25, 1;\n" \
"$L3:\n" \
" mov.u32 %retval, %r25;\n" \
" st.param.u32 [%out_retval], %retval;\n" \
" ret;\n" \
" }\n" \
".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1);\n" \
".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1)\n" \
"{\n" \
" .reg.u64 %ar1;\n" \
".reg.u32 %retval;\n" \
" .reg.u64 %hr10;\n" \
" .reg.u64 %r25;\n" \
" .reg.u32 %r26;\n" \
" .reg.u32 %r27;\n" \
" ld.param.u64 %ar1, [%in_ar1];\n" \
" mov.u64 %r25, %ar1;\n" \
" ld.u32 %r26, [%r25];\n" \
" {\n" \
" .param.u32 %retval_in;\n" \
" {\n" \
" .param.u32 %out_arg0;\n" \
" st.param.u32 [%out_arg0], %r26;\n" \
" call (%retval_in), acc_on_device, (%out_arg0);\n" \
" }\n" \
" ld.param.u32 %r27, [%retval_in];\n" \
"}\n" \
" mov.u32 %retval, %r27;\n" \
" st.param.u32 [%out_retval], %retval;\n" \
" ret;\n" \
" }"
#define GOACC_INTERNAL_PTX \
".version 3.1\n" \
".target sm_30\n" \
".address_size 64\n" \
".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1);\n" \
".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1);\n" \
".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1);\n" \
".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1);\n" \
".visible .func (.param .u32 %out_retval) GOACC_get_num_threads;\n" \
".visible .func (.param .u32 %out_retval) GOACC_get_thread_num;\n" \
".extern .func abort;\n" \
".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1)\n" \
"{\n" \
".reg .u32 %ar1;\n" \
".reg .u32 %retval;\n" \
".reg .u64 %hr10;\n" \
".reg .u32 %r22;\n" \
".reg .u32 %r23;\n" \
".reg .u32 %r24;\n" \
".reg .u32 %r25;\n" \
".reg .u32 %r26;\n" \
".reg .u32 %r27;\n" \
".reg .u32 %r28;\n" \
".reg .u32 %r29;\n" \
".reg .pred %r30;\n" \
".reg .u32 %r31;\n" \
".reg .pred %r32;\n" \
".reg .u32 %r33;\n" \
".reg .pred %r34;\n" \
".local .align 8 .b8 %frame[4];\n" \
"ld.param.u32 %ar1,[%in_ar1];\n" \
"mov.u32 %r27,%ar1;\n" \
"st.local.u32 [%frame],%r27;\n" \
"ld.local.u32 %r28,[%frame];\n" \
"mov.u32 %r29,1;\n" \
"setp.eq.u32 %r30,%r28,%r29;\n" \
"@%r30 bra $L4;\n" \
"mov.u32 %r31,2;\n" \
"setp.eq.u32 %r32,%r28,%r31;\n" \
"@%r32 bra $L5;\n" \
"mov.u32 %r33,0;\n" \
"setp.eq.u32 %r34,%r28,%r33;\n" \
"@!%r34 bra $L8;\n" \
"mov.u32 %r23,%tid.x;\n" \
"mov.u32 %r22,%r23;\n" \
"bra $L7;\n" \
"$L4:\n" \
"mov.u32 %r24,%tid.y;\n" \
"mov.u32 %r22,%r24;\n" \
"bra $L7;\n" \
"$L5:\n" \
"mov.u32 %r25,%tid.z;\n" \
"mov.u32 %r22,%r25;\n" \
"bra $L7;\n" \
"$L8:\n" \
"{\n" \
"{\n" \
"call abort;\n" \
"}\n" \
"}\n" \
"$L7:\n" \
"mov.u32 %r26,%r22;\n" \
"mov.u32 %retval,%r26;\n" \
"st.param.u32 [%out_retval],%retval;\n" \
"ret;\n" \
"}\n" \
".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1)\n" \
"{\n" \
".reg .u32 %ar1;\n" \
".reg .u32 %retval;\n" \
".reg .u64 %hr10;\n" \
".reg .u32 %r22;\n" \
".reg .u32 %r23;\n" \
".reg .u32 %r24;\n" \
".reg .u32 %r25;\n" \
".reg .u32 %r26;\n" \
".reg .u32 %r27;\n" \
".reg .u32 %r28;\n" \
".reg .u32 %r29;\n" \
".reg .pred %r30;\n" \
".reg .u32 %r31;\n" \
".reg .pred %r32;\n" \
".reg .u32 %r33;\n" \
".reg .pred %r34;\n" \
".local .align 8 .b8 %frame[4];\n" \
"ld.param.u32 %ar1,[%in_ar1];\n" \
"mov.u32 %r27,%ar1;\n" \
"st.local.u32 [%frame],%r27;\n" \
"ld.local.u32 %r28,[%frame];\n" \
"mov.u32 %r29,1;\n" \
"setp.eq.u32 %r30,%r28,%r29;\n" \
"@%r30 bra $L11;\n" \
"mov.u32 %r31,2;\n" \
"setp.eq.u32 %r32,%r28,%r31;\n" \
"@%r32 bra $L12;\n" \
"mov.u32 %r33,0;\n" \
"setp.eq.u32 %r34,%r28,%r33;\n" \
"@!%r34 bra $L15;\n" \
"mov.u32 %r23,%ntid.x;\n" \
"mov.u32 %r22,%r23;\n" \
"bra $L14;\n" \
"$L11:\n" \
"mov.u32 %r24,%ntid.y;\n" \
"mov.u32 %r22,%r24;\n" \
"bra $L14;\n" \
"$L12:\n" \
"mov.u32 %r25,%ntid.z;\n" \
"mov.u32 %r22,%r25;\n" \
"bra $L14;\n" \
"$L15:\n" \
"{\n" \
"{\n" \
"call abort;\n" \
"}\n" \
"}\n" \
"$L14:\n" \
"mov.u32 %r26,%r22;\n" \
"mov.u32 %retval,%r26;\n" \
"st.param.u32 [%out_retval],%retval;\n" \
"ret;\n" \
"}\n" \
".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1)\n" \
"{\n" \
".reg .u32 %ar1;\n" \
".reg .u32 %retval;\n" \
".reg .u64 %hr10;\n" \
".reg .u32 %r22;\n" \
".reg .u32 %r23;\n" \
".reg .u32 %r24;\n" \
".reg .u32 %r25;\n" \
".reg .u32 %r26;\n" \
".reg .u32 %r27;\n" \
".reg .u32 %r28;\n" \
".reg .u32 %r29;\n" \
".reg .pred %r30;\n" \
".reg .u32 %r31;\n" \
".reg .pred %r32;\n" \
".reg .u32 %r33;\n" \
".reg .pred %r34;\n" \
".local .align 8 .b8 %frame[4];\n" \
"ld.param.u32 %ar1,[%in_ar1];\n" \
"mov.u32 %r27,%ar1;\n" \
"st.local.u32 [%frame],%r27;\n" \
"ld.local.u32 %r28,[%frame];\n" \
"mov.u32 %r29,1;\n" \
"setp.eq.u32 %r30,%r28,%r29;\n" \
"@%r30 bra $L18;\n" \
"mov.u32 %r31,2;\n" \
"setp.eq.u32 %r32,%r28,%r31;\n" \
"@%r32 bra $L19;\n" \
"mov.u32 %r33,0;\n" \
"setp.eq.u32 %r34,%r28,%r33;\n" \
"@!%r34 bra $L22;\n" \
"mov.u32 %r23,%ctaid.x;\n" \
"mov.u32 %r22,%r23;\n" \
"bra $L21;\n" \
"$L18:\n" \
"mov.u32 %r24,%ctaid.y;\n" \
"mov.u32 %r22,%r24;\n" \
"bra $L21;\n" \
"$L19:\n" \
"mov.u32 %r25,%ctaid.z;\n" \
"mov.u32 %r22,%r25;\n" \
"bra $L21;\n" \
"$L22:\n" \
"{\n" \
"{\n" \
"call abort;\n" \
"}\n" \
"}\n" \
"$L21:\n" \
"mov.u32 %r26,%r22;\n" \
"mov.u32 %retval,%r26;\n" \
"st.param.u32 [%out_retval],%retval;\n" \
"ret;\n" \
"}\n" \
".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1)\n" \
"{\n" \
".reg .u32 %ar1;\n" \
".reg .u32 %retval;\n" \
".reg .u64 %hr10;\n" \
".reg .u32 %r22;\n" \
".reg .u32 %r23;\n" \
".reg .u32 %r24;\n" \
".reg .u32 %r25;\n" \
".reg .u32 %r26;\n" \
".reg .u32 %r27;\n" \
".reg .u32 %r28;\n" \
".reg .u32 %r29;\n" \
".reg .pred %r30;\n" \
".reg .u32 %r31;\n" \
".reg .pred %r32;\n" \
".reg .u32 %r33;\n" \
".reg .pred %r34;\n" \
".local .align 8 .b8 %frame[4];\n" \
"ld.param.u32 %ar1,[%in_ar1];\n" \
"mov.u32 %r27,%ar1;\n" \
"st.local.u32 [%frame],%r27;\n" \
"ld.local.u32 %r28,[%frame];\n" \
"mov.u32 %r29,1;\n" \
"setp.eq.u32 %r30,%r28,%r29;\n" \
"@%r30 bra $L25;\n" \
"mov.u32 %r31,2;\n" \
"setp.eq.u32 %r32,%r28,%r31;\n" \
"@%r32 bra $L26;\n" \
"mov.u32 %r33,0;\n" \
"setp.eq.u32 %r34,%r28,%r33;\n" \
"@!%r34 bra $L29;\n" \
"mov.u32 %r23,%nctaid.x;\n" \
"mov.u32 %r22,%r23;\n" \
"bra $L28;\n" \
"$L25:\n" \
"mov.u32 %r24,%nctaid.y;\n" \
"mov.u32 %r22,%r24;\n" \
"bra $L28;\n" \
"$L26:\n" \
"mov.u32 %r25,%nctaid.z;\n" \
"mov.u32 %r22,%r25;\n" \
"bra $L28;\n" \
"$L29:\n" \
"{\n" \
"{\n" \
"call abort;\n" \
"}\n" \
"}\n" \
"$L28:\n" \
"mov.u32 %r26,%r22;\n" \
"mov.u32 %retval,%r26;\n" \
"st.param.u32 [%out_retval],%retval;\n" \
"ret;\n" \
"}\n" \
".visible .func (.param .u32 %out_retval) GOACC_get_num_threads\n" \
"{\n" \
".reg .u32 %retval;\n" \
".reg .u64 %hr10;\n" \
".reg .u32 %r22;\n" \
".reg .u32 %r23;\n" \
".reg .u32 %r24;\n" \
".reg .u32 %r25;\n" \
".reg .u32 %r26;\n" \
".reg .u32 %r27;\n" \
".reg .u32 %r28;\n" \
".reg .u32 %r29;\n" \
"mov.u32 %r26,0;\n" \
"{\n" \
".param .u32 %retval_in;\n" \
"{\n" \
".param .u32 %out_arg0;\n" \
"st.param.u32 [%out_arg0],%r26;\n" \
"call (%retval_in),GOACC_ntid,(%out_arg0);\n" \
"}\n" \
"ld.param.u32 %r27,[%retval_in];\n" \
"}\n" \
"mov.u32 %r22,%r27;\n" \
"mov.u32 %r28,0;\n" \
"{\n" \
".param .u32 %retval_in;\n" \
"{\n" \
".param .u32 %out_arg0;\n" \
"st.param.u32 [%out_arg0],%r28;\n" \
"call (%retval_in),GOACC_nctaid,(%out_arg0);\n" \
"}\n" \
"ld.param.u32 %r29,[%retval_in];\n" \
"}\n" \
"mov.u32 %r23,%r29;\n" \
"mul.lo.u32 %r24,%r22,%r23;\n" \
"mov.u32 %r25,%r24;\n" \
"mov.u32 %retval,%r25;\n" \
"st.param.u32 [%out_retval],%retval;\n" \
"ret;\n" \
"}\n" \
".visible .func (.param .u32 %out_retval) GOACC_get_thread_num\n" \
"{\n" \
".reg .u32 %retval;\n" \
".reg .u64 %hr10;\n" \
".reg .u32 %r22;\n" \
".reg .u32 %r23;\n" \
".reg .u32 %r24;\n" \
".reg .u32 %r25;\n" \
".reg .u32 %r26;\n" \
".reg .u32 %r27;\n" \
".reg .u32 %r28;\n" \
".reg .u32 %r29;\n" \
".reg .u32 %r30;\n" \
".reg .u32 %r31;\n" \
".reg .u32 %r32;\n" \
".reg .u32 %r33;\n" \
"mov.u32 %r28,0;\n" \
"{\n" \
".param .u32 %retval_in;\n" \
"{\n" \
".param .u32 %out_arg0;\n" \
"st.param.u32 [%out_arg0],%r28;\n" \
"call (%retval_in),GOACC_ntid,(%out_arg0);\n" \
"}\n" \
"ld.param.u32 %r29,[%retval_in];\n" \
"}\n" \
"mov.u32 %r22,%r29;\n" \
"mov.u32 %r30,0;\n" \
"{\n" \
".param .u32 %retval_in;\n" \
"{\n" \
".param .u32 %out_arg0;\n" \
"st.param.u32 [%out_arg0],%r30;\n" \
"call (%retval_in),GOACC_ctaid,(%out_arg0);\n" \
"}\n" \
"ld.param.u32 %r31,[%retval_in];\n" \
"}\n" \
"mov.u32 %r23,%r31;\n" \
"mul.lo.u32 %r24,%r22,%r23;\n" \
"mov.u32 %r32,0;\n" \
"{\n" \
".param .u32 %retval_in;\n" \
"{\n" \
".param .u32 %out_arg0;\n" \
"st.param.u32 [%out_arg0],%r32;\n" \
"call (%retval_in),GOACC_tid,(%out_arg0);\n" \
"}\n" \
"ld.param.u32 %r33,[%retval_in];\n" \
"}\n" \
"mov.u32 %r25,%r33;\n" \
"add.u32 %r26,%r24,%r25;\n" \
"mov.u32 %r27,%r26;\n" \
"mov.u32 %retval,%r27;\n" \
"st.param.u32 [%out_retval],%retval;\n" \
"ret;\n" \
"}\n"

View file

@ -34,7 +34,6 @@
#include "openacc.h"
#include "config.h"
#include "libgomp-plugin.h"
#include "oacc-ptx.h"
#include "oacc-plugin.h"
#include "gomp-constants.h"
@ -750,35 +749,6 @@ link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
if (r != CUDA_SUCCESS)
GOMP_PLUGIN_fatal ("cuLinkCreate error: %s", cuda_error (r));
char *abort_ptx = ABORT_PTX;
r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, abort_ptx,
strlen (abort_ptx) + 1, 0, 0, 0, 0);
if (r != CUDA_SUCCESS)
{
GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
GOMP_PLUGIN_fatal ("cuLinkAddData (abort) error: %s", cuda_error (r));
}
char *acc_on_device_ptx = ACC_ON_DEVICE_PTX;
r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, acc_on_device_ptx,
strlen (acc_on_device_ptx) + 1, 0, 0, 0, 0);
if (r != CUDA_SUCCESS)
{
GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
GOMP_PLUGIN_fatal ("cuLinkAddData (acc_on_device) error: %s",
cuda_error (r));
}
char *goacc_internal_ptx = GOACC_INTERNAL_PTX;
r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, goacc_internal_ptx,
strlen (goacc_internal_ptx) + 1, 0, 0, 0, 0);
if (r != CUDA_SUCCESS)
{
GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
GOMP_PLUGIN_fatal ("cuLinkAddData (goacc_internal_ptx) error: %s",
cuda_error (r));
}
for (; num_objs--; ptx_objs++)
{
/* cuLinkAddData's 'data' argument erroneously omits the const