cgraph.h (enum cgraph_simd_clone_arg_type): New.

* cgraph.h (enum cgraph_simd_clone_arg_type): New.
	(struct cgraph_simd_clone_arg, struct cgraph_simd_clone): New.
	(struct cgraph_node): Add simdclone and simd_clones fields.
	* config/i386/i386.c (ix86_simd_clone_compute_vecsize_and_simdlen,
	ix86_simd_clone_adjust, ix86_simd_clone_usable): New functions.
	(TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN,
	TARGET_SIMD_CLONE_ADJUST, TARGET_SIMD_CLONE_USABLE): Define.
	* doc/tm.texi.in (TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN,
	TARGET_SIMD_CLONE_ADJUST, TARGET_SIMD_CLONE_USABLE): Add.
	* doc/tm.texi: Regenerated.
	* ggc.h (ggc_alloc_cleared_simd_clone_stat): New function.
	* ipa-cp.c (determine_versionability): Fail if "omp declare simd"
	attribute is present.
	* omp-low.c: Include pretty-print.h, ipa-prop.h and tree-eh.h.
	(simd_clone_vector_of_formal_parm_types): New function.
	(simd_clone_struct_alloc, simd_clone_struct_copy,
	simd_clone_vector_of_formal_parm_types, simd_clone_clauses_extract,
	simd_clone_compute_base_data_type, simd_clone_mangle,
	simd_clone_create, simd_clone_adjust_return_type,
	create_tmp_simd_array, simd_clone_adjust_argument_types,
	simd_clone_init_simd_arrays): New functions.
	(struct modify_stmt_info): New type.
	(ipa_simd_modify_stmt_ops, ipa_simd_modify_function_body,
	simd_clone_adjust, expand_simd_clones, ipa_omp_simd_clone): New
	functions.
	(pass_data_omp_simd_clone): New variable.
	(pass_omp_simd_clone): New class.
	(make_pass_omp_simd_clone): New function.
	* passes.def (pass_omp_simd_clone): New.
	* target.def (TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN,
	TARGET_SIMD_CLONE_ADJUST, TARGET_SIMD_CLONE_USABLE): New target
	hooks.
	* target.h (struct cgraph_node, struct cgraph_simd_node): Declare.
	* tree-core.h (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE): Document.
	* tree.h (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE): Define.
	* tree-pass.h (make_pass_omp_simd_clone): New prototype.
	* tree-vect-data-refs.c: Include cgraph.h.
	(vect_analyze_data_refs): Inline by hand find_data_references_in_loop
	and find_data_references_in_bb, if find_data_references_in_stmt
	fails, still allow calls to #pragma omp declare simd functions
	in #pragma omp simd loops unless they contain data references among
	the call arguments or in lhs.
	* tree-vect-loop.c (vect_determine_vectorization_factor): Handle
	calls with no lhs.
	(vect_transform_loop): Allow NULL STMT_VINFO_VECTYPE for calls without
	lhs.
	* tree-vectorizer.h (enum stmt_vec_info_type): Add
	call_simd_clone_vec_info_type.
	(struct _stmt_vec_info): Add simd_clone_fndecl field.
	(STMT_VINFO_SIMD_CLONE_FNDECL): Define.
	* tree-vect-stmts.c: Include tree-ssa-loop.h,
	tree-scalar-evolution.h and cgraph.h.
	(vectorizable_call): Handle calls without lhs.  Assert
	!stmt_can_throw_internal instead of failing for it.  Don't update
	EH stuff.
	(struct simd_call_arg_info): New.
	(vectorizable_simd_clone_call): New function.
	(vect_transform_stmt): Call it.
	(vect_analyze_stmt): Likewise.  Allow NULL STMT_VINFO_VECTYPE for
	calls without lhs.
	* ipa-prop.c (ipa_add_new_function): Only call ipa_analyze_node
	if cgraph_function_with_gimple_body_p is true.
c/
	* c-decl.c (c_builtin_function_ext_scope): Avoid binding if
	external_scope is NULL.
cp/
	* semantics.c (finish_omp_clauses): For #pragma omp declare simd
	linear clause step call maybe_constant_value.
testsuite/
	* g++.dg/gomp/declare-simd-1.C (f38): Make sure
	simdlen is a power of two.
	* gcc.dg/gomp/simd-clones-2.c: Compile on all targets.
	Remove -msse2.  Adjust regexps for name mangling changes.
	* gcc.dg/gomp/simd-clones-3.c: Likewise.
	* gcc.dg/vect/vect-simd-clone-1.c: New test.
	* gcc.dg/vect/vect-simd-clone-2.c: New test.
	* gcc.dg/vect/vect-simd-clone-3.c: New test.
	* gcc.dg/vect/vect-simd-clone-4.c: New test.
	* gcc.dg/vect/vect-simd-clone-5.c: New test.
	* gcc.dg/vect/vect-simd-clone-6.c: New test.
	* gcc.dg/vect/vect-simd-clone-7.c: New test.
	* gcc.dg/vect/vect-simd-clone-8.c: New test.
	* gcc.dg/vect/vect-simd-clone-9.c: New test.
	* gcc.dg/vect/vect-simd-clone-10.c: New test.
	* gcc.dg/vect/vect-simd-clone-10.h: New file.
	* gcc.dg/vect/vect-simd-clone-10a.c: New file.
	* gcc.dg/vect/vect-simd-clone-11.c: New test.

Co-Authored-By: Jakub Jelinek <jakub@redhat.com>

From-SVN: r205442
This commit is contained in:
Aldy Hernandez 2013-11-27 11:20:06 +00:00 committed by Jakub Jelinek
parent a7d4a96bf7
commit 0136f8f03a
46 changed files with 3234 additions and 29 deletions

View file

@ -1,3 +1,69 @@
2013-11-27 Aldy Hernandez <aldyh@redhat.com>
Jakub Jelinek <jakub@redhat.com>
* cgraph.h (enum cgraph_simd_clone_arg_type): New.
(struct cgraph_simd_clone_arg, struct cgraph_simd_clone): New.
(struct cgraph_node): Add simdclone and simd_clones fields.
* config/i386/i386.c (ix86_simd_clone_compute_vecsize_and_simdlen,
ix86_simd_clone_adjust, ix86_simd_clone_usable): New functions.
(TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN,
TARGET_SIMD_CLONE_ADJUST, TARGET_SIMD_CLONE_USABLE): Define.
* doc/tm.texi.in (TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN,
TARGET_SIMD_CLONE_ADJUST, TARGET_SIMD_CLONE_USABLE): Add.
* doc/tm.texi: Regenerated.
* ggc.h (ggc_alloc_cleared_simd_clone_stat): New function.
* ipa-cp.c (determine_versionability): Fail if "omp declare simd"
attribute is present.
* omp-low.c: Include pretty-print.h, ipa-prop.h and tree-eh.h.
(simd_clone_vector_of_formal_parm_types): New function.
(simd_clone_struct_alloc, simd_clone_struct_copy,
simd_clone_vector_of_formal_parm_types, simd_clone_clauses_extract,
simd_clone_compute_base_data_type, simd_clone_mangle,
simd_clone_create, simd_clone_adjust_return_type,
create_tmp_simd_array, simd_clone_adjust_argument_types,
simd_clone_init_simd_arrays): New functions.
(struct modify_stmt_info): New type.
(ipa_simd_modify_stmt_ops, ipa_simd_modify_function_body,
simd_clone_adjust, expand_simd_clones, ipa_omp_simd_clone): New
functions.
(pass_data_omp_simd_clone): New variable.
(pass_omp_simd_clone): New class.
(make_pass_omp_simd_clone): New function.
* passes.def (pass_omp_simd_clone): New.
* target.def (TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN,
TARGET_SIMD_CLONE_ADJUST, TARGET_SIMD_CLONE_USABLE): New target
hooks.
* target.h (struct cgraph_node, struct cgraph_simd_node): Declare.
* tree-core.h (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE): Document.
* tree.h (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE): Define.
* tree-pass.h (make_pass_omp_simd_clone): New prototype.
* tree-vect-data-refs.c: Include cgraph.h.
(vect_analyze_data_refs): Inline by hand find_data_references_in_loop
and find_data_references_in_bb, if find_data_references_in_stmt
fails, still allow calls to #pragma omp declare simd functions
in #pragma omp simd loops unless they contain data references among
the call arguments or in lhs.
* tree-vect-loop.c (vect_determine_vectorization_factor): Handle
calls with no lhs.
(vect_transform_loop): Allow NULL STMT_VINFO_VECTYPE for calls without
lhs.
* tree-vectorizer.h (enum stmt_vec_info_type): Add
call_simd_clone_vec_info_type.
(struct _stmt_vec_info): Add simd_clone_fndecl field.
(STMT_VINFO_SIMD_CLONE_FNDECL): Define.
* tree-vect-stmts.c: Include tree-ssa-loop.h,
tree-scalar-evolution.h and cgraph.h.
(vectorizable_call): Handle calls without lhs. Assert
!stmt_can_throw_internal instead of failing for it. Don't update
EH stuff.
(struct simd_call_arg_info): New.
(vectorizable_simd_clone_call): New function.
(vect_transform_stmt): Call it.
(vect_analyze_stmt): Likewise. Allow NULL STMT_VINFO_VECTYPE for
calls without lhs.
* ipa-prop.c (ipa_add_new_function): Only call ipa_analyze_node
if cgraph_function_with_gimple_body_p is true.
2013-11-27 Tom de Vries <tom@codesourcery.com>
Marc Glisse <marc.glisse@inria.fr>

View file

@ -1,3 +1,9 @@
2013-11-27 Aldy Hernandez <aldyh@redhat.com>
Jakub Jelinek <jakub@redhat.com>
* c-decl.c (c_builtin_function_ext_scope): Avoid binding if
external_scope is NULL.
2013-11-27 Tom de Vries <tom@codesourcery.com>
Marc Glisse <marc.glisse@inria.fr>

View file

@ -3646,8 +3646,9 @@ c_builtin_function_ext_scope (tree decl)
const char *name = IDENTIFIER_POINTER (id);
C_DECL_BUILTIN_PROTOTYPE (decl) = prototype_p (type);
bind (id, decl, external_scope, /*invisible=*/false, /*nested=*/false,
UNKNOWN_LOCATION);
if (external_scope)
bind (id, decl, external_scope, /*invisible=*/false, /*nested=*/false,
UNKNOWN_LOCATION);
/* Builtins in the implementation namespace are made visible without
needing to be explicitly declared. See push_file_scope. */

View file

@ -256,6 +256,99 @@ struct GTY(()) cgraph_clone_info
bitmap combined_args_to_skip;
};
enum cgraph_simd_clone_arg_type
{
SIMD_CLONE_ARG_TYPE_VECTOR,
SIMD_CLONE_ARG_TYPE_UNIFORM,
SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP,
SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP,
SIMD_CLONE_ARG_TYPE_MASK
};
/* Function arguments in the original function of a SIMD clone.
Supplementary data for `struct simd_clone'. */
struct GTY(()) cgraph_simd_clone_arg {
/* Original function argument as it originally existed in
DECL_ARGUMENTS. */
tree orig_arg;
/* orig_arg's function (or for extern functions type from
TYPE_ARG_TYPES). */
tree orig_type;
/* If argument is a vector, this holds the vector version of
orig_arg that after adjusting the argument types will live in
DECL_ARGUMENTS. Otherwise, this is NULL.
This basically holds:
vector(simdlen) __typeof__(orig_arg) new_arg. */
tree vector_arg;
/* vector_arg's type (or for extern functions new vector type. */
tree vector_type;
/* If argument is a vector, this holds the array where the simd
argument is held while executing the simd clone function. This
is a local variable in the cloned function. Its content is
copied from vector_arg upon entry to the clone.
This basically holds:
__typeof__(orig_arg) simd_array[simdlen]. */
tree simd_array;
/* A SIMD clone's argument can be either linear (constant or
variable), uniform, or vector. */
enum cgraph_simd_clone_arg_type arg_type;
/* For arg_type SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP this is
the constant linear step, if arg_type is
SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP, this is index of
the uniform argument holding the step, otherwise 0. */
HOST_WIDE_INT linear_step;
/* Variable alignment if available, otherwise 0. */
unsigned int alignment;
};
/* Specific data for a SIMD function clone. */
struct GTY(()) cgraph_simd_clone {
/* Number of words in the SIMD lane associated with this clone. */
unsigned int simdlen;
/* Number of annotated function arguments in `args'. This is
usually the number of named arguments in FNDECL. */
unsigned int nargs;
/* Max hardware vector size in bits for integral vectors. */
unsigned int vecsize_int;
/* Max hardware vector size in bits for floating point vectors. */
unsigned int vecsize_float;
/* The mangling character for a given vector size. This is is used
to determine the ISA mangling bit as specified in the Intel
Vector ABI. */
unsigned char vecsize_mangle;
/* True if this is the masked, in-branch version of the clone,
otherwise false. */
unsigned int inbranch : 1;
/* True if this is a Cilk Plus variant. */
unsigned int cilk_elemental : 1;
/* Doubly linked list of SIMD clones. */
struct cgraph_node *prev_clone, *next_clone;
/* Original cgraph node the SIMD clones were created for. */
struct cgraph_node *origin;
/* Annotated function arguments for the original function. */
struct cgraph_simd_clone_arg GTY((length ("%h.nargs"))) args[1];
};
/* The cgraph data structure.
Each function decl has assigned cgraph_node listing callees and callers. */
@ -284,6 +377,12 @@ public:
/* Declaration node used to be clone of. */
tree former_clone_of;
/* If this is a SIMD clone, this points to the SIMD specific
information for it. */
struct cgraph_simd_clone *simdclone;
/* If this function has SIMD clones, this points to the first clone. */
struct cgraph_node *simd_clones;
/* Interprocedural passes scheduled to have their transform functions
applied next time we execute local pass on them. We maintain it
per-function in order to allow IPA passes to introduce new functions. */

View file

@ -43690,6 +43690,184 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val)
return val;
}
/* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
or number of vecsize_mangle variants that should be emitted. */
static int
ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
struct cgraph_simd_clone *clonei,
tree base_type, int num)
{
int ret = 1;
if (clonei->simdlen
&& (clonei->simdlen < 2
|| clonei->simdlen > 16
|| (clonei->simdlen & (clonei->simdlen - 1)) != 0))
{
warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
"unsupported simdlen %d\n", clonei->simdlen);
return 0;
}
tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
if (TREE_CODE (ret_type) != VOID_TYPE)
switch (TYPE_MODE (ret_type))
{
case QImode:
case HImode:
case SImode:
case DImode:
case SFmode:
case DFmode:
/* case SCmode: */
/* case DCmode: */
break;
default:
warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
"unsupported return type %qT for simd\n", ret_type);
return 0;
}
tree t;
int i;
for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
/* FIXME: Shouldn't we allow such arguments if they are uniform? */
switch (TYPE_MODE (TREE_TYPE (t)))
{
case QImode:
case HImode:
case SImode:
case DImode:
case SFmode:
case DFmode:
/* case SCmode: */
/* case DCmode: */
break;
default:
warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
"unsupported argument type %qT for simd\n", TREE_TYPE (t));
return 0;
}
if (clonei->cilk_elemental)
{
/* Parse here processor clause. If not present, default to 'b'. */
clonei->vecsize_mangle = 'b';
}
else if (!TREE_PUBLIC (node->decl))
{
/* If the function isn't exported, we can pick up just one ISA
for the clones. */
if (TARGET_AVX2)
clonei->vecsize_mangle = 'd';
else if (TARGET_AVX)
clonei->vecsize_mangle = 'c';
else
clonei->vecsize_mangle = 'b';
ret = 1;
}
else
{
clonei->vecsize_mangle = "bcd"[num];
ret = 3;
}
switch (clonei->vecsize_mangle)
{
case 'b':
clonei->vecsize_int = 128;
clonei->vecsize_float = 128;
break;
case 'c':
clonei->vecsize_int = 128;
clonei->vecsize_float = 256;
break;
case 'd':
clonei->vecsize_int = 256;
clonei->vecsize_float = 256;
break;
}
if (clonei->simdlen == 0)
{
if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
clonei->simdlen = clonei->vecsize_int;
else
clonei->simdlen = clonei->vecsize_float;
clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
if (clonei->simdlen > 16)
clonei->simdlen = 16;
}
return ret;
}
/* Add target attribute to SIMD clone NODE if needed. */
static void
ix86_simd_clone_adjust (struct cgraph_node *node)
{
const char *str = NULL;
gcc_assert (node->decl == cfun->decl);
switch (node->simdclone->vecsize_mangle)
{
case 'b':
if (!TARGET_SSE2)
str = "sse2";
break;
case 'c':
if (!TARGET_AVX)
str = "avx";
break;
case 'd':
if (!TARGET_AVX2)
str = "avx2";
break;
default:
gcc_unreachable ();
}
if (str == NULL)
return;
push_cfun (NULL);
tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
gcc_assert (ok);
pop_cfun ();
ix86_previous_fndecl = NULL_TREE;
ix86_set_current_function (node->decl);
}
/* If SIMD clone NODE can't be used in a vectorized loop
in current function, return -1, otherwise return a badness of using it
(0 if it is most desirable from vecsize_mangle point of view, 1
slightly less desirable, etc.). */
static int
ix86_simd_clone_usable (struct cgraph_node *node)
{
switch (node->simdclone->vecsize_mangle)
{
case 'b':
if (!TARGET_SSE2)
return -1;
if (!TARGET_AVX)
return 0;
return TARGET_AVX2 ? 2 : 1;
case 'c':
if (!TARGET_AVX)
return -1;
return TARGET_AVX2 ? 1 : 0;
break;
case 'd':
if (!TARGET_AVX2)
return -1;
return 0;
default:
gcc_unreachable ();
}
}
/* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
static bool
@ -44178,6 +44356,18 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
#undef TARGET_SPILL_CLASS
#define TARGET_SPILL_CLASS ix86_spill_class
#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
ix86_simd_clone_compute_vecsize_and_simdlen
#undef TARGET_SIMD_CLONE_ADJUST
#define TARGET_SIMD_CLONE_ADJUST \
ix86_simd_clone_adjust
#undef TARGET_SIMD_CLONE_USABLE
#define TARGET_SIMD_CLONE_USABLE \
ix86_simd_clone_usable
#undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
#define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
ix86_float_exceptions_rounding_supported_p

View file

@ -1,3 +1,9 @@
2013-11-27 Aldy Hernandez <aldyh@redhat.com>
Jakub Jelinek <jakub@redhat.com>
* semantics.c (finish_omp_clauses): For #pragma omp declare simd
linear clause step call maybe_constant_value.
2013-11-27 Tom de Vries <tom@codesourcery.com>
Marc Glisse <marc.glisse@inria.fr>

View file

@ -5202,6 +5202,8 @@ finish_omp_clauses (tree clauses)
t = mark_rvalue_use (t);
if (!processing_template_decl)
{
if (TREE_CODE (OMP_CLAUSE_DECL (c)) == PARM_DECL)
t = maybe_constant_value (t);
t = fold_build_cleanup_point_expr (TREE_TYPE (t), t);
if (TREE_CODE (TREE_TYPE (OMP_CLAUSE_DECL (c)))
== POINTER_TYPE)

View file

@ -5818,6 +5818,26 @@ The default is @code{NULL_TREE} which means to not vectorize gather
loads.
@end deftypefn
@deftypefn {Target Hook} int TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN (struct cgraph_node *@var{}, struct cgraph_simd_clone *@var{}, @var{tree}, @var{int})
This hook should set @var{vecsize_mangle}, @var{vecsize_int}, @var{vecsize_float}
fields in @var{simd_clone} structure pointed by @var{clone_info} argument and also
@var{simdlen} field if it was previously 0.
The hook should return 0 if SIMD clones shouldn't be emitted,
or number of @var{vecsize_mangle} variants that should be emitted.
@end deftypefn
@deftypefn {Target Hook} void TARGET_SIMD_CLONE_ADJUST (struct cgraph_node *@var{})
This hook should add implicit @code{attribute(target("..."))} attribute
to SIMD clone @var{node} if needed.
@end deftypefn
@deftypefn {Target Hook} int TARGET_SIMD_CLONE_USABLE (struct cgraph_node *@var{})
This hook should return -1 if SIMD clone @var{node} shouldn't be used
in vectorized loops in current function, or non-negative number if it is
usable. In that case, the smaller the number is, the more desirable it is
to use it.
@end deftypefn
@node Anchored Addresses
@section Anchored Addresses
@cindex anchored addresses

View file

@ -4422,6 +4422,12 @@ address; but often a machine-dependent strategy can generate better code.
@hook TARGET_VECTORIZE_BUILTIN_GATHER
@hook TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
@hook TARGET_SIMD_CLONE_ADJUST
@hook TARGET_SIMD_CLONE_USABLE
@node Anchored Addresses
@section Anchored Addresses
@cindex anchored addresses

View file

@ -276,4 +276,11 @@ ggc_alloc_cleared_gimple_statement_stat (size_t s MEM_STAT_DECL)
ggc_internal_cleared_alloc_stat (s PASS_MEM_STAT);
}
static inline struct simd_clone *
ggc_alloc_cleared_simd_clone_stat (size_t s MEM_STAT_DECL)
{
return (struct simd_clone *)
ggc_internal_cleared_alloc_stat (s PASS_MEM_STAT);
}
#endif

View file

@ -430,6 +430,13 @@ determine_versionability (struct cgraph_node *node)
reason = "not a tree_versionable_function";
else if (cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE)
reason = "insufficient body availability";
else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl)))
{
/* Ideally we should clone the SIMD clones themselves and create
vector copies of them, so IPA-cp and SIMD clones can happily
coexist, but that may not be worth the effort. */
reason = "function has SIMD clones";
}
if (reason && dump_file && !node->alias && !node->thunk.thunk_p)
fprintf (dump_file, "Function %s/%i is not versionable, reason: %s.\n",

View file

@ -3217,7 +3217,8 @@ ipa_node_duplication_hook (struct cgraph_node *src, struct cgraph_node *dst,
static void
ipa_add_new_function (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
{
ipa_analyze_node (node);
if (cgraph_function_with_gimple_body_p (node))
ipa_analyze_node (node);
}
/* Register our cgraph hooks if they are not already there. */

View file

@ -426,6 +426,19 @@ symtab_remove_unreachable_nodes (bool before_inlining_p, FILE *file)
enqueue_node (cnode, &first, reachable);
}
}
}
/* If any reachable function has simd clones, mark them as
reachable as well. */
if (cnode->simd_clones)
{
cgraph_node *next;
for (next = cnode->simd_clones;
next;
next = next->simdclone->next_clone)
if (in_boundary_p
|| !pointer_set_insert (reachable, next))
enqueue_node (next, &first, reachable);
}
}
/* When we see constructor of external variable, keep referred nodes in the

File diff suppressed because it is too large Load diff

View file

@ -117,6 +117,7 @@ along with GCC; see the file COPYING3. If not see
compiled unit. */
INSERT_PASSES_AFTER (all_late_ipa_passes)
NEXT_PASS (pass_ipa_pta);
NEXT_PASS (pass_omp_simd_clone);
TERMINATE_PASS_LIST ()
/* These passes are run after IPA passes on every function that is being

View file

@ -1521,6 +1521,36 @@ hook_int_uint_mode_1)
HOOK_VECTOR_END (sched)
/* Functions relating to OpenMP and Cilk Plus SIMD clones. */
#undef HOOK_PREFIX
#define HOOK_PREFIX "TARGET_SIMD_CLONE_"
HOOK_VECTOR (TARGET_SIMD_CLONE, simd_clone)
DEFHOOK
(compute_vecsize_and_simdlen,
"This hook should set @var{vecsize_mangle}, @var{vecsize_int}, @var{vecsize_float}\n\
fields in @var{simd_clone} structure pointed by @var{clone_info} argument and also\n\
@var{simdlen} field if it was previously 0.\n\
The hook should return 0 if SIMD clones shouldn't be emitted,\n\
or number of @var{vecsize_mangle} variants that should be emitted.",
int, (struct cgraph_node *, struct cgraph_simd_clone *, tree, int), NULL)
DEFHOOK
(adjust,
"This hook should add implicit @code{attribute(target(\"...\"))} attribute\n\
to SIMD clone @var{node} if needed.",
void, (struct cgraph_node *), NULL)
DEFHOOK
(usable,
"This hook should return -1 if SIMD clone @var{node} shouldn't be used\n\
in vectorized loops in current function, or non-negative number if it is\n\
usable. In that case, the smaller the number is, the more desirable it is\n\
to use it.",
int, (struct cgraph_node *), NULL)
HOOK_VECTOR_END (simd_clone)
/* Functions relating to vectorization. */
#undef HOOK_PREFIX
#define HOOK_PREFIX "TARGET_VECTORIZE_"

View file

@ -93,6 +93,8 @@ extern bool target_default_pointer_address_modes_p (void);
struct stdarg_info;
struct spec_info_def;
struct hard_reg_set_container;
struct cgraph_node;
struct cgraph_simd_clone;
/* The struct used by the secondary_reload target hook. */
typedef struct secondary_reload_info

View file

@ -1,3 +1,25 @@
2013-11-27 Aldy Hernandez <aldyh@redhat.com>
Jakub Jelinek <jakub@redhat.com>
* g++.dg/gomp/declare-simd-1.C (f38): Make sure
simdlen is a power of two.
* gcc.dg/gomp/simd-clones-2.c: Compile on all targets.
Remove -msse2. Adjust regexps for name mangling changes.
* gcc.dg/gomp/simd-clones-3.c: Likewise.
* gcc.dg/vect/vect-simd-clone-1.c: New test.
* gcc.dg/vect/vect-simd-clone-2.c: New test.
* gcc.dg/vect/vect-simd-clone-3.c: New test.
* gcc.dg/vect/vect-simd-clone-4.c: New test.
* gcc.dg/vect/vect-simd-clone-5.c: New test.
* gcc.dg/vect/vect-simd-clone-6.c: New test.
* gcc.dg/vect/vect-simd-clone-7.c: New test.
* gcc.dg/vect/vect-simd-clone-8.c: New test.
* gcc.dg/vect/vect-simd-clone-9.c: New test.
* gcc.dg/vect/vect-simd-clone-10.c: New test.
* gcc.dg/vect/vect-simd-clone-10.h: New file.
* gcc.dg/vect/vect-simd-clone-10a.c: New file.
* gcc.dg/vect/vect-simd-clone-11.c: New test.
2013-11-27 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
* gcc.dg/cilk-plus/cilk-plus.exp: Append to ld_library_path.

View file

@ -239,5 +239,5 @@ struct D
void
f38 (D &d)
{
d.f37 <12> (6);
d.f37 <16> (6);
}

View file

@ -0,0 +1,33 @@
/* { dg-do compile } */
/* { dg-options "-fopenmp -fdump-tree-optimized -O3" } */
/* Test that functions that have SIMD clone counterparts are not
cloned by IPA-cp. For example, special_add() below has SIMD clones
created for it. However, if IPA-cp later decides to clone a
specialization of special_add(x, 666) when analyzing fillit(), we
will forever keep the vectorizer from using the SIMD versions of
special_add in a loop.
If IPA-CP gets taught how to adjust the SIMD clones as well, this
test could be removed. */
#pragma omp declare simd simdlen(4)
static int __attribute__ ((noinline))
special_add (int x, int y)
{
if (y == 666)
return x + y + 123;
else
return x + y;
}
void fillit(int *tot)
{
int i;
for (i=0; i < 10000; ++i)
tot[i] = special_add (i, 666);
}
/* { dg-final { scan-tree-dump-not "special_add.constprop" "optimized" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */

View file

@ -0,0 +1,26 @@
/* { dg-options "-fopenmp -fdump-tree-optimized -O" } */
#pragma omp declare simd inbranch uniform(c) linear(b:66)
#pragma omp declare simd notinbranch aligned(c:32)
int addit(int a, int b, int *c)
{
return a + b;
}
#pragma omp declare simd uniform(a) aligned(a:32) linear(k:1) notinbranch
float setArray(float *a, float x, int k)
{
a[k] = a[k] + x;
return a[k];
}
/* { dg-final { scan-tree-dump "_ZGVbN4ua32vl_setArray" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump "_ZGVbN4vvva32_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump "_ZGVbM4vl66u_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump "_ZGVcN8ua32vl_setArray" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump "_ZGVcN4vvva32_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump "_ZGVcM4vl66u_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump "_ZGVdN8ua32vl_setArray" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump "_ZGVdN8vvva32_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump "_ZGVdM8vl66u_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */

View file

@ -0,0 +1,18 @@
/* { dg-options "-fopenmp -fdump-tree-optimized -O2" } */
/* Test that if there is no *inbranch clauses, that both the masked and
the unmasked version are created. */
#pragma omp declare simd
int addit(int a, int b, int c)
{
return a + b;
}
/* { dg-final { scan-tree-dump "_ZGVbN4vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump "_ZGVbM4vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump "_ZGVcN4vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump "_ZGVcM4vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump "_ZGVdN8vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump "_ZGVdM8vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */

View file

@ -0,0 +1,11 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-fopenmp" } */
#pragma omp declare simd simdlen(4) notinbranch
int f2 (int a, int b)
{
if (a > 5)
return a + b;
else
return a - b;
}

View file

@ -0,0 +1,12 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-fopenmp -w" } */
/* ?? The -w above is to inhibit the following warning for now:
a.c:2:6: warning: AVX vector argument without AVX enabled changes
the ABI [enabled by default]. */
#pragma omp declare simd notinbranch simdlen(4)
void foo (int *a)
{
*a = 555;
}

View file

@ -0,0 +1,11 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-fopenmp" } */
/* Test that array subscripts are properly adjusted. */
int array[1000];
#pragma omp declare simd notinbranch simdlen(4)
void foo (int i)
{
array[i] = 555;
}

View file

@ -0,0 +1,16 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-fopenmp -w" } */
int array[1000];
#pragma omp declare simd notinbranch simdlen(4)
void foo (int *a, int b)
{
a[b] = 555;
}
#pragma omp declare simd notinbranch simdlen(4)
void bar (int *a)
{
*a = 555;
}

View file

@ -0,0 +1,58 @@
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
#include "tree-vect.h"
#ifndef N
#define N 1024
#endif
int array[N];
#pragma omp declare simd simdlen(4) notinbranch
#pragma omp declare simd simdlen(4) notinbranch uniform(b) linear(c:3)
#pragma omp declare simd simdlen(8) notinbranch
#pragma omp declare simd simdlen(8) notinbranch uniform(b) linear(c:3)
__attribute__((noinline)) int
foo (int a, int b, int c)
{
if (a < 30)
return 5;
return a + b + c;
}
__attribute__((noinline, noclone)) void
bar ()
{
int i;
#pragma omp simd
for (i = 0; i < N; ++i)
array[i] = foo (i, 123, i * 3);
}
__attribute__((noinline, noclone)) void
baz ()
{
int i;
#pragma omp simd
for (i = 0; i < N; ++i)
array[i] = foo (i, array[i], i * 3);
}
int
main ()
{
int i;
check_vect ();
bar ();
for (i = 0; i < N; i++)
if (array[i] != (i < 30 ? 5 : i * 4 + 123))
abort ();
baz ();
for (i = 0; i < N; i++)
if (array[i] != (i < 30 ? 5 : i * 8 + 123))
abort ();
return 0;
}
/* { dg-final { cleanup-tree-dump "vect" } } */

View file

@ -0,0 +1,83 @@
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
/* { dg-additional-sources vect-simd-clone-10a.c } */
#include "tree-vect.h"
#ifndef N
#define N 1024
#endif
int a[N], b[N];
long int c[N];
unsigned char d[N];
#include "vect-simd-clone-10.h"
__attribute__((noinline)) void
fn1 (void)
{
int i;
#pragma omp simd
for (i = 0; i < N; i++)
a[i] = foo (c[i], a[i], b[i]) + 6;
#pragma omp simd
for (i = 0; i < N; i++)
c[i] = bar (a[i], b[i], c[i]) * 2;
}
__attribute__((noinline)) void
fn2 (void)
{
int i;
#pragma omp simd
for (i = 0; i < N; i++)
{
a[i] = foo (c[i], a[i], b[i]) + 6;
d[i]++;
}
#pragma omp simd
for (i = 0; i < N; i++)
{
c[i] = bar (a[i], b[i], c[i]) * 2;
d[i] /= 2;
}
}
__attribute__((noinline)) void
fn3 (void)
{
int i;
for (i = 0; i < N; i++)
{
a[i] = i * 2;
b[i] = 17 + (i % 37);
c[i] = (i & 63);
d[i] = 16 + i;
}
}
int
main ()
{
int i;
check_vect ();
fn3 ();
fn1 ();
for (i = 0; i < N; i++)
if (a[i] != i * 2 + 23 + (i % 37) + (i & 63)
|| b[i] != 17 + (i % 37)
|| c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63))
abort ();
fn3 ();
fn2 ();
for (i = 0; i < N; i++)
if (a[i] != i * 2 + 23 + (i % 37) + (i & 63)
|| b[i] != 17 + (i % 37)
|| c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63)
|| d[i] != ((unsigned char) (17 + i)) / 2)
abort ();
return 0;
}
/* { dg-final { cleanup-tree-dump "vect" } } */

View file

@ -0,0 +1,4 @@
#pragma omp declare simd notinbranch
extern int foo (long int a, int b, int c);
#pragma omp declare simd notinbranch
extern long int bar (int a, int b, long int c);

View file

@ -0,0 +1,17 @@
/* { dg-do compile } */
#include "vect-simd-clone-10.h"
#pragma omp declare simd notinbranch
extern int
foo (long int a, int b, int c)
{
return a + b + c;
}
#pragma omp declare simd notinbranch
extern long int
bar (int a, int b, long int c)
{
return a + b + c;
}

View file

@ -0,0 +1,66 @@
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
#include "tree-vect.h"
#ifndef N
#define N 1024
#endif
int a[N] __attribute__((aligned (32)));
#pragma omp declare simd linear(a) linear(b:3) linear(c:6) notinbranch
__attribute__((noinline)) int
foo (int a, int b, int c)
{
return a ^ (b * 512) ^ (c * 512 * 512);
}
__attribute__((noinline, noclone)) void
bar (int *d)
{
int i, j, k;
for (i = 0, j = 0, k = 0; i < N / 2; i++, j++, k += 3)
d[i] = foo (j, i * 3, 2 * k + 2);
}
#if 0
__attribute__((noinline, noclone)) void
baz (int *d)
{
long int i, j, k;
for (i = 0, j = 0, k = 0; i < N / 2;
i = (int) i + 1, j = (int) j + 1, k = (int) k + 3)
d[i] = foo (j, i * 3, 2 * k + 2);
}
#endif
int
main ()
{
int i;
check_vect ();
if (sizeof (int) * __CHAR_BIT__ < 32)
return 0;
bar (a + 7);
for (i = 0; i < N / 2; i++)
if (a[i + 7] != (i ^ (i * 3 * 512) ^ (((i * 6) + 2) * 512 * 512)))
abort ();
bar (a);
for (i = 0; i < N / 2; i++)
if (a[i] != (i ^ (i * 3 * 512) ^ (((i * 6) + 2) * 512 * 512)))
abort ();
#if 0
baz (a + 7);
for (i = 0; i < N / 2; i++)
if (a[i + 7] != (i ^ (i * 3 * 512) ^ (((i * 6) + 2) * 512 * 512)))
abort ();
baz (a);
for (i = 0; i < N / 2; i++)
if (a[i] != (i ^ (i * 3 * 512) ^ (((i * 6) + 2) * 512 * 512)))
abort ();
#endif
return 0;
}
/* { dg-final { cleanup-tree-dump "vect" } } */

View file

@ -0,0 +1,52 @@
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
#include "tree-vect.h"
#ifndef N
#define N 1024
#endif
int array[N] __attribute__((aligned (32)));
#pragma omp declare simd simdlen(4) notinbranch aligned(a:16) uniform(a) linear(b)
#pragma omp declare simd simdlen(4) notinbranch aligned(a:32) uniform(a) linear(b)
#pragma omp declare simd simdlen(8) notinbranch aligned(a:16) uniform(a) linear(b)
#pragma omp declare simd simdlen(8) notinbranch aligned(a:32) uniform(a) linear(b)
__attribute__((noinline)) void
foo (int *a, int b, int c)
{
a[b] = c;
}
__attribute__((noinline, noclone)) void
bar ()
{
int i;
#pragma omp simd
for (i = 0; i < N; ++i)
foo (array, i, i * array[i]);
}
__attribute__((noinline, noclone)) void
baz ()
{
int i;
for (i = 0; i < N; i++)
array[i] = 5 * (i & 7);
}
int
main ()
{
int i;
check_vect ();
baz ();
bar ();
for (i = 0; i < N; i++)
if (array[i] != 5 * (i & 7) * i)
abort ();
return 0;
}
/* { dg-final { cleanup-tree-dump "vect" } } */

View file

@ -0,0 +1,45 @@
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
#include "tree-vect.h"
#ifndef N
#define N 1024
#endif
int d[N], e[N];
#pragma omp declare simd simdlen(4) notinbranch uniform(b) linear(c:3)
__attribute__((noinline)) int
foo (int a, int b, int c)
{
if (a < 30)
return 5;
return a + b + c;
}
__attribute__((noinline, noclone)) void
bar ()
{
int i;
#pragma omp simd
for (i = 0; i < N; ++i)
{
d[i] = foo (i, 123, i * 3);
e[i] = e[i] + i;
}
}
int
main ()
{
int i;
check_vect ();
bar ();
for (i = 0; i < N; i++)
if (d[i] != (i < 30 ? 5 : i * 4 + 123) || e[i] != i)
abort ();
return 0;
}
/* { dg-final { cleanup-tree-dump "vect" } } */

View file

@ -0,0 +1,48 @@
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
#include "tree-vect.h"
#ifndef N
#define N 1024
#endif
float d[N];
int e[N];
unsigned short f[N];
#pragma omp declare simd simdlen(8) notinbranch uniform(b)
__attribute__((noinline)) float
foo (float a, float b, float c)
{
if (a < 30)
return 5.0f;
return a + b + c;
}
__attribute__((noinline, noclone)) void
bar ()
{
int i;
#pragma omp simd
for (i = 0; i < N; ++i)
{
d[i] = foo (i, 123, i * 3);
e[i] = e[i] * 3;
f[i] = f[i] + 1;
}
}
int
main ()
{
int i;
check_vect ();
bar ();
for (i = 0; i < N; i++)
if (d[i] != (i < 30 ? 5.0f : i * 4 + 123.0f) || e[i] || f[i] != 1)
abort ();
return 0;
}
/* { dg-final { cleanup-tree-dump "vect" } } */

View file

@ -0,0 +1,43 @@
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
#include "tree-vect.h"
#ifndef N
#define N 1024
#endif
int d[N], e[N];
#pragma omp declare simd simdlen(4) notinbranch uniform(b) linear(c:3)
__attribute__((noinline)) long long int
foo (int a, int b, int c)
{
return a + b + c;
}
__attribute__((noinline, noclone)) void
bar ()
{
int i;
#pragma omp simd
for (i = 0; i < N; ++i)
{
d[i] = foo (i, 123, i * 3);
e[i] = e[i] + i;
}
}
int
main ()
{
int i;
check_vect ();
bar ();
for (i = 0; i < N; i++)
if (d[i] != i * 4 + 123 || e[i] != i)
abort ();
return 0;
}
/* { dg-final { cleanup-tree-dump "vect" } } */

View file

@ -0,0 +1,74 @@
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
#include "tree-vect.h"
#ifndef N
#define N 1024
#endif
int a[N];
long long int b[N];
short c[N];
#pragma omp declare simd
#pragma omp declare simd uniform(b) linear(c:3)
__attribute__((noinline)) short
foo (int a, long long int b, short c)
{
return a + b + c;
}
__attribute__((noinline, noclone)) void
bar (int x)
{
int i;
if (x == 0)
{
#pragma omp simd
for (i = 0; i < N; i++)
c[i] = foo (a[i], b[i], c[i]);
}
else
{
#pragma omp simd
for (i = 0; i < N; i++)
c[i] = foo (a[i], x, i * 3);
}
}
__attribute__((noinline, noclone)) void
baz (void)
{
int i;
for (i = 0; i < N; i++)
{
a[i] = 2 * i;
b[i] = -7 * i + 6;
c[i] = (i & 31) << 4;
}
}
int
main ()
{
int i;
check_vect ();
baz ();
bar (0);
for (i = 0; i < N; i++)
if (a[i] != 2 * i || b[i] != 6 - 7 * i
|| c[i] != 6 - 5 * i + ((i & 31) << 4))
abort ();
else
a[i] = c[i];
bar (17);
for (i = 0; i < N; i++)
if (a[i] != 6 - 5 * i + ((i & 31) << 4)
|| b[i] != 6 - 7 * i
|| c[i] != 23 - 2 * i + ((i & 31) << 4))
abort ();
return 0;
}
/* { dg-final { cleanup-tree-dump "vect" } } */

View file

@ -0,0 +1,74 @@
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
#include "tree-vect.h"
#ifndef N
#define N 1024
#endif
int a[N];
long long int b[N];
short c[N];
#pragma omp declare simd
#pragma omp declare simd uniform(b) linear(c:3)
__attribute__((noinline)) short
foo (int a, long long int b, int c)
{
return a + b + c;
}
__attribute__((noinline, noclone)) void
bar (int x)
{
int i;
if (x == 0)
{
#pragma omp simd
for (i = 0; i < N; i++)
c[i] = foo (a[i], b[i], c[i]);
}
else
{
#pragma omp simd
for (i = 0; i < N; i++)
c[i] = foo (a[i], x, i * 3);
}
}
__attribute__((noinline, noclone)) void
baz (void)
{
int i;
for (i = 0; i < N; i++)
{
a[i] = 2 * i;
b[i] = -7 * i + 6;
c[i] = (i & 31) << 4;
}
}
int
main ()
{
int i;
check_vect ();
baz ();
bar (0);
for (i = 0; i < N; i++)
if (a[i] != 2 * i || b[i] != 6 - 7 * i
|| c[i] != 6 - 5 * i + ((i & 31) << 4))
abort ();
else
a[i] = c[i];
bar (17);
for (i = 0; i < N; i++)
if (a[i] != 6 - 5 * i + ((i & 31) << 4)
|| b[i] != 6 - 7 * i
|| c[i] != 23 - 2 * i + ((i & 31) << 4))
abort ();
return 0;
}
/* { dg-final { cleanup-tree-dump "vect" } } */

View file

@ -0,0 +1,94 @@
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
#include "tree-vect.h"
#ifndef N
#define N 1024
#endif
int a[N], b[N];
long int c[N];
unsigned char d[N];
#pragma omp declare simd simdlen(8) notinbranch
__attribute__((noinline)) int
foo (long int a, int b, int c)
{
return a + b + c;
}
#pragma omp declare simd simdlen(8) notinbranch
__attribute__((noinline)) long int
bar (int a, int b, long int c)
{
return a + b + c;
}
__attribute__((noinline)) void
fn1 (void)
{
int i;
#pragma omp simd
for (i = 0; i < N; i++)
a[i] = foo (c[i], a[i], b[i]) + 6;
#pragma omp simd
for (i = 0; i < N; i++)
c[i] = bar (a[i], b[i], c[i]) * 2;
}
__attribute__((noinline)) void
fn2 (void)
{
int i;
#pragma omp simd
for (i = 0; i < N; i++)
{
a[i] = foo (c[i], a[i], b[i]) + 6;
d[i]++;
}
#pragma omp simd
for (i = 0; i < N; i++)
{
c[i] = bar (a[i], b[i], c[i]) * 2;
d[i] /= 2;
}
}
__attribute__((noinline)) void
fn3 (void)
{
int i;
for (i = 0; i < N; i++)
{
a[i] = i * 2;
b[i] = 17 + (i % 37);
c[i] = (i & 63);
d[i] = 16 + i;
}
}
int
main ()
{
int i;
check_vect ();
fn3 ();
fn1 ();
for (i = 0; i < N; i++)
if (a[i] != i * 2 + 23 + (i % 37) + (i & 63)
|| b[i] != 17 + (i % 37)
|| c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63))
abort ();
fn3 ();
fn2 ();
for (i = 0; i < N; i++)
if (a[i] != i * 2 + 23 + (i % 37) + (i & 63)
|| b[i] != 17 + (i % 37)
|| c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63)
|| d[i] != ((unsigned char) (17 + i)) / 2)
abort ();
return 0;
}
/* { dg-final { cleanup-tree-dump "vect" } } */

View file

@ -0,0 +1,94 @@
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
#include "tree-vect.h"
#ifndef N
#define N 1024
#endif
int a[N], b[N];
long int c[N];
unsigned char d[N];
#pragma omp declare simd notinbranch
__attribute__((noinline)) static int
foo (long int a, int b, int c)
{
return a + b + c;
}
#pragma omp declare simd notinbranch
__attribute__((noinline)) static long int
bar (int a, int b, long int c)
{
return a + b + c;
}
__attribute__((noinline)) void
fn1 (void)
{
int i;
#pragma omp simd
for (i = 0; i < N; i++)
a[i] = foo (c[i], a[i], b[i]) + 6;
#pragma omp simd
for (i = 0; i < N; i++)
c[i] = bar (a[i], b[i], c[i]) * 2;
}
__attribute__((noinline)) void
fn2 (void)
{
int i;
#pragma omp simd
for (i = 0; i < N; i++)
{
a[i] = foo (c[i], a[i], b[i]) + 6;
d[i]++;
}
#pragma omp simd
for (i = 0; i < N; i++)
{
c[i] = bar (a[i], b[i], c[i]) * 2;
d[i] /= 2;
}
}
__attribute__((noinline)) void
fn3 (void)
{
int i;
for (i = 0; i < N; i++)
{
a[i] = i * 2;
b[i] = 17 + (i % 37);
c[i] = (i & 63);
d[i] = 16 + i;
}
}
int
main ()
{
int i;
check_vect ();
fn3 ();
fn1 ();
for (i = 0; i < N; i++)
if (a[i] != i * 2 + 23 + (i % 37) + (i & 63)
|| b[i] != 17 + (i % 37)
|| c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63))
abort ();
fn3 ();
fn2 ();
for (i = 0; i < N; i++)
if (a[i] != i * 2 + 23 + (i % 37) + (i & 63)
|| b[i] != 17 + (i % 37)
|| c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63)
|| d[i] != ((unsigned char) (17 + i)) / 2)
abort ();
return 0;
}
/* { dg-final { cleanup-tree-dump "vect" } } */

View file

@ -903,6 +903,9 @@ struct GTY(()) tree_base {
CALL_ALLOCA_FOR_VAR_P in
CALL_EXPR
OMP_CLAUSE_LINEAR_VARIABLE_STRIDE in
OMP_CLAUSE_LINEAR
side_effects_flag:
TREE_SIDE_EFFECTS in

View file

@ -472,6 +472,7 @@ extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_omp_simd_clone (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_profile (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_cdtor_merge (gcc::context *ctxt);

View file

@ -53,6 +53,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-scalar-evolution.h"
#include "tree-vectorizer.h"
#include "diagnostic-core.h"
#include "cgraph.h"
/* Need to include rtl.h, expr.h, etc. for optabs. */
#include "expr.h"
#include "optabs.h"
@ -3167,10 +3168,11 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
if (loop_vinfo)
{
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
loop = LOOP_VINFO_LOOP (loop_vinfo);
if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo))
|| find_data_references_in_loop
(loop, &LOOP_VINFO_DATAREFS (loop_vinfo)))
datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@ -3179,7 +3181,57 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
return false;
}
datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
for (i = 0; i < loop->num_nodes; i++)
{
gimple_stmt_iterator gsi;
for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple stmt = gsi_stmt (gsi);
if (!find_data_references_in_stmt (loop, stmt, &datarefs))
{
if (is_gimple_call (stmt) && loop->safelen)
{
tree fndecl = gimple_call_fndecl (stmt), op;
if (fndecl != NULL_TREE)
{
struct cgraph_node *node = cgraph_get_node (fndecl);
if (node != NULL && node->simd_clones != NULL)
{
unsigned int j, n = gimple_call_num_args (stmt);
for (j = 0; j < n; j++)
{
op = gimple_call_arg (stmt, j);
if (DECL_P (op)
|| (REFERENCE_CLASS_P (op)
&& get_base_address (op)))
break;
}
op = gimple_call_lhs (stmt);
/* Ignore #pragma omp declare simd functions
if they don't have data references in the
call stmt itself. */
if (j == n
&& !(op
&& (DECL_P (op)
|| (REFERENCE_CLASS_P (op)
&& get_base_address (op)))))
continue;
}
}
}
LOOP_VINFO_DATAREFS (loop_vinfo) = datarefs;
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: loop contains function "
"calls or data references that cannot "
"be analyzed\n");
return false;
}
}
}
LOOP_VINFO_DATAREFS (loop_vinfo) = datarefs;
}
else
{

View file

@ -376,6 +376,19 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
if (gimple_get_lhs (stmt) == NULL_TREE)
{
if (is_gimple_call (stmt))
{
/* Ignore calls with no lhs. These must be calls to
#pragma omp simd functions, and what vectorization factor
it really needs can't be determined until
vectorizable_simd_clone_call. */
if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
{
pattern_def_seq = NULL;
gsi_next (&si);
}
continue;
}
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@ -5699,7 +5712,6 @@ vect_transform_loop (loop_vec_info loop_vinfo)
int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
bool grouped_store;
bool slp_scheduled = false;
unsigned int nunits;
gimple stmt, pattern_stmt;
gimple_seq pattern_def_seq = NULL;
gimple_stmt_iterator pattern_def_si = gsi_none ();
@ -5957,16 +5969,18 @@ vect_transform_loop (loop_vec_info loop_vinfo)
transform_pattern_stmt = false;
}
gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
STMT_VINFO_VECTYPE (stmt_info));
if (!STMT_SLP_TYPE (stmt_info)
&& nunits != (unsigned int) vectorization_factor
&& dump_enabled_p ())
/* For SLP VF is set according to unrolling factor, and not to
vector size, hence for SLP this print is not valid. */
dump_printf_loc (MSG_NOTE, vect_location,
"multiple-types.\n");
if (STMT_VINFO_VECTYPE (stmt_info))
{
unsigned int nunits
= (unsigned int)
TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
if (!STMT_SLP_TYPE (stmt_info)
&& nunits != (unsigned int) vectorization_factor
&& dump_enabled_p ())
/* For SLP VF is set according to unrolling factor, and not
to vector size, hence for SLP this print is not valid. */
dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
}
/* SLP. Schedule all the SLP instances when the first SLP stmt is
reached. */

View file

@ -46,12 +46,15 @@ along with GCC; see the file COPYING3. If not see
#include "tree-ssanames.h"
#include "tree-ssa-loop-manip.h"
#include "cfgloop.h"
#include "tree-ssa-loop.h"
#include "tree-scalar-evolution.h"
#include "expr.h"
#include "recog.h" /* FIXME: for insn_data */
#include "optabs.h"
#include "diagnostic-core.h"
#include "tree-vectorizer.h"
#include "dumpfile.h"
#include "cgraph.h"
/* For lang_hooks.types.type_for_mode. */
#include "langhooks.h"
@ -1735,11 +1738,11 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
if (!is_gimple_call (stmt))
return false;
if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
if (gimple_call_lhs (stmt) == NULL_TREE
|| TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
return false;
if (stmt_can_throw_internal (stmt))
return false;
gcc_checking_assert (!stmt_can_throw_internal (stmt));
vectype_out = STMT_VINFO_VECTYPE (stmt_info);
@ -2082,10 +2085,6 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
vargs.release ();
/* Update the exception handling table with the vector stmt if necessary. */
if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
gimple_purge_dead_eh_edges (gimple_bb (stmt));
/* The call in STMT might prevent it from being removed in dce.
We however cannot remove it here, due to the way the ssa name
it defines is mapped to the new definition. So just replace
@ -2109,6 +2108,605 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
}
struct simd_call_arg_info
{
tree vectype;
tree op;
enum vect_def_type dt;
HOST_WIDE_INT linear_step;
unsigned int align;
};
/* Function vectorizable_simd_clone_call.
Check if STMT performs a function call that can be vectorized
by calling a simd clone of the function.
If VEC_STMT is also passed, vectorize the STMT: create a vectorized
stmt to replace it, put it in VEC_STMT, and insert it at BSI.
Return FALSE if not a vectorizable STMT, TRUE otherwise. */
static bool
vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
gimple *vec_stmt, slp_tree slp_node)
{
tree vec_dest;
tree scalar_dest;
tree op, type;
tree vec_oprnd0 = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
tree vectype;
unsigned int nunits;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
tree fndecl, new_temp, def;
gimple def_stmt;
gimple new_stmt = NULL;
int ncopies, j;
vec<simd_call_arg_info> arginfo = vNULL;
vec<tree> vargs = vNULL;
size_t i, nargs;
tree lhs, rtype, ratype;
vec<constructor_elt, va_gc> *ret_ctor_elts;
/* Is STMT a vectorizable call? */
if (!is_gimple_call (stmt))
return false;
fndecl = gimple_call_fndecl (stmt);
if (fndecl == NULL_TREE)
return false;
struct cgraph_node *node = cgraph_get_node (fndecl);
if (node == NULL || node->simd_clones == NULL)
return false;
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false;
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
return false;
if (gimple_call_lhs (stmt)
&& TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
return false;
gcc_checking_assert (!stmt_can_throw_internal (stmt));
vectype = STMT_VINFO_VECTYPE (stmt_info);
if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
return false;
/* FORNOW */
if (slp_node || PURE_SLP_STMT (stmt_info))
return false;
/* Process function arguments. */
nargs = gimple_call_num_args (stmt);
/* Bail out if the function has zero arguments. */
if (nargs == 0)
return false;
arginfo.create (nargs);
for (i = 0; i < nargs; i++)
{
simd_call_arg_info thisarginfo;
affine_iv iv;
thisarginfo.linear_step = 0;
thisarginfo.align = 0;
thisarginfo.op = NULL_TREE;
op = gimple_call_arg (stmt, i);
if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
&def_stmt, &def, &thisarginfo.dt,
&thisarginfo.vectype)
|| thisarginfo.dt == vect_uninitialized_def)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"use not simple.\n");
arginfo.release ();
return false;
}
if (thisarginfo.dt == vect_constant_def
|| thisarginfo.dt == vect_external_def)
gcc_assert (thisarginfo.vectype == NULL_TREE);
else
gcc_assert (thisarginfo.vectype != NULL_TREE);
if (thisarginfo.dt != vect_constant_def
&& thisarginfo.dt != vect_external_def
&& loop_vinfo
&& TREE_CODE (op) == SSA_NAME
&& simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false)
&& tree_fits_shwi_p (iv.step))
{
thisarginfo.linear_step = tree_to_shwi (iv.step);
thisarginfo.op = iv.base;
}
else if ((thisarginfo.dt == vect_constant_def
|| thisarginfo.dt == vect_external_def)
&& POINTER_TYPE_P (TREE_TYPE (op)))
thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
arginfo.quick_push (thisarginfo);
}
unsigned int badness = 0;
struct cgraph_node *bestn = NULL;
if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info))
bestn = cgraph_get_node (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info));
else
for (struct cgraph_node *n = node->simd_clones; n != NULL;
n = n->simdclone->next_clone)
{
unsigned int this_badness = 0;
if (n->simdclone->simdlen
> (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
|| n->simdclone->nargs != nargs)
continue;
if (n->simdclone->simdlen
< (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
- exact_log2 (n->simdclone->simdlen)) * 1024;
if (n->simdclone->inbranch)
this_badness += 2048;
int target_badness = targetm.simd_clone.usable (n);
if (target_badness < 0)
continue;
this_badness += target_badness * 512;
/* FORNOW: Have to add code to add the mask argument. */
if (n->simdclone->inbranch)
continue;
for (i = 0; i < nargs; i++)
{
switch (n->simdclone->args[i].arg_type)
{
case SIMD_CLONE_ARG_TYPE_VECTOR:
if (!useless_type_conversion_p
(n->simdclone->args[i].orig_type,
TREE_TYPE (gimple_call_arg (stmt, i))))
i = -1;
else if (arginfo[i].dt == vect_constant_def
|| arginfo[i].dt == vect_external_def
|| arginfo[i].linear_step)
this_badness += 64;
break;
case SIMD_CLONE_ARG_TYPE_UNIFORM:
if (arginfo[i].dt != vect_constant_def
&& arginfo[i].dt != vect_external_def)
i = -1;
break;
case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
if (arginfo[i].dt == vect_constant_def
|| arginfo[i].dt == vect_external_def
|| (arginfo[i].linear_step
!= n->simdclone->args[i].linear_step))
i = -1;
break;
case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
/* FORNOW */
i = -1;
break;
case SIMD_CLONE_ARG_TYPE_MASK:
gcc_unreachable ();
}
if (i == (size_t) -1)
break;
if (n->simdclone->args[i].alignment > arginfo[i].align)
{
i = -1;
break;
}
if (arginfo[i].align)
this_badness += (exact_log2 (arginfo[i].align)
- exact_log2 (n->simdclone->args[i].alignment));
}
if (i == (size_t) -1)
continue;
if (bestn == NULL || this_badness < badness)
{
bestn = n;
badness = this_badness;
}
}
if (bestn == NULL)
{
arginfo.release ();
return false;
}
for (i = 0; i < nargs; i++)
if ((arginfo[i].dt == vect_constant_def
|| arginfo[i].dt == vect_external_def)
&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
{
arginfo[i].vectype
= get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
i)));
if (arginfo[i].vectype == NULL
|| (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
> bestn->simdclone->simdlen))
{
arginfo.release ();
return false;
}
}
fndecl = bestn->decl;
nunits = bestn->simdclone->simdlen;
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
/* If the function isn't const, only allow it in simd loops where user
has asserted that at least nunits consecutive iterations can be
performed using SIMD instructions. */
if ((loop == NULL || (unsigned) loop->safelen < nunits)
&& gimple_vuse (stmt))
{
arginfo.release ();
return false;
}
/* Sanity check: make sure that at least one copy of the vectorized stmt
needs to be generated. */
gcc_assert (ncopies >= 1);
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info) = bestn->decl;
STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"=== vectorizable_simd_clone_call ===\n");
/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
arginfo.release ();
return true;
}
/** Transform. **/
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
/* Handle def. */
scalar_dest = gimple_call_lhs (stmt);
vec_dest = NULL_TREE;
rtype = NULL_TREE;
ratype = NULL_TREE;
if (scalar_dest)
{
vec_dest = vect_create_destination_var (scalar_dest, vectype);
rtype = TREE_TYPE (TREE_TYPE (fndecl));
if (TREE_CODE (rtype) == ARRAY_TYPE)
{
ratype = rtype;
rtype = TREE_TYPE (ratype);
}
}
prev_stmt_info = NULL;
for (j = 0; j < ncopies; ++j)
{
/* Build argument list for the vectorized call. */
if (j == 0)
vargs.create (nargs);
else
vargs.truncate (0);
for (i = 0; i < nargs; i++)
{
unsigned int k, l, m, o;
tree atype;
op = gimple_call_arg (stmt, i);
switch (bestn->simdclone->args[i].arg_type)
{
case SIMD_CLONE_ARG_TYPE_VECTOR:
atype = bestn->simdclone->args[i].vector_type;
o = nunits / TYPE_VECTOR_SUBPARTS (atype);
for (m = j * o; m < (j + 1) * o; m++)
{
if (TYPE_VECTOR_SUBPARTS (atype)
< TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
{
unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
/ TYPE_VECTOR_SUBPARTS (atype));
gcc_assert ((k & (k - 1)) == 0);
if (m == 0)
vec_oprnd0
= vect_get_vec_def_for_operand (op, stmt, NULL);
else
{
vec_oprnd0 = arginfo[i].op;
if ((m & (k - 1)) == 0)
vec_oprnd0
= vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
vec_oprnd0);
}
arginfo[i].op = vec_oprnd0;
vec_oprnd0
= build3 (BIT_FIELD_REF, atype, vec_oprnd0,
size_int (prec),
bitsize_int ((m & (k - 1)) * prec));
new_stmt
= gimple_build_assign (make_ssa_name (atype, NULL),
vec_oprnd0);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
vargs.safe_push (gimple_assign_lhs (new_stmt));
}
else
{
k = (TYPE_VECTOR_SUBPARTS (atype)
/ TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
gcc_assert ((k & (k - 1)) == 0);
vec<constructor_elt, va_gc> *ctor_elts;
if (k != 1)
vec_alloc (ctor_elts, k);
else
ctor_elts = NULL;
for (l = 0; l < k; l++)
{
if (m == 0 && l == 0)
vec_oprnd0
= vect_get_vec_def_for_operand (op, stmt, NULL);
else
vec_oprnd0
= vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
arginfo[i].op);
arginfo[i].op = vec_oprnd0;
if (k == 1)
break;
CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
vec_oprnd0);
}
if (k == 1)
vargs.safe_push (vec_oprnd0);
else
{
vec_oprnd0 = build_constructor (atype, ctor_elts);
new_stmt
= gimple_build_assign (make_ssa_name (atype, NULL),
vec_oprnd0);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
vargs.safe_push (gimple_assign_lhs (new_stmt));
}
}
}
break;
case SIMD_CLONE_ARG_TYPE_UNIFORM:
vargs.safe_push (op);
break;
case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
if (j == 0)
{
gimple_seq stmts;
arginfo[i].op
= force_gimple_operand (arginfo[i].op, &stmts, true,
NULL_TREE);
if (stmts != NULL)
{
basic_block new_bb;
edge pe = loop_preheader_edge (loop);
new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
gcc_assert (!new_bb);
}
tree phi_res = copy_ssa_name (op, NULL);
gimple new_phi = create_phi_node (phi_res, loop->header);
set_vinfo_for_stmt (new_phi,
new_stmt_vec_info (new_phi, loop_vinfo,
NULL));
add_phi_arg (new_phi, arginfo[i].op,
loop_preheader_edge (loop), UNKNOWN_LOCATION);
enum tree_code code
= POINTER_TYPE_P (TREE_TYPE (op))
? POINTER_PLUS_EXPR : PLUS_EXPR;
tree type = POINTER_TYPE_P (TREE_TYPE (op))
? sizetype : TREE_TYPE (op);
double_int cst
= double_int::from_shwi
(bestn->simdclone->args[i].linear_step);
cst *= double_int::from_uhwi (ncopies * nunits);
tree tcst = double_int_to_tree (type, cst);
tree phi_arg = copy_ssa_name (op, NULL);
new_stmt = gimple_build_assign_with_ops (code, phi_arg,
phi_res, tcst);
gimple_stmt_iterator si = gsi_after_labels (loop->header);
gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
set_vinfo_for_stmt (new_stmt,
new_stmt_vec_info (new_stmt, loop_vinfo,
NULL));
add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
UNKNOWN_LOCATION);
arginfo[i].op = phi_res;
vargs.safe_push (phi_res);
}
else
{
enum tree_code code
= POINTER_TYPE_P (TREE_TYPE (op))
? POINTER_PLUS_EXPR : PLUS_EXPR;
tree type = POINTER_TYPE_P (TREE_TYPE (op))
? sizetype : TREE_TYPE (op);
double_int cst
= double_int::from_shwi
(bestn->simdclone->args[i].linear_step);
cst *= double_int::from_uhwi (j * nunits);
tree tcst = double_int_to_tree (type, cst);
new_temp = make_ssa_name (TREE_TYPE (op), NULL);
new_stmt
= gimple_build_assign_with_ops (code, new_temp,
arginfo[i].op, tcst);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
vargs.safe_push (new_temp);
}
break;
case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
default:
gcc_unreachable ();
}
}
new_stmt = gimple_build_call_vec (fndecl, vargs);
if (vec_dest)
{
gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
if (ratype)
new_temp = create_tmp_var (ratype, NULL);
else if (TYPE_VECTOR_SUBPARTS (vectype)
== TYPE_VECTOR_SUBPARTS (rtype))
new_temp = make_ssa_name (vec_dest, new_stmt);
else
new_temp = make_ssa_name (rtype, new_stmt);
gimple_call_set_lhs (new_stmt, new_temp);
}
vect_finish_stmt_generation (stmt, new_stmt, gsi);
if (vec_dest)
{
if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
{
unsigned int k, l;
unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
gcc_assert ((k & (k - 1)) == 0);
for (l = 0; l < k; l++)
{
tree t;
if (ratype)
{
t = build_fold_addr_expr (new_temp);
t = build2 (MEM_REF, vectype, t,
build_int_cst (TREE_TYPE (t),
l * prec / BITS_PER_UNIT));
}
else
t = build3 (BIT_FIELD_REF, vectype, new_temp,
size_int (prec), bitsize_int (l * prec));
new_stmt
= gimple_build_assign (make_ssa_name (vectype, NULL), t);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
if (j == 0 && l == 0)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
else
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
prev_stmt_info = vinfo_for_stmt (new_stmt);
}
if (ratype)
{
tree clobber = build_constructor (ratype, NULL);
TREE_THIS_VOLATILE (clobber) = 1;
new_stmt = gimple_build_assign (new_temp, clobber);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
}
continue;
}
else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
{
unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
/ TYPE_VECTOR_SUBPARTS (rtype));
gcc_assert ((k & (k - 1)) == 0);
if ((j & (k - 1)) == 0)
vec_alloc (ret_ctor_elts, k);
if (ratype)
{
unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
for (m = 0; m < o; m++)
{
tree tem = build4 (ARRAY_REF, rtype, new_temp,
size_int (m), NULL_TREE, NULL_TREE);
new_stmt
= gimple_build_assign (make_ssa_name (rtype, NULL),
tem);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
gimple_assign_lhs (new_stmt));
}
tree clobber = build_constructor (ratype, NULL);
TREE_THIS_VOLATILE (clobber) = 1;
new_stmt = gimple_build_assign (new_temp, clobber);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
}
else
CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
if ((j & (k - 1)) != k - 1)
continue;
vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
new_stmt
= gimple_build_assign (make_ssa_name (vec_dest, NULL),
vec_oprnd0);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
if ((unsigned) j == k - 1)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
else
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
prev_stmt_info = vinfo_for_stmt (new_stmt);
continue;
}
else if (ratype)
{
tree t = build_fold_addr_expr (new_temp);
t = build2 (MEM_REF, vectype, t,
build_int_cst (TREE_TYPE (t), 0));
new_stmt
= gimple_build_assign (make_ssa_name (vec_dest, NULL), t);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
tree clobber = build_constructor (ratype, NULL);
TREE_THIS_VOLATILE (clobber) = 1;
vect_finish_stmt_generation (stmt,
gimple_build_assign (new_temp,
clobber), gsi);
}
}
if (j == 0)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
else
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
prev_stmt_info = vinfo_for_stmt (new_stmt);
}
vargs.release ();
/* The call in STMT might prevent it from being removed in dce.
We however cannot remove it here, due to the way the ssa name
it defines is mapped to the new definition. So just replace
rhs of the statement with something harmless. */
if (slp_node)
return true;
if (scalar_dest)
{
type = TREE_TYPE (scalar_dest);
if (is_pattern_stmt_p (stmt_info))
lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
else
lhs = gimple_call_lhs (stmt);
new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
}
else
new_stmt = gimple_build_nop ();
set_vinfo_for_stmt (new_stmt, stmt_info);
set_vinfo_for_stmt (stmt, NULL);
STMT_VINFO_STMT (stmt_info) = new_stmt;
gsi_replace (gsi, new_stmt, false);
unlink_stmt_vdef (stmt);
return true;
}
/* Function vect_gen_widened_results_half
Create a vector stmt whose code, type, number of arguments, and result
@ -5819,7 +6417,9 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
if (STMT_VINFO_RELEVANT_P (stmt_info))
{
gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
|| (is_gimple_call (stmt)
&& gimple_call_lhs (stmt) == NULL_TREE));
*need_to_vectorize = true;
}
@ -5827,7 +6427,8 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
if (!bb_vinfo
&& (STMT_VINFO_RELEVANT_P (stmt_info)
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
|| vectorizable_conversion (stmt, NULL, NULL, NULL)
|| vectorizable_shift (stmt, NULL, NULL, NULL)
|| vectorizable_operation (stmt, NULL, NULL, NULL)
|| vectorizable_assignment (stmt, NULL, NULL, NULL)
@ -5839,7 +6440,8 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
else
{
if (bb_vinfo)
ok = (vectorizable_conversion (stmt, NULL, NULL, node)
ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
|| vectorizable_conversion (stmt, NULL, NULL, node)
|| vectorizable_shift (stmt, NULL, NULL, node)
|| vectorizable_operation (stmt, NULL, NULL, node)
|| vectorizable_assignment (stmt, NULL, NULL, node)
@ -5967,6 +6569,11 @@ vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
stmt = gsi_stmt (*gsi);
break;
case call_simd_clone_vec_info_type:
done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
stmt = gsi_stmt (*gsi);
break;
case reduc_vec_info_type:
done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
gcc_assert (done);

View file

@ -443,6 +443,7 @@ enum stmt_vec_info_type {
shift_vec_info_type,
op_vec_info_type,
call_vec_info_type,
call_simd_clone_vec_info_type,
assignment_vec_info_type,
condition_vec_info_type,
reduc_vec_info_type,
@ -565,6 +566,9 @@ typedef struct _stmt_vec_info {
of this stmt. */
vec<dr_p> same_align_refs;
/* Selected SIMD clone's function decl. */
tree simd_clone_fndecl;
/* Classify the def of this stmt. */
enum vect_def_type def_type;
@ -633,6 +637,7 @@ typedef struct _stmt_vec_info {
#define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt
#define STMT_VINFO_PATTERN_DEF_SEQ(S) (S)->pattern_def_seq
#define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs
#define STMT_VINFO_SIMD_CLONE_FNDECL(S) (S)->simd_clone_fndecl
#define STMT_VINFO_DEF_TYPE(S) (S)->def_type
#define STMT_VINFO_GROUP_FIRST_ELEMENT(S) (S)->first_element
#define STMT_VINFO_GROUP_NEXT_ELEMENT(S) (S)->next_element

View file

@ -1344,6 +1344,10 @@ extern void protected_set_expr_location (tree, location_t);
#define OMP_CLAUSE_LINEAR_NO_COPYOUT(NODE) \
TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_LINEAR))
/* True if a LINEAR clause has a stride that is variable. */
#define OMP_CLAUSE_LINEAR_VARIABLE_STRIDE(NODE) \
TREE_PROTECTED (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_LINEAR))
#define OMP_CLAUSE_LINEAR_STEP(NODE) \
OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_LINEAR), 1)