cgraph.h (enum cgraph_simd_clone_arg_type): New.
* cgraph.h (enum cgraph_simd_clone_arg_type): New. (struct cgraph_simd_clone_arg, struct cgraph_simd_clone): New. (struct cgraph_node): Add simdclone and simd_clones fields. * config/i386/i386.c (ix86_simd_clone_compute_vecsize_and_simdlen, ix86_simd_clone_adjust, ix86_simd_clone_usable): New functions. (TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN, TARGET_SIMD_CLONE_ADJUST, TARGET_SIMD_CLONE_USABLE): Define. * doc/tm.texi.in (TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN, TARGET_SIMD_CLONE_ADJUST, TARGET_SIMD_CLONE_USABLE): Add. * doc/tm.texi: Regenerated. * ggc.h (ggc_alloc_cleared_simd_clone_stat): New function. * ipa-cp.c (determine_versionability): Fail if "omp declare simd" attribute is present. * omp-low.c: Include pretty-print.h, ipa-prop.h and tree-eh.h. (simd_clone_vector_of_formal_parm_types): New function. (simd_clone_struct_alloc, simd_clone_struct_copy, simd_clone_vector_of_formal_parm_types, simd_clone_clauses_extract, simd_clone_compute_base_data_type, simd_clone_mangle, simd_clone_create, simd_clone_adjust_return_type, create_tmp_simd_array, simd_clone_adjust_argument_types, simd_clone_init_simd_arrays): New functions. (struct modify_stmt_info): New type. (ipa_simd_modify_stmt_ops, ipa_simd_modify_function_body, simd_clone_adjust, expand_simd_clones, ipa_omp_simd_clone): New functions. (pass_data_omp_simd_clone): New variable. (pass_omp_simd_clone): New class. (make_pass_omp_simd_clone): New function. * passes.def (pass_omp_simd_clone): New. * target.def (TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN, TARGET_SIMD_CLONE_ADJUST, TARGET_SIMD_CLONE_USABLE): New target hooks. * target.h (struct cgraph_node, struct cgraph_simd_node): Declare. * tree-core.h (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE): Document. * tree.h (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE): Define. * tree-pass.h (make_pass_omp_simd_clone): New prototype. * tree-vect-data-refs.c: Include cgraph.h. (vect_analyze_data_refs): Inline by hand find_data_references_in_loop and find_data_references_in_bb, if find_data_references_in_stmt fails, still allow calls to #pragma omp declare simd functions in #pragma omp simd loops unless they contain data references among the call arguments or in lhs. * tree-vect-loop.c (vect_determine_vectorization_factor): Handle calls with no lhs. (vect_transform_loop): Allow NULL STMT_VINFO_VECTYPE for calls without lhs. * tree-vectorizer.h (enum stmt_vec_info_type): Add call_simd_clone_vec_info_type. (struct _stmt_vec_info): Add simd_clone_fndecl field. (STMT_VINFO_SIMD_CLONE_FNDECL): Define. * tree-vect-stmts.c: Include tree-ssa-loop.h, tree-scalar-evolution.h and cgraph.h. (vectorizable_call): Handle calls without lhs. Assert !stmt_can_throw_internal instead of failing for it. Don't update EH stuff. (struct simd_call_arg_info): New. (vectorizable_simd_clone_call): New function. (vect_transform_stmt): Call it. (vect_analyze_stmt): Likewise. Allow NULL STMT_VINFO_VECTYPE for calls without lhs. * ipa-prop.c (ipa_add_new_function): Only call ipa_analyze_node if cgraph_function_with_gimple_body_p is true. c/ * c-decl.c (c_builtin_function_ext_scope): Avoid binding if external_scope is NULL. cp/ * semantics.c (finish_omp_clauses): For #pragma omp declare simd linear clause step call maybe_constant_value. testsuite/ * g++.dg/gomp/declare-simd-1.C (f38): Make sure simdlen is a power of two. * gcc.dg/gomp/simd-clones-2.c: Compile on all targets. Remove -msse2. Adjust regexps for name mangling changes. * gcc.dg/gomp/simd-clones-3.c: Likewise. * gcc.dg/vect/vect-simd-clone-1.c: New test. * gcc.dg/vect/vect-simd-clone-2.c: New test. * gcc.dg/vect/vect-simd-clone-3.c: New test. * gcc.dg/vect/vect-simd-clone-4.c: New test. * gcc.dg/vect/vect-simd-clone-5.c: New test. * gcc.dg/vect/vect-simd-clone-6.c: New test. * gcc.dg/vect/vect-simd-clone-7.c: New test. * gcc.dg/vect/vect-simd-clone-8.c: New test. * gcc.dg/vect/vect-simd-clone-9.c: New test. * gcc.dg/vect/vect-simd-clone-10.c: New test. * gcc.dg/vect/vect-simd-clone-10.h: New file. * gcc.dg/vect/vect-simd-clone-10a.c: New file. * gcc.dg/vect/vect-simd-clone-11.c: New test. Co-Authored-By: Jakub Jelinek <jakub@redhat.com> From-SVN: r205442
This commit is contained in:
parent
a7d4a96bf7
commit
0136f8f03a
46 changed files with 3234 additions and 29 deletions
|
@ -1,3 +1,69 @@
|
|||
2013-11-27 Aldy Hernandez <aldyh@redhat.com>
|
||||
Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* cgraph.h (enum cgraph_simd_clone_arg_type): New.
|
||||
(struct cgraph_simd_clone_arg, struct cgraph_simd_clone): New.
|
||||
(struct cgraph_node): Add simdclone and simd_clones fields.
|
||||
* config/i386/i386.c (ix86_simd_clone_compute_vecsize_and_simdlen,
|
||||
ix86_simd_clone_adjust, ix86_simd_clone_usable): New functions.
|
||||
(TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN,
|
||||
TARGET_SIMD_CLONE_ADJUST, TARGET_SIMD_CLONE_USABLE): Define.
|
||||
* doc/tm.texi.in (TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN,
|
||||
TARGET_SIMD_CLONE_ADJUST, TARGET_SIMD_CLONE_USABLE): Add.
|
||||
* doc/tm.texi: Regenerated.
|
||||
* ggc.h (ggc_alloc_cleared_simd_clone_stat): New function.
|
||||
* ipa-cp.c (determine_versionability): Fail if "omp declare simd"
|
||||
attribute is present.
|
||||
* omp-low.c: Include pretty-print.h, ipa-prop.h and tree-eh.h.
|
||||
(simd_clone_vector_of_formal_parm_types): New function.
|
||||
(simd_clone_struct_alloc, simd_clone_struct_copy,
|
||||
simd_clone_vector_of_formal_parm_types, simd_clone_clauses_extract,
|
||||
simd_clone_compute_base_data_type, simd_clone_mangle,
|
||||
simd_clone_create, simd_clone_adjust_return_type,
|
||||
create_tmp_simd_array, simd_clone_adjust_argument_types,
|
||||
simd_clone_init_simd_arrays): New functions.
|
||||
(struct modify_stmt_info): New type.
|
||||
(ipa_simd_modify_stmt_ops, ipa_simd_modify_function_body,
|
||||
simd_clone_adjust, expand_simd_clones, ipa_omp_simd_clone): New
|
||||
functions.
|
||||
(pass_data_omp_simd_clone): New variable.
|
||||
(pass_omp_simd_clone): New class.
|
||||
(make_pass_omp_simd_clone): New function.
|
||||
* passes.def (pass_omp_simd_clone): New.
|
||||
* target.def (TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN,
|
||||
TARGET_SIMD_CLONE_ADJUST, TARGET_SIMD_CLONE_USABLE): New target
|
||||
hooks.
|
||||
* target.h (struct cgraph_node, struct cgraph_simd_node): Declare.
|
||||
* tree-core.h (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE): Document.
|
||||
* tree.h (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE): Define.
|
||||
* tree-pass.h (make_pass_omp_simd_clone): New prototype.
|
||||
* tree-vect-data-refs.c: Include cgraph.h.
|
||||
(vect_analyze_data_refs): Inline by hand find_data_references_in_loop
|
||||
and find_data_references_in_bb, if find_data_references_in_stmt
|
||||
fails, still allow calls to #pragma omp declare simd functions
|
||||
in #pragma omp simd loops unless they contain data references among
|
||||
the call arguments or in lhs.
|
||||
* tree-vect-loop.c (vect_determine_vectorization_factor): Handle
|
||||
calls with no lhs.
|
||||
(vect_transform_loop): Allow NULL STMT_VINFO_VECTYPE for calls without
|
||||
lhs.
|
||||
* tree-vectorizer.h (enum stmt_vec_info_type): Add
|
||||
call_simd_clone_vec_info_type.
|
||||
(struct _stmt_vec_info): Add simd_clone_fndecl field.
|
||||
(STMT_VINFO_SIMD_CLONE_FNDECL): Define.
|
||||
* tree-vect-stmts.c: Include tree-ssa-loop.h,
|
||||
tree-scalar-evolution.h and cgraph.h.
|
||||
(vectorizable_call): Handle calls without lhs. Assert
|
||||
!stmt_can_throw_internal instead of failing for it. Don't update
|
||||
EH stuff.
|
||||
(struct simd_call_arg_info): New.
|
||||
(vectorizable_simd_clone_call): New function.
|
||||
(vect_transform_stmt): Call it.
|
||||
(vect_analyze_stmt): Likewise. Allow NULL STMT_VINFO_VECTYPE for
|
||||
calls without lhs.
|
||||
* ipa-prop.c (ipa_add_new_function): Only call ipa_analyze_node
|
||||
if cgraph_function_with_gimple_body_p is true.
|
||||
|
||||
2013-11-27 Tom de Vries <tom@codesourcery.com>
|
||||
Marc Glisse <marc.glisse@inria.fr>
|
||||
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
2013-11-27 Aldy Hernandez <aldyh@redhat.com>
|
||||
Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* c-decl.c (c_builtin_function_ext_scope): Avoid binding if
|
||||
external_scope is NULL.
|
||||
|
||||
2013-11-27 Tom de Vries <tom@codesourcery.com>
|
||||
Marc Glisse <marc.glisse@inria.fr>
|
||||
|
||||
|
|
|
@ -3646,8 +3646,9 @@ c_builtin_function_ext_scope (tree decl)
|
|||
const char *name = IDENTIFIER_POINTER (id);
|
||||
C_DECL_BUILTIN_PROTOTYPE (decl) = prototype_p (type);
|
||||
|
||||
bind (id, decl, external_scope, /*invisible=*/false, /*nested=*/false,
|
||||
UNKNOWN_LOCATION);
|
||||
if (external_scope)
|
||||
bind (id, decl, external_scope, /*invisible=*/false, /*nested=*/false,
|
||||
UNKNOWN_LOCATION);
|
||||
|
||||
/* Builtins in the implementation namespace are made visible without
|
||||
needing to be explicitly declared. See push_file_scope. */
|
||||
|
|
99
gcc/cgraph.h
99
gcc/cgraph.h
|
@ -256,6 +256,99 @@ struct GTY(()) cgraph_clone_info
|
|||
bitmap combined_args_to_skip;
|
||||
};
|
||||
|
||||
enum cgraph_simd_clone_arg_type
|
||||
{
|
||||
SIMD_CLONE_ARG_TYPE_VECTOR,
|
||||
SIMD_CLONE_ARG_TYPE_UNIFORM,
|
||||
SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP,
|
||||
SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP,
|
||||
SIMD_CLONE_ARG_TYPE_MASK
|
||||
};
|
||||
|
||||
/* Function arguments in the original function of a SIMD clone.
|
||||
Supplementary data for `struct simd_clone'. */
|
||||
|
||||
struct GTY(()) cgraph_simd_clone_arg {
|
||||
/* Original function argument as it originally existed in
|
||||
DECL_ARGUMENTS. */
|
||||
tree orig_arg;
|
||||
|
||||
/* orig_arg's function (or for extern functions type from
|
||||
TYPE_ARG_TYPES). */
|
||||
tree orig_type;
|
||||
|
||||
/* If argument is a vector, this holds the vector version of
|
||||
orig_arg that after adjusting the argument types will live in
|
||||
DECL_ARGUMENTS. Otherwise, this is NULL.
|
||||
|
||||
This basically holds:
|
||||
vector(simdlen) __typeof__(orig_arg) new_arg. */
|
||||
tree vector_arg;
|
||||
|
||||
/* vector_arg's type (or for extern functions new vector type. */
|
||||
tree vector_type;
|
||||
|
||||
/* If argument is a vector, this holds the array where the simd
|
||||
argument is held while executing the simd clone function. This
|
||||
is a local variable in the cloned function. Its content is
|
||||
copied from vector_arg upon entry to the clone.
|
||||
|
||||
This basically holds:
|
||||
__typeof__(orig_arg) simd_array[simdlen]. */
|
||||
tree simd_array;
|
||||
|
||||
/* A SIMD clone's argument can be either linear (constant or
|
||||
variable), uniform, or vector. */
|
||||
enum cgraph_simd_clone_arg_type arg_type;
|
||||
|
||||
/* For arg_type SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP this is
|
||||
the constant linear step, if arg_type is
|
||||
SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP, this is index of
|
||||
the uniform argument holding the step, otherwise 0. */
|
||||
HOST_WIDE_INT linear_step;
|
||||
|
||||
/* Variable alignment if available, otherwise 0. */
|
||||
unsigned int alignment;
|
||||
};
|
||||
|
||||
/* Specific data for a SIMD function clone. */
|
||||
|
||||
struct GTY(()) cgraph_simd_clone {
|
||||
/* Number of words in the SIMD lane associated with this clone. */
|
||||
unsigned int simdlen;
|
||||
|
||||
/* Number of annotated function arguments in `args'. This is
|
||||
usually the number of named arguments in FNDECL. */
|
||||
unsigned int nargs;
|
||||
|
||||
/* Max hardware vector size in bits for integral vectors. */
|
||||
unsigned int vecsize_int;
|
||||
|
||||
/* Max hardware vector size in bits for floating point vectors. */
|
||||
unsigned int vecsize_float;
|
||||
|
||||
/* The mangling character for a given vector size. This is is used
|
||||
to determine the ISA mangling bit as specified in the Intel
|
||||
Vector ABI. */
|
||||
unsigned char vecsize_mangle;
|
||||
|
||||
/* True if this is the masked, in-branch version of the clone,
|
||||
otherwise false. */
|
||||
unsigned int inbranch : 1;
|
||||
|
||||
/* True if this is a Cilk Plus variant. */
|
||||
unsigned int cilk_elemental : 1;
|
||||
|
||||
/* Doubly linked list of SIMD clones. */
|
||||
struct cgraph_node *prev_clone, *next_clone;
|
||||
|
||||
/* Original cgraph node the SIMD clones were created for. */
|
||||
struct cgraph_node *origin;
|
||||
|
||||
/* Annotated function arguments for the original function. */
|
||||
struct cgraph_simd_clone_arg GTY((length ("%h.nargs"))) args[1];
|
||||
};
|
||||
|
||||
|
||||
/* The cgraph data structure.
|
||||
Each function decl has assigned cgraph_node listing callees and callers. */
|
||||
|
@ -284,6 +377,12 @@ public:
|
|||
/* Declaration node used to be clone of. */
|
||||
tree former_clone_of;
|
||||
|
||||
/* If this is a SIMD clone, this points to the SIMD specific
|
||||
information for it. */
|
||||
struct cgraph_simd_clone *simdclone;
|
||||
/* If this function has SIMD clones, this points to the first clone. */
|
||||
struct cgraph_node *simd_clones;
|
||||
|
||||
/* Interprocedural passes scheduled to have their transform functions
|
||||
applied next time we execute local pass on them. We maintain it
|
||||
per-function in order to allow IPA passes to introduce new functions. */
|
||||
|
|
|
@ -43690,6 +43690,184 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val)
|
|||
return val;
|
||||
}
|
||||
|
||||
/* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
|
||||
CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
|
||||
CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
|
||||
or number of vecsize_mangle variants that should be emitted. */
|
||||
|
||||
static int
|
||||
ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
|
||||
struct cgraph_simd_clone *clonei,
|
||||
tree base_type, int num)
|
||||
{
|
||||
int ret = 1;
|
||||
|
||||
if (clonei->simdlen
|
||||
&& (clonei->simdlen < 2
|
||||
|| clonei->simdlen > 16
|
||||
|| (clonei->simdlen & (clonei->simdlen - 1)) != 0))
|
||||
{
|
||||
warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
|
||||
"unsupported simdlen %d\n", clonei->simdlen);
|
||||
return 0;
|
||||
}
|
||||
|
||||
tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
|
||||
if (TREE_CODE (ret_type) != VOID_TYPE)
|
||||
switch (TYPE_MODE (ret_type))
|
||||
{
|
||||
case QImode:
|
||||
case HImode:
|
||||
case SImode:
|
||||
case DImode:
|
||||
case SFmode:
|
||||
case DFmode:
|
||||
/* case SCmode: */
|
||||
/* case DCmode: */
|
||||
break;
|
||||
default:
|
||||
warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
|
||||
"unsupported return type %qT for simd\n", ret_type);
|
||||
return 0;
|
||||
}
|
||||
|
||||
tree t;
|
||||
int i;
|
||||
|
||||
for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
|
||||
/* FIXME: Shouldn't we allow such arguments if they are uniform? */
|
||||
switch (TYPE_MODE (TREE_TYPE (t)))
|
||||
{
|
||||
case QImode:
|
||||
case HImode:
|
||||
case SImode:
|
||||
case DImode:
|
||||
case SFmode:
|
||||
case DFmode:
|
||||
/* case SCmode: */
|
||||
/* case DCmode: */
|
||||
break;
|
||||
default:
|
||||
warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
|
||||
"unsupported argument type %qT for simd\n", TREE_TYPE (t));
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (clonei->cilk_elemental)
|
||||
{
|
||||
/* Parse here processor clause. If not present, default to 'b'. */
|
||||
clonei->vecsize_mangle = 'b';
|
||||
}
|
||||
else if (!TREE_PUBLIC (node->decl))
|
||||
{
|
||||
/* If the function isn't exported, we can pick up just one ISA
|
||||
for the clones. */
|
||||
if (TARGET_AVX2)
|
||||
clonei->vecsize_mangle = 'd';
|
||||
else if (TARGET_AVX)
|
||||
clonei->vecsize_mangle = 'c';
|
||||
else
|
||||
clonei->vecsize_mangle = 'b';
|
||||
ret = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
clonei->vecsize_mangle = "bcd"[num];
|
||||
ret = 3;
|
||||
}
|
||||
switch (clonei->vecsize_mangle)
|
||||
{
|
||||
case 'b':
|
||||
clonei->vecsize_int = 128;
|
||||
clonei->vecsize_float = 128;
|
||||
break;
|
||||
case 'c':
|
||||
clonei->vecsize_int = 128;
|
||||
clonei->vecsize_float = 256;
|
||||
break;
|
||||
case 'd':
|
||||
clonei->vecsize_int = 256;
|
||||
clonei->vecsize_float = 256;
|
||||
break;
|
||||
}
|
||||
if (clonei->simdlen == 0)
|
||||
{
|
||||
if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
|
||||
clonei->simdlen = clonei->vecsize_int;
|
||||
else
|
||||
clonei->simdlen = clonei->vecsize_float;
|
||||
clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
|
||||
if (clonei->simdlen > 16)
|
||||
clonei->simdlen = 16;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Add target attribute to SIMD clone NODE if needed. */
|
||||
|
||||
static void
|
||||
ix86_simd_clone_adjust (struct cgraph_node *node)
|
||||
{
|
||||
const char *str = NULL;
|
||||
gcc_assert (node->decl == cfun->decl);
|
||||
switch (node->simdclone->vecsize_mangle)
|
||||
{
|
||||
case 'b':
|
||||
if (!TARGET_SSE2)
|
||||
str = "sse2";
|
||||
break;
|
||||
case 'c':
|
||||
if (!TARGET_AVX)
|
||||
str = "avx";
|
||||
break;
|
||||
case 'd':
|
||||
if (!TARGET_AVX2)
|
||||
str = "avx2";
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
if (str == NULL)
|
||||
return;
|
||||
push_cfun (NULL);
|
||||
tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
|
||||
bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
|
||||
gcc_assert (ok);
|
||||
pop_cfun ();
|
||||
ix86_previous_fndecl = NULL_TREE;
|
||||
ix86_set_current_function (node->decl);
|
||||
}
|
||||
|
||||
/* If SIMD clone NODE can't be used in a vectorized loop
|
||||
in current function, return -1, otherwise return a badness of using it
|
||||
(0 if it is most desirable from vecsize_mangle point of view, 1
|
||||
slightly less desirable, etc.). */
|
||||
|
||||
static int
|
||||
ix86_simd_clone_usable (struct cgraph_node *node)
|
||||
{
|
||||
switch (node->simdclone->vecsize_mangle)
|
||||
{
|
||||
case 'b':
|
||||
if (!TARGET_SSE2)
|
||||
return -1;
|
||||
if (!TARGET_AVX)
|
||||
return 0;
|
||||
return TARGET_AVX2 ? 2 : 1;
|
||||
case 'c':
|
||||
if (!TARGET_AVX)
|
||||
return -1;
|
||||
return TARGET_AVX2 ? 1 : 0;
|
||||
break;
|
||||
case 'd':
|
||||
if (!TARGET_AVX2)
|
||||
return -1;
|
||||
return 0;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
|
||||
/* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
|
||||
|
||||
static bool
|
||||
|
@ -44178,6 +44356,18 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
|
|||
#undef TARGET_SPILL_CLASS
|
||||
#define TARGET_SPILL_CLASS ix86_spill_class
|
||||
|
||||
#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
|
||||
#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
|
||||
ix86_simd_clone_compute_vecsize_and_simdlen
|
||||
|
||||
#undef TARGET_SIMD_CLONE_ADJUST
|
||||
#define TARGET_SIMD_CLONE_ADJUST \
|
||||
ix86_simd_clone_adjust
|
||||
|
||||
#undef TARGET_SIMD_CLONE_USABLE
|
||||
#define TARGET_SIMD_CLONE_USABLE \
|
||||
ix86_simd_clone_usable
|
||||
|
||||
#undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
|
||||
#define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
|
||||
ix86_float_exceptions_rounding_supported_p
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
2013-11-27 Aldy Hernandez <aldyh@redhat.com>
|
||||
Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* semantics.c (finish_omp_clauses): For #pragma omp declare simd
|
||||
linear clause step call maybe_constant_value.
|
||||
|
||||
2013-11-27 Tom de Vries <tom@codesourcery.com>
|
||||
Marc Glisse <marc.glisse@inria.fr>
|
||||
|
||||
|
|
|
@ -5202,6 +5202,8 @@ finish_omp_clauses (tree clauses)
|
|||
t = mark_rvalue_use (t);
|
||||
if (!processing_template_decl)
|
||||
{
|
||||
if (TREE_CODE (OMP_CLAUSE_DECL (c)) == PARM_DECL)
|
||||
t = maybe_constant_value (t);
|
||||
t = fold_build_cleanup_point_expr (TREE_TYPE (t), t);
|
||||
if (TREE_CODE (TREE_TYPE (OMP_CLAUSE_DECL (c)))
|
||||
== POINTER_TYPE)
|
||||
|
|
|
@ -5818,6 +5818,26 @@ The default is @code{NULL_TREE} which means to not vectorize gather
|
|||
loads.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} int TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN (struct cgraph_node *@var{}, struct cgraph_simd_clone *@var{}, @var{tree}, @var{int})
|
||||
This hook should set @var{vecsize_mangle}, @var{vecsize_int}, @var{vecsize_float}
|
||||
fields in @var{simd_clone} structure pointed by @var{clone_info} argument and also
|
||||
@var{simdlen} field if it was previously 0.
|
||||
The hook should return 0 if SIMD clones shouldn't be emitted,
|
||||
or number of @var{vecsize_mangle} variants that should be emitted.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} void TARGET_SIMD_CLONE_ADJUST (struct cgraph_node *@var{})
|
||||
This hook should add implicit @code{attribute(target("..."))} attribute
|
||||
to SIMD clone @var{node} if needed.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} int TARGET_SIMD_CLONE_USABLE (struct cgraph_node *@var{})
|
||||
This hook should return -1 if SIMD clone @var{node} shouldn't be used
|
||||
in vectorized loops in current function, or non-negative number if it is
|
||||
usable. In that case, the smaller the number is, the more desirable it is
|
||||
to use it.
|
||||
@end deftypefn
|
||||
|
||||
@node Anchored Addresses
|
||||
@section Anchored Addresses
|
||||
@cindex anchored addresses
|
||||
|
|
|
@ -4422,6 +4422,12 @@ address; but often a machine-dependent strategy can generate better code.
|
|||
|
||||
@hook TARGET_VECTORIZE_BUILTIN_GATHER
|
||||
|
||||
@hook TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
|
||||
|
||||
@hook TARGET_SIMD_CLONE_ADJUST
|
||||
|
||||
@hook TARGET_SIMD_CLONE_USABLE
|
||||
|
||||
@node Anchored Addresses
|
||||
@section Anchored Addresses
|
||||
@cindex anchored addresses
|
||||
|
|
|
@ -276,4 +276,11 @@ ggc_alloc_cleared_gimple_statement_stat (size_t s MEM_STAT_DECL)
|
|||
ggc_internal_cleared_alloc_stat (s PASS_MEM_STAT);
|
||||
}
|
||||
|
||||
static inline struct simd_clone *
|
||||
ggc_alloc_cleared_simd_clone_stat (size_t s MEM_STAT_DECL)
|
||||
{
|
||||
return (struct simd_clone *)
|
||||
ggc_internal_cleared_alloc_stat (s PASS_MEM_STAT);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -430,6 +430,13 @@ determine_versionability (struct cgraph_node *node)
|
|||
reason = "not a tree_versionable_function";
|
||||
else if (cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE)
|
||||
reason = "insufficient body availability";
|
||||
else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl)))
|
||||
{
|
||||
/* Ideally we should clone the SIMD clones themselves and create
|
||||
vector copies of them, so IPA-cp and SIMD clones can happily
|
||||
coexist, but that may not be worth the effort. */
|
||||
reason = "function has SIMD clones";
|
||||
}
|
||||
|
||||
if (reason && dump_file && !node->alias && !node->thunk.thunk_p)
|
||||
fprintf (dump_file, "Function %s/%i is not versionable, reason: %s.\n",
|
||||
|
|
|
@ -3217,7 +3217,8 @@ ipa_node_duplication_hook (struct cgraph_node *src, struct cgraph_node *dst,
|
|||
static void
|
||||
ipa_add_new_function (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
|
||||
{
|
||||
ipa_analyze_node (node);
|
||||
if (cgraph_function_with_gimple_body_p (node))
|
||||
ipa_analyze_node (node);
|
||||
}
|
||||
|
||||
/* Register our cgraph hooks if they are not already there. */
|
||||
|
|
13
gcc/ipa.c
13
gcc/ipa.c
|
@ -426,6 +426,19 @@ symtab_remove_unreachable_nodes (bool before_inlining_p, FILE *file)
|
|||
enqueue_node (cnode, &first, reachable);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
/* If any reachable function has simd clones, mark them as
|
||||
reachable as well. */
|
||||
if (cnode->simd_clones)
|
||||
{
|
||||
cgraph_node *next;
|
||||
for (next = cnode->simd_clones;
|
||||
next;
|
||||
next = next->simdclone->next_clone)
|
||||
if (in_boundary_p
|
||||
|| !pointer_set_insert (reachable, next))
|
||||
enqueue_node (next, &first, reachable);
|
||||
}
|
||||
}
|
||||
/* When we see constructor of external variable, keep referred nodes in the
|
||||
|
|
1161
gcc/omp-low.c
1161
gcc/omp-low.c
File diff suppressed because it is too large
Load diff
|
@ -117,6 +117,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
compiled unit. */
|
||||
INSERT_PASSES_AFTER (all_late_ipa_passes)
|
||||
NEXT_PASS (pass_ipa_pta);
|
||||
NEXT_PASS (pass_omp_simd_clone);
|
||||
TERMINATE_PASS_LIST ()
|
||||
|
||||
/* These passes are run after IPA passes on every function that is being
|
||||
|
|
|
@ -1521,6 +1521,36 @@ hook_int_uint_mode_1)
|
|||
|
||||
HOOK_VECTOR_END (sched)
|
||||
|
||||
/* Functions relating to OpenMP and Cilk Plus SIMD clones. */
|
||||
#undef HOOK_PREFIX
|
||||
#define HOOK_PREFIX "TARGET_SIMD_CLONE_"
|
||||
HOOK_VECTOR (TARGET_SIMD_CLONE, simd_clone)
|
||||
|
||||
DEFHOOK
|
||||
(compute_vecsize_and_simdlen,
|
||||
"This hook should set @var{vecsize_mangle}, @var{vecsize_int}, @var{vecsize_float}\n\
|
||||
fields in @var{simd_clone} structure pointed by @var{clone_info} argument and also\n\
|
||||
@var{simdlen} field if it was previously 0.\n\
|
||||
The hook should return 0 if SIMD clones shouldn't be emitted,\n\
|
||||
or number of @var{vecsize_mangle} variants that should be emitted.",
|
||||
int, (struct cgraph_node *, struct cgraph_simd_clone *, tree, int), NULL)
|
||||
|
||||
DEFHOOK
|
||||
(adjust,
|
||||
"This hook should add implicit @code{attribute(target(\"...\"))} attribute\n\
|
||||
to SIMD clone @var{node} if needed.",
|
||||
void, (struct cgraph_node *), NULL)
|
||||
|
||||
DEFHOOK
|
||||
(usable,
|
||||
"This hook should return -1 if SIMD clone @var{node} shouldn't be used\n\
|
||||
in vectorized loops in current function, or non-negative number if it is\n\
|
||||
usable. In that case, the smaller the number is, the more desirable it is\n\
|
||||
to use it.",
|
||||
int, (struct cgraph_node *), NULL)
|
||||
|
||||
HOOK_VECTOR_END (simd_clone)
|
||||
|
||||
/* Functions relating to vectorization. */
|
||||
#undef HOOK_PREFIX
|
||||
#define HOOK_PREFIX "TARGET_VECTORIZE_"
|
||||
|
|
|
@ -93,6 +93,8 @@ extern bool target_default_pointer_address_modes_p (void);
|
|||
struct stdarg_info;
|
||||
struct spec_info_def;
|
||||
struct hard_reg_set_container;
|
||||
struct cgraph_node;
|
||||
struct cgraph_simd_clone;
|
||||
|
||||
/* The struct used by the secondary_reload target hook. */
|
||||
typedef struct secondary_reload_info
|
||||
|
|
|
@ -1,3 +1,25 @@
|
|||
2013-11-27 Aldy Hernandez <aldyh@redhat.com>
|
||||
Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* g++.dg/gomp/declare-simd-1.C (f38): Make sure
|
||||
simdlen is a power of two.
|
||||
* gcc.dg/gomp/simd-clones-2.c: Compile on all targets.
|
||||
Remove -msse2. Adjust regexps for name mangling changes.
|
||||
* gcc.dg/gomp/simd-clones-3.c: Likewise.
|
||||
* gcc.dg/vect/vect-simd-clone-1.c: New test.
|
||||
* gcc.dg/vect/vect-simd-clone-2.c: New test.
|
||||
* gcc.dg/vect/vect-simd-clone-3.c: New test.
|
||||
* gcc.dg/vect/vect-simd-clone-4.c: New test.
|
||||
* gcc.dg/vect/vect-simd-clone-5.c: New test.
|
||||
* gcc.dg/vect/vect-simd-clone-6.c: New test.
|
||||
* gcc.dg/vect/vect-simd-clone-7.c: New test.
|
||||
* gcc.dg/vect/vect-simd-clone-8.c: New test.
|
||||
* gcc.dg/vect/vect-simd-clone-9.c: New test.
|
||||
* gcc.dg/vect/vect-simd-clone-10.c: New test.
|
||||
* gcc.dg/vect/vect-simd-clone-10.h: New file.
|
||||
* gcc.dg/vect/vect-simd-clone-10a.c: New file.
|
||||
* gcc.dg/vect/vect-simd-clone-11.c: New test.
|
||||
|
||||
2013-11-27 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
|
||||
|
||||
* gcc.dg/cilk-plus/cilk-plus.exp: Append to ld_library_path.
|
||||
|
|
|
@ -239,5 +239,5 @@ struct D
|
|||
void
|
||||
f38 (D &d)
|
||||
{
|
||||
d.f37 <12> (6);
|
||||
d.f37 <16> (6);
|
||||
}
|
||||
|
|
33
gcc/testsuite/gcc.dg/gomp/simd-clones-1.c
Normal file
33
gcc/testsuite/gcc.dg/gomp/simd-clones-1.c
Normal file
|
@ -0,0 +1,33 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-fopenmp -fdump-tree-optimized -O3" } */
|
||||
|
||||
/* Test that functions that have SIMD clone counterparts are not
|
||||
cloned by IPA-cp. For example, special_add() below has SIMD clones
|
||||
created for it. However, if IPA-cp later decides to clone a
|
||||
specialization of special_add(x, 666) when analyzing fillit(), we
|
||||
will forever keep the vectorizer from using the SIMD versions of
|
||||
special_add in a loop.
|
||||
|
||||
If IPA-CP gets taught how to adjust the SIMD clones as well, this
|
||||
test could be removed. */
|
||||
|
||||
#pragma omp declare simd simdlen(4)
|
||||
static int __attribute__ ((noinline))
|
||||
special_add (int x, int y)
|
||||
{
|
||||
if (y == 666)
|
||||
return x + y + 123;
|
||||
else
|
||||
return x + y;
|
||||
}
|
||||
|
||||
void fillit(int *tot)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i < 10000; ++i)
|
||||
tot[i] = special_add (i, 666);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-not "special_add.constprop" "optimized" } } */
|
||||
/* { dg-final { cleanup-tree-dump "optimized" } } */
|
26
gcc/testsuite/gcc.dg/gomp/simd-clones-2.c
Normal file
26
gcc/testsuite/gcc.dg/gomp/simd-clones-2.c
Normal file
|
@ -0,0 +1,26 @@
|
|||
/* { dg-options "-fopenmp -fdump-tree-optimized -O" } */
|
||||
|
||||
#pragma omp declare simd inbranch uniform(c) linear(b:66)
|
||||
#pragma omp declare simd notinbranch aligned(c:32)
|
||||
int addit(int a, int b, int *c)
|
||||
{
|
||||
return a + b;
|
||||
}
|
||||
|
||||
#pragma omp declare simd uniform(a) aligned(a:32) linear(k:1) notinbranch
|
||||
float setArray(float *a, float x, int k)
|
||||
{
|
||||
a[k] = a[k] + x;
|
||||
return a[k];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump "_ZGVbN4ua32vl_setArray" "optimized" { target i?86-*-* x86_64-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump "_ZGVbN4vvva32_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump "_ZGVbM4vl66u_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump "_ZGVcN8ua32vl_setArray" "optimized" { target i?86-*-* x86_64-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump "_ZGVcN4vvva32_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump "_ZGVcM4vl66u_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump "_ZGVdN8ua32vl_setArray" "optimized" { target i?86-*-* x86_64-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump "_ZGVdN8vvva32_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump "_ZGVdM8vl66u_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
|
||||
/* { dg-final { cleanup-tree-dump "optimized" } } */
|
18
gcc/testsuite/gcc.dg/gomp/simd-clones-3.c
Normal file
18
gcc/testsuite/gcc.dg/gomp/simd-clones-3.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* { dg-options "-fopenmp -fdump-tree-optimized -O2" } */
|
||||
|
||||
/* Test that if there is no *inbranch clauses, that both the masked and
|
||||
the unmasked version are created. */
|
||||
|
||||
#pragma omp declare simd
|
||||
int addit(int a, int b, int c)
|
||||
{
|
||||
return a + b;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump "_ZGVbN4vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump "_ZGVbM4vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump "_ZGVcN4vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump "_ZGVcM4vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump "_ZGVdN8vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump "_ZGVdM8vvv_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */
|
||||
/* { dg-final { cleanup-tree-dump "optimized" } } */
|
11
gcc/testsuite/gcc.dg/gomp/simd-clones-4.c
Normal file
11
gcc/testsuite/gcc.dg/gomp/simd-clones-4.c
Normal file
|
@ -0,0 +1,11 @@
|
|||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-options "-fopenmp" } */
|
||||
|
||||
#pragma omp declare simd simdlen(4) notinbranch
|
||||
int f2 (int a, int b)
|
||||
{
|
||||
if (a > 5)
|
||||
return a + b;
|
||||
else
|
||||
return a - b;
|
||||
}
|
12
gcc/testsuite/gcc.dg/gomp/simd-clones-5.c
Normal file
12
gcc/testsuite/gcc.dg/gomp/simd-clones-5.c
Normal file
|
@ -0,0 +1,12 @@
|
|||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-options "-fopenmp -w" } */
|
||||
|
||||
/* ?? The -w above is to inhibit the following warning for now:
|
||||
a.c:2:6: warning: AVX vector argument without AVX enabled changes
|
||||
the ABI [enabled by default]. */
|
||||
|
||||
#pragma omp declare simd notinbranch simdlen(4)
|
||||
void foo (int *a)
|
||||
{
|
||||
*a = 555;
|
||||
}
|
11
gcc/testsuite/gcc.dg/gomp/simd-clones-6.c
Normal file
11
gcc/testsuite/gcc.dg/gomp/simd-clones-6.c
Normal file
|
@ -0,0 +1,11 @@
|
|||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-options "-fopenmp" } */
|
||||
|
||||
/* Test that array subscripts are properly adjusted. */
|
||||
|
||||
int array[1000];
|
||||
#pragma omp declare simd notinbranch simdlen(4)
|
||||
void foo (int i)
|
||||
{
|
||||
array[i] = 555;
|
||||
}
|
16
gcc/testsuite/gcc.dg/gomp/simd-clones-7.c
Normal file
16
gcc/testsuite/gcc.dg/gomp/simd-clones-7.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-options "-fopenmp -w" } */
|
||||
|
||||
int array[1000];
|
||||
|
||||
#pragma omp declare simd notinbranch simdlen(4)
|
||||
void foo (int *a, int b)
|
||||
{
|
||||
a[b] = 555;
|
||||
}
|
||||
|
||||
#pragma omp declare simd notinbranch simdlen(4)
|
||||
void bar (int *a)
|
||||
{
|
||||
*a = 555;
|
||||
}
|
58
gcc/testsuite/gcc.dg/vect/vect-simd-clone-1.c
Normal file
58
gcc/testsuite/gcc.dg/vect/vect-simd-clone-1.c
Normal file
|
@ -0,0 +1,58 @@
|
|||
/* { dg-additional-options "-fopenmp-simd" } */
|
||||
/* { dg-additional-options "-mavx" { target avx_runtime } } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
#ifndef N
|
||||
#define N 1024
|
||||
#endif
|
||||
|
||||
int array[N];
|
||||
|
||||
#pragma omp declare simd simdlen(4) notinbranch
|
||||
#pragma omp declare simd simdlen(4) notinbranch uniform(b) linear(c:3)
|
||||
#pragma omp declare simd simdlen(8) notinbranch
|
||||
#pragma omp declare simd simdlen(8) notinbranch uniform(b) linear(c:3)
|
||||
__attribute__((noinline)) int
|
||||
foo (int a, int b, int c)
|
||||
{
|
||||
if (a < 30)
|
||||
return 5;
|
||||
return a + b + c;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
bar ()
|
||||
{
|
||||
int i;
|
||||
#pragma omp simd
|
||||
for (i = 0; i < N; ++i)
|
||||
array[i] = foo (i, 123, i * 3);
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
baz ()
|
||||
{
|
||||
int i;
|
||||
#pragma omp simd
|
||||
for (i = 0; i < N; ++i)
|
||||
array[i] = foo (i, array[i], i * 3);
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int i;
|
||||
check_vect ();
|
||||
bar ();
|
||||
for (i = 0; i < N; i++)
|
||||
if (array[i] != (i < 30 ? 5 : i * 4 + 123))
|
||||
abort ();
|
||||
baz ();
|
||||
for (i = 0; i < N; i++)
|
||||
if (array[i] != (i < 30 ? 5 : i * 8 + 123))
|
||||
abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
83
gcc/testsuite/gcc.dg/vect/vect-simd-clone-10.c
Normal file
83
gcc/testsuite/gcc.dg/vect/vect-simd-clone-10.c
Normal file
|
@ -0,0 +1,83 @@
|
|||
/* { dg-additional-options "-fopenmp-simd" } */
|
||||
/* { dg-additional-options "-mavx" { target avx_runtime } } */
|
||||
/* { dg-additional-sources vect-simd-clone-10a.c } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
#ifndef N
|
||||
#define N 1024
|
||||
#endif
|
||||
|
||||
int a[N], b[N];
|
||||
long int c[N];
|
||||
unsigned char d[N];
|
||||
|
||||
#include "vect-simd-clone-10.h"
|
||||
|
||||
__attribute__((noinline)) void
|
||||
fn1 (void)
|
||||
{
|
||||
int i;
|
||||
#pragma omp simd
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = foo (c[i], a[i], b[i]) + 6;
|
||||
#pragma omp simd
|
||||
for (i = 0; i < N; i++)
|
||||
c[i] = bar (a[i], b[i], c[i]) * 2;
|
||||
}
|
||||
|
||||
__attribute__((noinline)) void
|
||||
fn2 (void)
|
||||
{
|
||||
int i;
|
||||
#pragma omp simd
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = foo (c[i], a[i], b[i]) + 6;
|
||||
d[i]++;
|
||||
}
|
||||
#pragma omp simd
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
c[i] = bar (a[i], b[i], c[i]) * 2;
|
||||
d[i] /= 2;
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noinline)) void
|
||||
fn3 (void)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = i * 2;
|
||||
b[i] = 17 + (i % 37);
|
||||
c[i] = (i & 63);
|
||||
d[i] = 16 + i;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int i;
|
||||
check_vect ();
|
||||
fn3 ();
|
||||
fn1 ();
|
||||
for (i = 0; i < N; i++)
|
||||
if (a[i] != i * 2 + 23 + (i % 37) + (i & 63)
|
||||
|| b[i] != 17 + (i % 37)
|
||||
|| c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63))
|
||||
abort ();
|
||||
fn3 ();
|
||||
fn2 ();
|
||||
for (i = 0; i < N; i++)
|
||||
if (a[i] != i * 2 + 23 + (i % 37) + (i & 63)
|
||||
|| b[i] != 17 + (i % 37)
|
||||
|| c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63)
|
||||
|| d[i] != ((unsigned char) (17 + i)) / 2)
|
||||
abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
4
gcc/testsuite/gcc.dg/vect/vect-simd-clone-10.h
Normal file
4
gcc/testsuite/gcc.dg/vect/vect-simd-clone-10.h
Normal file
|
@ -0,0 +1,4 @@
|
|||
#pragma omp declare simd notinbranch
|
||||
extern int foo (long int a, int b, int c);
|
||||
#pragma omp declare simd notinbranch
|
||||
extern long int bar (int a, int b, long int c);
|
17
gcc/testsuite/gcc.dg/vect/vect-simd-clone-10a.c
Normal file
17
gcc/testsuite/gcc.dg/vect/vect-simd-clone-10a.c
Normal file
|
@ -0,0 +1,17 @@
|
|||
/* { dg-do compile } */
|
||||
|
||||
#include "vect-simd-clone-10.h"
|
||||
|
||||
#pragma omp declare simd notinbranch
|
||||
extern int
|
||||
foo (long int a, int b, int c)
|
||||
{
|
||||
return a + b + c;
|
||||
}
|
||||
|
||||
#pragma omp declare simd notinbranch
|
||||
extern long int
|
||||
bar (int a, int b, long int c)
|
||||
{
|
||||
return a + b + c;
|
||||
}
|
66
gcc/testsuite/gcc.dg/vect/vect-simd-clone-11.c
Normal file
66
gcc/testsuite/gcc.dg/vect/vect-simd-clone-11.c
Normal file
|
@ -0,0 +1,66 @@
|
|||
/* { dg-additional-options "-fopenmp-simd" } */
|
||||
/* { dg-additional-options "-mavx" { target avx_runtime } } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
#ifndef N
|
||||
#define N 1024
|
||||
#endif
|
||||
|
||||
int a[N] __attribute__((aligned (32)));
|
||||
|
||||
#pragma omp declare simd linear(a) linear(b:3) linear(c:6) notinbranch
|
||||
__attribute__((noinline)) int
|
||||
foo (int a, int b, int c)
|
||||
{
|
||||
return a ^ (b * 512) ^ (c * 512 * 512);
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
bar (int *d)
|
||||
{
|
||||
int i, j, k;
|
||||
for (i = 0, j = 0, k = 0; i < N / 2; i++, j++, k += 3)
|
||||
d[i] = foo (j, i * 3, 2 * k + 2);
|
||||
}
|
||||
|
||||
#if 0
|
||||
__attribute__((noinline, noclone)) void
|
||||
baz (int *d)
|
||||
{
|
||||
long int i, j, k;
|
||||
for (i = 0, j = 0, k = 0; i < N / 2;
|
||||
i = (int) i + 1, j = (int) j + 1, k = (int) k + 3)
|
||||
d[i] = foo (j, i * 3, 2 * k + 2);
|
||||
}
|
||||
#endif
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int i;
|
||||
check_vect ();
|
||||
if (sizeof (int) * __CHAR_BIT__ < 32)
|
||||
return 0;
|
||||
bar (a + 7);
|
||||
for (i = 0; i < N / 2; i++)
|
||||
if (a[i + 7] != (i ^ (i * 3 * 512) ^ (((i * 6) + 2) * 512 * 512)))
|
||||
abort ();
|
||||
bar (a);
|
||||
for (i = 0; i < N / 2; i++)
|
||||
if (a[i] != (i ^ (i * 3 * 512) ^ (((i * 6) + 2) * 512 * 512)))
|
||||
abort ();
|
||||
#if 0
|
||||
baz (a + 7);
|
||||
for (i = 0; i < N / 2; i++)
|
||||
if (a[i + 7] != (i ^ (i * 3 * 512) ^ (((i * 6) + 2) * 512 * 512)))
|
||||
abort ();
|
||||
baz (a);
|
||||
for (i = 0; i < N / 2; i++)
|
||||
if (a[i] != (i ^ (i * 3 * 512) ^ (((i * 6) + 2) * 512 * 512)))
|
||||
abort ();
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
52
gcc/testsuite/gcc.dg/vect/vect-simd-clone-2.c
Normal file
52
gcc/testsuite/gcc.dg/vect/vect-simd-clone-2.c
Normal file
|
@ -0,0 +1,52 @@
|
|||
/* { dg-additional-options "-fopenmp-simd" } */
|
||||
/* { dg-additional-options "-mavx" { target avx_runtime } } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
#ifndef N
|
||||
#define N 1024
|
||||
#endif
|
||||
|
||||
int array[N] __attribute__((aligned (32)));
|
||||
|
||||
#pragma omp declare simd simdlen(4) notinbranch aligned(a:16) uniform(a) linear(b)
|
||||
#pragma omp declare simd simdlen(4) notinbranch aligned(a:32) uniform(a) linear(b)
|
||||
#pragma omp declare simd simdlen(8) notinbranch aligned(a:16) uniform(a) linear(b)
|
||||
#pragma omp declare simd simdlen(8) notinbranch aligned(a:32) uniform(a) linear(b)
|
||||
__attribute__((noinline)) void
|
||||
foo (int *a, int b, int c)
|
||||
{
|
||||
a[b] = c;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
bar ()
|
||||
{
|
||||
int i;
|
||||
#pragma omp simd
|
||||
for (i = 0; i < N; ++i)
|
||||
foo (array, i, i * array[i]);
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
baz ()
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < N; i++)
|
||||
array[i] = 5 * (i & 7);
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int i;
|
||||
check_vect ();
|
||||
baz ();
|
||||
bar ();
|
||||
for (i = 0; i < N; i++)
|
||||
if (array[i] != 5 * (i & 7) * i)
|
||||
abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
45
gcc/testsuite/gcc.dg/vect/vect-simd-clone-3.c
Normal file
45
gcc/testsuite/gcc.dg/vect/vect-simd-clone-3.c
Normal file
|
@ -0,0 +1,45 @@
|
|||
/* { dg-additional-options "-fopenmp-simd" } */
|
||||
/* { dg-additional-options "-mavx" { target avx_runtime } } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
#ifndef N
|
||||
#define N 1024
|
||||
#endif
|
||||
|
||||
int d[N], e[N];
|
||||
|
||||
#pragma omp declare simd simdlen(4) notinbranch uniform(b) linear(c:3)
|
||||
__attribute__((noinline)) int
|
||||
foo (int a, int b, int c)
|
||||
{
|
||||
if (a < 30)
|
||||
return 5;
|
||||
return a + b + c;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
bar ()
|
||||
{
|
||||
int i;
|
||||
#pragma omp simd
|
||||
for (i = 0; i < N; ++i)
|
||||
{
|
||||
d[i] = foo (i, 123, i * 3);
|
||||
e[i] = e[i] + i;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int i;
|
||||
check_vect ();
|
||||
bar ();
|
||||
for (i = 0; i < N; i++)
|
||||
if (d[i] != (i < 30 ? 5 : i * 4 + 123) || e[i] != i)
|
||||
abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
48
gcc/testsuite/gcc.dg/vect/vect-simd-clone-4.c
Normal file
48
gcc/testsuite/gcc.dg/vect/vect-simd-clone-4.c
Normal file
|
@ -0,0 +1,48 @@
|
|||
/* { dg-additional-options "-fopenmp-simd" } */
|
||||
/* { dg-additional-options "-mavx" { target avx_runtime } } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
#ifndef N
|
||||
#define N 1024
|
||||
#endif
|
||||
|
||||
float d[N];
|
||||
int e[N];
|
||||
unsigned short f[N];
|
||||
|
||||
#pragma omp declare simd simdlen(8) notinbranch uniform(b)
|
||||
__attribute__((noinline)) float
|
||||
foo (float a, float b, float c)
|
||||
{
|
||||
if (a < 30)
|
||||
return 5.0f;
|
||||
return a + b + c;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
bar ()
|
||||
{
|
||||
int i;
|
||||
#pragma omp simd
|
||||
for (i = 0; i < N; ++i)
|
||||
{
|
||||
d[i] = foo (i, 123, i * 3);
|
||||
e[i] = e[i] * 3;
|
||||
f[i] = f[i] + 1;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int i;
|
||||
check_vect ();
|
||||
bar ();
|
||||
for (i = 0; i < N; i++)
|
||||
if (d[i] != (i < 30 ? 5.0f : i * 4 + 123.0f) || e[i] || f[i] != 1)
|
||||
abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
43
gcc/testsuite/gcc.dg/vect/vect-simd-clone-5.c
Normal file
43
gcc/testsuite/gcc.dg/vect/vect-simd-clone-5.c
Normal file
|
@ -0,0 +1,43 @@
|
|||
/* { dg-additional-options "-fopenmp-simd" } */
|
||||
/* { dg-additional-options "-mavx" { target avx_runtime } } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
#ifndef N
|
||||
#define N 1024
|
||||
#endif
|
||||
|
||||
int d[N], e[N];
|
||||
|
||||
#pragma omp declare simd simdlen(4) notinbranch uniform(b) linear(c:3)
|
||||
__attribute__((noinline)) long long int
|
||||
foo (int a, int b, int c)
|
||||
{
|
||||
return a + b + c;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
bar ()
|
||||
{
|
||||
int i;
|
||||
#pragma omp simd
|
||||
for (i = 0; i < N; ++i)
|
||||
{
|
||||
d[i] = foo (i, 123, i * 3);
|
||||
e[i] = e[i] + i;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int i;
|
||||
check_vect ();
|
||||
bar ();
|
||||
for (i = 0; i < N; i++)
|
||||
if (d[i] != i * 4 + 123 || e[i] != i)
|
||||
abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
74
gcc/testsuite/gcc.dg/vect/vect-simd-clone-6.c
Normal file
74
gcc/testsuite/gcc.dg/vect/vect-simd-clone-6.c
Normal file
|
@ -0,0 +1,74 @@
|
|||
/* { dg-additional-options "-fopenmp-simd" } */
|
||||
/* { dg-additional-options "-mavx" { target avx_runtime } } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
#ifndef N
|
||||
#define N 1024
|
||||
#endif
|
||||
|
||||
int a[N];
|
||||
long long int b[N];
|
||||
short c[N];
|
||||
|
||||
#pragma omp declare simd
|
||||
#pragma omp declare simd uniform(b) linear(c:3)
|
||||
__attribute__((noinline)) short
|
||||
foo (int a, long long int b, short c)
|
||||
{
|
||||
return a + b + c;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
bar (int x)
|
||||
{
|
||||
int i;
|
||||
if (x == 0)
|
||||
{
|
||||
#pragma omp simd
|
||||
for (i = 0; i < N; i++)
|
||||
c[i] = foo (a[i], b[i], c[i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
#pragma omp simd
|
||||
for (i = 0; i < N; i++)
|
||||
c[i] = foo (a[i], x, i * 3);
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
baz (void)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 2 * i;
|
||||
b[i] = -7 * i + 6;
|
||||
c[i] = (i & 31) << 4;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int i;
|
||||
check_vect ();
|
||||
baz ();
|
||||
bar (0);
|
||||
for (i = 0; i < N; i++)
|
||||
if (a[i] != 2 * i || b[i] != 6 - 7 * i
|
||||
|| c[i] != 6 - 5 * i + ((i & 31) << 4))
|
||||
abort ();
|
||||
else
|
||||
a[i] = c[i];
|
||||
bar (17);
|
||||
for (i = 0; i < N; i++)
|
||||
if (a[i] != 6 - 5 * i + ((i & 31) << 4)
|
||||
|| b[i] != 6 - 7 * i
|
||||
|| c[i] != 23 - 2 * i + ((i & 31) << 4))
|
||||
abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
74
gcc/testsuite/gcc.dg/vect/vect-simd-clone-7.c
Normal file
74
gcc/testsuite/gcc.dg/vect/vect-simd-clone-7.c
Normal file
|
@ -0,0 +1,74 @@
|
|||
/* { dg-additional-options "-fopenmp-simd" } */
|
||||
/* { dg-additional-options "-mavx" { target avx_runtime } } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
#ifndef N
|
||||
#define N 1024
|
||||
#endif
|
||||
|
||||
int a[N];
|
||||
long long int b[N];
|
||||
short c[N];
|
||||
|
||||
#pragma omp declare simd
|
||||
#pragma omp declare simd uniform(b) linear(c:3)
|
||||
__attribute__((noinline)) short
|
||||
foo (int a, long long int b, int c)
|
||||
{
|
||||
return a + b + c;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
bar (int x)
|
||||
{
|
||||
int i;
|
||||
if (x == 0)
|
||||
{
|
||||
#pragma omp simd
|
||||
for (i = 0; i < N; i++)
|
||||
c[i] = foo (a[i], b[i], c[i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
#pragma omp simd
|
||||
for (i = 0; i < N; i++)
|
||||
c[i] = foo (a[i], x, i * 3);
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) void
|
||||
baz (void)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = 2 * i;
|
||||
b[i] = -7 * i + 6;
|
||||
c[i] = (i & 31) << 4;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int i;
|
||||
check_vect ();
|
||||
baz ();
|
||||
bar (0);
|
||||
for (i = 0; i < N; i++)
|
||||
if (a[i] != 2 * i || b[i] != 6 - 7 * i
|
||||
|| c[i] != 6 - 5 * i + ((i & 31) << 4))
|
||||
abort ();
|
||||
else
|
||||
a[i] = c[i];
|
||||
bar (17);
|
||||
for (i = 0; i < N; i++)
|
||||
if (a[i] != 6 - 5 * i + ((i & 31) << 4)
|
||||
|| b[i] != 6 - 7 * i
|
||||
|| c[i] != 23 - 2 * i + ((i & 31) << 4))
|
||||
abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
94
gcc/testsuite/gcc.dg/vect/vect-simd-clone-8.c
Normal file
94
gcc/testsuite/gcc.dg/vect/vect-simd-clone-8.c
Normal file
|
@ -0,0 +1,94 @@
|
|||
/* { dg-additional-options "-fopenmp-simd" } */
|
||||
/* { dg-additional-options "-mavx" { target avx_runtime } } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
#ifndef N
|
||||
#define N 1024
|
||||
#endif
|
||||
|
||||
int a[N], b[N];
|
||||
long int c[N];
|
||||
unsigned char d[N];
|
||||
|
||||
#pragma omp declare simd simdlen(8) notinbranch
|
||||
__attribute__((noinline)) int
|
||||
foo (long int a, int b, int c)
|
||||
{
|
||||
return a + b + c;
|
||||
}
|
||||
|
||||
#pragma omp declare simd simdlen(8) notinbranch
|
||||
__attribute__((noinline)) long int
|
||||
bar (int a, int b, long int c)
|
||||
{
|
||||
return a + b + c;
|
||||
}
|
||||
|
||||
__attribute__((noinline)) void
|
||||
fn1 (void)
|
||||
{
|
||||
int i;
|
||||
#pragma omp simd
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = foo (c[i], a[i], b[i]) + 6;
|
||||
#pragma omp simd
|
||||
for (i = 0; i < N; i++)
|
||||
c[i] = bar (a[i], b[i], c[i]) * 2;
|
||||
}
|
||||
|
||||
__attribute__((noinline)) void
|
||||
fn2 (void)
|
||||
{
|
||||
int i;
|
||||
#pragma omp simd
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = foo (c[i], a[i], b[i]) + 6;
|
||||
d[i]++;
|
||||
}
|
||||
#pragma omp simd
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
c[i] = bar (a[i], b[i], c[i]) * 2;
|
||||
d[i] /= 2;
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noinline)) void
|
||||
fn3 (void)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = i * 2;
|
||||
b[i] = 17 + (i % 37);
|
||||
c[i] = (i & 63);
|
||||
d[i] = 16 + i;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int i;
|
||||
check_vect ();
|
||||
fn3 ();
|
||||
fn1 ();
|
||||
for (i = 0; i < N; i++)
|
||||
if (a[i] != i * 2 + 23 + (i % 37) + (i & 63)
|
||||
|| b[i] != 17 + (i % 37)
|
||||
|| c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63))
|
||||
abort ();
|
||||
fn3 ();
|
||||
fn2 ();
|
||||
for (i = 0; i < N; i++)
|
||||
if (a[i] != i * 2 + 23 + (i % 37) + (i & 63)
|
||||
|| b[i] != 17 + (i % 37)
|
||||
|| c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63)
|
||||
|| d[i] != ((unsigned char) (17 + i)) / 2)
|
||||
abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
94
gcc/testsuite/gcc.dg/vect/vect-simd-clone-9.c
Normal file
94
gcc/testsuite/gcc.dg/vect/vect-simd-clone-9.c
Normal file
|
@ -0,0 +1,94 @@
|
|||
/* { dg-additional-options "-fopenmp-simd" } */
|
||||
/* { dg-additional-options "-mavx" { target avx_runtime } } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
#ifndef N
|
||||
#define N 1024
|
||||
#endif
|
||||
|
||||
int a[N], b[N];
|
||||
long int c[N];
|
||||
unsigned char d[N];
|
||||
|
||||
#pragma omp declare simd notinbranch
|
||||
__attribute__((noinline)) static int
|
||||
foo (long int a, int b, int c)
|
||||
{
|
||||
return a + b + c;
|
||||
}
|
||||
|
||||
#pragma omp declare simd notinbranch
|
||||
__attribute__((noinline)) static long int
|
||||
bar (int a, int b, long int c)
|
||||
{
|
||||
return a + b + c;
|
||||
}
|
||||
|
||||
__attribute__((noinline)) void
|
||||
fn1 (void)
|
||||
{
|
||||
int i;
|
||||
#pragma omp simd
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = foo (c[i], a[i], b[i]) + 6;
|
||||
#pragma omp simd
|
||||
for (i = 0; i < N; i++)
|
||||
c[i] = bar (a[i], b[i], c[i]) * 2;
|
||||
}
|
||||
|
||||
__attribute__((noinline)) void
|
||||
fn2 (void)
|
||||
{
|
||||
int i;
|
||||
#pragma omp simd
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = foo (c[i], a[i], b[i]) + 6;
|
||||
d[i]++;
|
||||
}
|
||||
#pragma omp simd
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
c[i] = bar (a[i], b[i], c[i]) * 2;
|
||||
d[i] /= 2;
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((noinline)) void
|
||||
fn3 (void)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
a[i] = i * 2;
|
||||
b[i] = 17 + (i % 37);
|
||||
c[i] = (i & 63);
|
||||
d[i] = 16 + i;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
int i;
|
||||
check_vect ();
|
||||
fn3 ();
|
||||
fn1 ();
|
||||
for (i = 0; i < N; i++)
|
||||
if (a[i] != i * 2 + 23 + (i % 37) + (i & 63)
|
||||
|| b[i] != 17 + (i % 37)
|
||||
|| c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63))
|
||||
abort ();
|
||||
fn3 ();
|
||||
fn2 ();
|
||||
for (i = 0; i < N; i++)
|
||||
if (a[i] != i * 2 + 23 + (i % 37) + (i & 63)
|
||||
|| b[i] != 17 + (i % 37)
|
||||
|| c[i] != i * 4 + 80 + 4 * (i % 37) + 4 * (i & 63)
|
||||
|| d[i] != ((unsigned char) (17 + i)) / 2)
|
||||
abort ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
|
@ -903,6 +903,9 @@ struct GTY(()) tree_base {
|
|||
CALL_ALLOCA_FOR_VAR_P in
|
||||
CALL_EXPR
|
||||
|
||||
OMP_CLAUSE_LINEAR_VARIABLE_STRIDE in
|
||||
OMP_CLAUSE_LINEAR
|
||||
|
||||
side_effects_flag:
|
||||
|
||||
TREE_SIDE_EFFECTS in
|
||||
|
|
|
@ -472,6 +472,7 @@ extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt);
|
|||
extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt);
|
||||
extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt);
|
||||
extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt);
|
||||
extern simple_ipa_opt_pass *make_pass_omp_simd_clone (gcc::context *ctxt);
|
||||
extern ipa_opt_pass_d *make_pass_ipa_profile (gcc::context *ctxt);
|
||||
extern ipa_opt_pass_d *make_pass_ipa_cdtor_merge (gcc::context *ctxt);
|
||||
|
||||
|
|
|
@ -53,6 +53,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "tree-scalar-evolution.h"
|
||||
#include "tree-vectorizer.h"
|
||||
#include "diagnostic-core.h"
|
||||
#include "cgraph.h"
|
||||
/* Need to include rtl.h, expr.h, etc. for optabs. */
|
||||
#include "expr.h"
|
||||
#include "optabs.h"
|
||||
|
@ -3167,10 +3168,11 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
|
|||
|
||||
if (loop_vinfo)
|
||||
{
|
||||
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
|
||||
|
||||
loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo))
|
||||
|| find_data_references_in_loop
|
||||
(loop, &LOOP_VINFO_DATAREFS (loop_vinfo)))
|
||||
datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
|
||||
if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo)))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
|
@ -3179,7 +3181,57 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
|
|||
return false;
|
||||
}
|
||||
|
||||
datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
|
||||
for (i = 0; i < loop->num_nodes; i++)
|
||||
{
|
||||
gimple_stmt_iterator gsi;
|
||||
|
||||
for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
|
||||
{
|
||||
gimple stmt = gsi_stmt (gsi);
|
||||
if (!find_data_references_in_stmt (loop, stmt, &datarefs))
|
||||
{
|
||||
if (is_gimple_call (stmt) && loop->safelen)
|
||||
{
|
||||
tree fndecl = gimple_call_fndecl (stmt), op;
|
||||
if (fndecl != NULL_TREE)
|
||||
{
|
||||
struct cgraph_node *node = cgraph_get_node (fndecl);
|
||||
if (node != NULL && node->simd_clones != NULL)
|
||||
{
|
||||
unsigned int j, n = gimple_call_num_args (stmt);
|
||||
for (j = 0; j < n; j++)
|
||||
{
|
||||
op = gimple_call_arg (stmt, j);
|
||||
if (DECL_P (op)
|
||||
|| (REFERENCE_CLASS_P (op)
|
||||
&& get_base_address (op)))
|
||||
break;
|
||||
}
|
||||
op = gimple_call_lhs (stmt);
|
||||
/* Ignore #pragma omp declare simd functions
|
||||
if they don't have data references in the
|
||||
call stmt itself. */
|
||||
if (j == n
|
||||
&& !(op
|
||||
&& (DECL_P (op)
|
||||
|| (REFERENCE_CLASS_P (op)
|
||||
&& get_base_address (op)))))
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
LOOP_VINFO_DATAREFS (loop_vinfo) = datarefs;
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"not vectorized: loop contains function "
|
||||
"calls or data references that cannot "
|
||||
"be analyzed\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LOOP_VINFO_DATAREFS (loop_vinfo) = datarefs;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -376,6 +376,19 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
|
|||
|
||||
if (gimple_get_lhs (stmt) == NULL_TREE)
|
||||
{
|
||||
if (is_gimple_call (stmt))
|
||||
{
|
||||
/* Ignore calls with no lhs. These must be calls to
|
||||
#pragma omp simd functions, and what vectorization factor
|
||||
it really needs can't be determined until
|
||||
vectorizable_simd_clone_call. */
|
||||
if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
|
||||
{
|
||||
pattern_def_seq = NULL;
|
||||
gsi_next (&si);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (dump_enabled_p ())
|
||||
{
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
|
@ -5699,7 +5712,6 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
|||
int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
|
||||
bool grouped_store;
|
||||
bool slp_scheduled = false;
|
||||
unsigned int nunits;
|
||||
gimple stmt, pattern_stmt;
|
||||
gimple_seq pattern_def_seq = NULL;
|
||||
gimple_stmt_iterator pattern_def_si = gsi_none ();
|
||||
|
@ -5957,16 +5969,18 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
|||
transform_pattern_stmt = false;
|
||||
}
|
||||
|
||||
gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
|
||||
nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
|
||||
STMT_VINFO_VECTYPE (stmt_info));
|
||||
if (!STMT_SLP_TYPE (stmt_info)
|
||||
&& nunits != (unsigned int) vectorization_factor
|
||||
&& dump_enabled_p ())
|
||||
/* For SLP VF is set according to unrolling factor, and not to
|
||||
vector size, hence for SLP this print is not valid. */
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"multiple-types.\n");
|
||||
if (STMT_VINFO_VECTYPE (stmt_info))
|
||||
{
|
||||
unsigned int nunits
|
||||
= (unsigned int)
|
||||
TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
|
||||
if (!STMT_SLP_TYPE (stmt_info)
|
||||
&& nunits != (unsigned int) vectorization_factor
|
||||
&& dump_enabled_p ())
|
||||
/* For SLP VF is set according to unrolling factor, and not
|
||||
to vector size, hence for SLP this print is not valid. */
|
||||
dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
|
||||
}
|
||||
|
||||
/* SLP. Schedule all the SLP instances when the first SLP stmt is
|
||||
reached. */
|
||||
|
|
|
@ -46,12 +46,15 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "tree-ssanames.h"
|
||||
#include "tree-ssa-loop-manip.h"
|
||||
#include "cfgloop.h"
|
||||
#include "tree-ssa-loop.h"
|
||||
#include "tree-scalar-evolution.h"
|
||||
#include "expr.h"
|
||||
#include "recog.h" /* FIXME: for insn_data */
|
||||
#include "optabs.h"
|
||||
#include "diagnostic-core.h"
|
||||
#include "tree-vectorizer.h"
|
||||
#include "dumpfile.h"
|
||||
#include "cgraph.h"
|
||||
|
||||
/* For lang_hooks.types.type_for_mode. */
|
||||
#include "langhooks.h"
|
||||
|
@ -1735,11 +1738,11 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
if (!is_gimple_call (stmt))
|
||||
return false;
|
||||
|
||||
if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
|
||||
if (gimple_call_lhs (stmt) == NULL_TREE
|
||||
|| TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
|
||||
return false;
|
||||
|
||||
if (stmt_can_throw_internal (stmt))
|
||||
return false;
|
||||
gcc_checking_assert (!stmt_can_throw_internal (stmt));
|
||||
|
||||
vectype_out = STMT_VINFO_VECTYPE (stmt_info);
|
||||
|
||||
|
@ -2082,10 +2085,6 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
|
||||
vargs.release ();
|
||||
|
||||
/* Update the exception handling table with the vector stmt if necessary. */
|
||||
if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
|
||||
gimple_purge_dead_eh_edges (gimple_bb (stmt));
|
||||
|
||||
/* The call in STMT might prevent it from being removed in dce.
|
||||
We however cannot remove it here, due to the way the ssa name
|
||||
it defines is mapped to the new definition. So just replace
|
||||
|
@ -2109,6 +2108,605 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
}
|
||||
|
||||
|
||||
struct simd_call_arg_info
|
||||
{
|
||||
tree vectype;
|
||||
tree op;
|
||||
enum vect_def_type dt;
|
||||
HOST_WIDE_INT linear_step;
|
||||
unsigned int align;
|
||||
};
|
||||
|
||||
/* Function vectorizable_simd_clone_call.
|
||||
|
||||
Check if STMT performs a function call that can be vectorized
|
||||
by calling a simd clone of the function.
|
||||
If VEC_STMT is also passed, vectorize the STMT: create a vectorized
|
||||
stmt to replace it, put it in VEC_STMT, and insert it at BSI.
|
||||
Return FALSE if not a vectorizable STMT, TRUE otherwise. */
|
||||
|
||||
static bool
|
||||
vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
gimple *vec_stmt, slp_tree slp_node)
|
||||
{
|
||||
tree vec_dest;
|
||||
tree scalar_dest;
|
||||
tree op, type;
|
||||
tree vec_oprnd0 = NULL_TREE;
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
|
||||
tree vectype;
|
||||
unsigned int nunits;
|
||||
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
||||
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
|
||||
struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
|
||||
tree fndecl, new_temp, def;
|
||||
gimple def_stmt;
|
||||
gimple new_stmt = NULL;
|
||||
int ncopies, j;
|
||||
vec<simd_call_arg_info> arginfo = vNULL;
|
||||
vec<tree> vargs = vNULL;
|
||||
size_t i, nargs;
|
||||
tree lhs, rtype, ratype;
|
||||
vec<constructor_elt, va_gc> *ret_ctor_elts;
|
||||
|
||||
/* Is STMT a vectorizable call? */
|
||||
if (!is_gimple_call (stmt))
|
||||
return false;
|
||||
|
||||
fndecl = gimple_call_fndecl (stmt);
|
||||
if (fndecl == NULL_TREE)
|
||||
return false;
|
||||
|
||||
struct cgraph_node *node = cgraph_get_node (fndecl);
|
||||
if (node == NULL || node->simd_clones == NULL)
|
||||
return false;
|
||||
|
||||
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
|
||||
return false;
|
||||
|
||||
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
|
||||
return false;
|
||||
|
||||
if (gimple_call_lhs (stmt)
|
||||
&& TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
|
||||
return false;
|
||||
|
||||
gcc_checking_assert (!stmt_can_throw_internal (stmt));
|
||||
|
||||
vectype = STMT_VINFO_VECTYPE (stmt_info);
|
||||
|
||||
if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
|
||||
return false;
|
||||
|
||||
/* FORNOW */
|
||||
if (slp_node || PURE_SLP_STMT (stmt_info))
|
||||
return false;
|
||||
|
||||
/* Process function arguments. */
|
||||
nargs = gimple_call_num_args (stmt);
|
||||
|
||||
/* Bail out if the function has zero arguments. */
|
||||
if (nargs == 0)
|
||||
return false;
|
||||
|
||||
arginfo.create (nargs);
|
||||
|
||||
for (i = 0; i < nargs; i++)
|
||||
{
|
||||
simd_call_arg_info thisarginfo;
|
||||
affine_iv iv;
|
||||
|
||||
thisarginfo.linear_step = 0;
|
||||
thisarginfo.align = 0;
|
||||
thisarginfo.op = NULL_TREE;
|
||||
|
||||
op = gimple_call_arg (stmt, i);
|
||||
if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
|
||||
&def_stmt, &def, &thisarginfo.dt,
|
||||
&thisarginfo.vectype)
|
||||
|| thisarginfo.dt == vect_uninitialized_def)
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"use not simple.\n");
|
||||
arginfo.release ();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (thisarginfo.dt == vect_constant_def
|
||||
|| thisarginfo.dt == vect_external_def)
|
||||
gcc_assert (thisarginfo.vectype == NULL_TREE);
|
||||
else
|
||||
gcc_assert (thisarginfo.vectype != NULL_TREE);
|
||||
|
||||
if (thisarginfo.dt != vect_constant_def
|
||||
&& thisarginfo.dt != vect_external_def
|
||||
&& loop_vinfo
|
||||
&& TREE_CODE (op) == SSA_NAME
|
||||
&& simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false)
|
||||
&& tree_fits_shwi_p (iv.step))
|
||||
{
|
||||
thisarginfo.linear_step = tree_to_shwi (iv.step);
|
||||
thisarginfo.op = iv.base;
|
||||
}
|
||||
else if ((thisarginfo.dt == vect_constant_def
|
||||
|| thisarginfo.dt == vect_external_def)
|
||||
&& POINTER_TYPE_P (TREE_TYPE (op)))
|
||||
thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
|
||||
|
||||
arginfo.quick_push (thisarginfo);
|
||||
}
|
||||
|
||||
unsigned int badness = 0;
|
||||
struct cgraph_node *bestn = NULL;
|
||||
if (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info))
|
||||
bestn = cgraph_get_node (STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info));
|
||||
else
|
||||
for (struct cgraph_node *n = node->simd_clones; n != NULL;
|
||||
n = n->simdclone->next_clone)
|
||||
{
|
||||
unsigned int this_badness = 0;
|
||||
if (n->simdclone->simdlen
|
||||
> (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
|
||||
|| n->simdclone->nargs != nargs)
|
||||
continue;
|
||||
if (n->simdclone->simdlen
|
||||
< (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
|
||||
this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
|
||||
- exact_log2 (n->simdclone->simdlen)) * 1024;
|
||||
if (n->simdclone->inbranch)
|
||||
this_badness += 2048;
|
||||
int target_badness = targetm.simd_clone.usable (n);
|
||||
if (target_badness < 0)
|
||||
continue;
|
||||
this_badness += target_badness * 512;
|
||||
/* FORNOW: Have to add code to add the mask argument. */
|
||||
if (n->simdclone->inbranch)
|
||||
continue;
|
||||
for (i = 0; i < nargs; i++)
|
||||
{
|
||||
switch (n->simdclone->args[i].arg_type)
|
||||
{
|
||||
case SIMD_CLONE_ARG_TYPE_VECTOR:
|
||||
if (!useless_type_conversion_p
|
||||
(n->simdclone->args[i].orig_type,
|
||||
TREE_TYPE (gimple_call_arg (stmt, i))))
|
||||
i = -1;
|
||||
else if (arginfo[i].dt == vect_constant_def
|
||||
|| arginfo[i].dt == vect_external_def
|
||||
|| arginfo[i].linear_step)
|
||||
this_badness += 64;
|
||||
break;
|
||||
case SIMD_CLONE_ARG_TYPE_UNIFORM:
|
||||
if (arginfo[i].dt != vect_constant_def
|
||||
&& arginfo[i].dt != vect_external_def)
|
||||
i = -1;
|
||||
break;
|
||||
case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
|
||||
if (arginfo[i].dt == vect_constant_def
|
||||
|| arginfo[i].dt == vect_external_def
|
||||
|| (arginfo[i].linear_step
|
||||
!= n->simdclone->args[i].linear_step))
|
||||
i = -1;
|
||||
break;
|
||||
case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
|
||||
/* FORNOW */
|
||||
i = -1;
|
||||
break;
|
||||
case SIMD_CLONE_ARG_TYPE_MASK:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
if (i == (size_t) -1)
|
||||
break;
|
||||
if (n->simdclone->args[i].alignment > arginfo[i].align)
|
||||
{
|
||||
i = -1;
|
||||
break;
|
||||
}
|
||||
if (arginfo[i].align)
|
||||
this_badness += (exact_log2 (arginfo[i].align)
|
||||
- exact_log2 (n->simdclone->args[i].alignment));
|
||||
}
|
||||
if (i == (size_t) -1)
|
||||
continue;
|
||||
if (bestn == NULL || this_badness < badness)
|
||||
{
|
||||
bestn = n;
|
||||
badness = this_badness;
|
||||
}
|
||||
}
|
||||
|
||||
if (bestn == NULL)
|
||||
{
|
||||
arginfo.release ();
|
||||
return false;
|
||||
}
|
||||
|
||||
for (i = 0; i < nargs; i++)
|
||||
if ((arginfo[i].dt == vect_constant_def
|
||||
|| arginfo[i].dt == vect_external_def)
|
||||
&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
|
||||
{
|
||||
arginfo[i].vectype
|
||||
= get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
|
||||
i)));
|
||||
if (arginfo[i].vectype == NULL
|
||||
|| (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
|
||||
> bestn->simdclone->simdlen))
|
||||
{
|
||||
arginfo.release ();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
fndecl = bestn->decl;
|
||||
nunits = bestn->simdclone->simdlen;
|
||||
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
|
||||
|
||||
/* If the function isn't const, only allow it in simd loops where user
|
||||
has asserted that at least nunits consecutive iterations can be
|
||||
performed using SIMD instructions. */
|
||||
if ((loop == NULL || (unsigned) loop->safelen < nunits)
|
||||
&& gimple_vuse (stmt))
|
||||
{
|
||||
arginfo.release ();
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Sanity check: make sure that at least one copy of the vectorized stmt
|
||||
needs to be generated. */
|
||||
gcc_assert (ncopies >= 1);
|
||||
|
||||
if (!vec_stmt) /* transformation not required. */
|
||||
{
|
||||
STMT_VINFO_SIMD_CLONE_FNDECL (stmt_info) = bestn->decl;
|
||||
STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"=== vectorizable_simd_clone_call ===\n");
|
||||
/* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
|
||||
arginfo.release ();
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Transform. **/
|
||||
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
|
||||
|
||||
/* Handle def. */
|
||||
scalar_dest = gimple_call_lhs (stmt);
|
||||
vec_dest = NULL_TREE;
|
||||
rtype = NULL_TREE;
|
||||
ratype = NULL_TREE;
|
||||
if (scalar_dest)
|
||||
{
|
||||
vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
||||
rtype = TREE_TYPE (TREE_TYPE (fndecl));
|
||||
if (TREE_CODE (rtype) == ARRAY_TYPE)
|
||||
{
|
||||
ratype = rtype;
|
||||
rtype = TREE_TYPE (ratype);
|
||||
}
|
||||
}
|
||||
|
||||
prev_stmt_info = NULL;
|
||||
for (j = 0; j < ncopies; ++j)
|
||||
{
|
||||
/* Build argument list for the vectorized call. */
|
||||
if (j == 0)
|
||||
vargs.create (nargs);
|
||||
else
|
||||
vargs.truncate (0);
|
||||
|
||||
for (i = 0; i < nargs; i++)
|
||||
{
|
||||
unsigned int k, l, m, o;
|
||||
tree atype;
|
||||
op = gimple_call_arg (stmt, i);
|
||||
switch (bestn->simdclone->args[i].arg_type)
|
||||
{
|
||||
case SIMD_CLONE_ARG_TYPE_VECTOR:
|
||||
atype = bestn->simdclone->args[i].vector_type;
|
||||
o = nunits / TYPE_VECTOR_SUBPARTS (atype);
|
||||
for (m = j * o; m < (j + 1) * o; m++)
|
||||
{
|
||||
if (TYPE_VECTOR_SUBPARTS (atype)
|
||||
< TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
|
||||
{
|
||||
unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
|
||||
k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
|
||||
/ TYPE_VECTOR_SUBPARTS (atype));
|
||||
gcc_assert ((k & (k - 1)) == 0);
|
||||
if (m == 0)
|
||||
vec_oprnd0
|
||||
= vect_get_vec_def_for_operand (op, stmt, NULL);
|
||||
else
|
||||
{
|
||||
vec_oprnd0 = arginfo[i].op;
|
||||
if ((m & (k - 1)) == 0)
|
||||
vec_oprnd0
|
||||
= vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
|
||||
vec_oprnd0);
|
||||
}
|
||||
arginfo[i].op = vec_oprnd0;
|
||||
vec_oprnd0
|
||||
= build3 (BIT_FIELD_REF, atype, vec_oprnd0,
|
||||
size_int (prec),
|
||||
bitsize_int ((m & (k - 1)) * prec));
|
||||
new_stmt
|
||||
= gimple_build_assign (make_ssa_name (atype, NULL),
|
||||
vec_oprnd0);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
vargs.safe_push (gimple_assign_lhs (new_stmt));
|
||||
}
|
||||
else
|
||||
{
|
||||
k = (TYPE_VECTOR_SUBPARTS (atype)
|
||||
/ TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
|
||||
gcc_assert ((k & (k - 1)) == 0);
|
||||
vec<constructor_elt, va_gc> *ctor_elts;
|
||||
if (k != 1)
|
||||
vec_alloc (ctor_elts, k);
|
||||
else
|
||||
ctor_elts = NULL;
|
||||
for (l = 0; l < k; l++)
|
||||
{
|
||||
if (m == 0 && l == 0)
|
||||
vec_oprnd0
|
||||
= vect_get_vec_def_for_operand (op, stmt, NULL);
|
||||
else
|
||||
vec_oprnd0
|
||||
= vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
|
||||
arginfo[i].op);
|
||||
arginfo[i].op = vec_oprnd0;
|
||||
if (k == 1)
|
||||
break;
|
||||
CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
|
||||
vec_oprnd0);
|
||||
}
|
||||
if (k == 1)
|
||||
vargs.safe_push (vec_oprnd0);
|
||||
else
|
||||
{
|
||||
vec_oprnd0 = build_constructor (atype, ctor_elts);
|
||||
new_stmt
|
||||
= gimple_build_assign (make_ssa_name (atype, NULL),
|
||||
vec_oprnd0);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
vargs.safe_push (gimple_assign_lhs (new_stmt));
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case SIMD_CLONE_ARG_TYPE_UNIFORM:
|
||||
vargs.safe_push (op);
|
||||
break;
|
||||
case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
|
||||
if (j == 0)
|
||||
{
|
||||
gimple_seq stmts;
|
||||
arginfo[i].op
|
||||
= force_gimple_operand (arginfo[i].op, &stmts, true,
|
||||
NULL_TREE);
|
||||
if (stmts != NULL)
|
||||
{
|
||||
basic_block new_bb;
|
||||
edge pe = loop_preheader_edge (loop);
|
||||
new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
|
||||
gcc_assert (!new_bb);
|
||||
}
|
||||
tree phi_res = copy_ssa_name (op, NULL);
|
||||
gimple new_phi = create_phi_node (phi_res, loop->header);
|
||||
set_vinfo_for_stmt (new_phi,
|
||||
new_stmt_vec_info (new_phi, loop_vinfo,
|
||||
NULL));
|
||||
add_phi_arg (new_phi, arginfo[i].op,
|
||||
loop_preheader_edge (loop), UNKNOWN_LOCATION);
|
||||
enum tree_code code
|
||||
= POINTER_TYPE_P (TREE_TYPE (op))
|
||||
? POINTER_PLUS_EXPR : PLUS_EXPR;
|
||||
tree type = POINTER_TYPE_P (TREE_TYPE (op))
|
||||
? sizetype : TREE_TYPE (op);
|
||||
double_int cst
|
||||
= double_int::from_shwi
|
||||
(bestn->simdclone->args[i].linear_step);
|
||||
cst *= double_int::from_uhwi (ncopies * nunits);
|
||||
tree tcst = double_int_to_tree (type, cst);
|
||||
tree phi_arg = copy_ssa_name (op, NULL);
|
||||
new_stmt = gimple_build_assign_with_ops (code, phi_arg,
|
||||
phi_res, tcst);
|
||||
gimple_stmt_iterator si = gsi_after_labels (loop->header);
|
||||
gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
|
||||
set_vinfo_for_stmt (new_stmt,
|
||||
new_stmt_vec_info (new_stmt, loop_vinfo,
|
||||
NULL));
|
||||
add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
|
||||
UNKNOWN_LOCATION);
|
||||
arginfo[i].op = phi_res;
|
||||
vargs.safe_push (phi_res);
|
||||
}
|
||||
else
|
||||
{
|
||||
enum tree_code code
|
||||
= POINTER_TYPE_P (TREE_TYPE (op))
|
||||
? POINTER_PLUS_EXPR : PLUS_EXPR;
|
||||
tree type = POINTER_TYPE_P (TREE_TYPE (op))
|
||||
? sizetype : TREE_TYPE (op);
|
||||
double_int cst
|
||||
= double_int::from_shwi
|
||||
(bestn->simdclone->args[i].linear_step);
|
||||
cst *= double_int::from_uhwi (j * nunits);
|
||||
tree tcst = double_int_to_tree (type, cst);
|
||||
new_temp = make_ssa_name (TREE_TYPE (op), NULL);
|
||||
new_stmt
|
||||
= gimple_build_assign_with_ops (code, new_temp,
|
||||
arginfo[i].op, tcst);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
vargs.safe_push (new_temp);
|
||||
}
|
||||
break;
|
||||
case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
|
||||
new_stmt = gimple_build_call_vec (fndecl, vargs);
|
||||
if (vec_dest)
|
||||
{
|
||||
gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
|
||||
if (ratype)
|
||||
new_temp = create_tmp_var (ratype, NULL);
|
||||
else if (TYPE_VECTOR_SUBPARTS (vectype)
|
||||
== TYPE_VECTOR_SUBPARTS (rtype))
|
||||
new_temp = make_ssa_name (vec_dest, new_stmt);
|
||||
else
|
||||
new_temp = make_ssa_name (rtype, new_stmt);
|
||||
gimple_call_set_lhs (new_stmt, new_temp);
|
||||
}
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
|
||||
if (vec_dest)
|
||||
{
|
||||
if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
|
||||
{
|
||||
unsigned int k, l;
|
||||
unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
|
||||
k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
|
||||
gcc_assert ((k & (k - 1)) == 0);
|
||||
for (l = 0; l < k; l++)
|
||||
{
|
||||
tree t;
|
||||
if (ratype)
|
||||
{
|
||||
t = build_fold_addr_expr (new_temp);
|
||||
t = build2 (MEM_REF, vectype, t,
|
||||
build_int_cst (TREE_TYPE (t),
|
||||
l * prec / BITS_PER_UNIT));
|
||||
}
|
||||
else
|
||||
t = build3 (BIT_FIELD_REF, vectype, new_temp,
|
||||
size_int (prec), bitsize_int (l * prec));
|
||||
new_stmt
|
||||
= gimple_build_assign (make_ssa_name (vectype, NULL), t);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
if (j == 0 && l == 0)
|
||||
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
|
||||
else
|
||||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
|
||||
|
||||
prev_stmt_info = vinfo_for_stmt (new_stmt);
|
||||
}
|
||||
|
||||
if (ratype)
|
||||
{
|
||||
tree clobber = build_constructor (ratype, NULL);
|
||||
TREE_THIS_VOLATILE (clobber) = 1;
|
||||
new_stmt = gimple_build_assign (new_temp, clobber);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
|
||||
{
|
||||
unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
|
||||
/ TYPE_VECTOR_SUBPARTS (rtype));
|
||||
gcc_assert ((k & (k - 1)) == 0);
|
||||
if ((j & (k - 1)) == 0)
|
||||
vec_alloc (ret_ctor_elts, k);
|
||||
if (ratype)
|
||||
{
|
||||
unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
|
||||
for (m = 0; m < o; m++)
|
||||
{
|
||||
tree tem = build4 (ARRAY_REF, rtype, new_temp,
|
||||
size_int (m), NULL_TREE, NULL_TREE);
|
||||
new_stmt
|
||||
= gimple_build_assign (make_ssa_name (rtype, NULL),
|
||||
tem);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
|
||||
gimple_assign_lhs (new_stmt));
|
||||
}
|
||||
tree clobber = build_constructor (ratype, NULL);
|
||||
TREE_THIS_VOLATILE (clobber) = 1;
|
||||
new_stmt = gimple_build_assign (new_temp, clobber);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
}
|
||||
else
|
||||
CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
|
||||
if ((j & (k - 1)) != k - 1)
|
||||
continue;
|
||||
vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
|
||||
new_stmt
|
||||
= gimple_build_assign (make_ssa_name (vec_dest, NULL),
|
||||
vec_oprnd0);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
|
||||
if ((unsigned) j == k - 1)
|
||||
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
|
||||
else
|
||||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
|
||||
|
||||
prev_stmt_info = vinfo_for_stmt (new_stmt);
|
||||
continue;
|
||||
}
|
||||
else if (ratype)
|
||||
{
|
||||
tree t = build_fold_addr_expr (new_temp);
|
||||
t = build2 (MEM_REF, vectype, t,
|
||||
build_int_cst (TREE_TYPE (t), 0));
|
||||
new_stmt
|
||||
= gimple_build_assign (make_ssa_name (vec_dest, NULL), t);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
tree clobber = build_constructor (ratype, NULL);
|
||||
TREE_THIS_VOLATILE (clobber) = 1;
|
||||
vect_finish_stmt_generation (stmt,
|
||||
gimple_build_assign (new_temp,
|
||||
clobber), gsi);
|
||||
}
|
||||
}
|
||||
|
||||
if (j == 0)
|
||||
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
|
||||
else
|
||||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
|
||||
|
||||
prev_stmt_info = vinfo_for_stmt (new_stmt);
|
||||
}
|
||||
|
||||
vargs.release ();
|
||||
|
||||
/* The call in STMT might prevent it from being removed in dce.
|
||||
We however cannot remove it here, due to the way the ssa name
|
||||
it defines is mapped to the new definition. So just replace
|
||||
rhs of the statement with something harmless. */
|
||||
|
||||
if (slp_node)
|
||||
return true;
|
||||
|
||||
if (scalar_dest)
|
||||
{
|
||||
type = TREE_TYPE (scalar_dest);
|
||||
if (is_pattern_stmt_p (stmt_info))
|
||||
lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
|
||||
else
|
||||
lhs = gimple_call_lhs (stmt);
|
||||
new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
|
||||
}
|
||||
else
|
||||
new_stmt = gimple_build_nop ();
|
||||
set_vinfo_for_stmt (new_stmt, stmt_info);
|
||||
set_vinfo_for_stmt (stmt, NULL);
|
||||
STMT_VINFO_STMT (stmt_info) = new_stmt;
|
||||
gsi_replace (gsi, new_stmt, false);
|
||||
unlink_stmt_vdef (stmt);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/* Function vect_gen_widened_results_half
|
||||
|
||||
Create a vector stmt whose code, type, number of arguments, and result
|
||||
|
@ -5819,7 +6417,9 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
|
|||
if (STMT_VINFO_RELEVANT_P (stmt_info))
|
||||
{
|
||||
gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
|
||||
gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
|
||||
gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
|
||||
|| (is_gimple_call (stmt)
|
||||
&& gimple_call_lhs (stmt) == NULL_TREE));
|
||||
*need_to_vectorize = true;
|
||||
}
|
||||
|
||||
|
@ -5827,7 +6427,8 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
|
|||
if (!bb_vinfo
|
||||
&& (STMT_VINFO_RELEVANT_P (stmt_info)
|
||||
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
|
||||
ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
|
||||
ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
|
||||
|| vectorizable_conversion (stmt, NULL, NULL, NULL)
|
||||
|| vectorizable_shift (stmt, NULL, NULL, NULL)
|
||||
|| vectorizable_operation (stmt, NULL, NULL, NULL)
|
||||
|| vectorizable_assignment (stmt, NULL, NULL, NULL)
|
||||
|
@ -5839,7 +6440,8 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
|
|||
else
|
||||
{
|
||||
if (bb_vinfo)
|
||||
ok = (vectorizable_conversion (stmt, NULL, NULL, node)
|
||||
ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
|
||||
|| vectorizable_conversion (stmt, NULL, NULL, node)
|
||||
|| vectorizable_shift (stmt, NULL, NULL, node)
|
||||
|| vectorizable_operation (stmt, NULL, NULL, node)
|
||||
|| vectorizable_assignment (stmt, NULL, NULL, node)
|
||||
|
@ -5967,6 +6569,11 @@ vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
stmt = gsi_stmt (*gsi);
|
||||
break;
|
||||
|
||||
case call_simd_clone_vec_info_type:
|
||||
done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
|
||||
stmt = gsi_stmt (*gsi);
|
||||
break;
|
||||
|
||||
case reduc_vec_info_type:
|
||||
done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
|
||||
gcc_assert (done);
|
||||
|
|
|
@ -443,6 +443,7 @@ enum stmt_vec_info_type {
|
|||
shift_vec_info_type,
|
||||
op_vec_info_type,
|
||||
call_vec_info_type,
|
||||
call_simd_clone_vec_info_type,
|
||||
assignment_vec_info_type,
|
||||
condition_vec_info_type,
|
||||
reduc_vec_info_type,
|
||||
|
@ -565,6 +566,9 @@ typedef struct _stmt_vec_info {
|
|||
of this stmt. */
|
||||
vec<dr_p> same_align_refs;
|
||||
|
||||
/* Selected SIMD clone's function decl. */
|
||||
tree simd_clone_fndecl;
|
||||
|
||||
/* Classify the def of this stmt. */
|
||||
enum vect_def_type def_type;
|
||||
|
||||
|
@ -633,6 +637,7 @@ typedef struct _stmt_vec_info {
|
|||
#define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt
|
||||
#define STMT_VINFO_PATTERN_DEF_SEQ(S) (S)->pattern_def_seq
|
||||
#define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs
|
||||
#define STMT_VINFO_SIMD_CLONE_FNDECL(S) (S)->simd_clone_fndecl
|
||||
#define STMT_VINFO_DEF_TYPE(S) (S)->def_type
|
||||
#define STMT_VINFO_GROUP_FIRST_ELEMENT(S) (S)->first_element
|
||||
#define STMT_VINFO_GROUP_NEXT_ELEMENT(S) (S)->next_element
|
||||
|
|
|
@ -1344,6 +1344,10 @@ extern void protected_set_expr_location (tree, location_t);
|
|||
#define OMP_CLAUSE_LINEAR_NO_COPYOUT(NODE) \
|
||||
TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_LINEAR))
|
||||
|
||||
/* True if a LINEAR clause has a stride that is variable. */
|
||||
#define OMP_CLAUSE_LINEAR_VARIABLE_STRIDE(NODE) \
|
||||
TREE_PROTECTED (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_LINEAR))
|
||||
|
||||
#define OMP_CLAUSE_LINEAR_STEP(NODE) \
|
||||
OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_LINEAR), 1)
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue