tree-vectorizer.c: Fix documentation.
* tree-vectorizer.c: Fix documentation. * tree-vectorizer.h (vinfo_for_stmt): Add documentation. (set_vinfo_for_stmt, get_earlier_stmt, get_later_stmt, is_pattern_stmt_p, is_loop_header_bb_p, stmt_vinfo_set_inside_of_loop_cost, stmt_vinfo_set_outside_of_loop_cost, vect_pow2, aligned_access_p, known_alignment_for_access_p): Likewise. * tree-vect-loop.c: Fix documentation. (vect_get_cost): Start function name from new line. * tree-vect-data-refs.c: Fix documentation. * tree-vect_stmts.c: Likewise. (vect_create_vectorized_promotion_stmts): Always free vec_tmp. (vectorizable_store): Free vec_oprnds if allocated. (vectorizable_condition): Initialize several variables to avoid warnings. * tree-vect-slp.c: Fix documentation. From-SVN: r164332
This commit is contained in:
parent
6be14c0ebc
commit
ff802fa1f3
11 changed files with 296 additions and 228 deletions
|
@ -1,3 +1,22 @@
|
|||
2010-09-16 Ira Rosen <irar@il.ibm.com>
|
||||
|
||||
* tree-vectorizer.c: Fix documentation.
|
||||
* tree-vectorizer.h (vinfo_for_stmt): Add documentation.
|
||||
(set_vinfo_for_stmt, get_earlier_stmt, get_later_stmt,
|
||||
is_pattern_stmt_p, is_loop_header_bb_p,
|
||||
stmt_vinfo_set_inside_of_loop_cost,
|
||||
stmt_vinfo_set_outside_of_loop_cost, vect_pow2, aligned_access_p,
|
||||
known_alignment_for_access_p): Likewise.
|
||||
* tree-vect-loop.c: Fix documentation.
|
||||
(vect_get_cost): Start function name from new line.
|
||||
* tree-vect-data-refs.c: Fix documentation.
|
||||
* tree-vect_stmts.c: Likewise.
|
||||
(vect_create_vectorized_promotion_stmts): Always free vec_tmp.
|
||||
(vectorizable_store): Free vec_oprnds if allocated.
|
||||
(vectorizable_condition): Initialize several variables to avoid
|
||||
warnings.
|
||||
* tree-vect-slp.c: Fix documentation.
|
||||
|
||||
2010-09-16 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
* tree.c (tree_node_structure_for_code): TRANSLATION_UNIT_DECL
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
2010-09-16 Ira Rosen <irar@il.ibm.com>
|
||||
|
||||
* gcc.dg/vect/bb-slp-8.c: Fix documentation, add space between function
|
||||
name and parentheses.
|
||||
* gcc.dg/vect/bb-slp-8a.c, gcc.dg/vect/bb-slp-8b.c: Likewise.
|
||||
|
||||
2010-09-15 Jason Merrill <jason@redhat.com>
|
||||
|
||||
* g++.dg/parse/parameter-declaration-2.C: New.
|
||||
|
|
|
@ -15,8 +15,8 @@ main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout)
|
|||
int i;
|
||||
unsigned int a0, a1, a2, a3;
|
||||
|
||||
/* pin and pout may alias. But since all the loads are before the first store
|
||||
the basic block is vectorizable. */
|
||||
/* pin and pout may alias. But since all the loads are before the first
|
||||
store the basic block is vectorizable. */
|
||||
a0 = *pin++ + 23;
|
||||
a1 = *pin++ + 142;
|
||||
a2 = *pin++ + 2;
|
||||
|
@ -35,7 +35,7 @@ main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout)
|
|||
|| out[1] != (in[1] + 142) * y
|
||||
|| out[2] != (in[2] + 2) * x
|
||||
|| out[3] != (in[3] + 31) * y)
|
||||
abort();
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -15,7 +15,7 @@ main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout)
|
|||
int i;
|
||||
unsigned int a0, a1, a2, a3;
|
||||
|
||||
/* pin and pout may alias, and loads and stores are mixed. The basic block
|
||||
/* pin and pout may alias, and loads and stores are mixed. The basic block
|
||||
cannot be vectorized. */
|
||||
a0 = *pin++ + 23;
|
||||
*pout++ = a0 * x;
|
||||
|
@ -34,7 +34,7 @@ main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout)
|
|||
|| out[1] != (in[1] + 142) * y
|
||||
|| out[2] != (in[2] + 2) * x
|
||||
|| out[3] != (in[3] + 31) * y)
|
||||
abort();
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -36,7 +36,7 @@ main1 (unsigned int x, unsigned int y)
|
|||
|| out[1] != (in[1] + 142) * y
|
||||
|| out[2] != (in[2] + 2) * x
|
||||
|| out[3] != (in[3] + 31) * y)
|
||||
abort();
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -45,19 +45,19 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "optabs.h"
|
||||
|
||||
/* Return the smallest scalar part of STMT.
|
||||
This is used to determine the vectype of the stmt. We generally set the
|
||||
vectype according to the type of the result (lhs). For stmts whose
|
||||
This is used to determine the vectype of the stmt. We generally set the
|
||||
vectype according to the type of the result (lhs). For stmts whose
|
||||
result-type is different than the type of the arguments (e.g., demotion,
|
||||
promotion), vectype will be reset appropriately (later). Note that we have
|
||||
to visit the smallest datatype in this function, because that determines the
|
||||
VF. If the smallest datatype in the loop is present only as the rhs of a
|
||||
VF. If the smallest datatype in the loop is present only as the rhs of a
|
||||
promotion operation - we'd miss it.
|
||||
Such a case, where a variable of this datatype does not appear in the lhs
|
||||
anywhere in the loop, can only occur if it's an invariant: e.g.:
|
||||
'int_x = (int) short_inv', which we'd expect to have been optimized away by
|
||||
invariant motion. However, we cannot rely on invariant motion to always take
|
||||
invariants out of the loop, and so in the case of promotion we also have to
|
||||
check the rhs.
|
||||
invariant motion. However, we cannot rely on invariant motion to always
|
||||
take invariants out of the loop, and so in the case of promotion we also
|
||||
have to check the rhs.
|
||||
LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
|
||||
types. */
|
||||
|
||||
|
@ -89,7 +89,7 @@ vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
|
|||
|
||||
|
||||
/* Find the place of the data-ref in STMT in the interleaving chain that starts
|
||||
from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
|
||||
from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
|
||||
|
||||
int
|
||||
vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
|
||||
|
@ -151,7 +151,7 @@ vect_insert_into_interleaving_chain (struct data_reference *dra,
|
|||
/* Function vect_update_interleaving_chain.
|
||||
|
||||
For two data-refs DRA and DRB that are a part of a chain interleaved data
|
||||
accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
|
||||
accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
|
||||
|
||||
There are four possible cases:
|
||||
1. New stmts - both DRA and DRB are not a part of any chain:
|
||||
|
@ -211,7 +211,7 @@ vect_update_interleaving_chain (struct data_reference *drb,
|
|||
if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
|
||||
{
|
||||
/* DRB's init is smaller than the init of the stmt previously marked
|
||||
as the first stmt of the interleaving chain of DRA. Therefore, we
|
||||
as the first stmt of the interleaving chain of DRA. Therefore, we
|
||||
update FIRST_STMT and put DRB in the head of the list. */
|
||||
DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
|
||||
DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
|
||||
|
@ -323,7 +323,11 @@ vect_equal_offsets (tree offset1, tree offset2)
|
|||
}
|
||||
|
||||
|
||||
/* Check dependence between DRA and DRB for basic block vectorization. */
|
||||
/* Check dependence between DRA and DRB for basic block vectorization.
|
||||
If the accesses share same bases and offsets, we can compare their initial
|
||||
constant offsets to decide whether they differ or not. In case of a read-
|
||||
write dependence we check that the load is before the store to ensure that
|
||||
vectorization will not change the order of the accesses. */
|
||||
|
||||
static bool
|
||||
vect_drs_dependent_in_basic_block (struct data_reference *dra,
|
||||
|
@ -342,7 +346,7 @@ vect_drs_dependent_in_basic_block (struct data_reference *dra,
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Check that the data-refs have same bases and offsets. If not, we can't
|
||||
/* Check that the data-refs have same bases and offsets. If not, we can't
|
||||
determine if they are dependent. */
|
||||
if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
|
||||
&& (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
|
||||
|
@ -368,10 +372,10 @@ vect_drs_dependent_in_basic_block (struct data_reference *dra,
|
|||
if (init_a != init_b)
|
||||
return false;
|
||||
|
||||
/* We have a read-write dependence. Check that the load is before the store.
|
||||
/* We have a read-write dependence. Check that the load is before the store.
|
||||
When we vectorize basic blocks, vector load can be only before
|
||||
corresponding scalar load, and vector store can be only after its
|
||||
corresponding scalar store. So the order of the acceses is preserved in
|
||||
corresponding scalar store. So the order of the acceses is preserved in
|
||||
case the load is before the store. */
|
||||
earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
|
||||
if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
|
||||
|
@ -383,7 +387,7 @@ vect_drs_dependent_in_basic_block (struct data_reference *dra,
|
|||
|
||||
/* Function vect_check_interleaving.
|
||||
|
||||
Check if DRA and DRB are a part of interleaving. In case they are, insert
|
||||
Check if DRA and DRB are a part of interleaving. In case they are, insert
|
||||
DRA and DRB in an interleaving chain. */
|
||||
|
||||
static bool
|
||||
|
@ -813,7 +817,7 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
|
|||
|
||||
/* In case the dataref is in an inner-loop of the loop that is being
|
||||
vectorized (LOOP), we use the base and misalignment information
|
||||
relative to the outer-loop (LOOP). This is ok only if the misalignment
|
||||
relative to the outer-loop (LOOP). This is ok only if the misalignment
|
||||
stays the same throughout the execution of the inner-loop, which is why
|
||||
we have to check that the stride of the dataref in the inner-loop evenly
|
||||
divides by the vector size. */
|
||||
|
@ -1241,8 +1245,8 @@ vect_peeling_hash_get_most_frequent (void **slot, void *data)
|
|||
}
|
||||
|
||||
|
||||
/* Traverse peeling hash table and calculate cost for each peeling option. Find
|
||||
one with the lowest cost. */
|
||||
/* Traverse peeling hash table and calculate cost for each peeling option.
|
||||
Find the one with the lowest cost. */
|
||||
|
||||
static int
|
||||
vect_peeling_hash_get_lowest_cost (void **slot, void *data)
|
||||
|
@ -1326,15 +1330,15 @@ vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
|
|||
the alignment of data references in the loop.
|
||||
|
||||
FOR NOW: we assume that whatever versioning/peeling takes place, only the
|
||||
original loop is to be vectorized; Any other loops that are created by
|
||||
original loop is to be vectorized. Any other loops that are created by
|
||||
the transformations performed in this pass - are not supposed to be
|
||||
vectorized. This restriction will be relaxed.
|
||||
vectorized. This restriction will be relaxed.
|
||||
|
||||
This pass will require a cost model to guide it whether to apply peeling
|
||||
or versioning or a combination of the two. For example, the scheme that
|
||||
or versioning or a combination of the two. For example, the scheme that
|
||||
intel uses when given a loop with several memory accesses, is as follows:
|
||||
choose one memory access ('p') which alignment you want to force by doing
|
||||
peeling. Then, either (1) generate a loop in which 'p' is aligned and all
|
||||
peeling. Then, either (1) generate a loop in which 'p' is aligned and all
|
||||
other accesses are not necessarily aligned, or (2) use loop versioning to
|
||||
generate one loop in which all accesses are aligned, and another loop in
|
||||
which only 'p' is necessarily aligned.
|
||||
|
@ -1343,9 +1347,9 @@ vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
|
|||
Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
|
||||
Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
|
||||
|
||||
Devising a cost model is the most critical aspect of this work. It will
|
||||
Devising a cost model is the most critical aspect of this work. It will
|
||||
guide us on which access to peel for, whether to use loop versioning, how
|
||||
many versions to create, etc. The cost model will probably consist of
|
||||
many versions to create, etc. The cost model will probably consist of
|
||||
generic considerations as well as target specific considerations (on
|
||||
powerpc for example, misaligned stores are more painful than misaligned
|
||||
loads).
|
||||
|
@ -1406,7 +1410,7 @@ vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
|
|||
}
|
||||
}
|
||||
|
||||
These loops are later passed to loop_transform to be vectorized. The
|
||||
These loops are later passed to loop_transform to be vectorized. The
|
||||
vectorizer will use the alignment information to guide the transformation
|
||||
(whether to generate regular loads/stores, or with special handling for
|
||||
misalignment). */
|
||||
|
@ -1500,11 +1504,11 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
|||
npeel_tmp = (nelements - mis) % vf;
|
||||
|
||||
/* For multiple types, it is possible that the bigger type access
|
||||
will have more than one peeling option. E.g., a loop with two
|
||||
will have more than one peeling option. E.g., a loop with two
|
||||
types: one of size (vector size / 4), and the other one of
|
||||
size (vector size / 8). Vectorization factor will 8. If both
|
||||
size (vector size / 8). Vectorization factor will 8. If both
|
||||
access are misaligned by 3, the first one needs one scalar
|
||||
iteration to be aligned, and the second one needs 5. But the
|
||||
iteration to be aligned, and the second one needs 5. But the
|
||||
the first one will be aligned also by peeling 5 scalar
|
||||
iterations, and in that case both accesses will be aligned.
|
||||
Hence, except for the immediate peeling amount, we also want
|
||||
|
@ -1996,7 +2000,7 @@ vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
|
|||
|
||||
|
||||
/* Analyze groups of strided accesses: check that DR belongs to a group of
|
||||
strided accesses of legal size, step, etc. Detect gaps, single element
|
||||
strided accesses of legal size, step, etc. Detect gaps, single element
|
||||
interleaving, and other special cases. Set strided access info.
|
||||
Collect groups of strided stores for further use in SLP analysis. */
|
||||
|
||||
|
@ -2072,9 +2076,10 @@ vect_analyze_group_access (struct data_reference *dr)
|
|||
|
||||
while (next)
|
||||
{
|
||||
/* Skip same data-refs. In case that two or more stmts share data-ref
|
||||
(supported only for loads), we vectorize only the first stmt, and
|
||||
the rest get their vectorized loads from the first one. */
|
||||
/* Skip same data-refs. In case that two or more stmts share
|
||||
data-ref (supported only for loads), we vectorize only the first
|
||||
stmt, and the rest get their vectorized loads from the first
|
||||
one. */
|
||||
if (!tree_int_cst_compare (DR_INIT (data_ref),
|
||||
DR_INIT (STMT_VINFO_DATA_REF (
|
||||
vinfo_for_stmt (next)))))
|
||||
|
@ -2196,7 +2201,7 @@ vect_analyze_group_access (struct data_reference *dr)
|
|||
|
||||
/* FORNOW: we handle only interleaving that is a power of 2.
|
||||
We don't fail here if it may be still possible to vectorize the
|
||||
group using SLP. If not, the size of the group will be checked in
|
||||
group using SLP. If not, the size of the group will be checked in
|
||||
vect_analyze_operations, and the vectorization will fail. */
|
||||
if (exact_log2 (stride) == -1)
|
||||
{
|
||||
|
@ -2483,8 +2488,8 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
|
|||
datarefs = BB_VINFO_DATAREFS (bb_vinfo);
|
||||
}
|
||||
|
||||
/* Go through the data-refs, check that the analysis succeeded. Update pointer
|
||||
from stmt_vec_info struct to DR and vectype. */
|
||||
/* Go through the data-refs, check that the analysis succeeded. Update
|
||||
pointer from stmt_vec_info struct to DR and vectype. */
|
||||
|
||||
FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
|
||||
{
|
||||
|
@ -2572,7 +2577,7 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
|
|||
tree dinit;
|
||||
|
||||
/* Build a reference to the first location accessed by the
|
||||
inner-loop: *(BASE+INIT). (The first location is actually
|
||||
inner-loop: *(BASE+INIT). (The first location is actually
|
||||
BASE+INIT+OFFSET, but we add OFFSET separately later). */
|
||||
tree inner_base = build_fold_indirect_ref
|
||||
(fold_build2 (POINTER_PLUS_EXPR,
|
||||
|
@ -2712,7 +2717,7 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
|
|||
|
||||
/* Function vect_get_new_vect_var.
|
||||
|
||||
Returns a name for a new variable. The current naming scheme appends the
|
||||
Returns a name for a new variable. The current naming scheme appends the
|
||||
prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
|
||||
the name of vectorizer generated variables, and appends that to NAME if
|
||||
provided. */
|
||||
|
@ -2767,7 +2772,7 @@ vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
|
|||
LOOP: Specify relative to which loop-nest should the address be computed.
|
||||
For example, when the dataref is in an inner-loop nested in an
|
||||
outer-loop that is now being vectorized, LOOP can be either the
|
||||
outer-loop, or the inner-loop. The first memory location accessed
|
||||
outer-loop, or the inner-loop. The first memory location accessed
|
||||
by the following dataref ('in' points to short):
|
||||
|
||||
for (i=0; i<N; i++)
|
||||
|
@ -2937,7 +2942,7 @@ vect_create_addr_base_for_vector_ref (gimple stmt,
|
|||
Return the increment stmt that updates the pointer in PTR_INCR.
|
||||
|
||||
3. Set INV_P to true if the access pattern of the data reference in the
|
||||
vectorized loop is invariant. Set it to false otherwise.
|
||||
vectorized loop is invariant. Set it to false otherwise.
|
||||
|
||||
4. Return the pointer. */
|
||||
|
||||
|
@ -3017,7 +3022,7 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
|
|||
print_generic_expr (vect_dump, base_name, TDF_SLIM);
|
||||
}
|
||||
|
||||
/** (1) Create the new vector-pointer variable: **/
|
||||
/* (1) Create the new vector-pointer variable. */
|
||||
vect_ptr_type = build_pointer_type (vectype);
|
||||
base = get_base_address (DR_REF (dr));
|
||||
if (base
|
||||
|
@ -3067,16 +3072,16 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
|
|||
|
||||
add_referenced_var (vect_ptr);
|
||||
|
||||
/** Note: If the dataref is in an inner-loop nested in LOOP, and we are
|
||||
vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
|
||||
def-use update cycles for the pointer: One relative to the outer-loop
|
||||
(LOOP), which is what steps (3) and (4) below do. The other is relative
|
||||
to the inner-loop (which is the inner-most loop containing the dataref),
|
||||
and this is done be step (5) below.
|
||||
/* Note: If the dataref is in an inner-loop nested in LOOP, and we are
|
||||
vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
|
||||
def-use update cycles for the pointer: one relative to the outer-loop
|
||||
(LOOP), which is what steps (3) and (4) below do. The other is relative
|
||||
to the inner-loop (which is the inner-most loop containing the dataref),
|
||||
and this is done be step (5) below.
|
||||
|
||||
When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
|
||||
inner-most loop, and so steps (3),(4) work the same, and step (5) is
|
||||
redundant. Steps (3),(4) create the following:
|
||||
When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
|
||||
inner-most loop, and so steps (3),(4) work the same, and step (5) is
|
||||
redundant. Steps (3),(4) create the following:
|
||||
|
||||
vp0 = &base_addr;
|
||||
LOOP: vp1 = phi(vp0,vp2)
|
||||
|
@ -3085,8 +3090,8 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
|
|||
vp2 = vp1 + step
|
||||
goto LOOP
|
||||
|
||||
If there is an inner-loop nested in loop, then step (5) will also be
|
||||
applied, and an additional update in the inner-loop will be created:
|
||||
If there is an inner-loop nested in loop, then step (5) will also be
|
||||
applied, and an additional update in the inner-loop will be created:
|
||||
|
||||
vp0 = &base_addr;
|
||||
LOOP: vp1 = phi(vp0,vp2)
|
||||
|
@ -3098,8 +3103,8 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
|
|||
vp2 = vp1 + step
|
||||
if () goto LOOP */
|
||||
|
||||
/** (3) Calculate the initial address the vector-pointer, and set
|
||||
the vector-pointer to point to it before the loop: **/
|
||||
/* (2) Calculate the initial address the vector-pointer, and set
|
||||
the vector-pointer to point to it before the loop. */
|
||||
|
||||
/* Create: (&(base[init_val+offset]) in the loop preheader. */
|
||||
|
||||
|
@ -3140,10 +3145,9 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
|
|||
else
|
||||
vect_ptr_init = new_temp;
|
||||
|
||||
/** (4) Handle the updating of the vector-pointer inside the loop.
|
||||
This is needed when ONLY_INIT is false, and also when AT_LOOP
|
||||
is the inner-loop nested in LOOP (during outer-loop vectorization).
|
||||
**/
|
||||
/* (3) Handle the updating of the vector-pointer inside the loop.
|
||||
This is needed when ONLY_INIT is false, and also when AT_LOOP is the
|
||||
inner-loop nested in LOOP (during outer-loop vectorization). */
|
||||
|
||||
/* No update in loop is required. */
|
||||
if (only_init && (!loop_vinfo || at_loop == loop))
|
||||
|
@ -3182,8 +3186,8 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
|
|||
return vptr;
|
||||
|
||||
|
||||
/** (5) Handle the updating of the vector-pointer inside the inner-loop
|
||||
nested in LOOP, if exists: **/
|
||||
/* (4) Handle the updating of the vector-pointer inside the inner-loop
|
||||
nested in LOOP, if exists. */
|
||||
|
||||
gcc_assert (nested_in_vect_loop);
|
||||
if (!only_init)
|
||||
|
@ -3358,12 +3362,12 @@ vect_strided_store_supported (tree vectype)
|
|||
|
||||
Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
|
||||
a power of 2, generate interleave_high/low stmts to reorder the data
|
||||
correctly for the stores. Return the final references for stores in
|
||||
correctly for the stores. Return the final references for stores in
|
||||
RESULT_CHAIN.
|
||||
|
||||
E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
|
||||
The input is 4 vectors each containing 8 elements. We assign a number to each
|
||||
element, the input sequence is:
|
||||
The input is 4 vectors each containing 8 elements. We assign a number to
|
||||
each element, the input sequence is:
|
||||
|
||||
1st vec: 0 1 2 3 4 5 6 7
|
||||
2nd vec: 8 9 10 11 12 13 14 15
|
||||
|
@ -3379,18 +3383,18 @@ vect_strided_store_supported (tree vectype)
|
|||
|
||||
i.e., we interleave the contents of the four vectors in their order.
|
||||
|
||||
We use interleave_high/low instructions to create such output. The input of
|
||||
We use interleave_high/low instructions to create such output. The input of
|
||||
each interleave_high/low operation is two vectors:
|
||||
1st vec 2nd vec
|
||||
0 1 2 3 4 5 6 7
|
||||
the even elements of the result vector are obtained left-to-right from the
|
||||
high/low elements of the first vector. The odd elements of the result are
|
||||
high/low elements of the first vector. The odd elements of the result are
|
||||
obtained left-to-right from the high/low elements of the second vector.
|
||||
The output of interleave_high will be: 0 4 1 5
|
||||
and of interleave_low: 2 6 3 7
|
||||
|
||||
|
||||
The permutation is done in log LENGTH stages. In each stage interleave_high
|
||||
The permutation is done in log LENGTH stages. In each stage interleave_high
|
||||
and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
|
||||
where the first argument is taken from the first half of DR_CHAIN and the
|
||||
second argument from it's second half.
|
||||
|
@ -3582,8 +3586,7 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
1. the misalignment computation
|
||||
2. the extra vector load (for the optimized realignment scheme).
|
||||
3. the phi node for the two vectors from which the realignment is
|
||||
done (for the optimized realignment scheme).
|
||||
*/
|
||||
done (for the optimized realignment scheme). */
|
||||
|
||||
/* 1. Determine where to generate the misalignment computation.
|
||||
|
||||
|
@ -3807,7 +3810,7 @@ vect_strided_load_supported (tree vectype)
|
|||
|
||||
Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
|
||||
a power of 2, generate extract_even/odd stmts to reorder the input data
|
||||
correctly. Return the final references for loads in RESULT_CHAIN.
|
||||
correctly. Return the final references for loads in RESULT_CHAIN.
|
||||
|
||||
E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
|
||||
The input is 4 vectors each containing 8 elements. We assign a number to each
|
||||
|
@ -3828,19 +3831,19 @@ vect_strided_load_supported (tree vectype)
|
|||
i.e., the first output vector should contain the first elements of each
|
||||
interleaving group, etc.
|
||||
|
||||
We use extract_even/odd instructions to create such output. The input of each
|
||||
extract_even/odd operation is two vectors
|
||||
We use extract_even/odd instructions to create such output. The input of
|
||||
each extract_even/odd operation is two vectors
|
||||
1st vec 2nd vec
|
||||
0 1 2 3 4 5 6 7
|
||||
|
||||
and the output is the vector of extracted even/odd elements. The output of
|
||||
and the output is the vector of extracted even/odd elements. The output of
|
||||
extract_even will be: 0 2 4 6
|
||||
and of extract_odd: 1 3 5 7
|
||||
|
||||
|
||||
The permutation is done in log LENGTH stages. In each stage extract_even and
|
||||
extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
|
||||
order. In our example,
|
||||
The permutation is done in log LENGTH stages. In each stage extract_even
|
||||
and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
|
||||
their order. In our example,
|
||||
|
||||
E1: extract_even (1st vec, 2nd vec)
|
||||
E2: extract_odd (1st vec, 2nd vec)
|
||||
|
@ -3977,13 +3980,12 @@ vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
|
|||
if (!next_stmt)
|
||||
break;
|
||||
|
||||
/* Skip the gaps. Loads created for the gaps will be removed by dead
|
||||
code elimination pass later. No need to check for the first stmt in
|
||||
/* Skip the gaps. Loads created for the gaps will be removed by dead
|
||||
code elimination pass later. No need to check for the first stmt in
|
||||
the group, since it always exists.
|
||||
DR_GROUP_GAP is the number of steps in elements from the previous
|
||||
access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
|
||||
correspond to the gaps.
|
||||
*/
|
||||
access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
|
||||
correspond to the gaps. */
|
||||
if (next_stmt != first_stmt
|
||||
&& gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
|
||||
{
|
||||
|
@ -4088,8 +4090,8 @@ vect_supportable_dr_alignment (struct data_reference *dr,
|
|||
|
||||
/* We can choose between using the implicit realignment scheme (generating
|
||||
a misaligned_move stmt) and the explicit realignment scheme (generating
|
||||
aligned loads with a REALIGN_LOAD). There are two variants to the explicit
|
||||
realignment scheme: optimized, and unoptimized.
|
||||
aligned loads with a REALIGN_LOAD). There are two variants to the
|
||||
explicit realignment scheme: optimized, and unoptimized.
|
||||
We can optimize the realignment only if the step between consecutive
|
||||
vector loads is equal to the vector size. Since the vector memory
|
||||
accesses advance in steps of VS (Vector Size) in the vectorized loop, it
|
||||
|
|
|
@ -76,7 +76,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
had successfully passed the analysis phase.
|
||||
Throughout this pass we make a distinction between two types of
|
||||
data: scalars (which are represented by SSA_NAMES), and memory references
|
||||
("data-refs"). These two types of data require different handling both
|
||||
("data-refs"). These two types of data require different handling both
|
||||
during analysis and transformation. The types of data-refs that the
|
||||
vectorizer currently supports are ARRAY_REFS which base is an array DECL
|
||||
(not a pointer), and INDIRECT_REFS through pointers; both array and pointer
|
||||
|
@ -97,10 +97,10 @@ along with GCC; see the file COPYING3. If not see
|
|||
=====================
|
||||
The loop transformation phase scans all the stmts in the loop, and
|
||||
creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
|
||||
the loop that needs to be vectorized. It inserts the vector code sequence
|
||||
the loop that needs to be vectorized. It inserts the vector code sequence
|
||||
just before the scalar stmt S, and records a pointer to the vector code
|
||||
in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
|
||||
attached to S). This pointer will be used for the vectorization of following
|
||||
attached to S). This pointer will be used for the vectorization of following
|
||||
stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
|
||||
otherwise, we rely on dead code elimination for removing it.
|
||||
|
||||
|
@ -112,7 +112,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
|
||||
To vectorize stmt S2, the vectorizer first finds the stmt that defines
|
||||
the operand 'b' (S1), and gets the relevant vector def 'vb' from the
|
||||
vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
|
||||
vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
|
||||
resulting sequence would be:
|
||||
|
||||
VS1: vb = px[i];
|
||||
|
@ -128,13 +128,13 @@ along with GCC; see the file COPYING3. If not see
|
|||
Currently the only target specific information that is used is the
|
||||
size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
|
||||
Targets that can support different sizes of vectors, for now will need
|
||||
to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". More
|
||||
to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". More
|
||||
flexibility will be added in the future.
|
||||
|
||||
Since we only vectorize operations which vector form can be
|
||||
expressed using existing tree codes, to verify that an operation is
|
||||
supported, the vectorizer checks the relevant optab at the relevant
|
||||
machine_mode (e.g, optab_handler (add_optab, V8HImode)). If
|
||||
machine_mode (e.g, optab_handler (add_optab, V8HImode)). If
|
||||
the value found is CODE_FOR_nothing, then there's no target support, and
|
||||
we can't vectorize the stmt.
|
||||
|
||||
|
@ -144,14 +144,14 @@ along with GCC; see the file COPYING3. If not see
|
|||
|
||||
/* Function vect_determine_vectorization_factor
|
||||
|
||||
Determine the vectorization factor (VF). VF is the number of data elements
|
||||
Determine the vectorization factor (VF). VF is the number of data elements
|
||||
that are operated upon in parallel in a single iteration of the vectorized
|
||||
loop. For example, when vectorizing a loop that operates on 4byte elements,
|
||||
loop. For example, when vectorizing a loop that operates on 4byte elements,
|
||||
on a target with vector size (VS) 16byte, the VF is set to 4, since 4
|
||||
elements can fit in a single vector register.
|
||||
|
||||
We currently support vectorization of loops in which all types operated upon
|
||||
are of the same size. Therefore this function currently sets VF according to
|
||||
are of the same size. Therefore this function currently sets VF according to
|
||||
the size of the types operated upon, and fails if there are multiple sizes
|
||||
in the loop.
|
||||
|
||||
|
@ -438,7 +438,7 @@ vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
|
|||
/* Function vect_analyze_scalar_cycles_1.
|
||||
|
||||
Examine the cross iteration def-use cycles of scalar variables
|
||||
in LOOP. LOOP_VINFO represents the loop that is now being
|
||||
in LOOP. LOOP_VINFO represents the loop that is now being
|
||||
considered for vectorization (can be LOOP, or an outer-loop
|
||||
enclosing LOOP). */
|
||||
|
||||
|
@ -454,7 +454,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
|
|||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
|
||||
|
||||
/* First - identify all inductions. Reduction detection assumes that all the
|
||||
/* First - identify all inductions. Reduction detection assumes that all the
|
||||
inductions have been identified, therefore, this order must not be
|
||||
changed. */
|
||||
for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
||||
|
@ -470,7 +470,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
|
|||
print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
|
||||
}
|
||||
|
||||
/* Skip virtual phi's. The data dependences that are associated with
|
||||
/* Skip virtual phi's. The data dependences that are associated with
|
||||
virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
|
||||
if (!is_gimple_reg (SSA_NAME_VAR (def)))
|
||||
continue;
|
||||
|
@ -569,7 +569,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
|
|||
/* Function vect_analyze_scalar_cycles.
|
||||
|
||||
Examine the cross iteration def-use cycles of scalar variables, by
|
||||
analyzing the loop-header PHIs of scalar variables; Classify each
|
||||
analyzing the loop-header PHIs of scalar variables. Classify each
|
||||
cycle as one of the following: invariant, induction, reduction, unknown.
|
||||
We do that for the loop represented by LOOP_VINFO, and also to its
|
||||
inner-loop, if exists.
|
||||
|
@ -1125,8 +1125,8 @@ vect_analyze_loop_form (struct loop *loop)
|
|||
|
||||
/* Get cost by calling cost target builtin. */
|
||||
|
||||
static inline
|
||||
int vect_get_cost (enum vect_cost_for_stmt type_of_cost)
|
||||
static inline int
|
||||
vect_get_cost (enum vect_cost_for_stmt type_of_cost)
|
||||
{
|
||||
tree dummy_type = NULL;
|
||||
int dummy = 0;
|
||||
|
@ -1301,7 +1301,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Analyze cost. Decide if worth while to vectorize. */
|
||||
/* Analyze cost. Decide if worth while to vectorize. */
|
||||
|
||||
/* Once VF is set, SLP costs should be updated since the number of created
|
||||
vector stmts depends on VF. */
|
||||
|
@ -1374,7 +1374,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
|
|||
/* Function vect_analyze_loop.
|
||||
|
||||
Apply a set of analyses on LOOP, and create a loop_vec_info struct
|
||||
for it. The different analyses will record information in the
|
||||
for it. The different analyses will record information in the
|
||||
loop_vec_info struct. */
|
||||
loop_vec_info
|
||||
vect_analyze_loop (struct loop *loop)
|
||||
|
@ -1594,7 +1594,7 @@ reduction_code_for_scalar_code (enum tree_code code,
|
|||
}
|
||||
|
||||
|
||||
/* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement
|
||||
/* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement
|
||||
STMT is printed with a message MSG. */
|
||||
|
||||
static void
|
||||
|
@ -1608,7 +1608,7 @@ report_vect_op (gimple stmt, const char *msg)
|
|||
/* Function vect_is_simple_reduction_1
|
||||
|
||||
(1) Detect a cross-iteration def-use cycle that represents a simple
|
||||
reduction computation. We look for the following pattern:
|
||||
reduction computation. We look for the following pattern:
|
||||
|
||||
loop_header:
|
||||
a1 = phi < a0, a2 >
|
||||
|
@ -2023,7 +2023,7 @@ vect_get_single_scalar_iteraion_cost (loop_vec_info loop_vinfo)
|
|||
int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
|
||||
int innerloop_iters, i, stmt_cost;
|
||||
|
||||
/* Count statements in scalar loop. Using this as scalar cost for a single
|
||||
/* Count statements in scalar loop. Using this as scalar cost for a single
|
||||
iteration for now.
|
||||
|
||||
TODO: Add outer loop support.
|
||||
|
@ -2308,7 +2308,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
|
|||
something more reasonable. */
|
||||
|
||||
/* If the number of iterations is known and we do not do versioning, we can
|
||||
decide whether to vectorize at compile time. Hence the scalar version
|
||||
decide whether to vectorize at compile time. Hence the scalar version
|
||||
do not carry cost model guard costs. */
|
||||
if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|
||||
|| LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
|
||||
|
@ -2339,7 +2339,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
|
|||
}
|
||||
|
||||
/* Calculate number of iterations required to make the vector version
|
||||
profitable, relative to the loop bodies only. The following condition
|
||||
profitable, relative to the loop bodies only. The following condition
|
||||
must hold true:
|
||||
SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
|
||||
where
|
||||
|
@ -2556,7 +2556,7 @@ vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
|
|||
|
||||
Output:
|
||||
Return a vector variable, initialized with the first VF values of
|
||||
the induction variable. E.g., for an iv with IV_PHI='X' and
|
||||
the induction variable. E.g., for an iv with IV_PHI='X' and
|
||||
evolution S, for a vector of 4 units, we want to return:
|
||||
[X, X + S, X + 2*S, X + 3*S]. */
|
||||
|
||||
|
@ -2638,8 +2638,8 @@ get_initial_def_for_induction (gimple iv_phi)
|
|||
if (nested_in_vect_loop)
|
||||
{
|
||||
/* iv_loop is nested in the loop to be vectorized. init_expr had already
|
||||
been created during vectorization of previous stmts; We obtain it from
|
||||
the STMT_VINFO_VEC_STMT of the defining stmt. */
|
||||
been created during vectorization of previous stmts. We obtain it
|
||||
from the STMT_VINFO_VEC_STMT of the defining stmt. */
|
||||
tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi,
|
||||
loop_preheader_edge (iv_loop));
|
||||
vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
|
||||
|
@ -2905,7 +2905,7 @@ get_initial_def_for_reduction (gimple stmt, tree init_val,
|
|||
gcc_assert (loop == (gimple_bb (stmt))->loop_father);
|
||||
|
||||
/* In case of double reduction we only create a vector variable to be put
|
||||
in the reduction phi node. The actual statement creation is done in
|
||||
in the reduction phi node. The actual statement creation is done in
|
||||
vect_create_epilog_for_reduction. */
|
||||
if (adjustment_def && nested_in_vect_loop
|
||||
&& TREE_CODE (init_val) == SSA_NAME
|
||||
|
@ -3023,7 +3023,7 @@ get_initial_def_for_reduction (gimple stmt, tree init_val,
|
|||
reduction statements.
|
||||
STMT is the scalar reduction stmt that is being vectorized.
|
||||
NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
|
||||
number of elements that we can fit in a vectype (nunits). In this case
|
||||
number of elements that we can fit in a vectype (nunits). In this case
|
||||
we have to generate more than one vector stmt - i.e - we need to "unroll"
|
||||
the vector stmt by a factor VF/nunits. For more details see documentation
|
||||
in vectorizable_operation.
|
||||
|
@ -3314,7 +3314,7 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
|
|||
/* In case this is a reduction in an inner-loop while vectorizing an outer
|
||||
loop - we don't need to extract a single scalar result at the end of the
|
||||
inner-loop (unless it is double reduction, i.e., the use of reduction is
|
||||
outside the outer-loop). The final vector of partial results will be used
|
||||
outside the outer-loop). The final vector of partial results will be used
|
||||
in the vectorized outer-loop, or reduced to a scalar result at the end of
|
||||
the outer-loop. */
|
||||
if (nested_in_vect_loop && !double_reduc)
|
||||
|
@ -3473,7 +3473,7 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
|
|||
}
|
||||
|
||||
/* The only case where we need to reduce scalar results in SLP, is
|
||||
unrolling. If the size of SCALAR_RESULTS is greater than
|
||||
unrolling. If the size of SCALAR_RESULTS is greater than
|
||||
GROUP_SIZE, we reduce them combining elements modulo
|
||||
GROUP_SIZE. */
|
||||
if (slp_node)
|
||||
|
@ -3579,7 +3579,7 @@ vect_finalize_reduction:
|
|||
VEC_replace (gimple, new_phis, 0, epilog_stmt);
|
||||
}
|
||||
|
||||
/* 2.6 Handle the loop-exit phis. Replace the uses of scalar loop-exit
|
||||
/* 2.6 Handle the loop-exit phis. Replace the uses of scalar loop-exit
|
||||
phis with new adjusted scalar results, i.e., replace use <s_out0>
|
||||
with use <s_out4>.
|
||||
|
||||
|
@ -3605,8 +3605,8 @@ vect_finalize_reduction:
|
|||
use <s_out4> */
|
||||
|
||||
/* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
|
||||
case that GROUP_SIZE is greater than vectorization factor). Therefore, we
|
||||
need to match SCALAR_RESULTS with corresponding statements. The first
|
||||
case that GROUP_SIZE is greater than vectorization factor). Therefore, we
|
||||
need to match SCALAR_RESULTS with corresponding statements. The first
|
||||
(GROUP_SIZE / number of new vector stmts) scalar results correspond to
|
||||
the first vector stmt, etc.
|
||||
(RATIO is equal to (GROUP_SIZE / number of new vector stmts)). */
|
||||
|
@ -3639,7 +3639,7 @@ vect_finalize_reduction:
|
|||
|
||||
phis = VEC_alloc (gimple, heap, 3);
|
||||
/* Find the loop-closed-use at the loop exit of the original scalar
|
||||
result. (The reduction result is expected to have two immediate uses -
|
||||
result. (The reduction result is expected to have two immediate uses -
|
||||
one at the latch block, and one at the loop exit). */
|
||||
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
|
||||
if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
|
||||
|
@ -3740,7 +3740,7 @@ vect_finalize_reduction:
|
|||
vect_phi_res = PHI_RESULT (vect_phi);
|
||||
|
||||
/* Replace the use, i.e., set the correct vs1 in the regular
|
||||
reduction phi node. FORNOW, NCOPIES is always 1, so the
|
||||
reduction phi node. FORNOW, NCOPIES is always 1, so the
|
||||
loop is redundant. */
|
||||
use = reduction_phi;
|
||||
for (j = 0; j < ncopies; j++)
|
||||
|
@ -3764,8 +3764,8 @@ vect_finalize_reduction:
|
|||
|
||||
phis = VEC_alloc (gimple, heap, 3);
|
||||
/* Find the loop-closed-use at the loop exit of the original scalar
|
||||
result. (The reduction result is expected to have two immediate uses -
|
||||
one at the latch block, and one at the loop exit). For double
|
||||
result. (The reduction result is expected to have two immediate uses,
|
||||
one at the latch block, and one at the loop exit). For double
|
||||
reductions we are looking for exit phis of the outer loop. */
|
||||
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
|
||||
{
|
||||
|
@ -3814,7 +3814,7 @@ vect_finalize_reduction:
|
|||
Return FALSE if not a vectorizable STMT, TRUE otherwise.
|
||||
|
||||
This function also handles reduction idioms (patterns) that have been
|
||||
recognized in advance during vect_pattern_recog. In this case, STMT may be
|
||||
recognized in advance during vect_pattern_recog. In this case, STMT may be
|
||||
of this form:
|
||||
X = pattern_expr (arg0, arg1, ..., X)
|
||||
and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
|
||||
|
@ -3835,9 +3835,9 @@ vect_finalize_reduction:
|
|||
|
||||
Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
|
||||
indicates what is the actual level of parallelism (V8HI in the example), so
|
||||
that the right vectorization factor would be derived. This vectype
|
||||
that the right vectorization factor would be derived. This vectype
|
||||
corresponds to the type of arguments to the reduction stmt, and should *NOT*
|
||||
be used to create the vectorized stmt. The right vectype for the vectorized
|
||||
be used to create the vectorized stmt. The right vectype for the vectorized
|
||||
stmt is obtained from the type of the result X:
|
||||
get_vectype_for_scalar_type (TREE_TYPE (X))
|
||||
|
||||
|
@ -3934,7 +3934,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
|
||||
}
|
||||
|
||||
/* 3. Check the operands of the operation. The first operands are defined
|
||||
/* 3. Check the operands of the operation. The first operands are defined
|
||||
inside the loop body. The last operand is the reduction variable,
|
||||
which is defined by the loop-header-phi. */
|
||||
|
||||
|
@ -3979,7 +3979,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
return false;
|
||||
|
||||
/* All uses but the last are expected to be defined in the loop.
|
||||
The last use is the reduction variable. In case of nested cycle this
|
||||
The last use is the reduction variable. In case of nested cycle this
|
||||
assumption is not true: we use reduc_index to record the index of the
|
||||
reduction variable. */
|
||||
for (i = 0; i < op_type-1; i++)
|
||||
|
@ -4110,7 +4110,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
1. The tree-code that is used to create the vector operation in the
|
||||
epilog code (that reduces the partial results) is not the
|
||||
tree-code of STMT, but is rather the tree-code of the original
|
||||
stmt from the pattern that STMT is replacing. I.e, in the example
|
||||
stmt from the pattern that STMT is replacing. I.e, in the example
|
||||
above we want to use 'widen_sum' in the loop, but 'plus' in the
|
||||
epilog.
|
||||
2. The type (mode) we use to check available target support
|
||||
|
@ -4513,7 +4513,7 @@ vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
|
|||
|
||||
/* Function vectorizable_live_operation.
|
||||
|
||||
STMT computes a value that is used outside the loop. Check if
|
||||
STMT computes a value that is used outside the loop. Check if
|
||||
it can be supported. */
|
||||
|
||||
bool
|
||||
|
@ -4554,7 +4554,7 @@ vectorizable_live_operation (gimple stmt,
|
|||
gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
|
||||
gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
|
||||
|
||||
/* FORNOW: support only if all uses are invariant. This means
|
||||
/* FORNOW: support only if all uses are invariant. This means
|
||||
that the scalar operations can remain in place, unvectorized.
|
||||
The original last scalar value that they compute will be used. */
|
||||
|
||||
|
@ -4665,7 +4665,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
|||
compile time constant), or it is a constant that doesn't divide by the
|
||||
vectorization factor, then an epilog loop needs to be created.
|
||||
We therefore duplicate the loop: the original loop will be vectorized,
|
||||
and will compute the first (n/VF) iterations. The second copy of the loop
|
||||
and will compute the first (n/VF) iterations. The second copy of the loop
|
||||
will remain scalar and will compute the remaining (n%VF) iterations.
|
||||
(VF is the vectorization factor). */
|
||||
|
||||
|
|
|
@ -147,7 +147,7 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
|||
}
|
||||
|
||||
/* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
|
||||
from the pattern. Check that all the stmts of the node are in the
|
||||
from the pattern. Check that all the stmts of the node are in the
|
||||
pattern. */
|
||||
if (loop && def_stmt && gimple_bb (def_stmt)
|
||||
&& flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
|
||||
|
@ -299,7 +299,7 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
|||
|
||||
/* Recursively build an SLP tree starting from NODE.
|
||||
Fail (and return FALSE) if def-stmts are not isomorphic, require data
|
||||
permutation or are of unsupported types of operation. Otherwise, return
|
||||
permutation or are of unsupported types of operation. Otherwise, return
|
||||
TRUE. */
|
||||
|
||||
static bool
|
||||
|
@ -542,7 +542,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
|||
if (prev_first_load)
|
||||
{
|
||||
/* Check that there are no loads from different interleaving
|
||||
chains in the same node. The only exception is complex
|
||||
chains in the same node. The only exception is complex
|
||||
numbers. */
|
||||
if (prev_first_load != first_load
|
||||
&& rhs_code != REALPART_EXPR
|
||||
|
@ -582,7 +582,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
|||
ncopies_for_cost, *node);
|
||||
}
|
||||
|
||||
/* Store the place of this load in the interleaving chain. In
|
||||
/* Store the place of this load in the interleaving chain. In
|
||||
case that permutation is needed we later decide if a specific
|
||||
permutation is supported. */
|
||||
load_place = vect_get_place_in_interleaving_chain (stmt,
|
||||
|
@ -729,7 +729,7 @@ vect_print_slp_tree (slp_tree node)
|
|||
|
||||
/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
|
||||
If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
|
||||
J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
|
||||
J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
|
||||
stmts in NODE are to be marked. */
|
||||
|
||||
static void
|
||||
|
@ -897,7 +897,7 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
|
|||
|
||||
/* In case of reduction every load permutation is allowed, since the order
|
||||
of the reduction statements is not important (as opposed to the case of
|
||||
strided stores). The only condition we need to check is that all the
|
||||
strided stores). The only condition we need to check is that all the
|
||||
load nodes are of the same size and have the same permutation (and then
|
||||
rearrange all the nodes of the SLP instance according to this
|
||||
permutation). */
|
||||
|
@ -920,7 +920,7 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
|
|||
real_c = real_b + real_a;
|
||||
imag_c = imag_a + imag_b;
|
||||
i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
|
||||
{real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
|
||||
{real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
|
||||
chains are mixed, they match the above pattern. */
|
||||
if (complex_numbers)
|
||||
{
|
||||
|
@ -969,7 +969,7 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
|
|||
stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
|
||||
/* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
|
||||
instance, not all the loads belong to the same node or interleaving
|
||||
group. Hence, we need to divide them into groups according to
|
||||
group. Hence, we need to divide them into groups according to
|
||||
GROUP_SIZE. */
|
||||
number_of_groups = VEC_length (int, load_permutation) / group_size;
|
||||
|
||||
|
@ -1002,7 +1002,7 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
|
|||
|
||||
if (!bad_permutation)
|
||||
{
|
||||
/* This permutaion is valid for reduction. Since the order of the
|
||||
/* This permutaion is valid for reduction. Since the order of the
|
||||
statements in the nodes is not important unless they are memory
|
||||
accesses, we can rearrange the statements in all the nodes
|
||||
according to the order of the loads. */
|
||||
|
@ -1064,9 +1064,10 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
|
|||
/* Find the first load in the loop that belongs to INSTANCE.
|
||||
When loads are in several SLP nodes, there can be a case in which the first
|
||||
load does not appear in the first SLP node to be transformed, causing
|
||||
incorrect order of statements. Since we generate all the loads together,
|
||||
incorrect order of statements. Since we generate all the loads together,
|
||||
they must be inserted before the first load of the SLP instance and not
|
||||
before the first load of the first node of the instance. */
|
||||
|
||||
static gimple
|
||||
vect_find_first_load_in_slp_instance (slp_instance instance)
|
||||
{
|
||||
|
@ -1083,6 +1084,7 @@ vect_find_first_load_in_slp_instance (slp_instance instance)
|
|||
|
||||
|
||||
/* Find the last store in SLP INSTANCE. */
|
||||
|
||||
static gimple
|
||||
vect_find_last_store_in_slp_instance (slp_instance instance)
|
||||
{
|
||||
|
@ -1100,7 +1102,7 @@ vect_find_last_store_in_slp_instance (slp_instance instance)
|
|||
}
|
||||
|
||||
|
||||
/* Analyze an SLP instance starting from a group of strided stores. Call
|
||||
/* Analyze an SLP instance starting from a group of strided stores. Call
|
||||
vect_build_slp_tree to build a tree of packed stmts if possible.
|
||||
Return FALSE if it's impossible to SLP any stmt in the loop. */
|
||||
|
||||
|
@ -1274,7 +1276,7 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
|||
}
|
||||
|
||||
|
||||
/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
|
||||
/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
|
||||
trees of packed scalar stmts if SLP is possible. */
|
||||
|
||||
bool
|
||||
|
@ -1339,9 +1341,9 @@ vect_make_slp_decision (loop_vec_info loop_vinfo)
|
|||
if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
|
||||
unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
|
||||
|
||||
/* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
|
||||
/* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
|
||||
call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
|
||||
loop-based vectorization. Such stmts will be marked as HYBRID. */
|
||||
loop-based vectorization. Such stmts will be marked as HYBRID. */
|
||||
vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
|
||||
decided_to_slp++;
|
||||
}
|
||||
|
@ -1355,7 +1357,7 @@ vect_make_slp_decision (loop_vec_info loop_vinfo)
|
|||
|
||||
|
||||
/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
|
||||
can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
|
||||
can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
|
||||
|
||||
static void
|
||||
vect_detect_hybrid_slp_stmts (slp_tree node)
|
||||
|
@ -1493,7 +1495,7 @@ vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
|
|||
}
|
||||
|
||||
|
||||
/* Analyze statements in SLP instances of the basic block. Return TRUE if the
|
||||
/* Analyze statements in SLP instances of the basic block. Return TRUE if the
|
||||
operations are supported. */
|
||||
|
||||
static bool
|
||||
|
@ -1523,7 +1525,7 @@ vect_slp_analyze_operations (bb_vec_info bb_vinfo)
|
|||
|
||||
/* Check if loads and stores are mixed in the basic block (in that
|
||||
case if we are not sure that the accesses differ, we can't vectorize the
|
||||
basic block). Also return FALSE in case that there is statement marked as
|
||||
basic block). Also return FALSE in case that there is statement marked as
|
||||
not vectorizable. */
|
||||
|
||||
static bool
|
||||
|
@ -1783,11 +1785,11 @@ vect_slp_analyze_bb (basic_block bb)
|
|||
|
||||
|
||||
/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
|
||||
the number of created vector stmts depends on the unrolling factor). However,
|
||||
the actual number of vector stmts for every SLP node depends on VF which is
|
||||
set later in vect_analyze_operations(). Hence, SLP costs should be updated.
|
||||
In this function we assume that the inside costs calculated in
|
||||
vect_model_xxx_cost are linear in ncopies. */
|
||||
the number of created vector stmts depends on the unrolling factor).
|
||||
However, the actual number of vector stmts for every SLP node depends on
|
||||
VF which is set later in vect_analyze_operations (). Hence, SLP costs
|
||||
should be updated. In this function we assume that the inside costs
|
||||
calculated in vect_model_xxx_cost are linear in ncopies. */
|
||||
|
||||
void
|
||||
vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
|
||||
|
@ -1846,7 +1848,7 @@ vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
|
|||
op_num = reduc_index - 1;
|
||||
op = gimple_op (stmt, op_num + 1);
|
||||
/* For additional copies (see the explanation of NUMBER_OF_COPIES below)
|
||||
we need either neutral operands or the original operands. See
|
||||
we need either neutral operands or the original operands. See
|
||||
get_initial_def_for_reduction() for details. */
|
||||
switch (code)
|
||||
{
|
||||
|
@ -2051,7 +2053,7 @@ vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
|
|||
number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
|
||||
/* Number of vector stmts was calculated according to LHS in
|
||||
vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
|
||||
necessary. See vect_get_smallest_scalar_type() for details. */
|
||||
necessary. See vect_get_smallest_scalar_type () for details. */
|
||||
vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
|
||||
&rhs_size_unit);
|
||||
if (rhs_size_unit != lhs_size_unit)
|
||||
|
@ -2065,7 +2067,7 @@ vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
|
|||
*vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
|
||||
|
||||
/* SLP_NODE corresponds either to a group of stores or to a group of
|
||||
unary/binary operations. We don't call this function for loads.
|
||||
unary/binary operations. We don't call this function for loads.
|
||||
For reduction defs we call vect_get_constant_vectors(), since we are
|
||||
looking for initial loop invariant values. */
|
||||
if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
|
||||
|
@ -2167,7 +2169,7 @@ vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
|
|||
|
||||
/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
|
||||
return in CURRENT_MASK_ELEMENT its equivalent in target specific
|
||||
representation. Check that the mask is valid and return FALSE if not.
|
||||
representation. Check that the mask is valid and return FALSE if not.
|
||||
Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
|
||||
the next vector, i.e., the current first vector is not needed. */
|
||||
|
||||
|
@ -2321,8 +2323,8 @@ vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
|
|||
The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
|
||||
scpecific type, e.g., in bytes for Altivec.
|
||||
The last mask is illegal since we assume two operands for permute
|
||||
operation, and the mask element values can't be outside that range. Hence,
|
||||
the last mask must be converted into {2,5,5,5}.
|
||||
operation, and the mask element values can't be outside that range.
|
||||
Hence, the last mask must be converted into {2,5,5,5}.
|
||||
For the first two permutations we need the first and the second input
|
||||
vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
|
||||
we need the second and the third vectors: {b1,c1,a2,b2} and
|
||||
|
@ -2438,7 +2440,7 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
|
|||
group_size = SLP_INSTANCE_GROUP_SIZE (instance);
|
||||
|
||||
/* For each SLP instance calculate number of vector stmts to be created
|
||||
for the scalar stmts in each node of the SLP tree. Number of vector
|
||||
for the scalar stmts in each node of the SLP tree. Number of vector
|
||||
elements in one vector iteration is the number of scalar elements in
|
||||
one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
|
||||
size. */
|
||||
|
@ -2492,6 +2494,8 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
|
|||
}
|
||||
|
||||
|
||||
/* Generate vector code for all SLP instances in the loop/basic block. */
|
||||
|
||||
bool
|
||||
vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
|
||||
{
|
||||
|
|
|
@ -166,7 +166,7 @@ vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
|
|||
|
||||
/* Function exist_non_indexing_operands_for_use_p
|
||||
|
||||
USE is one of the uses attached to STMT. Check if USE is
|
||||
USE is one of the uses attached to STMT. Check if USE is
|
||||
used in STMT for anything other than indexing an array. */
|
||||
|
||||
static bool
|
||||
|
@ -175,7 +175,7 @@ exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
|
|||
tree operand;
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||
|
||||
/* USE corresponds to some operand in STMT. If there is no data
|
||||
/* USE corresponds to some operand in STMT. If there is no data
|
||||
reference in STMT, then any operand that corresponds to USE
|
||||
is not indexing an array. */
|
||||
if (!STMT_VINFO_DATA_REF (stmt_info))
|
||||
|
@ -215,7 +215,7 @@ exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
|
|||
Inputs:
|
||||
- a USE in STMT in a loop represented by LOOP_VINFO
|
||||
- LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
|
||||
that defined USE. This is done by calling mark_relevant and passing it
|
||||
that defined USE. This is done by calling mark_relevant and passing it
|
||||
the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
|
||||
|
||||
Outputs:
|
||||
|
@ -466,7 +466,7 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
|
|||
relevant = vect_used_by_reduction
|
||||
This is because we distinguish between two kinds of relevant stmts -
|
||||
those that are used by a reduction computation, and those that are
|
||||
(also) used by a regular computation. This allows us later on to
|
||||
(also) used by a regular computation. This allows us later on to
|
||||
identify stmts that are used solely by a reduction, and therefore the
|
||||
order of the results that they produce does not have to be kept. */
|
||||
|
||||
|
@ -558,6 +558,9 @@ int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
|
|||
dummy_type, dummy);
|
||||
}
|
||||
|
||||
|
||||
/* Get cost for STMT. */
|
||||
|
||||
int
|
||||
cost_for_stmt (gimple stmt)
|
||||
{
|
||||
|
@ -870,10 +873,10 @@ vect_get_load_cost (struct data_reference *dr, int ncopies,
|
|||
"pipelined.");
|
||||
|
||||
/* Unaligned software pipeline has a load of an address, an initial
|
||||
load, and possibly a mask operation to "prime" the loop. However,
|
||||
load, and possibly a mask operation to "prime" the loop. However,
|
||||
if this is an access in a group of loads, which provide strided
|
||||
access, then the above cost should only be considered for one
|
||||
access in the group. Inside the loop, there is a load op
|
||||
access in the group. Inside the loop, there is a load op
|
||||
and a realignment op. */
|
||||
|
||||
if (add_realign_cost)
|
||||
|
@ -897,8 +900,8 @@ vect_get_load_cost (struct data_reference *dr, int ncopies,
|
|||
/* Function vect_init_vector.
|
||||
|
||||
Insert a new stmt (INIT_STMT) that initializes a new vector variable with
|
||||
the vector elements of VECTOR_VAR. Place the initialization at BSI if it
|
||||
is not NULL. Otherwise, place the initialization at the loop preheader.
|
||||
the vector elements of VECTOR_VAR. Place the initialization at BSI if it
|
||||
is not NULL. Otherwise, place the initialization at the loop preheader.
|
||||
Return the DEF of INIT_STMT.
|
||||
It will be used in the vectorization of STMT. */
|
||||
|
||||
|
@ -963,7 +966,7 @@ vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
|
|||
|
||||
/* Function vect_get_vec_def_for_operand.
|
||||
|
||||
OP is an operand in STMT. This function returns a (vector) def that will be
|
||||
OP is an operand in STMT. This function returns a (vector) def that will be
|
||||
used in the vectorized stmt for STMT.
|
||||
|
||||
In the case that OP is an SSA_NAME which is defined in the loop, then
|
||||
|
@ -1117,10 +1120,10 @@ vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
|
|||
|
||||
/* Function vect_get_vec_def_for_stmt_copy
|
||||
|
||||
Return a vector-def for an operand. This function is used when the
|
||||
Return a vector-def for an operand. This function is used when the
|
||||
vectorized stmt to be created (by the caller to this function) is a "copy"
|
||||
created in case the vectorized result cannot fit in one vector, and several
|
||||
copies of the vector-stmt are required. In this case the vector-def is
|
||||
copies of the vector-stmt are required. In this case the vector-def is
|
||||
retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
|
||||
of the stmt that defines VEC_OPRND.
|
||||
DT is the type of the vector def VEC_OPRND.
|
||||
|
@ -1128,7 +1131,7 @@ vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
|
|||
Context:
|
||||
In case the vectorization factor (VF) is bigger than the number
|
||||
of elements that can fit in a vectype (nunits), we have to generate
|
||||
more than one vector stmt to vectorize the scalar stmt. This situation
|
||||
more than one vector stmt to vectorize the scalar stmt. This situation
|
||||
arises when there are multiple data-types operated upon in the loop; the
|
||||
smallest data-type determines the VF, and as a result, when vectorizing
|
||||
stmts operating on wider types we need to create 'VF/nunits' "copies" of the
|
||||
|
@ -1153,7 +1156,7 @@ vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
|
|||
The vectorization of S2:
|
||||
To create the first vector-stmt out of the 4 copies - VSnew.0 -
|
||||
the function 'vect_get_vec_def_for_operand' is called to
|
||||
get the relevant vector-def for each operand of S2. For operand x it
|
||||
get the relevant vector-def for each operand of S2. For operand x it
|
||||
returns the vector-def 'vx.0'.
|
||||
|
||||
To create the remaining copies of the vector-stmt (VSnew.j), this
|
||||
|
@ -1196,7 +1199,7 @@ vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
|
|||
|
||||
|
||||
/* Get vectorized definitions for the operands to create a copy of an original
|
||||
stmt. See vect_get_vec_def_for_stmt_copy() for details. */
|
||||
stmt. See vect_get_vec_def_for_stmt_copy () for details. */
|
||||
|
||||
static void
|
||||
vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
|
||||
|
@ -1217,7 +1220,8 @@ vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
|
|||
}
|
||||
|
||||
|
||||
/* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
|
||||
/* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
|
||||
NULL. */
|
||||
|
||||
static void
|
||||
vect_get_vec_defs (tree op0, tree op1, gimple stmt,
|
||||
|
@ -1594,7 +1598,7 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
|
|||
|
||||
Create a vector stmt whose code, type, number of arguments, and result
|
||||
variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
|
||||
VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
|
||||
VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
|
||||
In the case that CODE is a CALL_EXPR, this means that a call to DECL
|
||||
needs to be created (DECL is a function-decl of a target-builtin).
|
||||
STMT is the original scalar stmt that we are vectorizing. */
|
||||
|
@ -1742,8 +1746,9 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
else
|
||||
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
|
||||
|
||||
/* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
|
||||
this, so we can safely override NCOPIES with 1 here. */
|
||||
/* Multiple types in SLP are handled by creating the appropriate number of
|
||||
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
|
||||
case of SLP. */
|
||||
if (slp_node)
|
||||
ncopies = 1;
|
||||
|
||||
|
@ -1900,6 +1905,8 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/* Function vectorizable_assignment.
|
||||
|
||||
Check if STMT performs an assignment (copy) that can be vectorized.
|
||||
|
@ -2156,7 +2163,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
vf = 1;
|
||||
|
||||
/* Multiple types in SLP are handled by creating the appropriate number of
|
||||
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
|
||||
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
|
||||
case of SLP. */
|
||||
if (slp_node)
|
||||
ncopies = 1;
|
||||
|
@ -2243,7 +2250,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
fprintf (vect_dump, "proceeding using word mode.");
|
||||
}
|
||||
|
||||
/* Worthwhile without SIMD support? Check only during analysis. */
|
||||
/* Worthwhile without SIMD support? Check only during analysis. */
|
||||
if (!VECTOR_MODE_P (TYPE_MODE (vectype))
|
||||
&& vf < vect_min_worthwhile_factor (code)
|
||||
&& !vec_stmt)
|
||||
|
@ -2270,12 +2277,12 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
/* Handle def. */
|
||||
vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
||||
|
||||
/* Allocate VECs for vector operands. In case of SLP, vector operands are
|
||||
/* Allocate VECs for vector operands. In case of SLP, vector operands are
|
||||
created in the previous stages of the recursion, so no allocation is
|
||||
needed, except for the case of shift with scalar shift argument. In that
|
||||
needed, except for the case of shift with scalar shift argument. In that
|
||||
case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
|
||||
be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
|
||||
In case of loop-based vectorization we allocate VECs of size 1. We
|
||||
In case of loop-based vectorization we allocate VECs of size 1. We
|
||||
allocate VEC_OPRNDS1 only in case of binary operation. */
|
||||
if (!slp_node)
|
||||
{
|
||||
|
@ -2289,13 +2296,13 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
/* In case the vectorization factor (VF) is bigger than the number
|
||||
of elements that we can fit in a vectype (nunits), we have to generate
|
||||
more than one vector stmt - i.e - we need to "unroll" the
|
||||
vector stmt by a factor VF/nunits. In doing so, we record a pointer
|
||||
vector stmt by a factor VF/nunits. In doing so, we record a pointer
|
||||
from one copy of the vector stmt to the next, in the field
|
||||
STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
|
||||
STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
|
||||
stages to find the correct vector defs to be used when vectorizing
|
||||
stmts that use the defs of the current stmt. The example below illustrates
|
||||
the vectorization process when VF=16 and nunits=4 (i.e - we need to create
|
||||
4 vectorized stmts):
|
||||
stmts that use the defs of the current stmt. The example below
|
||||
illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
|
||||
we need to create 4 vectorized stmts):
|
||||
|
||||
before vectorization:
|
||||
RELATED_STMT VEC_STMT
|
||||
|
@ -2314,18 +2321,18 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
|
||||
step2: vectorize stmt S2 (done here):
|
||||
To vectorize stmt S2 we first need to find the relevant vector
|
||||
def for the first operand 'x'. This is, as usual, obtained from
|
||||
def for the first operand 'x'. This is, as usual, obtained from
|
||||
the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
|
||||
that defines 'x' (S1). This way we find the stmt VS1_0, and the
|
||||
relevant vector def 'vx0'. Having found 'vx0' we can generate
|
||||
that defines 'x' (S1). This way we find the stmt VS1_0, and the
|
||||
relevant vector def 'vx0'. Having found 'vx0' we can generate
|
||||
the vector stmt VS2_0, and as usual, record it in the
|
||||
STMT_VINFO_VEC_STMT of stmt S2.
|
||||
When creating the second copy (VS2_1), we obtain the relevant vector
|
||||
def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
|
||||
stmt VS1_0. This way we find the stmt VS1_1 and the relevant
|
||||
vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
|
||||
stmt VS1_0. This way we find the stmt VS1_1 and the relevant
|
||||
vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
|
||||
pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
|
||||
Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
|
||||
Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
|
||||
chain of stmts and pointers:
|
||||
RELATED_STMT VEC_STMT
|
||||
VS1_0: vx0 = memref0 VS1_1 -
|
||||
|
@ -2348,7 +2355,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
if (op_type == binary_op && scalar_shift_arg)
|
||||
{
|
||||
/* Vector shl and shr insn patterns can be defined with scalar
|
||||
operand 2 (shift operand). In this case, use constant or loop
|
||||
operand 2 (shift operand). In this case, use constant or loop
|
||||
invariant op1 directly, without extending it to vector mode
|
||||
first. */
|
||||
optab_op2_mode = insn_data[icode].operand[2].mode;
|
||||
|
@ -2361,8 +2368,8 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
if (slp_node)
|
||||
{
|
||||
/* Store vec_oprnd1 for every vector stmt to be created
|
||||
for SLP_NODE. We check during the analysis that all the
|
||||
shift arguments are the same.
|
||||
for SLP_NODE. We check during the analysis that all
|
||||
the shift arguments are the same.
|
||||
TODO: Allow different constants for different vector
|
||||
stmts generated for an SLP instance. */
|
||||
for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
|
||||
|
@ -2415,7 +2422,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
}
|
||||
|
||||
|
||||
/* Get vectorized definitions for loop-based vectorization. For the first
|
||||
/* Get vectorized definitions for loop-based vectorization. For the first
|
||||
operand we call vect_get_vec_def_for_operand() (with OPRND containing
|
||||
scalar operand), and for the rest we get a copy with
|
||||
vect_get_vec_def_for_stmt_copy() using the previous vector definition
|
||||
|
@ -2612,7 +2619,7 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
return false;
|
||||
|
||||
/* Multiple types in SLP are handled by creating the appropriate number of
|
||||
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
|
||||
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
|
||||
case of SLP. */
|
||||
if (slp_node)
|
||||
ncopies = 1;
|
||||
|
@ -2702,7 +2709,7 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
|
||||
|
||||
/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
|
||||
and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
|
||||
and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
|
||||
the resulting vectors and call the function recursively. */
|
||||
|
||||
static void
|
||||
|
@ -2779,17 +2786,18 @@ vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
|
|||
if (multi_step_cvt)
|
||||
{
|
||||
/* For multi-step promotion operation we first generate we call the
|
||||
function recurcively for every stage. We start from the input type,
|
||||
function recurcively for every stage. We start from the input type,
|
||||
create promotion operations to the intermediate types, and then
|
||||
create promotions to the output type. */
|
||||
*vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
|
||||
VEC_free (tree, heap, vec_tmp);
|
||||
vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
|
||||
multi_step_cvt - 1, stmt,
|
||||
vec_dsts, gsi, slp_node, code1,
|
||||
code2, decl2, decl2, op_type,
|
||||
prev_stmt_info);
|
||||
}
|
||||
|
||||
VEC_free (tree, heap, vec_tmp);
|
||||
}
|
||||
|
||||
|
||||
|
@ -2891,7 +2899,7 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
return false;
|
||||
|
||||
/* Multiple types in SLP are handled by creating the appropriate number of
|
||||
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
|
||||
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
|
||||
case of SLP. */
|
||||
if (slp_node)
|
||||
ncopies = 1;
|
||||
|
@ -3259,7 +3267,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
the documentation of vect_permute_store_chain()).
|
||||
|
||||
In case of both multiple types and interleaving, above vector stores and
|
||||
permutation stmts are created for every copy. The result vector stmts are
|
||||
permutation stmts are created for every copy. The result vector stmts are
|
||||
put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
|
||||
STMT_VINFO_RELATED_STMT for the next copies.
|
||||
*/
|
||||
|
@ -3411,6 +3419,8 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
VEC_free (tree, heap, oprnds);
|
||||
if (result_chain)
|
||||
VEC_free (tree, heap, result_chain);
|
||||
if (vec_oprnds)
|
||||
VEC_free (tree, heap, vec_oprnds);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -3476,7 +3486,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
vf = 1;
|
||||
|
||||
/* Multiple types in SLP are handled by creating the appropriate number of
|
||||
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
|
||||
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
|
||||
case of SLP. */
|
||||
if (slp)
|
||||
ncopies = 1;
|
||||
|
@ -3603,13 +3613,13 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
/* In case the vectorization factor (VF) is bigger than the number
|
||||
of elements that we can fit in a vectype (nunits), we have to generate
|
||||
more than one vector stmt - i.e - we need to "unroll" the
|
||||
vector stmt by a factor VF/nunits. In doing so, we record a pointer
|
||||
vector stmt by a factor VF/nunits. In doing so, we record a pointer
|
||||
from one copy of the vector stmt to the next, in the field
|
||||
STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
|
||||
STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
|
||||
stages to find the correct vector defs to be used when vectorizing
|
||||
stmts that use the defs of the current stmt. The example below illustrates
|
||||
the vectorization process when VF=16 and nunits=4 (i.e - we need to create
|
||||
4 vectorized stmts):
|
||||
stmts that use the defs of the current stmt. The example below
|
||||
illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
|
||||
need to create 4 vectorized stmts):
|
||||
|
||||
before vectorization:
|
||||
RELATED_STMT VEC_STMT
|
||||
|
@ -3621,7 +3631,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
|
||||
Next, we create the vector stmt VS1_1, and record a pointer to
|
||||
it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
|
||||
Similarly, for VS1_2 and VS1_3. This is the resulting chain of
|
||||
Similarly, for VS1_2 and VS1_3. This is the resulting chain of
|
||||
stmts and pointers:
|
||||
RELATED_STMT VEC_STMT
|
||||
VS1_0: vx0 = memref0 VS1_1 -
|
||||
|
@ -3664,9 +3674,9 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
|
||||
|
||||
In case of both multiple types and interleaving, the vector loads and
|
||||
permutation stmts above are created for every copy. The result vector stmts
|
||||
are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
|
||||
STMT_VINFO_RELATED_STMT for the next copies. */
|
||||
permutation stmts above are created for every copy. The result vector
|
||||
stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
|
||||
corresponding STMT_VINFO_RELATED_STMT for the next copies. */
|
||||
|
||||
/* If the data reference is aligned (dr_aligned) or potentially unaligned
|
||||
on a target that supports unaligned accesses (dr_unaligned_supported)
|
||||
|
@ -3699,7 +3709,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
|
||||
/* If the misalignment remains the same throughout the execution of the
|
||||
loop, we can create the init_addr and permutation mask at the loop
|
||||
preheader. Otherwise, it needs to be created inside the loop.
|
||||
preheader. Otherwise, it needs to be created inside the loop.
|
||||
This can only occur when vectorizing memory accesses in the inner-loop
|
||||
nested within an outer-loop that is being vectorized. */
|
||||
|
||||
|
@ -3854,7 +3864,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
mark_symbols_for_renaming (new_stmt);
|
||||
|
||||
/* 3. Handle explicit realignment if necessary/supported. Create in
|
||||
/* 3. Handle explicit realignment if necessary/supported. Create in
|
||||
loop: vec_dest = realign_load (msq, lsq, realignment_token) */
|
||||
if (alignment_support_scheme == dr_explicit_realign_optimized
|
||||
|| alignment_support_scheme == dr_explicit_realign)
|
||||
|
@ -4035,7 +4045,8 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
tree cond_expr, then_clause, else_clause;
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
||||
tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
|
||||
tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
|
||||
tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
|
||||
tree vec_compare, vec_cond_expr;
|
||||
tree new_temp;
|
||||
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
||||
|
@ -4365,7 +4376,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
|
|||
if (!PURE_SLP_STMT (stmt_info))
|
||||
{
|
||||
/* Groups of strided accesses whose size is not a power of 2 are not
|
||||
vectorizable yet using loop-vectorization. Therefore, if this stmt
|
||||
vectorizable yet using loop-vectorization. Therefore, if this stmt
|
||||
feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
|
||||
loop-based vectorized), the loop cannot be vectorized. */
|
||||
if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
|
||||
|
@ -4447,7 +4458,7 @@ vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
|
||||
{
|
||||
/* In case of interleaving, the whole chain is vectorized when the
|
||||
last store in the chain is reached. Store stmts before the last
|
||||
last store in the chain is reached. Store stmts before the last
|
||||
one are skipped, and there vec_stmt_info shouldn't be freed
|
||||
meanwhile. */
|
||||
*strided_store = true;
|
||||
|
@ -4747,7 +4758,7 @@ get_same_sized_vectype (tree scalar_type, tree vector_type ATTRIBUTE_UNUSED)
|
|||
|
||||
Returns whether a stmt with OPERAND can be vectorized.
|
||||
For loops, supportable operands are constants, loop invariants, and operands
|
||||
that are defined by the current iteration of the loop. Unsupportable
|
||||
that are defined by the current iteration of the loop. Unsupportable
|
||||
operands are those that are defined by a previous iteration of the loop (as
|
||||
is the case in reduction/induction computations).
|
||||
For basic blocks, supportable operands are constants and bb invariants.
|
||||
|
@ -4929,7 +4940,7 @@ vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
|
|||
- CODE1 and CODE2 are codes of vector operations to be used when
|
||||
vectorizing the operation, if available.
|
||||
- DECL1 and DECL2 are decls of target builtin functions to be used
|
||||
when vectorizing the operation, if available. In this case,
|
||||
when vectorizing the operation, if available. In this case,
|
||||
CODE1 and CODE2 are CALL_EXPR.
|
||||
- MULTI_STEP_CVT determines the number of required intermediate steps in
|
||||
case of multi-step conversion (like char->short->int - in that case
|
||||
|
@ -4973,7 +4984,7 @@ supportable_widening_operation (enum tree_code code, gimple stmt,
|
|||
|
||||
When vectorizing outer-loops, we execute the inner-loop sequentially
|
||||
(each vectorized inner-loop iteration contributes to VF outer-loop
|
||||
iterations in parallel). We therefore don't allow to change the order
|
||||
iterations in parallel). We therefore don't allow to change the order
|
||||
of the computation in the inner-loop during outer-loop vectorization. */
|
||||
|
||||
if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
|
||||
|
@ -5086,8 +5097,9 @@ supportable_widening_operation (enum tree_code code, gimple stmt,
|
|||
*code2 = c2;
|
||||
|
||||
/* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
|
||||
intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
|
||||
to get to NARROW_VECTYPE, and fail if we do not. */
|
||||
intermediate steps in promotion sequence. We try
|
||||
MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
|
||||
not. */
|
||||
*interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
|
@ -5138,7 +5150,7 @@ supportable_widening_operation (enum tree_code code, gimple stmt,
|
|||
and producing a result of type VECTYPE_OUT).
|
||||
|
||||
Narrowing operations we currently support are NOP (CONVERT) and
|
||||
FIX_TRUNC. This function checks if these operations are supported by
|
||||
FIX_TRUNC. This function checks if these operations are supported by
|
||||
the target platform directly via vector tree-codes.
|
||||
|
||||
Output:
|
||||
|
@ -5206,8 +5218,9 @@ supportable_narrowing_operation (enum tree_code code,
|
|||
*code1 = c1;
|
||||
prev_type = vectype;
|
||||
/* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
|
||||
intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
|
||||
to get to NARROW_VECTYPE, and fail if we do not. */
|
||||
intermediate steps in promotion sequence. We try
|
||||
MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
|
||||
not. */
|
||||
*interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
|
|
|
@ -208,7 +208,7 @@ vectorize_loops (void)
|
|||
/* ----------- Analyze loops. ----------- */
|
||||
|
||||
/* If some loop was duplicated, it gets bigger number
|
||||
than all previously defined loops. This fact allows us to run
|
||||
than all previously defined loops. This fact allows us to run
|
||||
only over initial loops skipping newly generated ones. */
|
||||
FOR_EACH_LOOP (li, loop, 0)
|
||||
if (optimize_loop_nest_for_speed_p (loop))
|
||||
|
|
|
@ -582,6 +582,8 @@ extern VEC(vec_void_p,heap) *stmt_vec_info_vec;
|
|||
void init_stmt_vec_info_vec (void);
|
||||
void free_stmt_vec_info_vec (void);
|
||||
|
||||
/* Return a stmt_vec_info corresponding to STMT. */
|
||||
|
||||
static inline stmt_vec_info
|
||||
vinfo_for_stmt (gimple stmt)
|
||||
{
|
||||
|
@ -592,6 +594,8 @@ vinfo_for_stmt (gimple stmt)
|
|||
return (stmt_vec_info) VEC_index (vec_void_p, stmt_vec_info_vec, uid - 1);
|
||||
}
|
||||
|
||||
/* Set vectorizer information INFO for STMT. */
|
||||
|
||||
static inline void
|
||||
set_vinfo_for_stmt (gimple stmt, stmt_vec_info info)
|
||||
{
|
||||
|
@ -607,6 +611,8 @@ set_vinfo_for_stmt (gimple stmt, stmt_vec_info info)
|
|||
VEC_replace (vec_void_p, stmt_vec_info_vec, uid - 1, (vec_void_p) info);
|
||||
}
|
||||
|
||||
/* Return the earlier statement between STMT1 and STMT2. */
|
||||
|
||||
static inline gimple
|
||||
get_earlier_stmt (gimple stmt1, gimple stmt2)
|
||||
{
|
||||
|
@ -633,6 +639,8 @@ get_earlier_stmt (gimple stmt1, gimple stmt2)
|
|||
return stmt2;
|
||||
}
|
||||
|
||||
/* Return the later statement between STMT1 and STMT2. */
|
||||
|
||||
static inline gimple
|
||||
get_later_stmt (gimple stmt1, gimple stmt2)
|
||||
{
|
||||
|
@ -659,6 +667,9 @@ get_later_stmt (gimple stmt1, gimple stmt2)
|
|||
return stmt2;
|
||||
}
|
||||
|
||||
/* Return TRUE if a statement represented by STMT_INFO is a part of a
|
||||
pattern. */
|
||||
|
||||
static inline bool
|
||||
is_pattern_stmt_p (stmt_vec_info stmt_info)
|
||||
{
|
||||
|
@ -674,6 +685,8 @@ is_pattern_stmt_p (stmt_vec_info stmt_info)
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Return true if BB is a loop header. */
|
||||
|
||||
static inline bool
|
||||
is_loop_header_bb_p (basic_block bb)
|
||||
{
|
||||
|
@ -683,6 +696,8 @@ is_loop_header_bb_p (basic_block bb)
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Set inside loop vectorization cost. */
|
||||
|
||||
static inline void
|
||||
stmt_vinfo_set_inside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
|
||||
int cost)
|
||||
|
@ -693,6 +708,8 @@ stmt_vinfo_set_inside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
|
|||
STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = cost;
|
||||
}
|
||||
|
||||
/* Set inside loop vectorization cost. */
|
||||
|
||||
static inline void
|
||||
stmt_vinfo_set_outside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
|
||||
int cost)
|
||||
|
@ -703,6 +720,8 @@ stmt_vinfo_set_outside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
|
|||
STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = cost;
|
||||
}
|
||||
|
||||
/* Return pow2 (X). */
|
||||
|
||||
static inline int
|
||||
vect_pow2 (int x)
|
||||
{
|
||||
|
@ -723,12 +742,17 @@ vect_pow2 (int x)
|
|||
#define DR_MISALIGNMENT(DR) ((int) (size_t) (DR)->aux)
|
||||
#define SET_DR_MISALIGNMENT(DR, VAL) ((DR)->aux = (void *) (size_t) (VAL))
|
||||
|
||||
/* Return TRUE if the data access is aligned, and FALSE otherwise. */
|
||||
|
||||
static inline bool
|
||||
aligned_access_p (struct data_reference *data_ref_info)
|
||||
{
|
||||
return (DR_MISALIGNMENT (data_ref_info) == 0);
|
||||
}
|
||||
|
||||
/* Return TRUE if the alignment of the data access is known, and FALSE
|
||||
otherwise. */
|
||||
|
||||
static inline bool
|
||||
known_alignment_for_access_p (struct data_reference *data_ref_info)
|
||||
{
|
||||
|
|
Loading…
Add table
Reference in a new issue