tree-vectorizer.h (get_later_stmt): New function.
* tree-vectorizer.h (get_later_stmt): New function. (vect_analyze_data_ref_dependences): Add argument. * tree-vect-loop.c (vect_analyze_loop): Update call to vect_analyze_data_ref_dependences. * tree-vect-data-refs.c (vect_drs_dependent_in_basic_block): New function. (vect_analyze_data_ref_dependence): Add argument for basic block dependencies. Check dependencies in basic block vectorization. (vect_analyze_data_ref_dependences): Add argument and update call to vect_analyze_data_ref_dependences. * tree-vect-slp.c (vect_find_last_store_in_slp_instance): New. (vect_bb_vectorizable_with_dependencies): New. (vect_slp_analyze_bb): Check dependencies in basic block. (vect_schedule_slp_instance): Insert stores before the last store in SLP instance. From-SVN: r163757
This commit is contained in:
parent
282ac51f24
commit
e4a707c492
9 changed files with 315 additions and 14 deletions
|
@ -1,3 +1,21 @@
|
|||
2010-09-02 Ira Rosen <irar@il.ibm.com>
|
||||
|
||||
* tree-vectorizer.h (get_later_stmt): New function.
|
||||
(vect_analyze_data_ref_dependences): Add argument.
|
||||
* tree-vect-loop.c (vect_analyze_loop): Update call to
|
||||
vect_analyze_data_ref_dependences.
|
||||
* tree-vect-data-refs.c (vect_drs_dependent_in_basic_block):
|
||||
New function.
|
||||
(vect_analyze_data_ref_dependence): Add argument for basic block
|
||||
dependencies. Check dependencies in basic block vectorization.
|
||||
(vect_analyze_data_ref_dependences): Add argument and update call to
|
||||
vect_analyze_data_ref_dependences.
|
||||
* tree-vect-slp.c (vect_find_last_store_in_slp_instance): New.
|
||||
(vect_bb_vectorizable_with_dependencies): New.
|
||||
(vect_slp_analyze_bb): Check dependencies in basic block.
|
||||
(vect_schedule_slp_instance): Insert stores before the last store in
|
||||
SLP instance.
|
||||
|
||||
2010-09-02 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
PR target/45476
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
2010-09-02 Ira Rosen <irar@il.ibm.com>
|
||||
|
||||
* gcc.dg/vect/bb-slp-8.c: Separate the interesting part and the
|
||||
check into different basic blocks. Expect vectorization if misaligned
|
||||
stores are supported.
|
||||
* gcc.dg/vect/bb-slp-8a.c: New test.
|
||||
* gcc.dg/vect/bb-slp-8b.c: New test.
|
||||
|
||||
2010-09-01 Steve Ellcey <sje@cup.hp.com>
|
||||
|
||||
* gfortran.dg/vect/fast-math-pr38969.f90: Skip if not vectorizing.
|
||||
|
|
|
@ -15,7 +15,8 @@ main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout)
|
|||
int i;
|
||||
unsigned int a0, a1, a2, a3;
|
||||
|
||||
/* pin and pout may alias. */
|
||||
/* pin and pout may alias. But since all the loads are before the first store
|
||||
the basic block is vectorizable. */
|
||||
a0 = *pin++ + 23;
|
||||
a1 = *pin++ + 142;
|
||||
a2 = *pin++ + 2;
|
||||
|
@ -26,6 +27,9 @@ main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout)
|
|||
*pout++ = a2 * x;
|
||||
*pout++ = a3 * y;
|
||||
|
||||
if (i)
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
|
||||
/* Check results. */
|
||||
if (out[0] != (in[0] + 23) * x
|
||||
|| out[1] != (in[1] + 142) * y
|
||||
|
@ -45,6 +49,6 @@ int main (void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */
|
||||
/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_hw_misalign } } } */
|
||||
/* { dg-final { cleanup-tree-dump "slp" } } */
|
||||
|
||||
|
|
53
gcc/testsuite/gcc.dg/vect/bb-slp-8a.c
Normal file
53
gcc/testsuite/gcc.dg/vect/bb-slp-8a.c
Normal file
|
@ -0,0 +1,53 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 16
|
||||
|
||||
unsigned int out[N];
|
||||
unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
|
||||
__attribute__ ((noinline)) int
|
||||
main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout)
|
||||
{
|
||||
int i;
|
||||
unsigned int a0, a1, a2, a3;
|
||||
|
||||
/* pin and pout may alias, and loads and stores are mixed. The basic block
|
||||
cannot be vectorized. */
|
||||
a0 = *pin++ + 23;
|
||||
*pout++ = a0 * x;
|
||||
a1 = *pin++ + 142;
|
||||
*pout++ = a1 * y;
|
||||
a2 = *pin++ + 2;
|
||||
*pout++ = a2 * x;
|
||||
a3 = *pin++ + 31;
|
||||
*pout++ = a3 * y;
|
||||
|
||||
if (i)
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
|
||||
/* Check results. */
|
||||
if (out[0] != (in[0] + 23) * x
|
||||
|| out[1] != (in[1] + 142) * y
|
||||
|| out[2] != (in[2] + 2) * x
|
||||
|| out[3] != (in[3] + 31) * y)
|
||||
abort();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 (2, 3, &in[0], &out[0]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */
|
||||
/* { dg-final { cleanup-tree-dump "slp" } } */
|
||||
|
55
gcc/testsuite/gcc.dg/vect/bb-slp-8b.c
Normal file
55
gcc/testsuite/gcc.dg/vect/bb-slp-8b.c
Normal file
|
@ -0,0 +1,55 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 16
|
||||
|
||||
unsigned int out[N];
|
||||
unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
|
||||
__attribute__ ((noinline)) int
|
||||
main1 (unsigned int x, unsigned int y)
|
||||
{
|
||||
int i;
|
||||
unsigned int a0, a1, a2, a3;
|
||||
unsigned int *pin = &in[0];
|
||||
unsigned int *pout = &out[0];
|
||||
|
||||
/* pin and pout are different, so despite the fact that loads and stores
|
||||
are mixed the basic block is vectorizable. */
|
||||
a0 = *pin++ + 23;
|
||||
*pout++ = a0 * x;
|
||||
a1 = *pin++ + 142;
|
||||
*pout++ = a1 * y;
|
||||
a2 = *pin++ + 2;
|
||||
*pout++ = a2 * x;
|
||||
a3 = *pin++ + 31;
|
||||
*pout++ = a3 * y;
|
||||
|
||||
if (i)
|
||||
__asm__ volatile ("" : : : "memory");
|
||||
|
||||
/* Check results. */
|
||||
if (out[0] != (in[0] + 23) * x
|
||||
|| out[1] != (in[1] + 142) * y
|
||||
|| out[2] != (in[2] + 2) * x
|
||||
|| out[3] != (in[3] + 31) * y)
|
||||
abort();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 (2, 3);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_hw_misalign } } } */
|
||||
/* { dg-final { cleanup-tree-dump "slp" } } */
|
||||
|
|
@ -322,6 +322,64 @@ vect_equal_offsets (tree offset1, tree offset2)
|
|||
}
|
||||
|
||||
|
||||
/* Check dependence between DRA and DRB for basic block vectorization. */
|
||||
|
||||
static bool
|
||||
vect_drs_dependent_in_basic_block (struct data_reference *dra,
|
||||
struct data_reference *drb)
|
||||
{
|
||||
HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
|
||||
gimple earlier_stmt;
|
||||
|
||||
/* We only call this function for pairs of loads and stores, but we verify
|
||||
it here. */
|
||||
if (DR_IS_READ (dra) == DR_IS_READ (drb))
|
||||
{
|
||||
if (DR_IS_READ (dra))
|
||||
return false;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Check that the data-refs have same bases and offsets. If not, we can't
|
||||
determine if they are dependent. */
|
||||
if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
|
||||
&& (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
|
||||
|| TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
|
||||
|| TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
|
||||
!= TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
|
||||
|| !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb)))
|
||||
return true;
|
||||
|
||||
/* Check the types. */
|
||||
type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
|
||||
type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
|
||||
|
||||
if (type_size_a != type_size_b
|
||||
|| !types_compatible_p (TREE_TYPE (DR_REF (dra)),
|
||||
TREE_TYPE (DR_REF (drb))))
|
||||
return true;
|
||||
|
||||
init_a = TREE_INT_CST_LOW (DR_INIT (dra));
|
||||
init_b = TREE_INT_CST_LOW (DR_INIT (drb));
|
||||
|
||||
/* Two different locations - no dependence. */
|
||||
if (init_a != init_b)
|
||||
return false;
|
||||
|
||||
/* We have a read-write dependence. Check that the load is before the store.
|
||||
When we vectorize basic blocks, vector load can be only before
|
||||
corresponding scalar load, and vector store can be only after its
|
||||
corresponding scalar store. So the order of the acceses is preserved in
|
||||
case the load is before the store. */
|
||||
earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
|
||||
if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/* Function vect_check_interleaving.
|
||||
|
||||
Check if DRA and DRB are a part of interleaving. In case they are, insert
|
||||
|
@ -495,7 +553,8 @@ vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
|
|||
|
||||
static bool
|
||||
vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
|
||||
loop_vec_info loop_vinfo, int *max_vf)
|
||||
loop_vec_info loop_vinfo, int *max_vf,
|
||||
bool *data_dependence_in_bb)
|
||||
{
|
||||
unsigned int i;
|
||||
struct loop *loop = NULL;
|
||||
|
@ -554,10 +613,14 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
|
|||
print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
|
||||
}
|
||||
|
||||
/* Mark the statements as unvectorizable. */
|
||||
STMT_VINFO_VECTORIZABLE (stmtinfo_a) = false;
|
||||
STMT_VINFO_VECTORIZABLE (stmtinfo_b) = false;
|
||||
/* We do not vectorize basic blocks with write-write dependencies. */
|
||||
if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
|
||||
return true;
|
||||
|
||||
/* We deal with read-write dependencies in basic blocks later (by
|
||||
verifying that all the loads in the basic block are before all the
|
||||
stores). */
|
||||
*data_dependence_in_bb = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -577,7 +640,12 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
|
|||
print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
|
||||
}
|
||||
|
||||
return true;
|
||||
/* Do not vectorize basic blcoks with write-write dependences. */
|
||||
if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
|
||||
return true;
|
||||
|
||||
/* Check if this dependence is allowed in basic block vectorization. */
|
||||
return vect_drs_dependent_in_basic_block (dra, drb);
|
||||
}
|
||||
|
||||
/* Loop-based vectorization and known data dependence. */
|
||||
|
@ -678,7 +746,8 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
|
|||
|
||||
bool
|
||||
vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
|
||||
bb_vec_info bb_vinfo, int *max_vf)
|
||||
bb_vec_info bb_vinfo, int *max_vf,
|
||||
bool *data_dependence_in_bb)
|
||||
{
|
||||
unsigned int i;
|
||||
VEC (ddr_p, heap) *ddrs = NULL;
|
||||
|
@ -693,7 +762,8 @@ vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
|
|||
ddrs = BB_VINFO_DDRS (bb_vinfo);
|
||||
|
||||
FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
|
||||
if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
|
||||
if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
|
||||
data_dependence_in_bb))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
|
|
@ -1378,7 +1378,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
|
|||
loop_vec_info
|
||||
vect_analyze_loop (struct loop *loop)
|
||||
{
|
||||
bool ok;
|
||||
bool ok, dummy;
|
||||
loop_vec_info loop_vinfo;
|
||||
int max_vf = MAX_VECTORIZATION_FACTOR;
|
||||
int min_vf = 2;
|
||||
|
@ -1444,7 +1444,7 @@ vect_analyze_loop (struct loop *loop)
|
|||
the dependences.
|
||||
FORNOW: fail at the first data dependence that we encounter. */
|
||||
|
||||
ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf);
|
||||
ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf, &dummy);
|
||||
if (!ok
|
||||
|| max_vf < min_vf)
|
||||
{
|
||||
|
|
|
@ -1082,6 +1082,24 @@ vect_find_first_load_in_slp_instance (slp_instance instance)
|
|||
}
|
||||
|
||||
|
||||
/* Find the last store in SLP INSTANCE. */
|
||||
static gimple
|
||||
vect_find_last_store_in_slp_instance (slp_instance instance)
|
||||
{
|
||||
int i;
|
||||
slp_tree node;
|
||||
gimple last_store = NULL, store;
|
||||
|
||||
node = SLP_INSTANCE_TREE (instance);
|
||||
for (i = 0;
|
||||
VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
|
||||
i++)
|
||||
last_store = get_later_stmt (store, last_store);
|
||||
|
||||
return last_store;
|
||||
}
|
||||
|
||||
|
||||
/* Analyze an SLP instance starting from a group of strided stores. Call
|
||||
vect_build_slp_tree to build a tree of packed stmts if possible.
|
||||
Return FALSE if it's impossible to SLP any stmt in the loop. */
|
||||
|
@ -1503,6 +1521,42 @@ vect_slp_analyze_operations (bb_vec_info bb_vinfo)
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Check if loads and stores are mixed in the basic block (in that
|
||||
case if we are not sure that the accesses differ, we can't vectorize the
|
||||
basic block). Also return FALSE in case that there is statement marked as
|
||||
not vectorizable. */
|
||||
|
||||
static bool
|
||||
vect_bb_vectorizable_with_dependencies (bb_vec_info bb_vinfo)
|
||||
{
|
||||
basic_block bb = BB_VINFO_BB (bb_vinfo);
|
||||
gimple_stmt_iterator si;
|
||||
bool detected_store = false;
|
||||
gimple stmt;
|
||||
struct data_reference *dr;
|
||||
|
||||
for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
|
||||
{
|
||||
stmt = gsi_stmt (si);
|
||||
|
||||
/* We can't allow not analyzed statements, since they may contain data
|
||||
accesses. */
|
||||
if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
|
||||
return false;
|
||||
|
||||
if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
|
||||
continue;
|
||||
|
||||
dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
|
||||
if (DR_IS_READ (dr) && detected_store)
|
||||
return false;
|
||||
|
||||
if (!DR_IS_READ (dr))
|
||||
detected_store = true;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Check if vectorization of the basic block is profitable. */
|
||||
|
||||
|
@ -1585,6 +1639,8 @@ vect_slp_analyze_bb (basic_block bb)
|
|||
gimple_stmt_iterator gsi;
|
||||
int min_vf = 2;
|
||||
int max_vf = MAX_VECTORIZATION_FACTOR;
|
||||
bool data_dependence_in_bb = false;
|
||||
|
||||
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
|
||||
|
@ -1632,8 +1688,11 @@ vect_slp_analyze_bb (basic_block bb)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
|
||||
|| min_vf > max_vf)
|
||||
if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf,
|
||||
&data_dependence_in_bb)
|
||||
|| min_vf > max_vf
|
||||
|| (data_dependence_in_bb
|
||||
&& !vect_bb_vectorizable_with_dependencies (bb_vinfo)))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
|
||||
fprintf (vect_dump, "not vectorized: unhandled data dependence "
|
||||
|
@ -2420,6 +2479,14 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
|
|||
else
|
||||
si = gsi_for_stmt (stmt);
|
||||
|
||||
/* Stores should be inserted just before the last store. */
|
||||
if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
|
||||
&& REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
|
||||
{
|
||||
gimple last_store = vect_find_last_store_in_slp_instance (instance);
|
||||
si = gsi_for_stmt (last_store);
|
||||
}
|
||||
|
||||
is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
|
||||
return is_store;
|
||||
}
|
||||
|
|
|
@ -633,6 +633,32 @@ get_earlier_stmt (gimple stmt1, gimple stmt2)
|
|||
return stmt2;
|
||||
}
|
||||
|
||||
static inline gimple
|
||||
get_later_stmt (gimple stmt1, gimple stmt2)
|
||||
{
|
||||
unsigned int uid1, uid2;
|
||||
|
||||
if (stmt1 == NULL)
|
||||
return stmt2;
|
||||
|
||||
if (stmt2 == NULL)
|
||||
return stmt1;
|
||||
|
||||
uid1 = gimple_uid (stmt1);
|
||||
uid2 = gimple_uid (stmt2);
|
||||
|
||||
if (uid1 == 0 || uid2 == 0)
|
||||
return NULL;
|
||||
|
||||
gcc_assert (uid1 <= VEC_length (vec_void_p, stmt_vec_info_vec));
|
||||
gcc_assert (uid2 <= VEC_length (vec_void_p, stmt_vec_info_vec));
|
||||
|
||||
if (uid1 > uid2)
|
||||
return stmt1;
|
||||
else
|
||||
return stmt2;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
is_pattern_stmt_p (stmt_vec_info stmt_info)
|
||||
{
|
||||
|
@ -776,7 +802,7 @@ extern enum dr_alignment_support vect_supportable_dr_alignment
|
|||
extern tree vect_get_smallest_scalar_type (gimple, HOST_WIDE_INT *,
|
||||
HOST_WIDE_INT *);
|
||||
extern bool vect_analyze_data_ref_dependences (loop_vec_info, bb_vec_info,
|
||||
int *);
|
||||
int *, bool *);
|
||||
extern bool vect_enhance_data_refs_alignment (loop_vec_info);
|
||||
extern bool vect_analyze_data_refs_alignment (loop_vec_info, bb_vec_info);
|
||||
extern bool vect_verify_datarefs_alignment (loop_vec_info, bb_vec_info);
|
||||
|
|
Loading…
Add table
Reference in a new issue