From e4a707c4920ed5cd0bd960a5dc6af93d9f65015c Mon Sep 17 00:00:00 2001 From: Ira Rosen Date: Thu, 2 Sep 2010 06:04:34 +0000 Subject: [PATCH] tree-vectorizer.h (get_later_stmt): New function. * tree-vectorizer.h (get_later_stmt): New function. (vect_analyze_data_ref_dependences): Add argument. * tree-vect-loop.c (vect_analyze_loop): Update call to vect_analyze_data_ref_dependences. * tree-vect-data-refs.c (vect_drs_dependent_in_basic_block): New function. (vect_analyze_data_ref_dependence): Add argument for basic block dependencies. Check dependencies in basic block vectorization. (vect_analyze_data_ref_dependences): Add argument and update call to vect_analyze_data_ref_dependences. * tree-vect-slp.c (vect_find_last_store_in_slp_instance): New. (vect_bb_vectorizable_with_dependencies): New. (vect_slp_analyze_bb): Check dependencies in basic block. (vect_schedule_slp_instance): Insert stores before the last store in SLP instance. From-SVN: r163757 --- gcc/ChangeLog | 18 ++++++ gcc/testsuite/ChangeLog | 8 +++ gcc/testsuite/gcc.dg/vect/bb-slp-8.c | 8 ++- gcc/testsuite/gcc.dg/vect/bb-slp-8a.c | 53 +++++++++++++++++ gcc/testsuite/gcc.dg/vect/bb-slp-8b.c | 55 ++++++++++++++++++ gcc/tree-vect-data-refs.c | 84 ++++++++++++++++++++++++--- gcc/tree-vect-loop.c | 4 +- gcc/tree-vect-slp.c | 71 +++++++++++++++++++++- gcc/tree-vectorizer.h | 28 ++++++++- 9 files changed, 315 insertions(+), 14 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-8a.c create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-8b.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0d239d04891..c54610f1ac9 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,21 @@ +2010-09-02 Ira Rosen + + * tree-vectorizer.h (get_later_stmt): New function. + (vect_analyze_data_ref_dependences): Add argument. + * tree-vect-loop.c (vect_analyze_loop): Update call to + vect_analyze_data_ref_dependences. + * tree-vect-data-refs.c (vect_drs_dependent_in_basic_block): + New function. + (vect_analyze_data_ref_dependence): Add argument for basic block + dependencies. Check dependencies in basic block vectorization. + (vect_analyze_data_ref_dependences): Add argument and update call to + vect_analyze_data_ref_dependences. + * tree-vect-slp.c (vect_find_last_store_in_slp_instance): New. + (vect_bb_vectorizable_with_dependencies): New. + (vect_slp_analyze_bb): Check dependencies in basic block. + (vect_schedule_slp_instance): Insert stores before the last store in + SLP instance. + 2010-09-02 Uros Bizjak PR target/45476 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 5c9fa627aea..03c55d00c40 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2010-09-02 Ira Rosen + + * gcc.dg/vect/bb-slp-8.c: Separate the interesting part and the + check into different basic blocks. Expect vectorization if misaligned + stores are supported. + * gcc.dg/vect/bb-slp-8a.c: New test. + * gcc.dg/vect/bb-slp-8b.c: New test. + 2010-09-01 Steve Ellcey * gfortran.dg/vect/fast-math-pr38969.f90: Skip if not vectorizing. diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-8.c b/gcc/testsuite/gcc.dg/vect/bb-slp-8.c index b0c1be77a3e..e45c0b8e388 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-8.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-8.c @@ -15,7 +15,8 @@ main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout) int i; unsigned int a0, a1, a2, a3; - /* pin and pout may alias. */ + /* pin and pout may alias. But since all the loads are before the first store + the basic block is vectorizable. */ a0 = *pin++ + 23; a1 = *pin++ + 142; a2 = *pin++ + 2; @@ -26,6 +27,9 @@ main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout) *pout++ = a2 * x; *pout++ = a3 * y; + if (i) + __asm__ volatile ("" : : : "memory"); + /* Check results. */ if (out[0] != (in[0] + 23) * x || out[1] != (in[1] + 142) * y @@ -45,6 +49,6 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */ +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_hw_misalign } } } */ /* { dg-final { cleanup-tree-dump "slp" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-8a.c b/gcc/testsuite/gcc.dg/vect/bb-slp-8a.c new file mode 100644 index 00000000000..e46e931bcd1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-8a.c @@ -0,0 +1,53 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include +#include "tree-vect.h" + +#define N 16 + +unsigned int out[N]; +unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + +__attribute__ ((noinline)) int +main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout) +{ + int i; + unsigned int a0, a1, a2, a3; + + /* pin and pout may alias, and loads and stores are mixed. The basic block + cannot be vectorized. */ + a0 = *pin++ + 23; + *pout++ = a0 * x; + a1 = *pin++ + 142; + *pout++ = a1 * y; + a2 = *pin++ + 2; + *pout++ = a2 * x; + a3 = *pin++ + 31; + *pout++ = a3 * y; + + if (i) + __asm__ volatile ("" : : : "memory"); + + /* Check results. */ + if (out[0] != (in[0] + 23) * x + || out[1] != (in[1] + 142) * y + || out[2] != (in[2] + 2) * x + || out[3] != (in[3] + 31) * y) + abort(); + + return 0; +} + +int main (void) +{ + check_vect (); + + main1 (2, 3, &in[0], &out[0]); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */ +/* { dg-final { cleanup-tree-dump "slp" } } */ + diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-8b.c b/gcc/testsuite/gcc.dg/vect/bb-slp-8b.c new file mode 100644 index 00000000000..384acd7241a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-8b.c @@ -0,0 +1,55 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include +#include "tree-vect.h" + +#define N 16 + +unsigned int out[N]; +unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + +__attribute__ ((noinline)) int +main1 (unsigned int x, unsigned int y) +{ + int i; + unsigned int a0, a1, a2, a3; + unsigned int *pin = &in[0]; + unsigned int *pout = &out[0]; + + /* pin and pout are different, so despite the fact that loads and stores + are mixed the basic block is vectorizable. */ + a0 = *pin++ + 23; + *pout++ = a0 * x; + a1 = *pin++ + 142; + *pout++ = a1 * y; + a2 = *pin++ + 2; + *pout++ = a2 * x; + a3 = *pin++ + 31; + *pout++ = a3 * y; + + if (i) + __asm__ volatile ("" : : : "memory"); + + /* Check results. */ + if (out[0] != (in[0] + 23) * x + || out[1] != (in[1] + 142) * y + || out[2] != (in[2] + 2) * x + || out[3] != (in[3] + 31) * y) + abort(); + + return 0; +} + +int main (void) +{ + check_vect (); + + main1 (2, 3); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_hw_misalign } } } */ +/* { dg-final { cleanup-tree-dump "slp" } } */ + diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 2408b45535e..c76c60bd346 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -322,6 +322,64 @@ vect_equal_offsets (tree offset1, tree offset2) } +/* Check dependence between DRA and DRB for basic block vectorization. */ + +static bool +vect_drs_dependent_in_basic_block (struct data_reference *dra, + struct data_reference *drb) +{ + HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b; + gimple earlier_stmt; + + /* We only call this function for pairs of loads and stores, but we verify + it here. */ + if (DR_IS_READ (dra) == DR_IS_READ (drb)) + { + if (DR_IS_READ (dra)) + return false; + else + return true; + } + + /* Check that the data-refs have same bases and offsets. If not, we can't + determine if they are dependent. */ + if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb) + && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR + || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR + || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0) + != TREE_OPERAND (DR_BASE_ADDRESS (drb),0))) + || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))) + return true; + + /* Check the types. */ + type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)))); + type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)))); + + if (type_size_a != type_size_b + || !types_compatible_p (TREE_TYPE (DR_REF (dra)), + TREE_TYPE (DR_REF (drb)))) + return true; + + init_a = TREE_INT_CST_LOW (DR_INIT (dra)); + init_b = TREE_INT_CST_LOW (DR_INIT (drb)); + + /* Two different locations - no dependence. */ + if (init_a != init_b) + return false; + + /* We have a read-write dependence. Check that the load is before the store. + When we vectorize basic blocks, vector load can be only before + corresponding scalar load, and vector store can be only after its + corresponding scalar store. So the order of the acceses is preserved in + case the load is before the store. */ + earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb)); + if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt)))) + return false; + + return true; +} + + /* Function vect_check_interleaving. Check if DRA and DRB are a part of interleaving. In case they are, insert @@ -495,7 +553,8 @@ vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo) static bool vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr, - loop_vec_info loop_vinfo, int *max_vf) + loop_vec_info loop_vinfo, int *max_vf, + bool *data_dependence_in_bb) { unsigned int i; struct loop *loop = NULL; @@ -554,10 +613,14 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr, print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); } - /* Mark the statements as unvectorizable. */ - STMT_VINFO_VECTORIZABLE (stmtinfo_a) = false; - STMT_VINFO_VECTORIZABLE (stmtinfo_b) = false; + /* We do not vectorize basic blocks with write-write dependencies. */ + if (!DR_IS_READ (dra) && !DR_IS_READ (drb)) + return true; + /* We deal with read-write dependencies in basic blocks later (by + verifying that all the loads in the basic block are before all the + stores). */ + *data_dependence_in_bb = true; return false; } @@ -577,7 +640,12 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr, print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); } - return true; + /* Do not vectorize basic blcoks with write-write dependences. */ + if (!DR_IS_READ (dra) && !DR_IS_READ (drb)) + return true; + + /* Check if this dependence is allowed in basic block vectorization. */ + return vect_drs_dependent_in_basic_block (dra, drb); } /* Loop-based vectorization and known data dependence. */ @@ -678,7 +746,8 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr, bool vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo, - bb_vec_info bb_vinfo, int *max_vf) + bb_vec_info bb_vinfo, int *max_vf, + bool *data_dependence_in_bb) { unsigned int i; VEC (ddr_p, heap) *ddrs = NULL; @@ -693,7 +762,8 @@ vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo, ddrs = BB_VINFO_DDRS (bb_vinfo); FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr) - if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf)) + if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf, + data_dependence_in_bb)) return false; return true; diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 21dca3a4e0d..2ed8c7663ef 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -1378,7 +1378,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo) loop_vec_info vect_analyze_loop (struct loop *loop) { - bool ok; + bool ok, dummy; loop_vec_info loop_vinfo; int max_vf = MAX_VECTORIZATION_FACTOR; int min_vf = 2; @@ -1444,7 +1444,7 @@ vect_analyze_loop (struct loop *loop) the dependences. FORNOW: fail at the first data dependence that we encounter. */ - ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf); + ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf, &dummy); if (!ok || max_vf < min_vf) { diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 135e1cbcaf1..f560ac29e42 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1082,6 +1082,24 @@ vect_find_first_load_in_slp_instance (slp_instance instance) } +/* Find the last store in SLP INSTANCE. */ +static gimple +vect_find_last_store_in_slp_instance (slp_instance instance) +{ + int i; + slp_tree node; + gimple last_store = NULL, store; + + node = SLP_INSTANCE_TREE (instance); + for (i = 0; + VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store); + i++) + last_store = get_later_stmt (store, last_store); + + return last_store; +} + + /* Analyze an SLP instance starting from a group of strided stores. Call vect_build_slp_tree to build a tree of packed stmts if possible. Return FALSE if it's impossible to SLP any stmt in the loop. */ @@ -1503,6 +1521,42 @@ vect_slp_analyze_operations (bb_vec_info bb_vinfo) return true; } +/* Check if loads and stores are mixed in the basic block (in that + case if we are not sure that the accesses differ, we can't vectorize the + basic block). Also return FALSE in case that there is statement marked as + not vectorizable. */ + +static bool +vect_bb_vectorizable_with_dependencies (bb_vec_info bb_vinfo) +{ + basic_block bb = BB_VINFO_BB (bb_vinfo); + gimple_stmt_iterator si; + bool detected_store = false; + gimple stmt; + struct data_reference *dr; + + for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) + { + stmt = gsi_stmt (si); + + /* We can't allow not analyzed statements, since they may contain data + accesses. */ + if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt))) + return false; + + if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))) + continue; + + dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); + if (DR_IS_READ (dr) && detected_store) + return false; + + if (!DR_IS_READ (dr)) + detected_store = true; + } + + return true; +} /* Check if vectorization of the basic block is profitable. */ @@ -1585,6 +1639,8 @@ vect_slp_analyze_bb (basic_block bb) gimple_stmt_iterator gsi; int min_vf = 2; int max_vf = MAX_VECTORIZATION_FACTOR; + bool data_dependence_in_bb = false; + if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "===vect_slp_analyze_bb===\n"); @@ -1632,8 +1688,11 @@ vect_slp_analyze_bb (basic_block bb) return NULL; } - if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf) - || min_vf > max_vf) + if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf, + &data_dependence_in_bb) + || min_vf > max_vf + || (data_dependence_in_bb + && !vect_bb_vectorizable_with_dependencies (bb_vinfo))) { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) fprintf (vect_dump, "not vectorized: unhandled data dependence " @@ -2420,6 +2479,14 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance, else si = gsi_for_stmt (stmt); + /* Stores should be inserted just before the last store. */ + if (STMT_VINFO_STRIDED_ACCESS (stmt_info) + && REFERENCE_CLASS_P (gimple_get_lhs (stmt))) + { + gimple last_store = vect_find_last_store_in_slp_instance (instance); + si = gsi_for_stmt (last_store); + } + is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance); return is_store; } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index ed8ff58312a..9d6e0acb57a 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -633,6 +633,32 @@ get_earlier_stmt (gimple stmt1, gimple stmt2) return stmt2; } +static inline gimple +get_later_stmt (gimple stmt1, gimple stmt2) +{ + unsigned int uid1, uid2; + + if (stmt1 == NULL) + return stmt2; + + if (stmt2 == NULL) + return stmt1; + + uid1 = gimple_uid (stmt1); + uid2 = gimple_uid (stmt2); + + if (uid1 == 0 || uid2 == 0) + return NULL; + + gcc_assert (uid1 <= VEC_length (vec_void_p, stmt_vec_info_vec)); + gcc_assert (uid2 <= VEC_length (vec_void_p, stmt_vec_info_vec)); + + if (uid1 > uid2) + return stmt1; + else + return stmt2; +} + static inline bool is_pattern_stmt_p (stmt_vec_info stmt_info) { @@ -776,7 +802,7 @@ extern enum dr_alignment_support vect_supportable_dr_alignment extern tree vect_get_smallest_scalar_type (gimple, HOST_WIDE_INT *, HOST_WIDE_INT *); extern bool vect_analyze_data_ref_dependences (loop_vec_info, bb_vec_info, - int *); + int *, bool *); extern bool vect_enhance_data_refs_alignment (loop_vec_info); extern bool vect_analyze_data_refs_alignment (loop_vec_info, bb_vec_info); extern bool vect_verify_datarefs_alignment (loop_vec_info, bb_vec_info);