From 32ff12435e068632cc28b1521b087e019ecf6717 Mon Sep 17 00:00:00 2001 From: Dorit Nuzman Date: Thu, 14 Aug 2008 12:47:56 +0000 Subject: [PATCH] tree-vect-transform.c (vect_create_epilog_for_reduction): Takes an additional argument. 2008-08-14 Dorit Nuzman * tree-vect-transform.c (vect_create_epilog_for_reduction): Takes an additional argument. Support reduction when duplication is needed due to data-types of different sizes in the loop. (get_initial_def_for_induction): Fix printout. (vect_get_vec_def_for_stmt_copy): Support case where the vec_stmt_for_operand is a phi node. (vectorizable_reduction): Support reduction when duplication is needed due to data-types of different sizes in the loop. (vectorizable_call): Remove restriction to not vectorize in case we have data-types of different sizes in the loop. (vectorizable_conversion): Likewise. (vectorizable_operation): Likewise. (vectorizable_type_demotion): Likewise. (vectorizable_type_promotion): Likewise. (vectorizable_induction): Add restriction to not vectorize in case we have data-types of different sizes in the loop. From-SVN: r139096 --- gcc/ChangeLog | 19 ++ gcc/testsuite/ChangeLog | 11 ++ gcc/testsuite/gcc.dg/vect/vect-outer-4a.c | 9 +- gcc/testsuite/gcc.dg/vect/vect-outer-4b.c | 9 +- gcc/testsuite/gcc.dg/vect/vect-outer-4f.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-outer-4g.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-outer-4i.c | 29 ++- gcc/testsuite/gcc.dg/vect/vect-outer-4j.c | 5 +- gcc/testsuite/gcc.dg/vect/vect-outer-4k.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-outer-4l.c | 2 +- gcc/tree-vect-transform.c | 222 +++++++++++++--------- 11 files changed, 194 insertions(+), 118 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 50a535546b4..77796d5674e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2008-08-14 Dorit Nuzman + + * tree-vect-transform.c (vect_create_epilog_for_reduction): Takes an + additional argument. Support reduction when duplication is needed due + to data-types of different sizes in the loop. + (get_initial_def_for_induction): Fix printout. + (vect_get_vec_def_for_stmt_copy): Support case where the + vec_stmt_for_operand is a phi node. + (vectorizable_reduction): Support reduction when duplication is needed + due to data-types of different sizes in the loop. + (vectorizable_call): Remove restriction to not vectorize in case we + have data-types of different sizes in the loop. + (vectorizable_conversion): Likewise. + (vectorizable_operation): Likewise. + (vectorizable_type_demotion): Likewise. + (vectorizable_type_promotion): Likewise. + (vectorizable_induction): Add restriction to not vectorize in case + we have data-types of different sizes in the loop. + 2008-08-14 Christophe Saout Uros Bizjak diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 14304344221..095d0a6450e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,14 @@ +2008-08-14 Dorit Nuzman + + * gcc.dg/vect/vect-outer-4g.c: Change loop bound. + * gcc.dg/vect/vect-outer-4k.c: Likewise. + * gcc.dg/vect/vect-outer-4l.c: Likewise. + * gcc.dg/vect/vect-outer-4f.c: Likewise. + * gcc.dg/vect/vect-outer-4a.c: Vectorizable. Remove obsolete comment. + * gcc.dg/vect/vect-outer-4i.c: Likewise. + * gcc.dg/vect/vect-outer-4b.c: Likewise. + * gcc.dg/vect/vect-outer-4j.c: Likewise. + 2008-08-14 Uros Bizjak PR target/37101 diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4a.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4a.c index 8fd1a03db14..d7bcc9a2e8c 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-outer-4a.c +++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4a.c @@ -6,8 +6,7 @@ signed short in[N+M]; signed short coeff[M]; signed short out[N]; -/* Outer-loop vectorization. - Currently not vectorized because of multiple-data-types in the inner-loop. */ +/* Outer-loop vectorization. */ void foo (){ @@ -23,9 +22,5 @@ foo (){ } } -/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ -/* FORNOW. not vectorized until we support 0-stride acceses like coeff[j]. should be: - { scan-tree-dump-not "multiple types in nested loop." "vect" { xfail *-*-* } } } */ - -/* { dg-final { scan-tree-dump-times "zero step in outer loop." 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { target { vect_widen_mult_hi_to_si && vect_pack_trunc } } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4b.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4b.c index ba2f7b4d0df..407315a8dc3 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-outer-4b.c +++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4b.c @@ -6,8 +6,7 @@ signed short in[N+M]; signed short coeff[M]; int out[N]; -/* Outer-loop vectorization. - Currently not vectorized because of multiple-data-types in the inner-loop. */ +/* Outer-loop vectorization. */ void foo (){ @@ -23,9 +22,5 @@ foo (){ } } -/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ -/* FORNOW. not vectorized until we support 0-stride acceses like coeff[j]. should be: - { scan-tree-dump-not "multiple types in nested loop." "vect" { xfail *-*-* } } } */ - -/* { dg-final { scan-tree-dump-times "zero step in outer loop." 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { target vect_widen_mult_hi_to_si } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4f.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4f.c index cef37c42321..88d6b7abccd 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-outer-4f.c +++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4f.c @@ -2,7 +2,7 @@ #include #include "tree-vect.h" -#define N 40 +#define N 96 #define M 128 unsigned short in[N+M]; unsigned int out[N]; diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4g.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4g.c index cef37c42321..88d6b7abccd 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-outer-4g.c +++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4g.c @@ -2,7 +2,7 @@ #include #include "tree-vect.h" -#define N 40 +#define N 96 #define M 128 unsigned short in[N+M]; unsigned int out[N]; diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4i.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4i.c index bc43c5bc6d5..a244ac20ac2 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-outer-4i.c +++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4i.c @@ -1,13 +1,17 @@ -/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ -#define N 40 +#include +#include "tree-vect.h" + +#define N 96 #define M 128 unsigned char in[N+M]; unsigned short out[N]; /* Outer-loop vectorization. */ -/* Not vectorized due to multiple-types in the inner-loop. */ +/* Multiple-types in the inner-loop. */ +__attribute__ ((noinline)) unsigned short foo (){ int i,j; @@ -24,5 +28,22 @@ foo (){ return s; } -/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ +int main (void) +{ + check_vect (); + int i; + unsigned short s; + + for (i = 0; i < N+M; i++) + in[i] = (unsigned char)i; + + s = foo (); + + if (s != 34048) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { target vect_unpack } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4j.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4j.c index 7e1b7ec81ee..db8f61c5e37 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-outer-4j.c +++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4j.c @@ -1,12 +1,11 @@ /* { dg-do compile } */ -#define N 40 +#define N 96 #define M 128 unsigned char in[N+M]; unsigned short out[N]; /* Outer-loop vectorization. */ -/* Not vectorized due to multiple-types in the inner-loop. */ void foo (){ @@ -22,5 +21,5 @@ foo (){ } } -/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { target vect_unpack } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4k.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4k.c index cef37c42321..88d6b7abccd 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-outer-4k.c +++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4k.c @@ -2,7 +2,7 @@ #include #include "tree-vect.h" -#define N 40 +#define N 96 #define M 128 unsigned short in[N+M]; unsigned int out[N]; diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4l.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4l.c index cef37c42321..88d6b7abccd 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-outer-4l.c +++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4l.c @@ -2,7 +2,7 @@ #include #include "tree-vect.h" -#define N 40 +#define N 96 #define M 128 unsigned short in[N+M]; unsigned int out[N]; diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c index a1bc2b1bd08..eb370775cba 100644 --- a/gcc/tree-vect-transform.c +++ b/gcc/tree-vect-transform.c @@ -59,8 +59,8 @@ static tree vect_init_vector (gimple, tree, tree, gimple_stmt_iterator *); static void vect_finish_stmt_generation (gimple stmt, gimple vec_stmt, gimple_stmt_iterator *); static bool vect_is_simple_cond (tree, loop_vec_info); -static void vect_create_epilog_for_reduction (tree, gimple, enum tree_code, - gimple); +static void vect_create_epilog_for_reduction + (tree, gimple, int, enum tree_code, gimple); static tree get_initial_def_for_reduction (gimple, tree, tree *); /* Utility function dealing with loop peeling (not peeling itself). */ @@ -1824,7 +1824,7 @@ get_initial_def_for_induction (gimple iv_phi) if (vect_print_dump_info (REPORT_DETAILS)) { - fprintf (vect_dump, "transform induction: created def-use cycle:"); + fprintf (vect_dump, "transform induction: created def-use cycle: "); print_gimple_stmt (vect_dump, induction_phi, 0, TDF_SLIM); fprintf (vect_dump, "\n"); print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (vec_def), 0, TDF_SLIM); @@ -2056,6 +2056,10 @@ vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd) vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info); gcc_assert (vec_stmt_for_operand); vec_oprnd = gimple_get_lhs (vec_stmt_for_operand); + if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI) + vec_oprnd = PHI_RESULT (vec_stmt_for_operand); + else + vec_oprnd = gimple_get_lhs (vec_stmt_for_operand); return vec_oprnd; } @@ -2251,6 +2255,11 @@ get_initial_def_for_reduction (gimple stmt, tree init_val, tree *adjustment_def) VECT_DEF is a vector of partial results. REDUC_CODE is the tree-code for the epilog reduction. + NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the + number of elements that we can fit in a vectype (nunits). In this case + we have to generate more than one vector stmt - i.e - we need to "unroll" + the vector stmt by a factor VF/nunits. For more details see documentation + in vectorizable_operation. STMT is the scalar reduction stmt that is being vectorized. REDUCTION_PHI is the phi-node that carries the reduction computation. @@ -2294,10 +2303,12 @@ get_initial_def_for_reduction (gimple stmt, tree init_val, tree *adjustment_def) static void vect_create_epilog_for_reduction (tree vect_def, gimple stmt, + int ncopies, enum tree_code reduc_code, gimple reduction_phi) { stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + stmt_vec_info prev_phi_info; tree vectype; enum machine_mode mode; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); @@ -2305,7 +2316,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, basic_block exit_bb; tree scalar_dest; tree scalar_type; - gimple new_phi; + gimple new_phi = NULL, phi; gimple_stmt_iterator exit_gsi; tree vec_dest; tree new_temp = NULL_TREE; @@ -2316,7 +2327,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, tree bitsize, bitpos, bytesize; enum tree_code code = gimple_assign_rhs_code (stmt); tree adjustment_def; - tree vec_initial_def; + tree vec_initial_def, def; tree orig_name; imm_use_iterator imm_iter; use_operand_p use_p; @@ -2326,7 +2337,8 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, gimple use_stmt; bool nested_in_vect_loop = false; VEC(gimple,heap) *phis = NULL; - int i; + enum vect_def_type dt = vect_unknown_def_type; + int j, i; if (nested_in_vect_loop_p (loop, stmt)) { @@ -2356,25 +2368,34 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, /*** 1. Create the reduction def-use cycle ***/ - /* 1.1 set the loop-entry arg of the reduction-phi: */ /* For the case of reduction, vect_get_vec_def_for_operand returns the scalar def before the loop, that defines the initial value of the reduction variable. */ vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt, &adjustment_def); - add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop)); - /* 1.2 set the loop-latch arg for the reduction-phi: */ - add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop)); - - if (vect_print_dump_info (REPORT_DETAILS)) + phi = reduction_phi; + def = vect_def; + for (j = 0; j < ncopies; j++) { - fprintf (vect_dump, "transform reduction: created def-use cycle:"); - print_gimple_stmt (vect_dump, reduction_phi, 0, TDF_SLIM); - fprintf (vect_dump, "\n"); - print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (vect_def), 0, TDF_SLIM); - } + /* 1.1 set the loop-entry arg of the reduction-phi: */ + add_phi_arg (phi, vec_initial_def, loop_preheader_edge (loop)); + /* 1.2 set the loop-latch arg for the reduction-phi: */ + if (j > 0) + def = vect_get_vec_def_for_stmt_copy (dt, def); + add_phi_arg (phi, def, loop_latch_edge (loop)); + + if (vect_print_dump_info (REPORT_DETAILS)) + { + fprintf (vect_dump, "transform reduction: created def-use cycle: "); + print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM); + fprintf (vect_dump, "\n"); + print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (def), 0, TDF_SLIM); + } + + phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)); + } /*** 2. Create epilog code The reduction epilog code operates across the elements of the vector @@ -2398,7 +2419,7 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, s_out3 = extract_field # step 2 s_out4 = adjust_result # step 3 - (step 3 is optional, and step2 1 and 2 may be combined). + (step 3 is optional, and steps 1 and 2 may be combined). Lastly, the uses of s_out0 are replaced by s_out4. ***/ @@ -2407,8 +2428,22 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, v_out1 = phi */ exit_bb = single_exit (loop)->dest; - new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb); - SET_PHI_ARG_DEF (new_phi, single_exit (loop)->dest_idx, vect_def); + def = vect_def; + prev_phi_info = NULL; + for (j = 0; j < ncopies; j++) + { + phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb); + set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo)); + if (j == 0) + new_phi = phi; + else + { + def = vect_get_vec_def_for_stmt_copy (dt, def); + STMT_VINFO_RELATED_STMT (prev_phi_info) = phi; + } + SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def); + prev_phi_info = vinfo_for_stmt (phi); + } exit_gsi = gsi_after_labels (exit_bb); /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3 @@ -2449,6 +2484,9 @@ vect_create_epilog_for_reduction (tree vect_def, gimple stmt, if (nested_in_vect_loop) goto vect_finalize_reduction; + /* FORNOW */ + gcc_assert (ncopies = 1); + /* 2.3 Create the reduction code, using one of the three schemes described above. */ @@ -2664,15 +2702,19 @@ vect_finalize_reduction: { stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi); - /* FORNOW. Currently not supporting the case that an inner-loop reduction - is not used in the outer-loop (but only outside the outer-loop). */ + /* FORNOW. Currently not supporting the case that an inner-loop + reduction is not used in the outer-loop (but only outside the + outer-loop). */ gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo) && !STMT_VINFO_LIVE_P (stmt_vinfo)); - epilog_stmt = adjustment_def ? epilog_stmt : new_phi; + epilog_stmt = adjustment_def ? epilog_stmt : new_phi; STMT_VINFO_VEC_STMT (stmt_vinfo) = epilog_stmt; - set_vinfo_for_stmt (epilog_stmt, + set_vinfo_for_stmt (epilog_stmt, new_stmt_vec_info (epilog_stmt, loop_vinfo)); + if (adjustment_def) + STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) = + STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi)); continue; } @@ -2745,7 +2787,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, tree def; gimple def_stmt; enum vect_def_type dt; - gimple new_phi; + gimple new_phi = NULL; tree scalar_type; bool is_simple_use; gimple orig_stmt; @@ -2754,23 +2796,17 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, int i; int nunits = TYPE_VECTOR_SUBPARTS (vectype); int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; - stmt_vec_info prev_stmt_info; + int epilog_copies; + stmt_vec_info prev_stmt_info, prev_phi_info; + gimple first_phi = NULL; + bool single_defuse_cycle = false; tree reduc_def; gimple new_stmt = NULL; int j; tree ops[3]; if (nested_in_vect_loop_p (loop, stmt)) - { - loop = loop->inner; - /* FORNOW. This restriction should be relaxed. */ - if (ncopies > 1) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "multiple types in nested loop."); - return false; - } - } + loop = loop->inner; gcc_assert (ncopies >= 1); @@ -3004,18 +3040,52 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, /* Create the destination vector */ vec_dest = vect_create_destination_var (scalar_dest, vectype); - /* Create the reduction-phi that defines the reduction-operand. */ - new_phi = create_phi_node (vec_dest, loop->header); - /* In case the vectorization factor (VF) is bigger than the number of elements that we can fit in a vectype (nunits), we have to generate more than one vector stmt - i.e - we need to "unroll" the vector stmt by a factor VF/nunits. For more details see documentation in vectorizable_operation. */ + /* If the reduction is used in an outer loop we need to generate + VF intermediate results, like so (e.g. for ncopies=2): + r0 = phi (init, r0) + r1 = phi (init, r1) + r0 = x0 + r0; + r1 = x1 + r1; + (i.e. we generate VF results in 2 registers). + In this case we have a separate def-use cycle for each copy, and therefore + for each copy we get the vector def for the reduction variable from the + respective phi node created for this copy. + + Otherwise (the reduction is unused in the loop nest), we can combine + together intermediate results, like so (e.g. for ncopies=2): + r = phi (init, r) + r = x0 + r; + r = x1 + r; + (i.e. we generate VF/2 results in a single register). + In this case for each copy we get the vector def for the reduction variable + from the vectorized reduction operation generated in the previous iteration. + */ + + if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_loop) + { + single_defuse_cycle = true; + epilog_copies = 1; + } + else + epilog_copies = ncopies; + prev_stmt_info = NULL; + prev_phi_info = NULL; for (j = 0; j < ncopies; j++) { + if (j == 0 || !single_defuse_cycle) + { + /* Create the reduction-phi that defines the reduction-operand. */ + new_phi = create_phi_node (vec_dest, loop->header); + set_vinfo_for_stmt (new_phi, new_stmt_vec_info (new_phi, loop_vinfo)); + } + /* Handle uses. */ if (j == 0) { @@ -3027,6 +3097,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, /* Get the vector def for the reduction variable from the phi node */ reduc_def = PHI_RESULT (new_phi); + first_phi = new_phi; } else { @@ -3035,9 +3106,12 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, if (op_type == ternary_op) loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1); - /* Get the vector def for the reduction variable from the vectorized - reduction operation generated in the previous iteration (j-1) */ - reduc_def = gimple_assign_lhs (new_stmt); + if (single_defuse_cycle) + reduc_def = gimple_assign_lhs (new_stmt); + else + reduc_def = PHI_RESULT (new_phi); + + STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi; } /* Arguments are ready. create the new vector stmt. */ @@ -3056,11 +3130,15 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi, else STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; prev_stmt_info = vinfo_for_stmt (new_stmt); + prev_phi_info = vinfo_for_stmt (new_phi); } /* Finalize the reduction-phi (set it's arguments) and create the epilog reduction code. */ - vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi); + if (!single_defuse_cycle) + new_temp = gimple_assign_lhs (*vec_stmt); + vect_create_epilog_for_reduction (new_temp, stmt, epilog_copies, + epilog_reduc_code, first_phi); return true; } @@ -3108,7 +3186,6 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt) int nunits_in; int nunits_out; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); tree fndecl, new_temp, def, rhs_type, lhs_type; gimple def_stmt; enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; @@ -3212,14 +3289,6 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt) needs to be generated. */ gcc_assert (ncopies >= 1); - /* FORNOW. This restriction should be relaxed. */ - if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "multiple types in nested loop."); - return false; - } - if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; @@ -3234,14 +3303,6 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt) if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "transform operation."); - /* FORNOW. This restriction should be relaxed. */ - if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "multiple types in nested loop."); - return false; - } - /* Handle def. */ scalar_dest = gimple_call_lhs (stmt); vec_dest = vect_create_destination_var (scalar_dest, vectype_out); @@ -3436,7 +3497,6 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi, tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK; tree decl1 = NULL_TREE, decl2 = NULL_TREE; tree new_temp; @@ -3527,14 +3587,6 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi, needs to be generated. */ gcc_assert (ncopies >= 1); - /* FORNOW. This restriction should be relaxed. */ - if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "multiple types in nested loop."); - return false; - } - /* Check the operands of the operation. */ if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0])) { @@ -3847,11 +3899,19 @@ vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED, stmt_vec_info stmt_info = vinfo_for_stmt (phi); tree vectype = STMT_VINFO_VECTYPE (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); int nunits = TYPE_VECTOR_SUBPARTS (vectype); int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; tree vec_def; gcc_assert (ncopies >= 1); + /* FORNOW. This restriction should be relaxed. */ + if (nested_in_vect_loop_p (loop, phi) && ncopies > 1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "multiple types in nested loop."); + return false; + } if (!STMT_VINFO_RELEVANT_P (stmt_info)) return false; @@ -3903,7 +3963,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, stmt_vec_info stmt_info = vinfo_for_stmt (stmt); tree vectype = STMT_VINFO_VECTYPE (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); enum tree_code code; enum machine_mode vec_mode; tree new_temp; @@ -3932,13 +3991,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, if (slp_node) ncopies = 1; gcc_assert (ncopies >= 1); - /* FORNOW. This restriction should be relaxed. */ - if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "multiple types in nested loop."); - return false; - } if (!STMT_VINFO_RELEVANT_P (stmt_info)) return false; @@ -4257,7 +4309,6 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi, tree vec_oprnd0=NULL, vec_oprnd1=NULL; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); enum tree_code code, code1 = ERROR_MARK; tree new_temp; tree def; @@ -4310,13 +4361,6 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi, ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; gcc_assert (ncopies >= 1); - /* FORNOW. This restriction should be relaxed. */ - if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "multiple types in nested loop."); - return false; - } if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) && INTEGRAL_TYPE_P (TREE_TYPE (op0))) @@ -4458,7 +4502,6 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi, tree vec_oprnd0=NULL, vec_oprnd1=NULL; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK; tree decl1 = NULL_TREE, decl2 = NULL_TREE; int op_type; @@ -4511,13 +4554,6 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi, ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; gcc_assert (ncopies >= 1); - /* FORNOW. This restriction should be relaxed. */ - if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "multiple types in nested loop."); - return false; - } if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest)) && INTEGRAL_TYPE_P (TREE_TYPE (op0)))