diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8b4baa1cab2..d558a925059 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,38 @@ +2004-11-03 Dorit Naishlos + + PR tree-optimization/18009 + * tree-vectorizer.h (enum dr_alignment_support): New type. + (MAX_NUMBER_OF_UNALIGNED_DATA_REFS): Removed. + (LOOP_UNALIGNED_DR): replaced with LOOP_VINFO_UNALIGNED_DR and holds a + single data_reference (instead of a varray of references). + * tree-vectorizer.c (new_loop_vec_info): Likewise. + (vect_gen_niters_for_prolog_loop): Likewise. + (vect_update_inits_of_drs): Likewise. + + (vect_update_inits_of_drs): Setting of DR_MISALIGNMENT moved to + vect_enhance_data_refs_alignment. + (vect_do_peeling_for_alignment): Likewise. + (vect_enhance_data_refs_alignment): Decide if and by how much to peel; + this functionality used to be in vect_analyze_data_refs_alignment. + Also update DR_MISALIGNMENT due to peeling; this functionality used to + be in vect_update_inits_of_drs and vect_do_peeling_for_alignment). + (vect_analyze_data_refs_alignment): Decision on whether and by how much + to peel moved to vect_enhance_data_refs_alignment. Call + vect_supportable_dr_alignment. + + (vect_compute_data_ref_alignment): Set STMT_VINFO_VECTYPE. + (vect_compute_data_refs_alignment): Return bool. Consider return value + of vect_compute_data_ref_alignment and return true/false accordingly. + (vect_enhance_data_refs_alignment): Consider return value of + vect_compute_data_refs_alignment and return true/false accordingly. + + (vect_supportable_dr_alignment): New function. + (vectorizable_store): Call vect_supportable_dr_alignment. + (vectorizable_load): Call vect_supportable_dr_alignment. Alignment + support checks moved from here to vect_supportable_dr_alignment. + + (vect_transform_loop): Avoid 80 columns overflow. + 2004-11-03 Andrew Pinski * timevar.c (timevar_enable): Change from diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 03dac2ddf38..2d75a557c9c 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -154,7 +154,7 @@ static bool vect_mark_stmts_to_be_vectorized (loop_vec_info); static bool vect_analyze_scalar_cycles (loop_vec_info); static bool vect_analyze_data_ref_accesses (loop_vec_info); static bool vect_analyze_data_refs_alignment (loop_vec_info); -static void vect_compute_data_refs_alignment (loop_vec_info); +static bool vect_compute_data_refs_alignment (loop_vec_info); static bool vect_analyze_operations (loop_vec_info); /* Main code transformation functions. */ @@ -165,6 +165,8 @@ static bool vectorizable_load (tree, block_stmt_iterator *, tree *); static bool vectorizable_store (tree, block_stmt_iterator *, tree *); static bool vectorizable_operation (tree, block_stmt_iterator *, tree *); static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *); +static enum dr_alignment_support vect_supportable_dr_alignment + (struct data_reference *); static void vect_align_data_ref (tree); static void vect_enhance_data_refs_alignment (loop_vec_info); @@ -1118,9 +1120,8 @@ new_loop_vec_info (struct loop *loop) "loop_write_datarefs"); VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20, "loop_read_datarefs"); + LOOP_VINFO_UNALIGNED_DR (res) = NULL; - for (i=0; ihandlers[mode].insn_code != CODE_FOR_nothing - && (!targetm.vectorize.builtin_mask_for_load - || targetm.vectorize.builtin_mask_for_load ())) - software_pipeline_loads_p = true; - else if (!targetm.vectorize.misaligned_mem_ok (mode)) - { - /* Possibly unaligned access, and can't software pipeline the loads. - */ - if (vect_debug_details (loop)) - fprintf (dump_file, "Arbitrary load not supported."); - return false; - } - } - if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; @@ -2470,7 +2461,11 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) if (vect_debug_details (NULL)) fprintf (dump_file, "transform load."); - if (!software_pipeline_loads_p) + alignment_support_cheme = vect_supportable_dr_alignment (dr); + gcc_assert (alignment_support_cheme); + + if (alignment_support_cheme == dr_aligned + || alignment_support_cheme == dr_unaligned_supported) { /* Create: p = initial_addr; @@ -2500,7 +2495,7 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) TREE_OPERAND (new_stmt, 0) = new_temp; vect_finish_stmt_generation (stmt, new_stmt, bsi); } - else /* software-pipeline the loads */ + else if (alignment_support_cheme == dr_unaligned_software_pipeline) { /* Create: p1 = initial_addr; @@ -2593,12 +2588,47 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) TREE_OPERAND (new_stmt, 0) = new_temp; vect_finish_stmt_generation (stmt, new_stmt, bsi); } + else + gcc_unreachable (); *vec_stmt = new_stmt; return true; } +/* Function vect_supportable_dr_alignment + + Return whether the data reference DR is supported with respect to its + alignment. */ + +static enum dr_alignment_support +vect_supportable_dr_alignment (struct data_reference *dr) +{ + tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))); + enum machine_mode mode = (int) TYPE_MODE (vectype); + + if (aligned_access_p (dr)) + return dr_aligned; + + /* Possibly unaligned access. */ + + if (DR_IS_READ (dr)) + { + if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing + && (!targetm.vectorize.builtin_mask_for_load + || targetm.vectorize.builtin_mask_for_load ())) + return dr_unaligned_software_pipeline; + + if (targetm.vectorize.misaligned_mem_ok (mode)) + /* Can't software pipeline the loads. */ + return dr_unaligned_supported; + } + + /* Unsupported. */ + return dr_unaligned_unsupported; +} + + /* Function vect_transform_stmt. Create a vectorized stmt to replace STMT, and insert it at BSI. */ @@ -3013,14 +3043,14 @@ vect_transform_for_unknown_loop_bound (loop_vec_info loop_vinfo, tree * ratio, Set the number of iterations for the loop represented by LOOP_VINFO to the minimum between NITERS (the original iteration count of the loop) - and the misalignment DR - the first data reference in the list - LOOP_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of this - loop, the data reference DR will refer to an aligned location. */ + and the misalignment of DR - the first data reference recorded in + LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of + this loop, the data reference DR will refer to an aligned location. */ static tree vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree niters) { - struct data_reference *dr = LOOP_UNALIGNED_DR (loop_vinfo, 0); + struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo); int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); tree var, stmt; @@ -3146,7 +3176,6 @@ vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters) { struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); vect_update_inits_of_dr (dr, loop, niters); - DR_MISALIGNMENT (dr) = -1; } } @@ -3157,14 +3186,13 @@ vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters) 'niters' is set to the misalignment of one of the data references in the loop, thereby forcing it to refer to an aligned location at the beginning of the execution of this loop. The data reference for which we are - peeling is chosen from LOOP_UNALIGNED_DR. */ + peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */ static void vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); tree niters_of_prolog_loop, ni_name; - struct data_reference *dr = LOOP_UNALIGNED_DR (loop_vinfo, 0); if (vect_debug_details (NULL)) fprintf (dump_file, "\n<>\n"); @@ -3177,10 +3205,6 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops) tree_duplicate_loop_to_edge (loop, loops, loop_preheader_edge(loop), niters_of_prolog_loop, ni_name, false); - - /* Update stmt info of dr according to which we peeled. */ - DR_MISALIGNMENT (dr) = 0; - /* Update number of times loop executes. */ vect_update_niters_after_peeling (loop_vinfo, niters_of_prolog_loop); @@ -3284,8 +3308,9 @@ vect_transform_loop (loop_vec_info loop_vinfo, #ifdef ENABLE_CHECKING /* FORNOW: Verify that all stmts operate on the same number of units and no inner unrolling is necessary. */ - gcc_assert (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info))) - == vectorization_factor); + gcc_assert + (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info))) + == vectorization_factor); #endif /* -------- vectorize statement ------------ */ if (vect_debug_details (NULL)) @@ -4073,6 +4098,7 @@ vect_compute_data_ref_alignment (struct data_reference *dr, /* It is not possible to vectorize this data reference. */ return false; } + STMT_VINFO_VECTYPE (stmt_info) = vectype; gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF); if (TREE_CODE (ref) == ARRAY_REF) @@ -4261,7 +4287,7 @@ vect_compute_array_ref_alignment (struct data_reference *dr, FOR NOW: No analysis is actually performed. Misalignment is calculated only for trivial cases. TODO. */ -static void +static bool vect_compute_data_refs_alignment (loop_vec_info loop_vinfo) { varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); @@ -4271,14 +4297,18 @@ vect_compute_data_refs_alignment (loop_vec_info loop_vinfo) for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) { struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); - vect_compute_data_ref_alignment (dr, loop_vinfo); + if (!vect_compute_data_ref_alignment (dr, loop_vinfo)) + return false; } for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) { struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); - vect_compute_data_ref_alignment (dr, loop_vinfo); + if (!vect_compute_data_ref_alignment (dr, loop_vinfo)) + return false; } + + return true; } @@ -4295,8 +4325,13 @@ vect_compute_data_refs_alignment (loop_vec_info loop_vinfo) FOR NOW: No transformation is actually performed. TODO. */ static void -vect_enhance_data_refs_alignment (loop_vec_info loop_info ATTRIBUTE_UNUSED) +vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) { + varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); + varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + unsigned int i; + /* This pass will require a cost model to guide it whether to apply peeling or versioning or a combination of the two. For example, the scheme that @@ -4379,6 +4414,76 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_info ATTRIBUTE_UNUSED) (whether to generate regular loads/stores, or with special handling for misalignment). */ + + /* (1) Peeling to force alignment. */ + + /* (1.1) Decide whether to perform peeling, and how many iterations to peel: + Considerations: + + How many accesses will become aligned due to the peeling + - How many accesses will become unaligned due to the peeling, + and the cost of misaligned accesses. + - The cost of peeling (the extra runtime checks, the increase + in code size). + + The scheme we use FORNOW: peel to force the alignment of the first + misaliged store in the loop. + Rationale: misaligned store are not yet supported. + + TODO: Use a better cost model. */ + + for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) + { + struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); + if (!aligned_access_p (dr)) + { + LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr; + LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true; + break; + } + } + + if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo)) + { + if (vect_debug_details (loop)) + fprintf (dump_file, "Peeling for alignment will not be applied."); + return; + } + else + if (vect_debug_details (loop)) + fprintf (dump_file, "Peeling for alignment will be applied."); + + + /* (1.2) Update the alignment info according to the peeling factor. + If the misalignment of the DR we peel for is M, then the + peeling factor is VF - M, and the misalignment of each access DR_i + in the loop is DR_MISALIGNMENT (DR_i) + VF - M. + If the misalignment of the DR we peel for is unknown, then the + misalignment of each access DR_i in the loop is also unknown. + + FORNOW: set the misalignment of the accesses to unknown even + if the peeling factor is known at compile time. + + TODO: - if the peeling factor is known at compile time, use that + when updating the misalignment info of the loop DRs. + - consider accesses that are known to have the same + alignment, even if that alignment is unknown. */ + + for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) + { + struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); + if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo)) + DR_MISALIGNMENT (dr) = 0; + else + DR_MISALIGNMENT (dr) = -1; + } + for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) + { + struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); + if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo)) + DR_MISALIGNMENT (dr) = 0; + else + DR_MISALIGNMENT (dr) = -1; + } } @@ -4392,12 +4497,11 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_info ATTRIBUTE_UNUSED) static bool vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo) { + varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo); varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo); struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - /*varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);*/ - + enum dr_alignment_support supportable_dr_alignment; unsigned int i; - unsigned int decide_peeling_count = 0; if (vect_debug_details (NULL)) fprintf (dump_file, "\n<>\n"); @@ -4406,67 +4510,46 @@ vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo) /* This pass may take place at function granularity instead of at loop granularity. */ - vect_compute_data_refs_alignment (loop_vinfo); + if (!vect_compute_data_refs_alignment (loop_vinfo)) + { + if (vect_debug_details (loop) || vect_debug_stats (loop)) + fprintf (dump_file, + "not vectorized: can't calculate alignment for data ref."); + return false; + } - /* This pass will use loop versioning and loop peeling in order to enhance - the alignment of data references in the loop. - FOR NOW: we assume that whatever versioning/peeling took place, the - original loop is to be vectorized. Any other loops that were created by - the transformations performed in this pass - are not supposed to be - vectorized. This restriction will be relaxed. */ + /* This pass will decide on using loop versioning and/or loop peeling in + order to enhance the alignment of data references in the loop. */ vect_enhance_data_refs_alignment (loop_vinfo); - /* Finally, check that loop can be vectorized. - FOR NOW: Until support for misaligned stores is in place, only if all - stores are aligned can the loop be vectorized. This restriction will be - relaxed. In the meantime, we can force the alignment of on of the - data-references in the loop using peeling. We currently use a heuristic - that peels the first misaligned store, but we plan to develop a - better cost model to guide the decision on which data-access to peel for. - */ + /* Finally, check that all the data references in the loop can be + handled with respect to their alignment. */ - for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) - { - struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); - if (!aligned_access_p (dr)) - { - /* Decide here whether we need peeling for alignment. */ - decide_peeling_count++; - if (decide_peeling_count > MAX_NUMBER_OF_UNALIGNED_DATA_REFS) - { - if (vect_debug_stats (loop) || vect_debug_details (loop)) - fprintf (dump_file, - "not vectorized: multiple misaligned stores."); - return false; - } - else - { - LOOP_UNALIGNED_DR (loop_vinfo, decide_peeling_count - 1) = dr; - LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true; - } - } - } - - /* The vectorizer now supports misaligned loads, so we don't fail anymore - in the presence of a misaligned read dataref. For some targets however - it may be preferable not to vectorize in such a case as misaligned - accesses are very costly. This should be considered in the future. */ -/* for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) { struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); - if (!aligned_access_p (dr)) + supportable_dr_alignment = vect_supportable_dr_alignment (dr); + if (!supportable_dr_alignment) { - if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo)) - || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo))) - fprintf (dump_file, "not vectorized: unaligned load."); + if (vect_debug_details (loop) || vect_debug_stats (loop)) + fprintf (dump_file, "not vectorized: unsupported unaligned load."); + return false; + } + } + for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) + { + struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); + supportable_dr_alignment = vect_supportable_dr_alignment (dr); + if (!supportable_dr_alignment) + { + if (vect_debug_details (loop) || vect_debug_stats (loop)) + fprintf (dump_file, "not vectorized: unsupported unaligned store."); return false; } } -*/ return true; } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 8ec9576544a..20563afc625 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -34,6 +34,14 @@ enum operation_type { binary_op }; +/* Define type of available alignment support. */ +enum dr_alignment_support { + dr_unaligned_unsupported, + dr_unaligned_supported, + dr_unaligned_software_pipeline, + dr_aligned +}; + /*-----------------------------------------------------------------*/ /* Info on vectorized defs. */ /*-----------------------------------------------------------------*/ @@ -121,7 +129,6 @@ vinfo_for_stmt (tree stmt) /* The misalignment of the memory access in bytes. */ #define DR_MISALIGNMENT(DR) (DR)->aux -#define MAX_NUMBER_OF_UNALIGNED_DATA_REFS 1 static inline bool aligned_access_p (struct data_reference *data_ref_info) @@ -163,7 +170,7 @@ typedef struct _loop_vec_info { int vectorization_factor; /* Unknown DRs according to which loop was peeled. */ - struct data_reference *unaligned_drs [MAX_NUMBER_OF_UNALIGNED_DATA_REFS]; + struct data_reference *unaligned_dr; /* If true, loop is peeled. unaligned_drs show in this case DRs used for peeling. */ @@ -187,7 +194,7 @@ typedef struct _loop_vec_info { #define LOOP_VINFO_DATAREF_READS(L) (L)->data_ref_reads #define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters)) #define LOOP_DO_PEELING_FOR_ALIGNMENT(L) (L)->do_peeling_for_alignment -#define LOOP_UNALIGNED_DR(L, I) (L)->unaligned_drs[(I)] +#define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr #define LOOP_VINFO_NITERS_KNOWN_P(L) \