passes.texi: Document predictive commoning.
* doc/passes.texi: Document predictive commoning. * doc/invoke.texi (-fpredictive-commoning): Document. * opts.c (decode_options): Enable flag_predictive_commoning on -O3. * tree-ssa-loop-im.c (get_lsm_tmp_name): Export. Allow adding indices to the generated name. (schedule_sm): Pass 0 to get_lsm_tmp_name. * tree-ssa-loop-niter.c (stmt_dominates_stmt_p): Export. * tree-pretty-print.c (op_symbol_1): Renamed to ... (op_symbol_code): ... and exported. (dump_omp_clause, op_symbol): Use op_symbol_code instead of op_symbol_1. * tree-pass.h (pass_predcom): Declare. * timevar.def (TV_PREDCOM): New timevar. * tree-ssa-loop.c (run_tree_predictive_commoning, gate_tree_predictive_commoning, pass_predcom): New. * tree-data-ref.c (find_data_references_in_loop): Find the references in dominance order. (canonicalize_base_object_address): Ensure that the result has pointer type. (dr_analyze_innermost): Export. (create_data_ref): Code to fail for references with invariant address moved ... (find_data_references_in_stmt): ... here. * tree-data-ref.h (dr_analyze_innermost): Declare. * tree-affine.c: Include tree-gimple.h and hashtab.h. (aff_combination_find_elt, name_expansion_hash, name_expansion_eq, tree_to_aff_combination_expand, double_int_constant_multiple_p, aff_combination_constant_multiple_p): New functions. * tree-affine.h (aff_combination_constant_multiple_p, tree_to_aff_combination_expand): Declare. * tree-predcom.c: New file. * common.opt (fpredictive-commoning): New option. * tree-flow.h (op_symbol_code, tree_predictive_commoning, stmt_dominates_stmt_p, get_lsm_tmp_name): Declare. * Makefile.in (tree-predcom.o): Add. (tree-affine.o): Add TREE_GIMPLE_H dependency. * passes.c (init_optimization_passes): Add dceloop after copy propagation in loop optimizer. Add predictive commoning to loop optimizer passes. * gcc.dg/tree-ssa/predcom-1.c: New test. * gcc.dg/tree-ssa/predcom-2.c: New test. * gcc.dg/tree-ssa/predcom-3.c: New test. * gcc.dg/tree-ssa/predcom-4.c: New test. * gcc.dg/tree-ssa/predcom-5.c: New test. * gcc.dg/vect/dump-tree-dceloop-pr26359.c: Test dceloop2 dumps. From-SVN: r125030
This commit is contained in:
parent
956741d54f
commit
bbc8a8dc0d
26 changed files with 3125 additions and 32 deletions
|
@ -1,3 +1,46 @@
|
|||
2007-05-24 Zdenek Dvorak <dvorakz@suse.cz>
|
||||
|
||||
* doc/passes.texi: Document predictive commoning.
|
||||
* doc/invoke.texi (-fpredictive-commoning): Document.
|
||||
* opts.c (decode_options): Enable flag_predictive_commoning on -O3.
|
||||
* tree-ssa-loop-im.c (get_lsm_tmp_name): Export. Allow
|
||||
adding indices to the generated name.
|
||||
(schedule_sm): Pass 0 to get_lsm_tmp_name.
|
||||
* tree-ssa-loop-niter.c (stmt_dominates_stmt_p): Export.
|
||||
* tree-pretty-print.c (op_symbol_1): Renamed to ...
|
||||
(op_symbol_code): ... and exported.
|
||||
(dump_omp_clause, op_symbol): Use op_symbol_code
|
||||
instead of op_symbol_1.
|
||||
* tree-pass.h (pass_predcom): Declare.
|
||||
* timevar.def (TV_PREDCOM): New timevar.
|
||||
* tree-ssa-loop.c (run_tree_predictive_commoning,
|
||||
gate_tree_predictive_commoning, pass_predcom): New.
|
||||
* tree-data-ref.c (find_data_references_in_loop): Find the
|
||||
references in dominance order.
|
||||
(canonicalize_base_object_address): Ensure that the result has
|
||||
pointer type.
|
||||
(dr_analyze_innermost): Export.
|
||||
(create_data_ref): Code to fail for references with invariant
|
||||
address moved ...
|
||||
(find_data_references_in_stmt): ... here.
|
||||
* tree-data-ref.h (dr_analyze_innermost): Declare.
|
||||
* tree-affine.c: Include tree-gimple.h and hashtab.h.
|
||||
(aff_combination_find_elt, name_expansion_hash,
|
||||
name_expansion_eq, tree_to_aff_combination_expand,
|
||||
double_int_constant_multiple_p, aff_combination_constant_multiple_p):
|
||||
New functions.
|
||||
* tree-affine.h (aff_combination_constant_multiple_p,
|
||||
tree_to_aff_combination_expand): Declare.
|
||||
* tree-predcom.c: New file.
|
||||
* common.opt (fpredictive-commoning): New option.
|
||||
* tree-flow.h (op_symbol_code, tree_predictive_commoning,
|
||||
stmt_dominates_stmt_p, get_lsm_tmp_name): Declare.
|
||||
* Makefile.in (tree-predcom.o): Add.
|
||||
(tree-affine.o): Add TREE_GIMPLE_H dependency.
|
||||
* passes.c (init_optimization_passes): Add dceloop after
|
||||
copy propagation in loop optimizer. Add predictive commoning
|
||||
to loop optimizer passes.
|
||||
|
||||
2007-05-24 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* target-def.h (TARGET_MANGLE_DECL_ASSEMBLER_NAME): Correct
|
||||
|
|
|
@ -1100,6 +1100,7 @@ OBJS-common = \
|
|||
tree-optimize.o \
|
||||
tree-outof-ssa.o \
|
||||
tree-phinodes.o \
|
||||
tree-predcom.o \
|
||||
tree-pretty-print.o \
|
||||
tree-profile.o \
|
||||
tree-scalar-evolution.o \
|
||||
|
@ -2083,14 +2084,18 @@ tree-ssa-loop-prefetch.o: tree-ssa-loop-prefetch.c $(TREE_FLOW_H) $(CONFIG_H) \
|
|||
tree-pass.h $(GGC_H) $(RECOG_H) insn-config.h $(HASHTAB_H) $(SCEV_H) \
|
||||
$(CFGLOOP_H) $(PARAMS_H) langhooks.h $(BASIC_BLOCK_H) hard-reg-set.h \
|
||||
tree-chrec.h toplev.h langhooks.h $(TREE_INLINE_H)
|
||||
tree-predcom.o: tree-predcom.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(TM_P_H) \
|
||||
$(CFGLOOP_H) $(TREE_FLOW_H) $(GGC_H) $(TREE_DATA_REF_H) $(SCEV_H) \
|
||||
$(PARAMS_H) $(DIAGNOSTIC_H) tree-pass.h $(TM_H) coretypes.h tree-affine.h \
|
||||
tree-inline.h
|
||||
tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \
|
||||
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \
|
||||
output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
|
||||
tree-pass.h $(GGC_H) $(RECOG_H) insn-config.h $(HASHTAB_H) $(SCEV_H) \
|
||||
$(CFGLOOP_H) $(PARAMS_H) langhooks.h $(BASIC_BLOCK_H) hard-reg-set.h \
|
||||
tree-chrec.h $(VARRAY_H) tree-affine.h pointer-set.h $(TARGET_H)
|
||||
tree-affine.o : tree-affine.c tree-affine.h $(CONFIG_H) \
|
||||
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) \
|
||||
tree-affine.o : tree-affine.c tree-affine.h $(CONFIG_H) pointer-set.h \
|
||||
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(TREE_GIMPLE_H) \
|
||||
output.h $(DIAGNOSTIC_H) $(TM_H) coretypes.h $(TREE_DUMP_H)
|
||||
tree-ssa-loop-manip.o : tree-ssa-loop-manip.c $(TREE_FLOW_H) $(CONFIG_H) \
|
||||
$(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) \
|
||||
|
|
|
@ -719,6 +719,10 @@ fpie
|
|||
Common Report Var(flag_pie,1) VarExists
|
||||
Generate position-independent code for executables if possible (small mode)
|
||||
|
||||
fpredictive-commoning
|
||||
Common Report Var(flag_predictive_commoning)
|
||||
Run predictive commoning optimization.
|
||||
|
||||
fprefetch-loop-arrays
|
||||
Common Report Var(flag_prefetch_loop_arrays) Optimization
|
||||
Generate prefetch instructions, if available, for arrays in loops
|
||||
|
|
|
@ -334,7 +334,7 @@ Objective-C and Objective-C++ Dialects}.
|
|||
-ffinite-math-only -fno-signed-zeros @gol
|
||||
-fno-toplevel-reorder -fno-trapping-math -fno-zero-initialized-in-bss @gol
|
||||
-fomit-frame-pointer -foptimize-register-move @gol
|
||||
-foptimize-sibling-calls -fprefetch-loop-arrays @gol
|
||||
-foptimize-sibling-calls -fpredictive-commoning -fprefetch-loop-arrays @gol
|
||||
-fprofile-generate -fprofile-use @gol
|
||||
-fregmove -frename-registers @gol
|
||||
-freorder-blocks -freorder-blocks-and-partition -freorder-functions @gol
|
||||
|
@ -5001,7 +5001,8 @@ invoking @option{-O2} on programs that use computed gotos.
|
|||
@opindex O3
|
||||
Optimize yet more. @option{-O3} turns on all optimizations specified by
|
||||
@option{-O2} and also turns on the @option{-finline-functions},
|
||||
@option{-funswitch-loops} and @option{-fgcse-after-reload} options.
|
||||
@option{-funswitch-loops}, @option{-fpredictive-commoning} and
|
||||
@option{-fgcse-after-reload} options.
|
||||
|
||||
@item -O0
|
||||
@opindex O0
|
||||
|
@ -5712,6 +5713,14 @@ This optimization is enabled by default.
|
|||
With this option, the compiler will create multiple copies of some
|
||||
local variables when unrolling a loop which can result in superior code.
|
||||
|
||||
@item -fpredictive-commoning
|
||||
@opindex fpredictive-commoning
|
||||
Perform predictive commoning optimization, i.e., reusing computations
|
||||
(especially memory loads and stores) performed in previous
|
||||
iterations of loops.
|
||||
|
||||
This option is enabled at level @option{-O3}.
|
||||
|
||||
@item -fprefetch-loop-arrays
|
||||
@opindex fprefetch-loop-arrays
|
||||
If supported by the target machine, generate instructions to prefetch
|
||||
|
|
|
@ -602,6 +602,17 @@ This pass completely unrolls loops with few iterations. The pass
|
|||
is located in @file{tree-ssa-loop-ivcanon.c} and described by
|
||||
@code{pass_complete_unroll}.
|
||||
|
||||
@item Predictive commoning
|
||||
|
||||
This pass makes the code reuse the computations from the previous
|
||||
iterations of the loops, especially loads and stores to memory.
|
||||
It does so by storing the values of these computations to a bank
|
||||
of temporary variables that are rotated at the end of loop. To avoid
|
||||
the need for this rotation, the loop is then unrolled and the copies
|
||||
of the loop body are rewritten to use the appropriate version of
|
||||
the temporary variable. This pass is located in @file{tree-predcom.c}
|
||||
and described by @code{pass_predcom}.
|
||||
|
||||
@item Array prefetching
|
||||
|
||||
This pass issues prefetch instructions for array references inside
|
||||
|
|
|
@ -767,6 +767,7 @@ decode_options (unsigned int argc, const char **argv)
|
|||
|
||||
if (optimize >= 3)
|
||||
{
|
||||
flag_predictive_commoning = 1;
|
||||
flag_inline_functions = 1;
|
||||
flag_unswitch_loops = 1;
|
||||
flag_gcse_after_reload = 1;
|
||||
|
|
|
@ -590,7 +590,9 @@ init_optimization_passes (void)
|
|||
struct tree_opt_pass **p = &pass_tree_loop.sub;
|
||||
NEXT_PASS (pass_tree_loop_init);
|
||||
NEXT_PASS (pass_copy_prop);
|
||||
NEXT_PASS (pass_dce_loop);
|
||||
NEXT_PASS (pass_lim);
|
||||
NEXT_PASS (pass_predcom);
|
||||
NEXT_PASS (pass_tree_unswitch);
|
||||
NEXT_PASS (pass_scev_cprop);
|
||||
NEXT_PASS (pass_empty_loop);
|
||||
|
|
|
@ -1,3 +1,12 @@
|
|||
2007-05-24 Zdenek Dvorak <dvorakz@suse.cz>
|
||||
|
||||
* gcc.dg/tree-ssa/predcom-1.c: New test.
|
||||
* gcc.dg/tree-ssa/predcom-2.c: New test.
|
||||
* gcc.dg/tree-ssa/predcom-3.c: New test.
|
||||
* gcc.dg/tree-ssa/predcom-4.c: New test.
|
||||
* gcc.dg/tree-ssa/predcom-5.c: New test.
|
||||
* gcc.dg/vect/dump-tree-dceloop-pr26359.c: Test dceloop2 dumps.
|
||||
|
||||
2007-05-24 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
PR testsuite/32062
|
||||
|
|
49
gcc/testsuite/gcc.dg/tree-ssa/predcom-1.c
Normal file
49
gcc/testsuite/gcc.dg/tree-ssa/predcom-1.c
Normal file
|
@ -0,0 +1,49 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -fpredictive-commoning -fdump-tree-pcom-details" } */
|
||||
|
||||
void abort (void);
|
||||
|
||||
unsigned fib[1000];
|
||||
|
||||
void count_fib(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
fib[0] = 0;
|
||||
fib[1] = 1;
|
||||
for (i = 2; i < 1000; i++)
|
||||
fib[i] = (fib[i-1] + fib[i - 2]) & 0xffff;
|
||||
}
|
||||
|
||||
unsigned avg[1000];
|
||||
|
||||
void count_averages(int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 1; i < n; i++)
|
||||
avg[i] = ((fib[i - 1] + fib[i] + fib[i + 1]) / 3) & 0xffff;
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
count_fib ();
|
||||
count_averages (999);
|
||||
|
||||
if (fib[19] != 4181 || avg[19] != 4510)
|
||||
abort ();
|
||||
|
||||
if (fib[999] != 162 || avg[998] != 21953)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Verify that both loops were transformed and unrolled. */
|
||||
/* { dg-final { scan-tree-dump-times "Unrolling 2 times." 2 "pcom"} } */
|
||||
|
||||
/* Also check that we undid the transformation previously made by PRE. */
|
||||
/* { dg-final { scan-tree-dump-times "looparound ref" 1 "pcom"} } */
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "pcom" } } */
|
45
gcc/testsuite/gcc.dg/tree-ssa/predcom-2.c
Normal file
45
gcc/testsuite/gcc.dg/tree-ssa/predcom-2.c
Normal file
|
@ -0,0 +1,45 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -fpredictive-commoning -fdump-tree-pcom-details" } */
|
||||
|
||||
void abort (void);
|
||||
|
||||
int fib[1000];
|
||||
|
||||
void count_fib(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
fib[0] = 0;
|
||||
fib[1] = 1;
|
||||
for (i = 2; i < 1000; i++)
|
||||
fib[i] = (fib[i-1] + fib[i - 2]) & 0xffff;
|
||||
}
|
||||
|
||||
int avg[1000];
|
||||
|
||||
void count_averages(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 1; i < 999; i++)
|
||||
avg[i] = ((fib[i - 1] + fib[i] + fib[i + 1]) / 3) & 0xffff;
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
count_fib ();
|
||||
count_averages ();
|
||||
|
||||
if (fib[19] != 4181 || avg[19] != 4510)
|
||||
abort ();
|
||||
|
||||
if (fib[999] != 162 || avg[998] != 21953)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Verify that both loops were transformed and unrolled. */
|
||||
/* { dg-final { scan-tree-dump-times "Unrolling 2 times." 2 "pcom"} } */
|
||||
/* { dg-final { cleanup-tree-dump "pcom" } } */
|
16
gcc/testsuite/gcc.dg/tree-ssa/predcom-3.c
Normal file
16
gcc/testsuite/gcc.dg/tree-ssa/predcom-3.c
Normal file
|
@ -0,0 +1,16 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fpredictive-commoning -fdump-tree-pcom-details" } */
|
||||
|
||||
int a[1000], b[1000];
|
||||
|
||||
void test(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 1; i < 999; i++)
|
||||
b[i] = (a[i + 1] + a[i] + a[i - 1]) / 3;
|
||||
}
|
||||
|
||||
/* Verify that we used 3 temporary variables for the loop. */
|
||||
/* { dg-final { scan-tree-dump-times "Unrolling 3 times." 1 "pcom"} } */
|
||||
/* { dg-final { cleanup-tree-dump "pcom" } } */
|
30
gcc/testsuite/gcc.dg/tree-ssa/predcom-4.c
Normal file
30
gcc/testsuite/gcc.dg/tree-ssa/predcom-4.c
Normal file
|
@ -0,0 +1,30 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -fpredictive-commoning -fdump-tree-pcom-details" } */
|
||||
|
||||
/* Test for predictive commoning of expressions, without reassociation. */
|
||||
|
||||
void abort (void);
|
||||
|
||||
int a[1000], b[1000], c[1000];
|
||||
|
||||
int main(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 1000; i++)
|
||||
a[i] = b[i] = i;
|
||||
|
||||
for (i = 1; i < 998; i++)
|
||||
c[i] = a[i + 2] * b[i + 1] - b[i - 1] * a[i];
|
||||
|
||||
for (i = 1; i < 998; i++)
|
||||
if (c[i] != 4 * i + 2)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "Combination" 1 "pcom"} } */
|
||||
/* { dg-final { scan-tree-dump-times "Unrolling 3 times." 1 "pcom"} } */
|
||||
/* { dg-final { cleanup-tree-dump "pcom" } } */
|
30
gcc/testsuite/gcc.dg/tree-ssa/predcom-5.c
Normal file
30
gcc/testsuite/gcc.dg/tree-ssa/predcom-5.c
Normal file
|
@ -0,0 +1,30 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -fpredictive-commoning -fdump-tree-pcom-details" } */
|
||||
|
||||
/* Test for predictive commoning of expressions, with reassociation. */
|
||||
|
||||
void abort (void);
|
||||
|
||||
unsigned a[1000], b[1000], c[1000], d[1000];
|
||||
|
||||
int main(void)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < 1000; i++)
|
||||
a[i] = b[i] = d[i] = i;
|
||||
|
||||
for (i = 1; i < 998; i++)
|
||||
c[i] = d[i + 1] * a[i + 2] * b[i + 1] - b[i - 1] * a[i] * d[i - 1];
|
||||
|
||||
for (i = 1; i < 998; i++)
|
||||
if (c[i] != (i+1)*(i+2)*(i+1) - (i - 1) * i * (i - 1))
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "Combination" 2 "pcom"} } */
|
||||
/* { dg-final { scan-tree-dump-times "Unrolling 3 times." 1 "pcom"} } */
|
||||
/* { dg-final { cleanup-tree-dump "pcom" } } */
|
|
@ -11,6 +11,6 @@ foo () {
|
|||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "Deleting : vect_" 0 "dceloop" } } */
|
||||
/* { dg-final { cleanup-tree-dump "dceloop" } } */
|
||||
/* { dg-final { scan-tree-dump-times "Deleting : vect_" 0 "dceloop2" } } */
|
||||
/* { dg-final { cleanup-tree-dump "dceloop2" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
|
@ -112,6 +112,7 @@ DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear")
|
|||
DEFTIMEVAR (TV_CHECK_DATA_DEPS , "tree check data dependences")
|
||||
DEFTIMEVAR (TV_TREE_PREFETCH , "tree prefetching")
|
||||
DEFTIMEVAR (TV_TREE_LOOP_IVOPTS , "tree iv optimization")
|
||||
DEFTIMEVAR (TV_PREDCOM , "predictive commoning")
|
||||
DEFTIMEVAR (TV_TREE_LOOP_INIT , "tree loop init")
|
||||
DEFTIMEVAR (TV_TREE_LOOP_FINI , "tree loop fini")
|
||||
DEFTIMEVAR (TV_TREE_CH , "tree copy headers")
|
||||
|
|
|
@ -29,7 +29,9 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
|||
#include "output.h"
|
||||
#include "diagnostic.h"
|
||||
#include "tree-dump.h"
|
||||
#include "pointer-set.h"
|
||||
#include "tree-affine.h"
|
||||
#include "tree-gimple.h"
|
||||
|
||||
/* Extends CST as appropriate for the affine combinations COMB. */
|
||||
|
||||
|
@ -493,3 +495,212 @@ aff_combination_mult (aff_tree *c1, aff_tree *c2, aff_tree *r)
|
|||
aff_combination_add_product (c1, double_int_one, c2->rest, r);
|
||||
aff_combination_add_product (c1, c2->offset, NULL, r);
|
||||
}
|
||||
|
||||
/* Returns the element of COMB whose value is VAL, or NULL if no such
|
||||
element exists. If IDX is not NULL, it is set to the index of VAL in
|
||||
COMB. */
|
||||
|
||||
static struct aff_comb_elt *
|
||||
aff_combination_find_elt (aff_tree *comb, tree val, unsigned *idx)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < comb->n; i++)
|
||||
if (operand_equal_p (comb->elts[i].val, val, 0))
|
||||
{
|
||||
if (idx)
|
||||
*idx = i;
|
||||
|
||||
return &comb->elts[i];
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Element of the cache that maps ssa name NAME to its expanded form
|
||||
as an affine expression EXPANSION. */
|
||||
|
||||
struct name_expansion
|
||||
{
|
||||
aff_tree expansion;
|
||||
|
||||
/* True if the expansion for the name is just being generated. */
|
||||
unsigned in_progress : 1;
|
||||
};
|
||||
|
||||
/* Similar to tree_to_aff_combination, but follows SSA name definitions
|
||||
and expands them recursively. CACHE is used to cache the expansions
|
||||
of the ssa names, to avoid exponential time complexity for cases
|
||||
like
|
||||
|
||||
a1 = a0 + a0;
|
||||
a2 = a1 + a1;
|
||||
a3 = a2 + a2;
|
||||
... */
|
||||
|
||||
void
|
||||
tree_to_aff_combination_expand (tree expr, tree type, aff_tree *comb,
|
||||
struct pointer_map_t **cache)
|
||||
{
|
||||
unsigned i;
|
||||
aff_tree to_add, current, curre;
|
||||
tree e, def, rhs;
|
||||
double_int scale;
|
||||
void **slot;
|
||||
struct name_expansion *exp;
|
||||
|
||||
tree_to_aff_combination (expr, type, comb);
|
||||
aff_combination_zero (&to_add, type);
|
||||
for (i = 0; i < comb->n; i++)
|
||||
{
|
||||
e = comb->elts[i].val;
|
||||
if (TREE_CODE (e) != SSA_NAME)
|
||||
continue;
|
||||
def = SSA_NAME_DEF_STMT (e);
|
||||
if (TREE_CODE (def) != GIMPLE_MODIFY_STMT
|
||||
|| GIMPLE_STMT_OPERAND (def, 0) != e)
|
||||
continue;
|
||||
|
||||
rhs = GIMPLE_STMT_OPERAND (def, 1);
|
||||
if (TREE_CODE (rhs) != SSA_NAME
|
||||
&& !EXPR_P (rhs)
|
||||
&& !is_gimple_min_invariant (rhs))
|
||||
continue;
|
||||
|
||||
/* We do not know whether the reference retains its value at the
|
||||
place where the expansion is used. */
|
||||
if (REFERENCE_CLASS_P (rhs))
|
||||
continue;
|
||||
|
||||
if (!*cache)
|
||||
*cache = pointer_map_create ();
|
||||
slot = pointer_map_insert (*cache, e);
|
||||
exp = *slot;
|
||||
|
||||
if (!exp)
|
||||
{
|
||||
exp = XNEW (struct name_expansion);
|
||||
exp->in_progress = 1;
|
||||
*slot = exp;
|
||||
tree_to_aff_combination_expand (rhs, type, ¤t, cache);
|
||||
exp->expansion = current;
|
||||
exp->in_progress = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Since we follow the definitions in the SSA form, we should not
|
||||
enter a cycle unless we pass through a phi node. */
|
||||
gcc_assert (!exp->in_progress);
|
||||
current = exp->expansion;
|
||||
}
|
||||
|
||||
/* Accumulate the new terms to TO_ADD, so that we do not modify
|
||||
COMB while traversing it; include the term -coef * E, to remove
|
||||
it from COMB. */
|
||||
scale = comb->elts[i].coef;
|
||||
aff_combination_zero (&curre, type);
|
||||
aff_combination_add_elt (&curre, e, double_int_neg (scale));
|
||||
aff_combination_scale (¤t, scale);
|
||||
aff_combination_add (&to_add, ¤t);
|
||||
aff_combination_add (&to_add, &curre);
|
||||
}
|
||||
aff_combination_add (comb, &to_add);
|
||||
}
|
||||
|
||||
/* Frees memory occupied by struct name_expansion in *VALUE. Callback for
|
||||
pointer_map_traverse. */
|
||||
|
||||
static bool
|
||||
free_name_expansion (void *key ATTRIBUTE_UNUSED, void **value,
|
||||
void *data ATTRIBUTE_UNUSED)
|
||||
{
|
||||
struct name_expansion *exp = *value;
|
||||
|
||||
free (exp);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Frees memory allocated for the CACHE used by
|
||||
tree_to_aff_combination_expand. */
|
||||
|
||||
void
|
||||
free_affine_expand_cache (struct pointer_map_t **cache)
|
||||
{
|
||||
if (!*cache)
|
||||
return;
|
||||
|
||||
pointer_map_traverse (*cache, free_name_expansion, NULL);
|
||||
pointer_map_destroy (*cache);
|
||||
*cache = NULL;
|
||||
}
|
||||
|
||||
/* If VAL != CST * DIV for any constant CST, returns false.
|
||||
Otherwise, if VAL != 0 (and hence CST != 0), and *MULT_SET is true,
|
||||
additionally compares CST and MULT, and if they are different,
|
||||
returns false. Finally, if neither of these two cases occcur,
|
||||
true is returned, and if CST != 0, CST is stored to MULT and
|
||||
MULT_SET is set to true. */
|
||||
|
||||
static bool
|
||||
double_int_constant_multiple_p (double_int val, double_int div,
|
||||
bool *mult_set, double_int *mult)
|
||||
{
|
||||
double_int rem, cst;
|
||||
|
||||
if (double_int_zero_p (val))
|
||||
return true;
|
||||
|
||||
if (double_int_zero_p (div))
|
||||
return false;
|
||||
|
||||
cst = double_int_sdivmod (val, div, FLOOR_DIV_EXPR, &rem);
|
||||
if (!double_int_zero_p (rem))
|
||||
return false;
|
||||
|
||||
if (*mult_set && !double_int_equal_p (*mult, cst))
|
||||
return false;
|
||||
|
||||
*mult_set = true;
|
||||
*mult = cst;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Returns true if VAL = X * DIV for some constant X. If this is the case,
|
||||
X is stored to MULT. */
|
||||
|
||||
bool
|
||||
aff_combination_constant_multiple_p (aff_tree *val, aff_tree *div,
|
||||
double_int *mult)
|
||||
{
|
||||
bool mult_set = false;
|
||||
unsigned i;
|
||||
|
||||
if (val->n == 0 && double_int_zero_p (val->offset))
|
||||
{
|
||||
*mult = double_int_zero;
|
||||
return true;
|
||||
}
|
||||
if (val->n != div->n)
|
||||
return false;
|
||||
|
||||
if (val->rest || div->rest)
|
||||
return false;
|
||||
|
||||
if (!double_int_constant_multiple_p (val->offset, div->offset,
|
||||
&mult_set, mult))
|
||||
return false;
|
||||
|
||||
for (i = 0; i < div->n; i++)
|
||||
{
|
||||
struct aff_comb_elt *elt
|
||||
= aff_combination_find_elt (val, div->elts[i].val, NULL);
|
||||
if (!elt)
|
||||
return false;
|
||||
if (!double_int_constant_multiple_p (elt->coef, div->elts[i].coef,
|
||||
&mult_set, mult))
|
||||
return false;
|
||||
}
|
||||
|
||||
gcc_assert (mult_set);
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -70,3 +70,7 @@ void aff_combination_convert (aff_tree *, tree);
|
|||
void tree_to_aff_combination (tree, tree, aff_tree *);
|
||||
tree aff_combination_to_tree (aff_tree *);
|
||||
void unshare_aff_combination (aff_tree *);
|
||||
bool aff_combination_constant_multiple_p (aff_tree *, aff_tree *, double_int *);
|
||||
void tree_to_aff_combination_expand (tree, tree, aff_tree *,
|
||||
struct pointer_map_t **);
|
||||
void free_affine_expand_cache (struct pointer_map_t **);
|
||||
|
|
|
@ -573,8 +573,15 @@ split_constant_offset (tree exp, tree *var, tree *off)
|
|||
static tree
|
||||
canonicalize_base_object_address (tree addr)
|
||||
{
|
||||
tree orig = addr;
|
||||
|
||||
STRIP_NOPS (addr);
|
||||
|
||||
/* The base address may be obtained by casting from integer, in that case
|
||||
keep the cast. */
|
||||
if (!POINTER_TYPE_P (TREE_TYPE (addr)))
|
||||
return orig;
|
||||
|
||||
if (TREE_CODE (addr) != ADDR_EXPR)
|
||||
return addr;
|
||||
|
||||
|
@ -584,7 +591,7 @@ canonicalize_base_object_address (tree addr)
|
|||
/* Analyzes the behavior of the memory reference DR in the innermost loop that
|
||||
contains it. */
|
||||
|
||||
static void
|
||||
void
|
||||
dr_analyze_innermost (struct data_reference *dr)
|
||||
{
|
||||
tree stmt = DR_STMT (dr);
|
||||
|
@ -804,16 +811,6 @@ create_data_ref (struct loop *nest, tree memref, tree stmt, bool is_read)
|
|||
fprintf (dump_file, "\n");
|
||||
}
|
||||
|
||||
/* FIXME -- data dependence analysis does not work correctly for objects with
|
||||
invariant addresses. Let us fail here until the problem is fixed. */
|
||||
if (dr_address_invariant_p (dr))
|
||||
{
|
||||
free_data_ref (dr);
|
||||
dr = NULL;
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, "\tFAILED as dr address is invariant\n");
|
||||
}
|
||||
|
||||
return dr;
|
||||
}
|
||||
|
||||
|
@ -3965,13 +3962,20 @@ find_data_references_in_stmt (struct loop *nest, tree stmt,
|
|||
for (i = 0; VEC_iterate (data_ref_loc, references, i, ref); i++)
|
||||
{
|
||||
dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
|
||||
if (dr)
|
||||
VEC_safe_push (data_reference_p, heap, *datarefs, dr);
|
||||
else
|
||||
gcc_assert (dr != NULL);
|
||||
|
||||
/* FIXME -- data dependence analysis does not work correctly for objects with
|
||||
invariant addresses. Let us fail here until the problem is fixed. */
|
||||
if (dr_address_invariant_p (dr))
|
||||
{
|
||||
free_data_ref (dr);
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, "\tFAILED as dr address is invariant\n");
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
|
||||
VEC_safe_push (data_reference_p, heap, *datarefs, dr);
|
||||
}
|
||||
VEC_free (data_ref_loc, heap, references);
|
||||
return ret;
|
||||
|
@ -3992,7 +3996,7 @@ find_data_references_in_loop (struct loop *loop,
|
|||
unsigned int i;
|
||||
block_stmt_iterator bsi;
|
||||
|
||||
bbs = get_loop_body (loop);
|
||||
bbs = get_loop_body_in_dom_order (loop);
|
||||
|
||||
for (i = 0; i < loop->num_nodes; i++)
|
||||
{
|
||||
|
|
|
@ -299,6 +299,7 @@ DEF_VEC_O (data_ref_loc);
|
|||
DEF_VEC_ALLOC_O (data_ref_loc, heap);
|
||||
|
||||
bool get_references_in_stmt (tree, VEC (data_ref_loc, heap) **);
|
||||
void dr_analyze_innermost (struct data_reference *);
|
||||
extern void compute_data_dependences_for_loop (struct loop *, bool,
|
||||
VEC (data_reference_p, heap) **,
|
||||
VEC (ddr_p, heap) **);
|
||||
|
|
|
@ -786,6 +786,7 @@ extern bool cleanup_tree_cfg_loop (void);
|
|||
|
||||
/* In tree-pretty-print.c. */
|
||||
extern void dump_generic_bb (FILE *, basic_block, int, int);
|
||||
extern const char *op_symbol_code (enum tree_code);
|
||||
|
||||
/* In tree-dfa.c */
|
||||
extern var_ann_t create_var_ann (tree);
|
||||
|
@ -972,6 +973,7 @@ unsigned int tree_unroll_loops_completely (bool);
|
|||
unsigned int tree_ssa_prefetch_arrays (void);
|
||||
unsigned int remove_empty_loops (void);
|
||||
void tree_ssa_iv_optimize (void);
|
||||
void tree_predictive_commoning (void);
|
||||
|
||||
bool number_of_iterations_exit (struct loop *, edge,
|
||||
struct tree_niter_desc *niter, bool);
|
||||
|
@ -1017,6 +1019,7 @@ void tree_transform_and_unroll_loop (struct loop *, unsigned,
|
|||
edge, struct tree_niter_desc *,
|
||||
transform_callback, void *);
|
||||
bool contains_abnormal_ssa_name_p (tree);
|
||||
bool stmt_dominates_stmt_p (tree, tree);
|
||||
|
||||
/* In tree-ssa-threadedge.c */
|
||||
extern bool potentially_threadable_block (basic_block);
|
||||
|
@ -1034,6 +1037,7 @@ enum move_pos
|
|||
MOVE_POSSIBLE /* Unlimited movement. */
|
||||
};
|
||||
extern enum move_pos movement_possibility (tree);
|
||||
char *get_lsm_tmp_name (tree, unsigned);
|
||||
|
||||
/* The reasons a variable may escape a function. */
|
||||
enum escape_type
|
||||
|
|
|
@ -250,6 +250,7 @@ extern struct tree_opt_pass pass_tree_loop;
|
|||
extern struct tree_opt_pass pass_tree_loop_init;
|
||||
extern struct tree_opt_pass pass_lim;
|
||||
extern struct tree_opt_pass pass_tree_unswitch;
|
||||
extern struct tree_opt_pass pass_predcom;
|
||||
extern struct tree_opt_pass pass_iv_canon;
|
||||
extern struct tree_opt_pass pass_scev_cprop;
|
||||
extern struct tree_opt_pass pass_empty_loop;
|
||||
|
|
2567
gcc/tree-predcom.c
Normal file
2567
gcc/tree-predcom.c
Normal file
File diff suppressed because it is too large
Load diff
|
@ -37,7 +37,6 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
|||
|
||||
/* Local functions, macros and variables. */
|
||||
static int op_prio (tree);
|
||||
static const char *op_symbol_1 (enum tree_code);
|
||||
static const char *op_symbol (tree);
|
||||
static void pretty_print_string (pretty_printer *, const char*);
|
||||
static void print_call_name (pretty_printer *, tree);
|
||||
|
@ -296,7 +295,7 @@ dump_omp_clause (pretty_printer *buffer, tree clause, int spc, int flags)
|
|||
|
||||
case OMP_CLAUSE_REDUCTION:
|
||||
pp_string (buffer, "reduction(");
|
||||
pp_string (buffer, op_symbol_1 (OMP_CLAUSE_REDUCTION_CODE (clause)));
|
||||
pp_string (buffer, op_symbol_code (OMP_CLAUSE_REDUCTION_CODE (clause)));
|
||||
pp_character (buffer, ':');
|
||||
dump_generic_node (buffer, OMP_CLAUSE_DECL (clause),
|
||||
spc, flags, false);
|
||||
|
@ -2390,10 +2389,10 @@ op_prio (tree op)
|
|||
}
|
||||
|
||||
|
||||
/* Return the symbol associated with operator OP. */
|
||||
/* Return the symbol associated with operator CODE. */
|
||||
|
||||
static const char *
|
||||
op_symbol_1 (enum tree_code code)
|
||||
const char *
|
||||
op_symbol_code (enum tree_code code)
|
||||
{
|
||||
switch (code)
|
||||
{
|
||||
|
@ -2557,10 +2556,12 @@ op_symbol_1 (enum tree_code code)
|
|||
}
|
||||
}
|
||||
|
||||
/* Return the symbol associated with operator OP. */
|
||||
|
||||
static const char *
|
||||
op_symbol (tree op)
|
||||
{
|
||||
return op_symbol_1 (TREE_CODE (op));
|
||||
return op_symbol_code (TREE_CODE (op));
|
||||
}
|
||||
|
||||
/* Prints the name of a CALL_EXPR. */
|
||||
|
|
|
@ -1116,14 +1116,23 @@ gen_lsm_tmp_name (tree ref)
|
|||
}
|
||||
|
||||
/* Determines name for temporary variable that replaces REF.
|
||||
The name is accumulated into the lsm_tmp_name variable. */
|
||||
The name is accumulated into the lsm_tmp_name variable.
|
||||
N is added to the name of the temporary. */
|
||||
|
||||
static char *
|
||||
get_lsm_tmp_name (tree ref)
|
||||
char *
|
||||
get_lsm_tmp_name (tree ref, unsigned n)
|
||||
{
|
||||
char ns[2];
|
||||
|
||||
lsm_tmp_name_length = 0;
|
||||
gen_lsm_tmp_name (ref);
|
||||
lsm_tmp_name_add ("_lsm");
|
||||
if (n < 10)
|
||||
{
|
||||
ns[0] = '0' + n;
|
||||
ns[1] = 0;
|
||||
lsm_tmp_name_add (ns);
|
||||
}
|
||||
return lsm_tmp_name;
|
||||
}
|
||||
|
||||
|
@ -1153,7 +1162,7 @@ schedule_sm (struct loop *loop, VEC (edge, heap) *exits, tree ref,
|
|||
}
|
||||
|
||||
tmp_var = make_rename_temp (TREE_TYPE (ref),
|
||||
get_lsm_tmp_name (ref));
|
||||
get_lsm_tmp_name (ref, ~0));
|
||||
|
||||
fmt_data.loop = loop;
|
||||
fmt_data.orig_loop = loop;
|
||||
|
|
|
@ -2816,7 +2816,7 @@ estimate_numbers_of_iterations (void)
|
|||
|
||||
/* Returns true if statement S1 dominates statement S2. */
|
||||
|
||||
static bool
|
||||
bool
|
||||
stmt_dominates_stmt_p (tree s1, tree s2)
|
||||
{
|
||||
basic_block bb1 = bb_for_stmt (s1), bb2 = bb_for_stmt (s2);
|
||||
|
|
|
@ -176,6 +176,42 @@ struct tree_opt_pass pass_tree_unswitch =
|
|||
0 /* letter */
|
||||
};
|
||||
|
||||
/* Predictive commoning. */
|
||||
|
||||
static unsigned
|
||||
run_tree_predictive_commoning (void)
|
||||
{
|
||||
if (!current_loops)
|
||||
return 0;
|
||||
|
||||
tree_predictive_commoning ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool
|
||||
gate_tree_predictive_commoning (void)
|
||||
{
|
||||
return flag_predictive_commoning != 0;
|
||||
}
|
||||
|
||||
struct tree_opt_pass pass_predcom =
|
||||
{
|
||||
"pcom", /* name */
|
||||
gate_tree_predictive_commoning, /* gate */
|
||||
run_tree_predictive_commoning, /* execute */
|
||||
NULL, /* sub */
|
||||
NULL, /* next */
|
||||
0, /* static_pass_number */
|
||||
TV_PREDCOM, /* tv_id */
|
||||
PROP_cfg, /* properties_required */
|
||||
0, /* properties_provided */
|
||||
0, /* properties_destroyed */
|
||||
0, /* todo_flags_start */
|
||||
TODO_dump_func | TODO_verify_loops
|
||||
| TODO_update_ssa_only_virtuals, /* todo_flags_finish */
|
||||
0 /* letter */
|
||||
};
|
||||
|
||||
/* Loop autovectorization. */
|
||||
|
||||
static unsigned int
|
||||
|
|
Loading…
Add table
Reference in a new issue