diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 914801a36a2..713c23fcb83 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2012-04-28 Joern Rennecke + Steven Bosscher + Maxim Kuvyrkov + + PR tree-optimization/38785 + * common.opt (ftree-partial-pre): New option. + * doc/invoke.texi: Document it. + * opts.c (default_options_table): Initialize flag_tree_partial_pre. + * tree-ssa-pre.c (do_partial_partial_insertion): Insert only if it will + benefit speed path. + (execute_pre): Use flag_tree_partial_pre. + 2012-04-27 John David Anglin PR target/52999 diff --git a/gcc/common.opt b/gcc/common.opt index 37e806a1f2b..14c88bf656c 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2033,6 +2033,10 @@ ftree-pre Common Report Var(flag_tree_pre) Optimization Enable SSA-PRE optimization on trees +ftree-partial-pre +Common Report Var(flag_tree_partial_pre) Optimization +In SSA-PRE optimization on trees, enable partial-partial redundancy elimination + ftree-pta Common Report Var(flag_tree_pta) Init(1) Optimization Perform function-local points-to analysis on trees. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 280fac3b04b..bd7ea3b9480 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -410,7 +410,8 @@ Objective-C and Objective-C++ Dialects}. -ftree-loop-if-convert-stores -ftree-loop-im @gol -ftree-phiprop -ftree-loop-distribution -ftree-loop-distribute-patterns @gol -ftree-loop-ivcanon -ftree-loop-linear -ftree-loop-optimize @gol --ftree-parallelize-loops=@var{n} -ftree-pre -ftree-pta -ftree-reassoc @gol +-ftree-parallelize-loops=@var{n} -ftree-pre -ftree-partial-pre -ftree-pta @gol +-ftree-reassoc @gol -ftree-sink -ftree-sra -ftree-switch-conversion -ftree-tail-merge @gol -ftree-ter -ftree-vect-loop-version -ftree-vectorize -ftree-vrp @gol -funit-at-a-time -funroll-all-loops -funroll-loops @gol @@ -6294,8 +6295,8 @@ invoking @option{-O2} on programs that use computed gotos. Optimize yet more. @option{-O3} turns on all optimizations specified by @option{-O2} and also turns on the @option{-finline-functions}, @option{-funswitch-loops}, @option{-fpredictive-commoning}, -@option{-fgcse-after-reload}, @option{-ftree-vectorize} and -@option{-fipa-cp-clone} options. +@option{-fgcse-after-reload}, @option{-ftree-vectorize}, +@option{-ftree-partial-pre} and @option{-fipa-cp-clone} options. @item -O0 @opindex O0 @@ -7090,6 +7091,11 @@ at @option{-O} and higher. Perform partial redundancy elimination (PRE) on trees. This flag is enabled by default at @option{-O2} and @option{-O3}. +@item -ftree-partial-pre +@opindex ftree-partial-pre +Make partial redundancy elimination (PRE) more aggressive. This flag is +enabled by default at @option{-O3}. + @item -ftree-forwprop @opindex ftree-forwprop Perform forward propagation on trees. This flag is enabled by default diff --git a/gcc/opts.c b/gcc/opts.c index 4e8b3c033fe..ab2de8fbcd1 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -499,6 +499,7 @@ static const struct default_options default_options_table[] = { OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_ftree_vectorize, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_fipa_cp_clone, NULL, 1 }, + { OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 }, /* -Ofast adds optimizations to -O3. */ { OPT_LEVELS_FAST, OPT_ffast_math, NULL, 1 }, diff --git a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c index e3e55ef48a8..a89856aa959 100644 --- a/gcc/tree-ssa-pre.c +++ b/gcc/tree-ssa-pre.c @@ -3774,20 +3774,51 @@ do_partial_partial_insertion (basic_block block, basic_block dom) } else avail[bprime->index] = edoubleprime; - } /* If we can insert it, it's not the same value already existing along every predecessor, and it's defined by some predecessor, it is partially redundant. */ - if (!cant_insert && by_all && dbg_cnt (treepre_insert)) + if (!cant_insert && by_all) { - pre_stats.pa_insert++; - if (insert_into_preds_of_block (block, get_expression_id (expr), - avail)) - new_stuff = true; - } + edge succ; + bool do_insertion = false; + + /* Insert only if we can remove a later expression on a path + that we want to optimize for speed. + The phi node that we will be inserting in BLOCK is not free, + and inserting it for the sake of !optimize_for_speed successor + may cause regressions on the speed path. */ + FOR_EACH_EDGE (succ, ei, block->succs) + { + if (bitmap_set_contains_value (PA_IN (succ->dest), val)) + { + if (optimize_edge_for_speed_p (succ)) + do_insertion = true; + } + } + + if (!do_insertion) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Skipping partial partial redundancy " + "for expression "); + print_pre_expr (dump_file, expr); + fprintf (dump_file, " (%04d), not partially anticipated " + "on any to be optimized for speed edges\n", val); + } + } + else if (dbg_cnt (treepre_insert)) + { + pre_stats.pa_insert++; + if (insert_into_preds_of_block (block, + get_expression_id (expr), + avail)) + new_stuff = true; + } + } free (avail); } } @@ -4948,7 +4979,8 @@ execute_pre (bool do_fre) { unsigned int todo = 0; - do_partial_partial = optimize > 2 && optimize_function_for_speed_p (cfun); + do_partial_partial = + flag_tree_partial_pre && optimize_function_for_speed_p (cfun); /* This has to happen before SCCVN runs because loop_optimizer_init may create new phis, etc. */