re PR tree-optimization/38785 (huge performance regression on EEMBC bitmnp01)
PR tree-optimization/38785 * common.opt (ftree-partial-pre): New option. * doc/invoke.texi: Document it. * opts.c (default_options_table): Initialize flag_tree_partial_pre. * tree-ssa-pre.c (do_partial_partial_insertion): Insert only if it will benefit speed path. (execute_pre): Use flag_tree_partial_pre. Co-Authored-By: Maxim Kuvyrkov <maxim@codesourcery.com> Co-Authored-By: Steven Bosscher <steven@gcc.gnu.org> From-SVN: r186928
This commit is contained in:
parent
52556f0477
commit
fa06ad0d58
5 changed files with 66 additions and 11 deletions
|
@ -1,3 +1,15 @@
|
|||
2012-04-28 Joern Rennecke <joern.rennecke@embecosm.com>
|
||||
Steven Bosscher <steven@gcc.gnu.org>
|
||||
Maxim Kuvyrkov <maxim@codesourcery.com>
|
||||
|
||||
PR tree-optimization/38785
|
||||
* common.opt (ftree-partial-pre): New option.
|
||||
* doc/invoke.texi: Document it.
|
||||
* opts.c (default_options_table): Initialize flag_tree_partial_pre.
|
||||
* tree-ssa-pre.c (do_partial_partial_insertion): Insert only if it will
|
||||
benefit speed path.
|
||||
(execute_pre): Use flag_tree_partial_pre.
|
||||
|
||||
2012-04-27 John David Anglin <dave.anglin@nrc-cnrc.gc.ca>
|
||||
|
||||
PR target/52999
|
||||
|
|
|
@ -2033,6 +2033,10 @@ ftree-pre
|
|||
Common Report Var(flag_tree_pre) Optimization
|
||||
Enable SSA-PRE optimization on trees
|
||||
|
||||
ftree-partial-pre
|
||||
Common Report Var(flag_tree_partial_pre) Optimization
|
||||
In SSA-PRE optimization on trees, enable partial-partial redundancy elimination
|
||||
|
||||
ftree-pta
|
||||
Common Report Var(flag_tree_pta) Init(1) Optimization
|
||||
Perform function-local points-to analysis on trees.
|
||||
|
|
|
@ -410,7 +410,8 @@ Objective-C and Objective-C++ Dialects}.
|
|||
-ftree-loop-if-convert-stores -ftree-loop-im @gol
|
||||
-ftree-phiprop -ftree-loop-distribution -ftree-loop-distribute-patterns @gol
|
||||
-ftree-loop-ivcanon -ftree-loop-linear -ftree-loop-optimize @gol
|
||||
-ftree-parallelize-loops=@var{n} -ftree-pre -ftree-pta -ftree-reassoc @gol
|
||||
-ftree-parallelize-loops=@var{n} -ftree-pre -ftree-partial-pre -ftree-pta @gol
|
||||
-ftree-reassoc @gol
|
||||
-ftree-sink -ftree-sra -ftree-switch-conversion -ftree-tail-merge @gol
|
||||
-ftree-ter -ftree-vect-loop-version -ftree-vectorize -ftree-vrp @gol
|
||||
-funit-at-a-time -funroll-all-loops -funroll-loops @gol
|
||||
|
@ -6294,8 +6295,8 @@ invoking @option{-O2} on programs that use computed gotos.
|
|||
Optimize yet more. @option{-O3} turns on all optimizations specified
|
||||
by @option{-O2} and also turns on the @option{-finline-functions},
|
||||
@option{-funswitch-loops}, @option{-fpredictive-commoning},
|
||||
@option{-fgcse-after-reload}, @option{-ftree-vectorize} and
|
||||
@option{-fipa-cp-clone} options.
|
||||
@option{-fgcse-after-reload}, @option{-ftree-vectorize},
|
||||
@option{-ftree-partial-pre} and @option{-fipa-cp-clone} options.
|
||||
|
||||
@item -O0
|
||||
@opindex O0
|
||||
|
@ -7090,6 +7091,11 @@ at @option{-O} and higher.
|
|||
Perform partial redundancy elimination (PRE) on trees. This flag is
|
||||
enabled by default at @option{-O2} and @option{-O3}.
|
||||
|
||||
@item -ftree-partial-pre
|
||||
@opindex ftree-partial-pre
|
||||
Make partial redundancy elimination (PRE) more aggressive. This flag is
|
||||
enabled by default at @option{-O3}.
|
||||
|
||||
@item -ftree-forwprop
|
||||
@opindex ftree-forwprop
|
||||
Perform forward propagation on trees. This flag is enabled by default
|
||||
|
|
|
@ -499,6 +499,7 @@ static const struct default_options default_options_table[] =
|
|||
{ OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 },
|
||||
{ OPT_LEVELS_3_PLUS, OPT_ftree_vectorize, NULL, 1 },
|
||||
{ OPT_LEVELS_3_PLUS, OPT_fipa_cp_clone, NULL, 1 },
|
||||
{ OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 },
|
||||
|
||||
/* -Ofast adds optimizations to -O3. */
|
||||
{ OPT_LEVELS_FAST, OPT_ffast_math, NULL, 1 },
|
||||
|
|
|
@ -3774,20 +3774,51 @@ do_partial_partial_insertion (basic_block block, basic_block dom)
|
|||
}
|
||||
else
|
||||
avail[bprime->index] = edoubleprime;
|
||||
|
||||
}
|
||||
|
||||
/* If we can insert it, it's not the same value
|
||||
already existing along every predecessor, and
|
||||
it's defined by some predecessor, it is
|
||||
partially redundant. */
|
||||
if (!cant_insert && by_all && dbg_cnt (treepre_insert))
|
||||
if (!cant_insert && by_all)
|
||||
{
|
||||
pre_stats.pa_insert++;
|
||||
if (insert_into_preds_of_block (block, get_expression_id (expr),
|
||||
avail))
|
||||
new_stuff = true;
|
||||
}
|
||||
edge succ;
|
||||
bool do_insertion = false;
|
||||
|
||||
/* Insert only if we can remove a later expression on a path
|
||||
that we want to optimize for speed.
|
||||
The phi node that we will be inserting in BLOCK is not free,
|
||||
and inserting it for the sake of !optimize_for_speed successor
|
||||
may cause regressions on the speed path. */
|
||||
FOR_EACH_EDGE (succ, ei, block->succs)
|
||||
{
|
||||
if (bitmap_set_contains_value (PA_IN (succ->dest), val))
|
||||
{
|
||||
if (optimize_edge_for_speed_p (succ))
|
||||
do_insertion = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!do_insertion)
|
||||
{
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, "Skipping partial partial redundancy "
|
||||
"for expression ");
|
||||
print_pre_expr (dump_file, expr);
|
||||
fprintf (dump_file, " (%04d), not partially anticipated "
|
||||
"on any to be optimized for speed edges\n", val);
|
||||
}
|
||||
}
|
||||
else if (dbg_cnt (treepre_insert))
|
||||
{
|
||||
pre_stats.pa_insert++;
|
||||
if (insert_into_preds_of_block (block,
|
||||
get_expression_id (expr),
|
||||
avail))
|
||||
new_stuff = true;
|
||||
}
|
||||
}
|
||||
free (avail);
|
||||
}
|
||||
}
|
||||
|
@ -4948,7 +4979,8 @@ execute_pre (bool do_fre)
|
|||
{
|
||||
unsigned int todo = 0;
|
||||
|
||||
do_partial_partial = optimize > 2 && optimize_function_for_speed_p (cfun);
|
||||
do_partial_partial =
|
||||
flag_tree_partial_pre && optimize_function_for_speed_p (cfun);
|
||||
|
||||
/* This has to happen before SCCVN runs because
|
||||
loop_optimizer_init may create new phis, etc. */
|
||||
|
|
Loading…
Add table
Reference in a new issue