re PR tree-optimization/26854 (Inordinate compile times on large routines)
2008-01-19 Kenneth Zadeck <zadeck@naturalbridge.com> PR rtl-optimization/26854 PR rtl-optimization/34400 * ddg.c (create_ddg_dep_from_intra_loop_link): Do not use DF_RD->gen. * df.h (df_changeable_flags.DF_RD_NO_TRIM): New. (df_rd_bb_info.expanded_lr_out): New. * loop_invariant.c (find_defs): Added DF_RD_NO_TRIM flag. * loop_iv.c (iv_analysis_loop_init): Ditto. * df-problems.c (df_rd_free_bb_info, df_rd_alloc, df_rd_confluence_n, df_rd_bb_local_compute, df_rd_transfer_function, df_rd_free): Added code to allocate, initialize or free expanded_lr_out. (df_rd_bb_local_compute_process_def): Restructured to make more understandable. (df_rd_confluence_n): Add code to do nothing with fake edges and code to no apply invalidate_by_call sets if the sets are being trimmed. (df_lr_local_finalize): Renamed to df_lr_finalize. (df_live_local_finalize): Renamed to df_live_finalize. From-SVN: r131670
This commit is contained in:
parent
1828e8e9bf
commit
2b49e1a09d
6 changed files with 160 additions and 71 deletions
|
@ -1,3 +1,23 @@
|
|||
2008-01-19 Kenneth Zadeck <zadeck@naturalbridge.com>
|
||||
|
||||
PR rtl-optimization/26854
|
||||
PR rtl-optimization/34400
|
||||
* ddg.c (create_ddg_dep_from_intra_loop_link): Do not use
|
||||
DF_RD->gen.
|
||||
* df.h (df_changeable_flags.DF_RD_NO_TRIM): New.
|
||||
(df_rd_bb_info.expanded_lr_out): New.
|
||||
* loop_invariant.c (find_defs): Added DF_RD_NO_TRIM flag.
|
||||
* loop_iv.c (iv_analysis_loop_init): Ditto.
|
||||
* df-problems.c (df_rd_free_bb_info, df_rd_alloc, df_rd_confluence_n,
|
||||
df_rd_bb_local_compute, df_rd_transfer_function, df_rd_free):
|
||||
Added code to allocate, initialize or free expanded_lr_out.
|
||||
(df_rd_bb_local_compute_process_def): Restructured to make
|
||||
more understandable.
|
||||
(df_rd_confluence_n): Add code to do nothing with fake edges and
|
||||
code to no apply invalidate_by_call sets if the sets are being trimmed.
|
||||
(df_lr_local_finalize): Renamed to df_lr_finalize.
|
||||
(df_live_local_finalize): Renamed to df_live_finalize.
|
||||
|
||||
2008-01-20 Richard Sandiford <rsandifo@nildram.co.uk>
|
||||
|
||||
PR target/34831
|
||||
|
|
|
@ -184,12 +184,13 @@ create_ddg_dep_from_intra_loop_link (ddg_ptr g, ddg_node_ptr src_node,
|
|||
{
|
||||
int regno = REGNO (SET_DEST (set));
|
||||
struct df_ref *first_def;
|
||||
struct df_rd_bb_info *bb_info = DF_RD_BB_INFO (g->bb);
|
||||
struct df_ref *last_def;
|
||||
|
||||
first_def = df_bb_regno_first_def_find (g->bb, regno);
|
||||
gcc_assert (first_def);
|
||||
|
||||
if (bitmap_bit_p (bb_info->gen, first_def->id))
|
||||
last_def = df_bb_regno_last_def_find (g->bb, regno);
|
||||
if (first_def == last_def)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -245,6 +245,8 @@ df_rd_free_bb_info (basic_block bb ATTRIBUTE_UNUSED,
|
|||
struct df_rd_bb_info *bb_info = (struct df_rd_bb_info *) vbb_info;
|
||||
if (bb_info)
|
||||
{
|
||||
if (bb_info->expanded_lr_out)
|
||||
BITMAP_FREE (bb_info->expanded_lr_out);
|
||||
BITMAP_FREE (bb_info->kill);
|
||||
BITMAP_FREE (bb_info->sparse_kill);
|
||||
BITMAP_FREE (bb_info->gen);
|
||||
|
@ -298,6 +300,8 @@ df_rd_alloc (bitmap all_blocks)
|
|||
struct df_rd_bb_info *bb_info = df_rd_get_bb_info (bb_index);
|
||||
if (bb_info)
|
||||
{
|
||||
if (bb_info->expanded_lr_out)
|
||||
bitmap_clear (bb_info->expanded_lr_out);
|
||||
bitmap_clear (bb_info->kill);
|
||||
bitmap_clear (bb_info->sparse_kill);
|
||||
bitmap_clear (bb_info->gen);
|
||||
|
@ -306,6 +310,10 @@ df_rd_alloc (bitmap all_blocks)
|
|||
{
|
||||
bb_info = (struct df_rd_bb_info *) pool_alloc (df_rd->block_pool);
|
||||
df_rd_set_bb_info (bb_index, bb_info);
|
||||
if (df->changeable_flags & DF_RD_NO_TRIM)
|
||||
bb_info->expanded_lr_out = NULL;
|
||||
else
|
||||
bb_info->expanded_lr_out = BITMAP_ALLOC (&problem_data->rd_bitmaps);
|
||||
bb_info->kill = BITMAP_ALLOC (&problem_data->rd_bitmaps);
|
||||
bb_info->sparse_kill = BITMAP_ALLOC (&problem_data->rd_bitmaps);
|
||||
bb_info->gen = BITMAP_ALLOC (&problem_data->rd_bitmaps);
|
||||
|
@ -320,56 +328,53 @@ df_rd_alloc (bitmap all_blocks)
|
|||
/* Process a list of DEFs for df_rd_bb_local_compute. */
|
||||
|
||||
static void
|
||||
df_rd_bb_local_compute_process_def (struct df_rd_bb_info *bb_info,
|
||||
df_rd_bb_local_compute_process_def (struct df_rd_bb_info *bb_info,
|
||||
struct df_ref **def_rec,
|
||||
enum df_ref_flags top_flag)
|
||||
{
|
||||
while (*def_rec)
|
||||
for (; *def_rec; def_rec++)
|
||||
{
|
||||
struct df_ref *def = *def_rec;
|
||||
if (top_flag == (DF_REF_FLAGS (def) & DF_REF_AT_TOP))
|
||||
unsigned int regno = DF_REF_REGNO (def);
|
||||
|
||||
/* This makes sure we do the artificial defs in the right order
|
||||
since they are all in the same list. */
|
||||
if (top_flag != (DF_REF_FLAGS (def) & DF_REF_AT_TOP))
|
||||
continue;
|
||||
|
||||
/* Skip over the hard regs if we do not care about them. */
|
||||
if ((df->changeable_flags & DF_NO_HARD_REGS) &&
|
||||
(regno < FIRST_PSEUDO_REGISTER))
|
||||
continue;
|
||||
|
||||
/* Only the last def(s) for a regno in the block has any
|
||||
effect. */
|
||||
if (bitmap_bit_p (seen_in_block, regno))
|
||||
continue;
|
||||
|
||||
/* The first def for regno in insn gets to knock out the
|
||||
defs from other instructions. */
|
||||
if ((!bitmap_bit_p (seen_in_insn, regno))
|
||||
/* If the def is to only part of the reg, it does
|
||||
not kill the other defs that reach here. */
|
||||
&& (!(DF_REF_FLAGS (def) &
|
||||
(DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER))))
|
||||
{
|
||||
unsigned int regno = DF_REF_REGNO (def);
|
||||
unsigned int begin = DF_DEFS_BEGIN (regno);
|
||||
unsigned int n_defs = DF_DEFS_COUNT (regno);
|
||||
|
||||
if ((!(df->changeable_flags & DF_NO_HARD_REGS))
|
||||
|| (regno >= FIRST_PSEUDO_REGISTER))
|
||||
{
|
||||
/* Only the last def(s) for a regno in the block has any
|
||||
effect. */
|
||||
if (!bitmap_bit_p (seen_in_block, regno))
|
||||
{
|
||||
/* The first def for regno in insn gets to knock out the
|
||||
defs from other instructions. */
|
||||
if ((!bitmap_bit_p (seen_in_insn, regno))
|
||||
/* If the def is to only part of the reg, it does
|
||||
not kill the other defs that reach here. */
|
||||
&& (!(DF_REF_FLAGS (def) &
|
||||
(DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER))))
|
||||
{
|
||||
if (n_defs > DF_SPARSE_THRESHOLD)
|
||||
{
|
||||
bitmap_set_bit (bb_info->sparse_kill, regno);
|
||||
bitmap_clear_range(bb_info->gen, begin, n_defs);
|
||||
}
|
||||
else
|
||||
{
|
||||
bitmap_set_range (bb_info->kill, begin, n_defs);
|
||||
bitmap_clear_range (bb_info->gen, begin, n_defs);
|
||||
}
|
||||
}
|
||||
|
||||
bitmap_set_bit (seen_in_insn, regno);
|
||||
/* All defs for regno in the instruction may be put into
|
||||
the gen set. */
|
||||
if (!(DF_REF_FLAGS (def)
|
||||
& (DF_REF_MUST_CLOBBER | DF_REF_MAY_CLOBBER)))
|
||||
bitmap_set_bit (bb_info->gen, DF_REF_ID (def));
|
||||
}
|
||||
}
|
||||
if (n_defs > DF_SPARSE_THRESHOLD)
|
||||
bitmap_set_bit (bb_info->sparse_kill, regno);
|
||||
else
|
||||
bitmap_set_range (bb_info->kill, begin, n_defs);
|
||||
bitmap_clear_range(bb_info->gen, begin, n_defs);
|
||||
}
|
||||
def_rec++;
|
||||
|
||||
bitmap_set_bit (seen_in_insn, regno);
|
||||
/* All defs for regno in the instruction may be put into
|
||||
the gen set. */
|
||||
if (!(DF_REF_FLAGS (def)
|
||||
& (DF_REF_MUST_CLOBBER | DF_REF_MAY_CLOBBER)))
|
||||
bitmap_set_bit (bb_info->gen, DF_REF_ID (def));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -380,14 +385,28 @@ df_rd_bb_local_compute (unsigned int bb_index)
|
|||
{
|
||||
basic_block bb = BASIC_BLOCK (bb_index);
|
||||
struct df_rd_bb_info *bb_info = df_rd_get_bb_info (bb_index);
|
||||
struct df_lr_bb_info *lr_bb_info = df_lr_get_bb_info (bb_index);
|
||||
rtx insn;
|
||||
|
||||
bitmap_clear (seen_in_block);
|
||||
bitmap_clear (seen_in_insn);
|
||||
|
||||
if (!(df->changeable_flags & DF_RD_NO_TRIM))
|
||||
{
|
||||
unsigned int regno;
|
||||
bitmap_iterator bi;
|
||||
int first_reg = (df->changeable_flags & DF_NO_HARD_REGS) ? FIRST_PSEUDO_REGISTER : 0;
|
||||
EXECUTE_IF_SET_IN_BITMAP (lr_bb_info->out, first_reg, regno, bi)
|
||||
{
|
||||
unsigned int begin = DF_DEFS_BEGIN (regno);
|
||||
unsigned int n_defs = DF_DEFS_COUNT (regno);
|
||||
bitmap_set_range (bb_info->expanded_lr_out, begin, n_defs);
|
||||
}
|
||||
}
|
||||
|
||||
/* Artificials are only hard regs. */
|
||||
if (!(df->changeable_flags & DF_NO_HARD_REGS))
|
||||
df_rd_bb_local_compute_process_def (bb_info,
|
||||
df_rd_bb_local_compute_process_def (bb_info,
|
||||
df_get_artificial_defs (bb_index),
|
||||
0);
|
||||
|
||||
|
@ -482,7 +501,13 @@ df_rd_confluence_n (edge e)
|
|||
bitmap op1 = df_rd_get_bb_info (e->dest->index)->in;
|
||||
bitmap op2 = df_rd_get_bb_info (e->src->index)->out;
|
||||
|
||||
if (e->flags & EDGE_EH)
|
||||
if (e->flags & EDGE_FAKE)
|
||||
return;
|
||||
|
||||
/* If we are trimming the solution, the invalidated_by_call code in
|
||||
the lr problem makes this unnecessary. However, if we do not
|
||||
trim, we must take this into account. */
|
||||
if ((df->changeable_flags & DF_RD_NO_TRIM) && e->flags & EDGE_EH)
|
||||
{
|
||||
struct df_rd_problem_data *problem_data
|
||||
= (struct df_rd_problem_data *) df_rd->problem_data;
|
||||
|
@ -490,7 +515,7 @@ df_rd_confluence_n (edge e)
|
|||
bitmap dense_invalidated = problem_data->dense_invalidated_by_call;
|
||||
bitmap_iterator bi;
|
||||
unsigned int regno;
|
||||
bitmap tmp = BITMAP_ALLOC (&df_bitmap_obstack);
|
||||
bitmap tmp = BITMAP_ALLOC (&problem_data->rd_bitmaps);
|
||||
|
||||
bitmap_copy (tmp, op2);
|
||||
bitmap_and_compl_into (tmp, dense_invalidated);
|
||||
|
@ -522,13 +547,13 @@ df_rd_transfer_function (int bb_index)
|
|||
bitmap gen = bb_info->gen;
|
||||
bitmap kill = bb_info->kill;
|
||||
bitmap sparse_kill = bb_info->sparse_kill;
|
||||
bool changed = false;
|
||||
|
||||
if (bitmap_empty_p (sparse_kill))
|
||||
return bitmap_ior_and_compl (out, gen, in, kill);
|
||||
if ((df->changeable_flags & DF_RD_NO_TRIM) && bitmap_empty_p (sparse_kill))
|
||||
changed = bitmap_ior_and_compl (out, gen, in, kill);
|
||||
else
|
||||
{
|
||||
struct df_rd_problem_data *problem_data;
|
||||
bool changed = false;
|
||||
bitmap tmp;
|
||||
|
||||
/* Note that TMP is _not_ a temporary bitmap if we end up replacing
|
||||
|
@ -545,6 +570,8 @@ df_rd_transfer_function (int bb_index)
|
|||
}
|
||||
bitmap_and_compl_into (tmp, kill);
|
||||
bitmap_ior_into (tmp, gen);
|
||||
if (!(df->changeable_flags & DF_RD_NO_TRIM))
|
||||
bitmap_and_into (tmp, bb_info->expanded_lr_out);
|
||||
changed = !bitmap_equal_p (tmp, out);
|
||||
if (changed)
|
||||
{
|
||||
|
@ -552,9 +579,10 @@ df_rd_transfer_function (int bb_index)
|
|||
bb_info->out = tmp;
|
||||
}
|
||||
else
|
||||
BITMAP_FREE (tmp);
|
||||
return changed;
|
||||
BITMAP_FREE (tmp);
|
||||
}
|
||||
|
||||
return changed;
|
||||
}
|
||||
|
||||
|
||||
|
@ -574,6 +602,8 @@ df_rd_free (void)
|
|||
struct df_rd_bb_info *bb_info = df_rd_get_bb_info (i);
|
||||
if (bb_info)
|
||||
{
|
||||
if (bb_info->expanded_lr_out)
|
||||
BITMAP_FREE (bb_info->expanded_lr_out);
|
||||
BITMAP_FREE (bb_info->kill);
|
||||
BITMAP_FREE (bb_info->sparse_kill);
|
||||
BITMAP_FREE (bb_info->gen);
|
||||
|
@ -1015,7 +1045,7 @@ df_lr_transfer_function (int bb_index)
|
|||
/* Run the fast dce as a side effect of building LR. */
|
||||
|
||||
static void
|
||||
df_lr_local_finalize (bitmap all_blocks ATTRIBUTE_UNUSED)
|
||||
df_lr_finalize (bitmap all_blocks ATTRIBUTE_UNUSED)
|
||||
{
|
||||
if (df->changeable_flags & DF_LR_RUN_DCE)
|
||||
{
|
||||
|
@ -1161,7 +1191,7 @@ df_lr_verify_solution_end (void)
|
|||
|
||||
if (df_lr->solutions_dirty)
|
||||
/* Do not check if the solution is still dirty. See the comment
|
||||
in df_lr_local_finalize for details. */
|
||||
in df_lr_finalize for details. */
|
||||
df_lr->solutions_dirty = false;
|
||||
else
|
||||
FOR_ALL_BB (bb)
|
||||
|
@ -1204,7 +1234,7 @@ static struct df_problem problem_LR =
|
|||
df_lr_confluence_0, /* Confluence operator 0. */
|
||||
df_lr_confluence_n, /* Confluence operator n. */
|
||||
df_lr_transfer_function, /* Transfer function. */
|
||||
df_lr_local_finalize, /* Finalize function. */
|
||||
df_lr_finalize, /* Finalize function. */
|
||||
df_lr_free, /* Free all of the problem information. */
|
||||
NULL, /* Remove this problem from the stack of dataflow problems. */
|
||||
NULL, /* Debugging. */
|
||||
|
@ -1560,7 +1590,7 @@ df_live_transfer_function (int bb_index)
|
|||
/* And the LR info with the must-initialized registers, to produce the LIVE info. */
|
||||
|
||||
static void
|
||||
df_live_local_finalize (bitmap all_blocks)
|
||||
df_live_finalize (bitmap all_blocks)
|
||||
{
|
||||
|
||||
if (df_live->solutions_dirty)
|
||||
|
@ -1751,7 +1781,7 @@ static struct df_problem problem_LIVE =
|
|||
NULL, /* Confluence operator 0. */
|
||||
df_live_confluence_n, /* Confluence operator n. */
|
||||
df_live_transfer_function, /* Transfer function. */
|
||||
df_live_local_finalize, /* Finalize function. */
|
||||
df_live_finalize, /* Finalize function. */
|
||||
df_live_free, /* Free all of the problem information. */
|
||||
df_live_free, /* Remove this problem from the stack of dataflow problems. */
|
||||
NULL, /* Debugging. */
|
||||
|
|
59
gcc/df.h
59
gcc/df.h
|
@ -402,22 +402,30 @@ enum df_changeable_flags
|
|||
{
|
||||
/* Scanning flags. */
|
||||
/* Flag to control the running of dce as a side effect of building LR. */
|
||||
DF_LR_RUN_DCE = 1, /* Run DCE. */
|
||||
DF_NO_HARD_REGS = 2, /* Skip hard registers in RD and CHAIN Building. */
|
||||
DF_EQ_NOTES = 4, /* Build chains with uses present in EQUIV/EQUAL notes. */
|
||||
DF_NO_REGS_EVER_LIVE = 8, /* Do not compute the regs_ever_live. */
|
||||
DF_LR_RUN_DCE = 1 << 0, /* Run DCE. */
|
||||
DF_NO_HARD_REGS = 1 << 1, /* Skip hard registers in RD and CHAIN Building. */
|
||||
|
||||
/* Do not trim the solution using the LR result. This can make the
|
||||
solution take much longer and take more memory. This is
|
||||
necessary for the loop optimizations, but has a very small time
|
||||
and space penalty because the loop optimizations process only a
|
||||
single loop at a time. Any pass that looks at the entire
|
||||
function should not set this flag. */
|
||||
DF_RD_NO_TRIM = 1 << 2,
|
||||
DF_EQ_NOTES = 1 << 3, /* Build chains with uses present in EQUIV/EQUAL notes. */
|
||||
DF_NO_REGS_EVER_LIVE = 1 << 4, /* Do not compute the regs_ever_live. */
|
||||
|
||||
/* Cause df_insn_rescan df_notes_rescan and df_insn_delete, to
|
||||
return immediately. This is used by passes that know how to update
|
||||
the scanning them selves. */
|
||||
DF_NO_INSN_RESCAN = 16,
|
||||
DF_NO_INSN_RESCAN = 1 << 5,
|
||||
|
||||
/* Cause df_insn_rescan df_notes_rescan and df_insn_delete, to
|
||||
return after marking the insn for later processing. This allows all
|
||||
rescans to be batched. */
|
||||
DF_DEFER_INSN_RESCAN = 32,
|
||||
DF_DEFER_INSN_RESCAN = 1 << 6,
|
||||
|
||||
DF_VERIFY_SCHEDULED = 64
|
||||
DF_VERIFY_SCHEDULED = 1 << 7
|
||||
};
|
||||
|
||||
/* Two of these structures are inline in df, one for the uses and one
|
||||
|
@ -705,16 +713,43 @@ struct df_scan_bb_info
|
|||
|
||||
|
||||
/* Reaching definitions. All bitmaps are indexed by the id field of
|
||||
the ref except sparse_kill (see above). */
|
||||
the ref except sparse_kill which is indexed by regno. */
|
||||
struct df_rd_bb_info
|
||||
{
|
||||
/* Local sets to describe the basic blocks. See the note in the RU
|
||||
datastructures for kill and sparse_kill. */
|
||||
/* Local sets to describe the basic blocks. */
|
||||
bitmap kill;
|
||||
bitmap sparse_kill;
|
||||
bitmap gen; /* The set of defs generated in this block. */
|
||||
|
||||
/* The results of the dataflow problem. */
|
||||
/* Expanded version of the DF_LT->out bitmap to match the positions
|
||||
of gen, in and out here. Only allocated if DF_RD_NO_TRIM is
|
||||
false. */
|
||||
bitmap expanded_lr_out;
|
||||
|
||||
/* The set of defs generated in this block. This is not set unless
|
||||
the def reaches the end of the block. */
|
||||
bitmap gen;
|
||||
|
||||
/* The results of the dataflow problem.
|
||||
|
||||
If DF_RD_NO_TRIM is not set, these sets are SOMEWHAT trimmed by
|
||||
the output of the DF_LR problem. The out set is precisely
|
||||
trimmed during propagation which means that the result is also
|
||||
trimmed when the propagation terminates. The in set is not
|
||||
explicitly trimmed, because this is expensive (adding about 5% to
|
||||
the cost of a bootstrap). However since the out sets are trimmed
|
||||
and the in sets are built from the out of the pred, the in set is
|
||||
MOSTLY trimmed.
|
||||
|
||||
The counter case happens at a branch where the variable V is in
|
||||
DF_LR->in the true branch but not the false branch. If V is
|
||||
defined before the branch, RD will propagate that into the
|
||||
DF_RD_in sets of both branches. When the block is processed, the
|
||||
DF_RD->out set will have V trimmed out of it but it will still be
|
||||
left in DF_RD->in.
|
||||
|
||||
If this not a problem for the current optimizers since they were
|
||||
designed before any trimming was available. This can be fixed by
|
||||
checking the DF_LR->in set directly. */
|
||||
bitmap in; /* At the top of the block. */
|
||||
bitmap out; /* At the bottom of the block. */
|
||||
};
|
||||
|
|
|
@ -639,6 +639,7 @@ find_defs (struct loop *loop, basic_block *body)
|
|||
df_remove_problem (df_chain);
|
||||
df_process_deferred_rescans ();
|
||||
df_chain_add_problem (DF_UD_CHAIN);
|
||||
df_set_flags (DF_RD_NO_TRIM);
|
||||
df_set_blocks (blocks);
|
||||
df_analyze ();
|
||||
|
||||
|
|
|
@ -22,9 +22,10 @@ along with GCC; see the file COPYING3. If not see
|
|||
doloop optimization and branch prediction. The iv information is computed
|
||||
on demand.
|
||||
|
||||
Induction variable is analyzed by walking the use-def chains. When a biv
|
||||
is found, it is cached in the bivs hash table. When register is proved
|
||||
to be a giv, its description is stored to DF_REF_DATA of the def reference.
|
||||
Induction variables are analyzed by walking the use-def chains. When
|
||||
a basic induction variable (biv) is found, it is cached in the bivs
|
||||
hash table. When register is proved to be a biv, its description
|
||||
is stored to DF_REF_DATA of the def reference.
|
||||
|
||||
The analysis works always with one loop -- you must call
|
||||
iv_analysis_loop_init (loop) for it. All the other functions then work with
|
||||
|
@ -277,6 +278,7 @@ iv_analysis_loop_init (struct loop *loop)
|
|||
df_remove_problem (df_chain);
|
||||
df_process_deferred_rescans ();
|
||||
df_chain_add_problem (DF_UD_CHAIN);
|
||||
df_set_flags (DF_RD_NO_TRIM);
|
||||
df_set_blocks (blocks);
|
||||
df_analyze ();
|
||||
if (dump_file)
|
||||
|
|
Loading…
Add table
Reference in a new issue