Give fast DCE a separate dirty flag
Thomas pointed out that we sometimes failed to eliminate some dead code (specifically clobbers of otherwise unused registers) on nvptx when late-combine is enabled. This happens because: - combine is able to optimise the function in a way that exposes dead code. This leaves the df information in a "dirty" state. - late_combine calls df_analyze without DF_LR_RUN_DCE run set. This updates the df information and clears the "dirty" state. - late_combine doesn't find any extra optimisations, and so leaves the df information up-to-date. - if_after_combine (ce2) calls df_analyze with DF_LR_RUN_DCE set. Because the df information is already up-to-date, fast DCE is not run. The upshot is that running late-combine has the effect of suppressing a DCE opportunity that would have been noticed without late_combine. I think this shows that we should track the state of the DCE separately from the LR problem. Every pass updates the latter, but not all passes update the former. gcc/ * df.h (DF_LR_DCE): New df_problem_id. (df_lr_dce): New macro. * df-core.cc (rest_of_handle_df_finish): Check for a null free_fun. * df-problems.cc (df_lr_finalize): Split out fast DCE handling to... (df_lr_dce_finalize): ...this new function. (problem_LR_DCE): New df_problem. (df_lr_add_problem): Register LR_DCE rather than LR itself. * dce.cc (fast_dce): Clear df_lr_dce->solutions_dirty.
This commit is contained in:
parent
239ad907b1
commit
47ea6bddd1
4 changed files with 74 additions and 30 deletions
|
@ -1182,6 +1182,9 @@ fast_dce (bool word_level)
|
|||
BITMAP_FREE (processed);
|
||||
BITMAP_FREE (redo_out);
|
||||
BITMAP_FREE (all_blocks);
|
||||
|
||||
/* Both forms of DCE should make further DCE unnecessary. */
|
||||
df_lr_dce->solutions_dirty = false;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -806,7 +806,8 @@ rest_of_handle_df_finish (void)
|
|||
for (i = 0; i < df->num_problems_defined; i++)
|
||||
{
|
||||
struct dataflow *dflow = df->problems_in_order[i];
|
||||
dflow->problem->free_fun ();
|
||||
if (dflow->problem->free_fun)
|
||||
dflow->problem->free_fun ();
|
||||
}
|
||||
|
||||
free (df->postorder);
|
||||
|
|
|
@ -1054,37 +1054,10 @@ df_lr_transfer_function (int bb_index)
|
|||
}
|
||||
|
||||
|
||||
/* Run the fast dce as a side effect of building LR. */
|
||||
|
||||
static void
|
||||
df_lr_finalize (bitmap all_blocks)
|
||||
df_lr_finalize (bitmap)
|
||||
{
|
||||
df_lr->solutions_dirty = false;
|
||||
if (df->changeable_flags & DF_LR_RUN_DCE)
|
||||
{
|
||||
run_fast_df_dce ();
|
||||
|
||||
/* If dce deletes some instructions, we need to recompute the lr
|
||||
solution before proceeding further. The problem is that fast
|
||||
dce is a pessimestic dataflow algorithm. In the case where
|
||||
it deletes a statement S inside of a loop, the uses inside of
|
||||
S may not be deleted from the dataflow solution because they
|
||||
were carried around the loop. While it is conservatively
|
||||
correct to leave these extra bits, the standards of df
|
||||
require that we maintain the best possible (least fixed
|
||||
point) solution. The only way to do that is to redo the
|
||||
iteration from the beginning. See PR35805 for an
|
||||
example. */
|
||||
if (df_lr->solutions_dirty)
|
||||
{
|
||||
df_clear_flags (DF_LR_RUN_DCE);
|
||||
df_lr_alloc (all_blocks);
|
||||
df_lr_local_compute (all_blocks);
|
||||
df_worklist_dataflow (df_lr, all_blocks, df->postorder, df->n_blocks);
|
||||
df_lr_finalize (all_blocks);
|
||||
df_set_flags (DF_LR_RUN_DCE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -1266,6 +1239,69 @@ static const struct df_problem problem_LR =
|
|||
false /* Reset blocks on dropping out of blocks_to_analyze. */
|
||||
};
|
||||
|
||||
/* Run the fast DCE after building LR. This is a separate problem so that
|
||||
the "dirty" flag is only cleared after a DCE pass is actually run. */
|
||||
|
||||
static void
|
||||
df_lr_dce_finalize (bitmap all_blocks)
|
||||
{
|
||||
if (!(df->changeable_flags & DF_LR_RUN_DCE))
|
||||
return;
|
||||
|
||||
/* Also clears df_lr_dce->solutions_dirty. */
|
||||
run_fast_df_dce ();
|
||||
|
||||
/* If dce deletes some instructions, we need to recompute the lr
|
||||
solution before proceeding further. The problem is that fast
|
||||
dce is a pessimestic dataflow algorithm. In the case where
|
||||
it deletes a statement S inside of a loop, the uses inside of
|
||||
S may not be deleted from the dataflow solution because they
|
||||
were carried around the loop. While it is conservatively
|
||||
correct to leave these extra bits, the standards of df
|
||||
require that we maintain the best possible (least fixed
|
||||
point) solution. The only way to do that is to redo the
|
||||
iteration from the beginning. See PR35805 for an
|
||||
example. */
|
||||
if (df_lr->solutions_dirty)
|
||||
{
|
||||
df_clear_flags (DF_LR_RUN_DCE);
|
||||
df_lr_alloc (all_blocks);
|
||||
df_lr_local_compute (all_blocks);
|
||||
df_worklist_dataflow (df_lr, all_blocks, df->postorder, df->n_blocks);
|
||||
df_lr_finalize (all_blocks);
|
||||
df_set_flags (DF_LR_RUN_DCE);
|
||||
}
|
||||
}
|
||||
|
||||
static const struct df_problem problem_LR_DCE
|
||||
{
|
||||
DF_LR_DCE, /* Problem id. */
|
||||
DF_BACKWARD, /* Direction (arbitrary). */
|
||||
NULL, /* Allocate the problem specific data. */
|
||||
NULL, /* Reset global information. */
|
||||
NULL, /* Free basic block info. */
|
||||
NULL, /* Local compute function. */
|
||||
NULL, /* Init the solution specific data. */
|
||||
NULL, /* Worklist solver. */
|
||||
NULL, /* Confluence operator 0. */
|
||||
NULL, /* Confluence operator n. */
|
||||
NULL, /* Transfer function. */
|
||||
df_lr_dce_finalize, /* Finalize function. */
|
||||
NULL, /* Free all of the problem information. */
|
||||
NULL, /* Remove this problem from the stack of dataflow problems. */
|
||||
NULL, /* Debugging. */
|
||||
NULL, /* Debugging start block. */
|
||||
NULL, /* Debugging end block. */
|
||||
NULL, /* Debugging start insn. */
|
||||
NULL, /* Debugging end insn. */
|
||||
NULL, /* Incremental solution verify start. */
|
||||
NULL, /* Incremental solution verify end. */
|
||||
&problem_LR, /* Dependent problem. */
|
||||
0, /* Size of entry of block_info array. */
|
||||
TV_DF_LR, /* Timing variable. */
|
||||
false /* Reset blocks on dropping out of blocks_to_analyze. */
|
||||
};
|
||||
|
||||
|
||||
/* Create a new DATAFLOW instance and add it to an existing instance
|
||||
of DF. The returned structure is what is used to get at the
|
||||
|
@ -1274,7 +1310,9 @@ static const struct df_problem problem_LR =
|
|||
void
|
||||
df_lr_add_problem (void)
|
||||
{
|
||||
df_add_problem (&problem_LR);
|
||||
/* Also add the fast DCE problem. It is then conditionally enabled by
|
||||
the DF_LR_RUN_DCE flag. */
|
||||
df_add_problem (&problem_LR_DCE);
|
||||
/* These will be initialized when df_scan_blocks processes each
|
||||
block. */
|
||||
df_lr->out_of_date_transfer_functions = BITMAP_ALLOC (&df_bitmap_obstack);
|
||||
|
|
2
gcc/df.h
2
gcc/df.h
|
@ -47,6 +47,7 @@ enum df_problem_id
|
|||
{
|
||||
DF_SCAN,
|
||||
DF_LR, /* Live Registers backward. */
|
||||
DF_LR_DCE, /* Dead code elimination post-pass for LR. */
|
||||
DF_LIVE, /* Live Registers & Uninitialized Registers */
|
||||
DF_RD, /* Reaching Defs. */
|
||||
DF_CHAIN, /* Def-Use and/or Use-Def Chains. */
|
||||
|
@ -940,6 +941,7 @@ extern class df_d *df;
|
|||
#define df_scan (df->problems_by_index[DF_SCAN])
|
||||
#define df_rd (df->problems_by_index[DF_RD])
|
||||
#define df_lr (df->problems_by_index[DF_LR])
|
||||
#define df_lr_dce (df->problems_by_index[DF_LR_DCE])
|
||||
#define df_live (df->problems_by_index[DF_LIVE])
|
||||
#define df_chain (df->problems_by_index[DF_CHAIN])
|
||||
#define df_word_lr (df->problems_by_index[DF_WORD_LR])
|
||||
|
|
Loading…
Add table
Reference in a new issue