Give fast DCE a separate dirty flag

Thomas pointed out that we sometimes failed to eliminate some dead code
(specifically clobbers of otherwise unused registers) on nvptx when
late-combine is enabled.  This happens because:

- combine is able to optimise the function in a way that exposes dead code.
  This leaves the df information in a "dirty" state.

- late_combine calls df_analyze without DF_LR_RUN_DCE run set.
  This updates the df information and clears the "dirty" state.

- late_combine doesn't find any extra optimisations, and so leaves
  the df information up-to-date.

- if_after_combine (ce2) calls df_analyze with DF_LR_RUN_DCE set.
  Because the df information is already up-to-date, fast DCE is
  not run.

The upshot is that running late-combine has the effect of suppressing
a DCE opportunity that would have been noticed without late_combine.

I think this shows that we should track the state of the DCE separately
from the LR problem.  Every pass updates the latter, but not all passes
update the former.

gcc/
	* df.h (DF_LR_DCE): New df_problem_id.
	(df_lr_dce): New macro.
	* df-core.cc (rest_of_handle_df_finish): Check for a null free_fun.
	* df-problems.cc (df_lr_finalize): Split out fast DCE handling to...
	(df_lr_dce_finalize): ...this new function.
	(problem_LR_DCE): New df_problem.
	(df_lr_add_problem): Register LR_DCE rather than LR itself.
	* dce.cc (fast_dce): Clear df_lr_dce->solutions_dirty.
This commit is contained in:
Richard Sandiford 2024-07-03 09:17:42 +01:00
parent 239ad907b1
commit 47ea6bddd1
4 changed files with 74 additions and 30 deletions

View file

@ -1182,6 +1182,9 @@ fast_dce (bool word_level)
BITMAP_FREE (processed);
BITMAP_FREE (redo_out);
BITMAP_FREE (all_blocks);
/* Both forms of DCE should make further DCE unnecessary. */
df_lr_dce->solutions_dirty = false;
}

View file

@ -806,7 +806,8 @@ rest_of_handle_df_finish (void)
for (i = 0; i < df->num_problems_defined; i++)
{
struct dataflow *dflow = df->problems_in_order[i];
dflow->problem->free_fun ();
if (dflow->problem->free_fun)
dflow->problem->free_fun ();
}
free (df->postorder);

View file

@ -1054,37 +1054,10 @@ df_lr_transfer_function (int bb_index)
}
/* Run the fast dce as a side effect of building LR. */
static void
df_lr_finalize (bitmap all_blocks)
df_lr_finalize (bitmap)
{
df_lr->solutions_dirty = false;
if (df->changeable_flags & DF_LR_RUN_DCE)
{
run_fast_df_dce ();
/* If dce deletes some instructions, we need to recompute the lr
solution before proceeding further. The problem is that fast
dce is a pessimestic dataflow algorithm. In the case where
it deletes a statement S inside of a loop, the uses inside of
S may not be deleted from the dataflow solution because they
were carried around the loop. While it is conservatively
correct to leave these extra bits, the standards of df
require that we maintain the best possible (least fixed
point) solution. The only way to do that is to redo the
iteration from the beginning. See PR35805 for an
example. */
if (df_lr->solutions_dirty)
{
df_clear_flags (DF_LR_RUN_DCE);
df_lr_alloc (all_blocks);
df_lr_local_compute (all_blocks);
df_worklist_dataflow (df_lr, all_blocks, df->postorder, df->n_blocks);
df_lr_finalize (all_blocks);
df_set_flags (DF_LR_RUN_DCE);
}
}
}
@ -1266,6 +1239,69 @@ static const struct df_problem problem_LR =
false /* Reset blocks on dropping out of blocks_to_analyze. */
};
/* Run the fast DCE after building LR. This is a separate problem so that
the "dirty" flag is only cleared after a DCE pass is actually run. */
static void
df_lr_dce_finalize (bitmap all_blocks)
{
if (!(df->changeable_flags & DF_LR_RUN_DCE))
return;
/* Also clears df_lr_dce->solutions_dirty. */
run_fast_df_dce ();
/* If dce deletes some instructions, we need to recompute the lr
solution before proceeding further. The problem is that fast
dce is a pessimestic dataflow algorithm. In the case where
it deletes a statement S inside of a loop, the uses inside of
S may not be deleted from the dataflow solution because they
were carried around the loop. While it is conservatively
correct to leave these extra bits, the standards of df
require that we maintain the best possible (least fixed
point) solution. The only way to do that is to redo the
iteration from the beginning. See PR35805 for an
example. */
if (df_lr->solutions_dirty)
{
df_clear_flags (DF_LR_RUN_DCE);
df_lr_alloc (all_blocks);
df_lr_local_compute (all_blocks);
df_worklist_dataflow (df_lr, all_blocks, df->postorder, df->n_blocks);
df_lr_finalize (all_blocks);
df_set_flags (DF_LR_RUN_DCE);
}
}
static const struct df_problem problem_LR_DCE
{
DF_LR_DCE, /* Problem id. */
DF_BACKWARD, /* Direction (arbitrary). */
NULL, /* Allocate the problem specific data. */
NULL, /* Reset global information. */
NULL, /* Free basic block info. */
NULL, /* Local compute function. */
NULL, /* Init the solution specific data. */
NULL, /* Worklist solver. */
NULL, /* Confluence operator 0. */
NULL, /* Confluence operator n. */
NULL, /* Transfer function. */
df_lr_dce_finalize, /* Finalize function. */
NULL, /* Free all of the problem information. */
NULL, /* Remove this problem from the stack of dataflow problems. */
NULL, /* Debugging. */
NULL, /* Debugging start block. */
NULL, /* Debugging end block. */
NULL, /* Debugging start insn. */
NULL, /* Debugging end insn. */
NULL, /* Incremental solution verify start. */
NULL, /* Incremental solution verify end. */
&problem_LR, /* Dependent problem. */
0, /* Size of entry of block_info array. */
TV_DF_LR, /* Timing variable. */
false /* Reset blocks on dropping out of blocks_to_analyze. */
};
/* Create a new DATAFLOW instance and add it to an existing instance
of DF. The returned structure is what is used to get at the
@ -1274,7 +1310,9 @@ static const struct df_problem problem_LR =
void
df_lr_add_problem (void)
{
df_add_problem (&problem_LR);
/* Also add the fast DCE problem. It is then conditionally enabled by
the DF_LR_RUN_DCE flag. */
df_add_problem (&problem_LR_DCE);
/* These will be initialized when df_scan_blocks processes each
block. */
df_lr->out_of_date_transfer_functions = BITMAP_ALLOC (&df_bitmap_obstack);

View file

@ -47,6 +47,7 @@ enum df_problem_id
{
DF_SCAN,
DF_LR, /* Live Registers backward. */
DF_LR_DCE, /* Dead code elimination post-pass for LR. */
DF_LIVE, /* Live Registers & Uninitialized Registers */
DF_RD, /* Reaching Defs. */
DF_CHAIN, /* Def-Use and/or Use-Def Chains. */
@ -940,6 +941,7 @@ extern class df_d *df;
#define df_scan (df->problems_by_index[DF_SCAN])
#define df_rd (df->problems_by_index[DF_RD])
#define df_lr (df->problems_by_index[DF_LR])
#define df_lr_dce (df->problems_by_index[DF_LR_DCE])
#define df_live (df->problems_by_index[DF_LIVE])
#define df_chain (df->problems_by_index[DF_CHAIN])
#define df_word_lr (df->problems_by_index[DF_WORD_LR])