re PR target/42495 (redundant memory load)
PR target/42495 PR middle-end/42574 * basic-block.h (get_dominated_to_depth): Declare. * dominance.c (get_dominated_to_depth): New function, use get_all_dominated_blocks as a base. (get_all_dominated_blocks): Use get_dominated_to_depth. * gcse.c (occr_t, VEC (occr_t, heap)): Define. (hoist_exprs): Remove. (alloc_code_hoist_mem, free_code_hoist_mem): Update. (compute_code_hoist_vbeinout): Add debug print outs. (hoist_code): Partially rewrite, simplify. Use get_dominated_to_depth. * params.def (PARAM_MAX_HOIST_DEPTH): New parameter to avoid quadratic behavior. * params.h (MAX_HOIST_DEPTH): New macro. * doc/invoke.texi (max-hoist-depth): Document. From-SVN: r162597
This commit is contained in:
parent
9b9ee6d392
commit
cad9aa150b
7 changed files with 200 additions and 130 deletions
|
@ -1,3 +1,23 @@
|
|||
2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com>
|
||||
|
||||
PR target/42495
|
||||
PR middle-end/42574
|
||||
* basic-block.h (get_dominated_to_depth): Declare.
|
||||
* dominance.c (get_dominated_to_depth): New function, use
|
||||
get_all_dominated_blocks as a base.
|
||||
(get_all_dominated_blocks): Use get_dominated_to_depth.
|
||||
|
||||
* gcse.c (occr_t, VEC (occr_t, heap)): Define.
|
||||
(hoist_exprs): Remove.
|
||||
(alloc_code_hoist_mem, free_code_hoist_mem): Update.
|
||||
(compute_code_hoist_vbeinout): Add debug print outs.
|
||||
(hoist_code): Partially rewrite, simplify. Use get_dominated_to_depth.
|
||||
|
||||
* params.def (PARAM_MAX_HOIST_DEPTH): New parameter to avoid
|
||||
quadratic behavior.
|
||||
* params.h (MAX_HOIST_DEPTH): New macro.
|
||||
* doc/invoke.texi (max-hoist-depth): Document.
|
||||
|
||||
2010-07-27 Maxim Kuvyrkov <maxim@codesourcery.com>
|
||||
|
||||
PR rtl-optimization/40956
|
||||
|
|
|
@ -854,6 +854,8 @@ extern VEC (basic_block, heap) *get_dominated_by (enum cdi_direction, basic_bloc
|
|||
extern VEC (basic_block, heap) *get_dominated_by_region (enum cdi_direction,
|
||||
basic_block *,
|
||||
unsigned);
|
||||
extern VEC (basic_block, heap) *get_dominated_to_depth (enum cdi_direction,
|
||||
basic_block, int);
|
||||
extern VEC (basic_block, heap) *get_all_dominated_blocks (enum cdi_direction,
|
||||
basic_block);
|
||||
extern void add_to_dominance_info (enum cdi_direction, basic_block);
|
||||
|
|
|
@ -8260,6 +8260,12 @@ the more aggressive code hoisting will be. Specifying 0 will
|
|||
allow all expressions to travel unrestricted distances.
|
||||
The default value is 3.
|
||||
|
||||
@item max-hoist-depth
|
||||
The depth of search in the dominator tree for expressions to hoist.
|
||||
This is used to avoid quadratic behavior in hoisting algorithm.
|
||||
The value of 0 will avoid limiting the search, but may slow down compilation
|
||||
of huge functions. The default value is 30.
|
||||
|
||||
@item max-unrolled-insns
|
||||
The maximum number of instructions that a loop should have if that loop
|
||||
is unrolled, and if the loop is unrolled, it determines how many times
|
||||
|
|
|
@ -784,16 +784,20 @@ get_dominated_by_region (enum cdi_direction dir, basic_block *region,
|
|||
}
|
||||
|
||||
/* Returns the list of basic blocks including BB dominated by BB, in the
|
||||
direction DIR. The vector will be sorted in preorder. */
|
||||
direction DIR up to DEPTH in the dominator tree. The DEPTH of zero will
|
||||
produce a vector containing all dominated blocks. The vector will be sorted
|
||||
in preorder. */
|
||||
|
||||
VEC (basic_block, heap) *
|
||||
get_all_dominated_blocks (enum cdi_direction dir, basic_block bb)
|
||||
get_dominated_to_depth (enum cdi_direction dir, basic_block bb, int depth)
|
||||
{
|
||||
VEC(basic_block, heap) *bbs = NULL;
|
||||
unsigned i;
|
||||
unsigned next_level_start;
|
||||
|
||||
i = 0;
|
||||
VEC_safe_push (basic_block, heap, bbs, bb);
|
||||
next_level_start = 1; /* = VEC_length (basic_block, bbs); */
|
||||
|
||||
do
|
||||
{
|
||||
|
@ -804,12 +808,24 @@ get_all_dominated_blocks (enum cdi_direction dir, basic_block bb)
|
|||
son;
|
||||
son = next_dom_son (dir, son))
|
||||
VEC_safe_push (basic_block, heap, bbs, son);
|
||||
|
||||
if (i == next_level_start && --depth)
|
||||
next_level_start = VEC_length (basic_block, bbs);
|
||||
}
|
||||
while (i < VEC_length (basic_block, bbs));
|
||||
while (i < next_level_start);
|
||||
|
||||
return bbs;
|
||||
}
|
||||
|
||||
/* Returns the list of basic blocks including BB dominated by BB, in the
|
||||
direction DIR. The vector will be sorted in preorder. */
|
||||
|
||||
VEC (basic_block, heap) *
|
||||
get_all_dominated_blocks (enum cdi_direction dir, basic_block bb)
|
||||
{
|
||||
return get_dominated_to_depth (dir, bb, 0);
|
||||
}
|
||||
|
||||
/* Redirect all edges pointing to BB to TO. */
|
||||
void
|
||||
redirect_immediate_dominators (enum cdi_direction dir, basic_block bb,
|
||||
|
|
270
gcc/gcse.c
270
gcc/gcse.c
|
@ -329,6 +329,10 @@ struct occr
|
|||
char copied_p;
|
||||
};
|
||||
|
||||
typedef struct occr *occr_t;
|
||||
DEF_VEC_P (occr_t);
|
||||
DEF_VEC_ALLOC_P (occr_t, heap);
|
||||
|
||||
/* Expression and copy propagation hash tables.
|
||||
Each hash table is an array of buckets.
|
||||
??? It is known that if it were an array of entries, structure elements
|
||||
|
@ -4163,9 +4167,6 @@ add_label_notes (rtx x, rtx insn)
|
|||
static sbitmap *hoist_vbein;
|
||||
static sbitmap *hoist_vbeout;
|
||||
|
||||
/* Hoistable expressions. */
|
||||
static sbitmap *hoist_exprs;
|
||||
|
||||
/* ??? We could compute post dominators and run this algorithm in
|
||||
reverse to perform tail merging, doing so would probably be
|
||||
more effective than the tail merging code in jump.c.
|
||||
|
@ -4184,7 +4185,6 @@ alloc_code_hoist_mem (int n_blocks, int n_exprs)
|
|||
|
||||
hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
|
||||
hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
|
||||
hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
|
||||
}
|
||||
|
||||
/* Free vars used for code hoisting analysis. */
|
||||
|
@ -4198,7 +4198,6 @@ free_code_hoist_mem (void)
|
|||
|
||||
sbitmap_vector_free (hoist_vbein);
|
||||
sbitmap_vector_free (hoist_vbeout);
|
||||
sbitmap_vector_free (hoist_exprs);
|
||||
|
||||
free_dominance_info (CDI_DOMINATORS);
|
||||
}
|
||||
|
@ -4249,7 +4248,17 @@ compute_code_hoist_vbeinout (void)
|
|||
}
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
|
||||
{
|
||||
fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
|
||||
|
||||
FOR_EACH_BB (bb)
|
||||
{
|
||||
fprintf (dump_file, "vbein (%d): ", bb->index);
|
||||
dump_sbitmap_file (dump_file, hoist_vbein[bb->index]);
|
||||
fprintf (dump_file, "vbeout(%d): ", bb->index);
|
||||
dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Top level routine to do the dataflow analysis needed by code hoisting. */
|
||||
|
@ -4352,6 +4361,8 @@ static int
|
|||
hoist_code (void)
|
||||
{
|
||||
basic_block bb, dominated;
|
||||
VEC (basic_block, heap) *dom_tree_walk;
|
||||
unsigned int dom_tree_walk_index;
|
||||
VEC (basic_block, heap) *domby;
|
||||
unsigned int i,j;
|
||||
struct expr **index_map;
|
||||
|
@ -4360,8 +4371,6 @@ hoist_code (void)
|
|||
int *bb_size;
|
||||
int changed = 0;
|
||||
|
||||
sbitmap_vector_zero (hoist_exprs, last_basic_block);
|
||||
|
||||
/* Compute a mapping from expression number (`bitmap_index') to
|
||||
hash table entry. */
|
||||
|
||||
|
@ -4400,34 +4409,72 @@ hoist_code (void)
|
|||
bb_size[bb->index] = to_head;
|
||||
}
|
||||
|
||||
gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1
|
||||
&& (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest
|
||||
== ENTRY_BLOCK_PTR->next_bb));
|
||||
|
||||
dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS,
|
||||
ENTRY_BLOCK_PTR->next_bb);
|
||||
|
||||
/* Walk over each basic block looking for potentially hoistable
|
||||
expressions, nothing gets hoisted from the entry block. */
|
||||
FOR_EACH_BB (bb)
|
||||
for (dom_tree_walk_index = 0;
|
||||
VEC_iterate (basic_block, dom_tree_walk, dom_tree_walk_index, bb);
|
||||
dom_tree_walk_index++)
|
||||
{
|
||||
int found = 0;
|
||||
int insn_inserted_p;
|
||||
domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH);
|
||||
|
||||
if (VEC_length (basic_block, domby) == 0)
|
||||
continue;
|
||||
|
||||
domby = get_dominated_by (CDI_DOMINATORS, bb);
|
||||
/* Examine each expression that is very busy at the exit of this
|
||||
block. These are the potentially hoistable expressions. */
|
||||
for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++)
|
||||
{
|
||||
int hoistable = 0;
|
||||
|
||||
if (TEST_BIT (hoist_vbeout[bb->index], i))
|
||||
{
|
||||
/* Current expression. */
|
||||
struct expr *expr = index_map[i];
|
||||
/* Number of occurences of EXPR that can be hoisted to BB. */
|
||||
int hoistable = 0;
|
||||
/* Basic blocks that have occurences reachable from BB. */
|
||||
bitmap_head _from_bbs, *from_bbs = &_from_bbs;
|
||||
/* Occurences reachable from BB. */
|
||||
VEC (occr_t, heap) *occrs_to_hoist = NULL;
|
||||
/* We want to insert the expression into BB only once, so
|
||||
note when we've inserted it. */
|
||||
int insn_inserted_p;
|
||||
occr_t occr;
|
||||
|
||||
bitmap_initialize (from_bbs, 0);
|
||||
|
||||
/* If an expression is computed in BB and is available at end of
|
||||
BB, hoist all occurences dominated by BB to BB. */
|
||||
if (TEST_BIT (comp[bb->index], i))
|
||||
hoistable++;
|
||||
{
|
||||
occr = find_occr_in_bb (expr->antic_occr, bb);
|
||||
|
||||
if (occr)
|
||||
{
|
||||
/* An occurence might've been already deleted
|
||||
while processing a dominator of BB. */
|
||||
if (occr->deleted_p)
|
||||
gcc_assert (MAX_HOIST_DEPTH > 1);
|
||||
else
|
||||
{
|
||||
gcc_assert (NONDEBUG_INSN_P (occr->insn));
|
||||
hoistable++;
|
||||
}
|
||||
}
|
||||
else
|
||||
hoistable++;
|
||||
}
|
||||
|
||||
/* We've found a potentially hoistable expression, now
|
||||
we look at every block BB dominates to see if it
|
||||
computes the expression. */
|
||||
for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++)
|
||||
{
|
||||
struct expr *expr = index_map[i];
|
||||
struct occr *occr = NULL;
|
||||
int max_distance;
|
||||
|
||||
/* Ignore self dominance. */
|
||||
|
@ -4439,22 +4486,25 @@ hoist_code (void)
|
|||
if (!TEST_BIT (antloc[dominated->index], i))
|
||||
continue;
|
||||
|
||||
occr = find_occr_in_bb (expr->antic_occr, dominated);
|
||||
gcc_assert (occr);
|
||||
|
||||
/* An occurence might've been already deleted
|
||||
while processing a dominator of BB. */
|
||||
if (occr->deleted_p)
|
||||
{
|
||||
gcc_assert (MAX_HOIST_DEPTH > 1);
|
||||
continue;
|
||||
}
|
||||
gcc_assert (NONDEBUG_INSN_P (occr->insn));
|
||||
|
||||
max_distance = expr->max_distance;
|
||||
if (max_distance > 0)
|
||||
{
|
||||
struct occr *occr;
|
||||
|
||||
occr = find_occr_in_bb (expr->antic_occr, dominated);
|
||||
gcc_assert (occr);
|
||||
|
||||
gcc_assert (NONDEBUG_INSN_P (occr->insn));
|
||||
|
||||
/* Adjust MAX_DISTANCE to account for the fact that
|
||||
OCCR won't have to travel all of DOMINATED, but
|
||||
only part of it. */
|
||||
max_distance += (bb_size[dominated->index]
|
||||
- to_bb_head[INSN_UID (occr->insn)]);
|
||||
}
|
||||
/* Adjust MAX_DISTANCE to account for the fact that
|
||||
OCCR won't have to travel all of DOMINATED, but
|
||||
only part of it. */
|
||||
max_distance += (bb_size[dominated->index]
|
||||
- to_bb_head[INSN_UID (occr->insn)]);
|
||||
|
||||
/* Note if the expression would reach the dominated block
|
||||
unimpared if it was placed at the end of BB.
|
||||
|
@ -4463,11 +4513,16 @@ hoist_code (void)
|
|||
from a dominated block into BB. */
|
||||
if (hoist_expr_reaches_here_p (bb, i, dominated, NULL,
|
||||
max_distance, bb_size))
|
||||
hoistable++;
|
||||
{
|
||||
hoistable++;
|
||||
VEC_safe_push (occr_t, heap,
|
||||
occrs_to_hoist, occr);
|
||||
bitmap_set_bit (from_bbs, dominated->index);
|
||||
}
|
||||
}
|
||||
|
||||
/* If we found more than one hoistable occurrence of this
|
||||
expression, then note it in the bitmap of expressions to
|
||||
expression, then note it in the vector of expressions to
|
||||
hoist. It makes no sense to hoist things which are computed
|
||||
in only one BB, and doing so tends to pessimize register
|
||||
allocation. One could increase this value to try harder
|
||||
|
@ -4478,115 +4533,76 @@ hoist_code (void)
|
|||
to nullify any benefit we get from code hoisting. */
|
||||
if (hoistable > 1 && dbg_cnt (hoist_insn))
|
||||
{
|
||||
SET_BIT (hoist_exprs[bb->index], i);
|
||||
found = 1;
|
||||
/* If (hoistable != VEC_length), then there is
|
||||
an occurence of EXPR in BB itself. Don't waste
|
||||
time looking for LCA in this case. */
|
||||
if ((unsigned) hoistable
|
||||
== VEC_length (occr_t, occrs_to_hoist))
|
||||
{
|
||||
basic_block lca;
|
||||
|
||||
lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
|
||||
from_bbs);
|
||||
if (lca != bb)
|
||||
/* Punt, it's better to hoist these occurences to
|
||||
LCA. */
|
||||
VEC_free (occr_t, heap, occrs_to_hoist);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* If we found nothing to hoist, then quit now. */
|
||||
if (! found)
|
||||
{
|
||||
VEC_free (basic_block, heap, domby);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
/* Punt, no point hoisting a single occurence. */
|
||||
VEC_free (occr_t, heap, occrs_to_hoist);
|
||||
|
||||
/* Loop over all the hoistable expressions. */
|
||||
for (i = 0; i < hoist_exprs[bb->index]->n_bits; i++)
|
||||
{
|
||||
/* We want to insert the expression into BB only once, so
|
||||
note when we've inserted it. */
|
||||
insn_inserted_p = 0;
|
||||
insn_inserted_p = 0;
|
||||
|
||||
/* These tests should be the same as the tests above. */
|
||||
if (TEST_BIT (hoist_exprs[bb->index], i))
|
||||
{
|
||||
/* We've found a potentially hoistable expression, now
|
||||
we look at every block BB dominates to see if it
|
||||
computes the expression. */
|
||||
for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++)
|
||||
/* Walk through occurences of I'th expressions we want
|
||||
to hoist to BB and make the transformations. */
|
||||
for (j = 0;
|
||||
VEC_iterate (occr_t, occrs_to_hoist, j, occr);
|
||||
j++)
|
||||
{
|
||||
struct expr *expr = index_map[i];
|
||||
int max_distance;
|
||||
rtx insn;
|
||||
rtx set;
|
||||
|
||||
/* Ignore self dominance. */
|
||||
if (bb == dominated)
|
||||
continue;
|
||||
gcc_assert (!occr->deleted_p);
|
||||
|
||||
/* We've found a dominated block, now see if it computes
|
||||
the busy expression and whether or not moving that
|
||||
expression to the "beginning" of that block is safe. */
|
||||
if (!TEST_BIT (antloc[dominated->index], i))
|
||||
continue;
|
||||
insn = occr->insn;
|
||||
set = single_set (insn);
|
||||
gcc_assert (set);
|
||||
|
||||
max_distance = expr->max_distance;
|
||||
if (max_distance > 0)
|
||||
/* Create a pseudo-reg to store the result of reaching
|
||||
expressions into. Get the mode for the new pseudo
|
||||
from the mode of the original destination pseudo.
|
||||
|
||||
It is important to use new pseudos whenever we
|
||||
emit a set. This will allow reload to use
|
||||
rematerialization for such registers. */
|
||||
if (!insn_inserted_p)
|
||||
expr->reaching_reg
|
||||
= gen_reg_rtx_and_attrs (SET_DEST (set));
|
||||
|
||||
gcse_emit_move_after (expr->reaching_reg, SET_DEST (set),
|
||||
insn);
|
||||
delete_insn (insn);
|
||||
occr->deleted_p = 1;
|
||||
changed = 1;
|
||||
gcse_subst_count++;
|
||||
|
||||
if (!insn_inserted_p)
|
||||
{
|
||||
occr = find_occr_in_bb (expr->antic_occr, dominated);
|
||||
gcc_assert (occr);
|
||||
|
||||
gcc_assert (NONDEBUG_INSN_P (occr->insn));
|
||||
|
||||
/* Adjust MAX_DISTANCE to account for the fact that
|
||||
OCCR won't have to travel all of DOMINATED, but
|
||||
only part of it. */
|
||||
max_distance += (bb_size[dominated->index]
|
||||
- to_bb_head[INSN_UID (occr->insn)]);
|
||||
}
|
||||
|
||||
/* The expression is computed in the dominated block and
|
||||
it would be safe to compute it at the start of the
|
||||
dominated block. Now we have to determine if the
|
||||
expression would reach the dominated block if it was
|
||||
placed at the end of BB.
|
||||
Note: the fact that hoist_exprs has i-th bit set means
|
||||
that /some/, not necesserilly all, occurences from
|
||||
the dominated blocks can be hoisted to BB. Here we check
|
||||
if a specific occurence can be hoisted to BB. */
|
||||
if (hoist_expr_reaches_here_p (bb, i, dominated, NULL,
|
||||
max_distance, bb_size))
|
||||
{
|
||||
rtx insn;
|
||||
rtx set;
|
||||
|
||||
if (!occr)
|
||||
{
|
||||
occr = find_occr_in_bb (expr->antic_occr, dominated);
|
||||
gcc_assert (occr);
|
||||
}
|
||||
|
||||
insn = occr->insn;
|
||||
set = single_set (insn);
|
||||
gcc_assert (set);
|
||||
|
||||
/* Create a pseudo-reg to store the result of reaching
|
||||
expressions into. Get the mode for the new pseudo
|
||||
from the mode of the original destination pseudo.
|
||||
|
||||
It is important to use new pseudos whenever we
|
||||
emit a set. This will allow reload to use
|
||||
rematerialization for such registers. */
|
||||
if (!insn_inserted_p)
|
||||
expr->reaching_reg
|
||||
= gen_reg_rtx_and_attrs (SET_DEST (set));
|
||||
|
||||
gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), insn);
|
||||
delete_insn (insn);
|
||||
occr->deleted_p = 1;
|
||||
changed = 1;
|
||||
gcse_subst_count++;
|
||||
|
||||
if (!insn_inserted_p)
|
||||
{
|
||||
insert_insn_end_basic_block (index_map[i], bb);
|
||||
insn_inserted_p = 1;
|
||||
}
|
||||
insert_insn_end_basic_block (expr, bb);
|
||||
insn_inserted_p = 1;
|
||||
}
|
||||
}
|
||||
|
||||
VEC_free (occr_t, heap, occrs_to_hoist);
|
||||
bitmap_clear (from_bbs);
|
||||
}
|
||||
}
|
||||
VEC_free (basic_block, heap, domby);
|
||||
}
|
||||
|
||||
VEC_free (basic_block, heap, dom_tree_walk);
|
||||
free (bb_size);
|
||||
free (to_bb_head);
|
||||
free (index_map);
|
||||
|
|
|
@ -240,6 +240,14 @@ DEFPARAM(PARAM_GCSE_UNRESTRICTED_COST,
|
|||
"Cost at which GCSE optimizations will not constraint the distance an expression can travel",
|
||||
3, 0, 0)
|
||||
|
||||
/* How deep from a given basic block the dominator tree should be searched
|
||||
for expressions to hoist to the block. The value of 0 will avoid limiting
|
||||
the search. */
|
||||
DEFPARAM(PARAM_MAX_HOIST_DEPTH,
|
||||
"max-hoist-depth",
|
||||
"Maximum depth of search in the dominator tree for expressions to hoist",
|
||||
30, 0, 0)
|
||||
|
||||
/* This parameter limits the number of insns in a loop that will be unrolled,
|
||||
and by how much the loop is unrolled.
|
||||
|
||||
|
|
|
@ -129,6 +129,8 @@ typedef enum compiler_param
|
|||
PARAM_VALUE (PARAM_GCSE_COST_DISTANCE_RATIO)
|
||||
#define GCSE_UNRESTRICTED_COST \
|
||||
PARAM_VALUE (PARAM_GCSE_UNRESTRICTED_COST)
|
||||
#define MAX_HOIST_DEPTH \
|
||||
PARAM_VALUE (PARAM_MAX_HOIST_DEPTH)
|
||||
#define MAX_UNROLLED_INSNS \
|
||||
PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS)
|
||||
#define MAX_SMS_LOOP_NUMBER \
|
||||
|
|
Loading…
Add table
Reference in a new issue