fwprop.c: New file.
2006-11-03 Paolo Bonzini <bonzini@gnu.org> Steven Bosscher <stevenb.gcc@gmail.com> * fwprop.c: New file. * Makefile.in: Add fwprop.o. * tree-pass.h (pass_rtl_fwprop, pass_rtl_fwprop_with_addr): New. * passes.c (init_optimization_passes): Schedule forward propagation. * rtlanal.c (loc_mentioned_in_p): Support NULL value of the second parameter. * timevar.def (TV_FWPROP): New. * common.opt (-fforward-propagate): New. * opts.c (decode_options): Enable forward propagation at -O2. * gcse.c (one_cprop_pass): Do not run local cprop unless touching jumps. * cse.c (fold_rtx_subreg, fold_rtx_mem, fold_rtx_mem_1, find_best_addr, canon_for_address, table_size): Remove. (new_basic_block, insert, remove_from_table): Remove references to table_size. (fold_rtx): Process SUBREGs and MEMs with equiv_constant, make simplification loop more straightforward by not calling fold_rtx recursively. (equiv_constant): Move here a small part of fold_rtx_subreg, do not call fold_rtx. Call avoid_constant_pool_reference to process MEMs. * recog.c (canonicalize_change_group): New. * recog.h (canonicalize_change_group): New. * doc/invoke.texi (Optimization Options): Document fwprop. * doc/passes.texi (RTL passes): Document fwprop. Co-Authored-By: Steven Bosscher <stevenb.gcc@gmail.com> From-SVN: r118475
This commit is contained in:
parent
c7cc12b01d
commit
a52b023a5f
15 changed files with 1213 additions and 893 deletions
|
@ -1,3 +1,32 @@
|
|||
2006-11-03 Paolo Bonzini <bonzini@gnu.org>
|
||||
Steven Bosscher <stevenb.gcc@gmail.com>
|
||||
|
||||
* fwprop.c: New file.
|
||||
* Makefile.in: Add fwprop.o.
|
||||
* tree-pass.h (pass_rtl_fwprop, pass_rtl_fwprop_with_addr): New.
|
||||
* passes.c (init_optimization_passes): Schedule forward propagation.
|
||||
* rtlanal.c (loc_mentioned_in_p): Support NULL value of the second
|
||||
parameter.
|
||||
* timevar.def (TV_FWPROP): New.
|
||||
* common.opt (-fforward-propagate): New.
|
||||
* opts.c (decode_options): Enable forward propagation at -O2.
|
||||
* gcse.c (one_cprop_pass): Do not run local cprop unless touching jumps.
|
||||
* cse.c (fold_rtx_subreg, fold_rtx_mem, fold_rtx_mem_1, find_best_addr,
|
||||
canon_for_address, table_size): Remove.
|
||||
(new_basic_block, insert, remove_from_table): Remove references to
|
||||
table_size.
|
||||
(fold_rtx): Process SUBREGs and MEMs with equiv_constant, make
|
||||
simplification loop more straightforward by not calling fold_rtx
|
||||
recursively.
|
||||
(equiv_constant): Move here a small part of fold_rtx_subreg,
|
||||
do not call fold_rtx. Call avoid_constant_pool_reference
|
||||
to process MEMs.
|
||||
* recog.c (canonicalize_change_group): New.
|
||||
* recog.h (canonicalize_change_group): New.
|
||||
|
||||
* doc/invoke.texi (Optimization Options): Document fwprop.
|
||||
* doc/passes.texi (RTL passes): Document fwprop.
|
||||
|
||||
2006-11-03 Geoffrey Keating <geoffk@apple.com>
|
||||
|
||||
* c-decl.c (WANT_C99_INLINE_SEMANTICS): New, set to 1.
|
||||
|
@ -23,7 +52,6 @@
|
|||
|
||||
2006-11-03 Paul Brook <paul@codesourcery.com>
|
||||
|
||||
gcc/
|
||||
* config/arm/arm.c (arm_file_start): New function.
|
||||
(TARGET_ASM_FILE_START): Define.
|
||||
(arm_default_cpu): New variable.
|
||||
|
|
|
@ -997,7 +997,7 @@ OBJS-common = \
|
|||
debug.o df-core.o df-problems.o df-scan.o dfp.o diagnostic.o dojump.o \
|
||||
dominance.o loop-doloop.o \
|
||||
dwarf2asm.o dwarf2out.o emit-rtl.o except.o explow.o loop-iv.o \
|
||||
expmed.o expr.o final.o flow.o fold-const.o function.o gcse.o \
|
||||
expmed.o expr.o final.o flow.o fold-const.o function.o fwprop.o gcse.o \
|
||||
genrtl.o ggc-common.o global.o graph.o gtype-desc.o \
|
||||
haifa-sched.o hooks.o ifcvt.o insn-attrtab.o insn-emit.o insn-modes.o \
|
||||
insn-extract.o insn-opinit.o insn-output.o insn-peep.o insn-recog.o \
|
||||
|
@ -2336,6 +2336,9 @@ cse.o : cse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(REGS_H) \
|
|||
hard-reg-set.h $(FLAGS_H) insn-config.h $(RECOG_H) $(EXPR_H) toplev.h \
|
||||
output.h $(FUNCTION_H) $(BASIC_BLOCK_H) $(GGC_H) $(TM_P_H) $(TIMEVAR_H) \
|
||||
except.h $(TARGET_H) $(PARAMS_H) rtlhooks-def.h tree-pass.h $(REAL_H)
|
||||
fwprop.o : fwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
|
||||
toplev.h insn-config.h $(RECOG_H) $(FLAGS_H) $(OBSTACK_H) $(BASIC_BLOCK_H) \
|
||||
output.h $(DF_H) alloc-pool.h $(TIMEVAR_H) tree-pass.h
|
||||
web.o : web.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
|
||||
hard-reg-set.h $(FLAGS_H) $(BASIC_BLOCK_H) $(FUNCTION_H) output.h toplev.h \
|
||||
$(DF_H) $(OBSTACK_H) $(TIMEVAR_H) tree-pass.h
|
||||
|
|
|
@ -444,6 +444,10 @@ fforce-mem
|
|||
Common Report Var(flag_force_mem)
|
||||
Copy memory operands into registers before use
|
||||
|
||||
fforward-propagate
|
||||
Common Report Var(flag_forward_propagate)
|
||||
Perform a forward propagation pass on RTL
|
||||
|
||||
; Nonzero means don't put addresses of constant functions in registers.
|
||||
; Used for compiling the Unix kernel, where strange substitutions are
|
||||
; done on the assembly output.
|
||||
|
|
|
@ -310,7 +310,7 @@ Objective-C and Objective-C++ Dialects}.
|
|||
-fcse-skip-blocks -fcx-limited-range -fdata-sections @gol
|
||||
-fdelayed-branch -fdelete-null-pointer-checks -fearly-inlining @gol
|
||||
-fexpensive-optimizations -ffast-math -ffloat-store @gol
|
||||
-fforce-addr -ffunction-sections @gol
|
||||
-fforce-addr -fforward-propagate -ffunction-sections @gol
|
||||
-fgcse -fgcse-lm -fgcse-sm -fgcse-las -fgcse-after-reload @gol
|
||||
-fcrossjumping -fif-conversion -fif-conversion2 @gol
|
||||
-finline-functions -finline-functions-called-once @gol
|
||||
|
@ -4621,6 +4621,16 @@ register-load. This option is now a nop and will be removed in 4.2.
|
|||
Force memory address constants to be copied into registers before
|
||||
doing arithmetic on them.
|
||||
|
||||
@item -fforward-propagate
|
||||
@opindex fforward-propagate
|
||||
Perform a forward propagation pass on RTL. The pass tries to combine two
|
||||
instructions and checks if the result can be simplified. If loop unrolling
|
||||
is active, two passes are performed and the second is scheduled after
|
||||
loop unrolling.
|
||||
|
||||
This option is enabled by default at optimization levels @option{-O2},
|
||||
@option{-O3}, @option{-Os}.
|
||||
|
||||
@item -fomit-frame-pointer
|
||||
@opindex fomit-frame-pointer
|
||||
Don't keep the frame pointer in a register for functions that
|
||||
|
|
|
@ -685,6 +685,15 @@ optimization pass''. The bulk of the code for this pass is in
|
|||
@file{cfgcleanup.c}, and there are support routines in @file{cfgrtl.c}
|
||||
and @file{jump.c}.
|
||||
|
||||
@item Forward propagation of single-def values
|
||||
|
||||
This pass attempts to remove redundant computation by substituting
|
||||
variables that come from a single definition, and
|
||||
seeing if the result can be simplified. It performs copy propagation
|
||||
and addressing mode selection. The pass is run twice, with values
|
||||
being propagated into loops only on the second run. It is located in
|
||||
@file{fwprop.c}.
|
||||
|
||||
@item Common subexpression elimination
|
||||
|
||||
This pass removes redundant computation within basic blocks, and
|
||||
|
|
1034
gcc/fwprop.c
Normal file
1034
gcc/fwprop.c
Normal file
File diff suppressed because it is too large
Load diff
|
@ -3396,7 +3396,8 @@ one_cprop_pass (int pass, bool cprop_jumps, bool bypass_jumps)
|
|||
global_const_prop_count = local_const_prop_count = 0;
|
||||
global_copy_prop_count = local_copy_prop_count = 0;
|
||||
|
||||
local_cprop_pass (cprop_jumps);
|
||||
if (cprop_jumps)
|
||||
local_cprop_pass (cprop_jumps);
|
||||
|
||||
/* Determine implicit sets. */
|
||||
implicit_sets = XCNEWVEC (rtx, last_basic_block);
|
||||
|
|
|
@ -474,6 +474,7 @@ decode_options (unsigned int argc, const char **argv)
|
|||
flag_thread_jumps = 1;
|
||||
flag_crossjumping = 1;
|
||||
flag_optimize_sibling_calls = 1;
|
||||
flag_forward_propagate = 1;
|
||||
flag_cse_follow_jumps = 1;
|
||||
flag_gcse = 1;
|
||||
flag_expensive_optimizations = 1;
|
||||
|
|
|
@ -635,6 +635,7 @@ init_optimization_passes (void)
|
|||
NEXT_PASS (pass_instantiate_virtual_regs);
|
||||
NEXT_PASS (pass_jump2);
|
||||
NEXT_PASS (pass_cse);
|
||||
NEXT_PASS (pass_rtl_fwprop);
|
||||
NEXT_PASS (pass_gcse);
|
||||
NEXT_PASS (pass_jump_bypass);
|
||||
NEXT_PASS (pass_rtl_ifcvt);
|
||||
|
@ -645,6 +646,7 @@ init_optimization_passes (void)
|
|||
NEXT_PASS (pass_loop2);
|
||||
NEXT_PASS (pass_web);
|
||||
NEXT_PASS (pass_cse2);
|
||||
NEXT_PASS (pass_rtl_fwprop_addr);
|
||||
NEXT_PASS (pass_life);
|
||||
NEXT_PASS (pass_combine);
|
||||
NEXT_PASS (pass_if_after_combine);
|
||||
|
|
22
gcc/recog.c
22
gcc/recog.c
|
@ -238,6 +238,28 @@ validate_change (rtx object, rtx *loc, rtx new, int in_group)
|
|||
return apply_change_group ();
|
||||
}
|
||||
|
||||
/* Keep X canonicalized if some changes have made it non-canonical; only
|
||||
modifies the operands of X, not (for example) its code. Simplifications
|
||||
are not the job of this routine.
|
||||
|
||||
Return true if anything was changed. */
|
||||
bool
|
||||
canonicalize_change_group (rtx insn, rtx x)
|
||||
{
|
||||
if (COMMUTATIVE_P (x)
|
||||
&& swap_commutative_operands_p (XEXP (x, 0), XEXP (x, 1)))
|
||||
{
|
||||
/* Oops, the caller has made X no longer canonical.
|
||||
Let's redo the changes in the correct order. */
|
||||
rtx tem = XEXP (x, 0);
|
||||
validate_change (insn, &XEXP (x, 0), XEXP (x, 1), 1);
|
||||
validate_change (insn, &XEXP (x, 1), tem, 1);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/* This subroutine of apply_change_group verifies whether the changes to INSN
|
||||
were valid; i.e. whether INSN can still be recognized. */
|
||||
|
|
|
@ -74,6 +74,7 @@ extern void init_recog_no_volatile (void);
|
|||
extern int check_asm_operands (rtx);
|
||||
extern int asm_operand_ok (rtx, const char *);
|
||||
extern int validate_change (rtx, rtx *, rtx, int);
|
||||
extern bool canonicalize_change_group (rtx insn, rtx x);
|
||||
extern int insn_invalid_p (rtx);
|
||||
extern int verify_changes (int);
|
||||
extern void confirm_change_group (void);
|
||||
|
|
|
@ -2837,10 +2837,15 @@ auto_inc_p (rtx x)
|
|||
int
|
||||
loc_mentioned_in_p (rtx *loc, rtx in)
|
||||
{
|
||||
enum rtx_code code = GET_CODE (in);
|
||||
const char *fmt = GET_RTX_FORMAT (code);
|
||||
enum rtx_code code;
|
||||
const char *fmt;
|
||||
int i, j;
|
||||
|
||||
if (!in)
|
||||
return 0;
|
||||
|
||||
code = GET_CODE (in);
|
||||
fmt = GET_RTX_FORMAT (code);
|
||||
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
|
||||
{
|
||||
if (loc == &in->u.fld[i].rt_rtx)
|
||||
|
|
|
@ -128,6 +128,7 @@ DEFTIMEVAR (TV_TEMPLATE_INSTANTIATION, "template instantiation")
|
|||
DEFTIMEVAR (TV_EXPAND , "expand")
|
||||
DEFTIMEVAR (TV_VARCONST , "varconst")
|
||||
DEFTIMEVAR (TV_JUMP , "jump")
|
||||
DEFTIMEVAR (TV_FWPROP , "forward prop")
|
||||
DEFTIMEVAR (TV_CSE , "CSE")
|
||||
DEFTIMEVAR (TV_LOOP , "loop analysis")
|
||||
DEFTIMEVAR (TV_GCSE , "global CSE")
|
||||
|
|
|
@ -330,6 +330,8 @@ extern struct tree_opt_pass pass_rtl_eh;
|
|||
extern struct tree_opt_pass pass_initial_value_sets;
|
||||
extern struct tree_opt_pass pass_unshare_all_rtl;
|
||||
extern struct tree_opt_pass pass_instantiate_virtual_regs;
|
||||
extern struct tree_opt_pass pass_rtl_fwprop;
|
||||
extern struct tree_opt_pass pass_rtl_fwprop_addr;
|
||||
extern struct tree_opt_pass pass_jump2;
|
||||
extern struct tree_opt_pass pass_cse;
|
||||
extern struct tree_opt_pass pass_gcse;
|
||||
|
|
Loading…
Add table
Reference in a new issue