diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1795692a707..10e9bb51940 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,38 @@ +2000-09-06 Bernd Schmidt + + * Makefile.in (cse.o): Depend on $(BASIC_BLOCK_H). + * cse.c: Include "basic-block.h". + (struct table_elt): New field REGCOST. + (CHEAP_REG): Delete macro. + (COST): Return 0 for REGs. + (approx_reg_cost_1, approx_reg_cost, preferrable): New functions. + (notreg_cost): Return 0 for appropriate SUBREGs. + (COSTS_N_INSNS): Return N * 2. + (rtx_cost): Return 0 for REGs, and use cost of nested rtx for cheap + SUBREGs. + (CHEAPER): Use new function preferrable. + (insert): Initialize REGCOST member. + (find_best_addr): Use approx_reg_cost for estimation of register + usage. + (cse_insn): Likewise. + * loop.c (iv_add_mult_cost): New function. + (add_cost, shift_cost, mult_cost): Delete variables. + (init_loop): Don't initialize add_cost; reduce copy_cost by half. + (strength_reduce): Use iv_add_mult_cost instead of fixed add_cost. + Make code that detects autoinc opportunities slightly less optimistic. + (simplify_giv_expr): If expression contains other reg that is also a + giv, only increment benefit if this is the only use of that reg. + (consec_sets_giv): Take that change into account. + (combine_givs): Slightly more verbose output. + + * i386.h (RTX_COSTS): For MULT, return true cost of multiplication, + not the cost of an equivalent shift. + * sh-protos.h (addsubcosts): Declare. + * sh.c (addsubcosts): New function. + * sh.h (CONST_COSTS): If CONST_OK_FOR_I, then return 0. + (RTX_COSTS): Tweak. Use addsubcosts. + (ADDRESS_COST): Return higher cost for reg+reg addressing. + 2000-09-06 Geoff Keating * config/rs6000/rs6000.c (validate_condition_mode): New function. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 9dfa222afc6..2543ec42e24 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1322,7 +1322,8 @@ simplify-rtx.o : simplify-rtx.c $(CONFIG_H) system.h $(RTL_H) $(REGS_H) \ hard-reg-set.h flags.h real.h insn-config.h $(RECOG_H) $(EXPR_H) toplev.h \ output.h function.h cselib.h ggc.h $(OBSTACK_H) cse.o : cse.c $(CONFIG_H) system.h $(RTL_H) $(REGS_H) hard-reg-set.h flags.h \ - real.h insn-config.h $(RECOG_H) $(EXPR_H) toplev.h output.h function.h $(GGC_H) + real.h insn-config.h $(RECOG_H) $(EXPR_H) toplev.h output.h function.h \ + $(BASIC_BLOCK_H) $(GGC_H) gcse.o : gcse.c $(CONFIG_H) system.h $(RTL_H) $(REGS_H) hard-reg-set.h \ flags.h real.h insn-config.h $(RECOG_H) $(EXPR_H) $(BASIC_BLOCK_H) \ function.h output.h toplev.h diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 015da07aa75..5f6f0257af1 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1978,22 +1978,14 @@ while (0) unsigned HOST_WIDE_INT value = INTVAL (XEXP (X, 1)); \ int nbits = 0; \ \ - if (value == 2) \ - TOPLEVEL_COSTS_N_INSNS (ix86_cost->add); \ - if (value == 4 || value == 8) \ - TOPLEVEL_COSTS_N_INSNS (ix86_cost->lea); \ - \ while (value != 0) \ { \ nbits++; \ value >>= 1; \ } \ \ - if (nbits == 1) \ - TOPLEVEL_COSTS_N_INSNS (ix86_cost->shift_const); \ - else \ - TOPLEVEL_COSTS_N_INSNS (ix86_cost->mult_init \ - + nbits * ix86_cost->mult_bit); \ + TOPLEVEL_COSTS_N_INSNS (ix86_cost->mult_init \ + + nbits * ix86_cost->mult_bit); \ } \ else /* This is arbitrary */ \ TOPLEVEL_COSTS_N_INSNS (ix86_cost->mult_init \ diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h index 375ade8b7d9..fc441a97feb 100644 --- a/gcc/config/sh/sh-protos.h +++ b/gcc/config/sh/sh-protos.h @@ -54,6 +54,7 @@ extern int prepare_move_operands PARAMS ((rtx[], enum machine_mode mode)); extern void from_compare PARAMS ((rtx *, int)); extern int shift_insns_rtx PARAMS ((rtx)); extern int shiftcosts PARAMS ((rtx)); +extern int addsubcosts PARAMS ((rtx)); extern int andcosts PARAMS ((rtx)); extern int multcosts PARAMS ((rtx)); extern void gen_ashift PARAMS ((int, int, rtx)); diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c index 930ff738b05..20babcbb6a5 100644 --- a/gcc/config/sh/sh.c +++ b/gcc/config/sh/sh.c @@ -1012,6 +1012,25 @@ andcosts (x) return 3; } +/* Return the cost of an addition or a subtraction. */ + +int +addsubcosts (x) + rtx x; +{ + /* Adding a register is a single cycle insn. */ + if (GET_CODE (XEXP (x, 1)) != CONST_INT) + return 1; + + /* Likewise for small constants. */ + if (CONST_OK_FOR_I (INTVAL (XEXP (x, 1)))) + return 1; + + /* Any other constant requires a 2 cycle pc-relative load plus an + addition. */ + return 3; +} + /* Return the cost of a multiply. */ int multcosts (x) diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h index 82b56b55f04..feb58530deb 100644 --- a/gcc/config/sh/sh.h +++ b/gcc/config/sh/sh.h @@ -1651,15 +1651,12 @@ extern int current_function_anonymous_args; #define Pmode SImode #define FUNCTION_MODE Pmode -/* The relative costs of various types of constants. Note that cse.c defines - REG = 1, SUBREG = 2, any node = (2 + sum of subnodes). */ +/* The relative costs of various types of constants. */ #define CONST_COSTS(RTX, CODE, OUTER_CODE) \ case CONST_INT: \ - if (INTVAL (RTX) == 0) \ + if (CONST_OK_FOR_I (INTVAL (RTX))) \ return 0; \ - else if (CONST_OK_FOR_I (INTVAL (RTX))) \ - return 1; \ else if (((OUTER_CODE) == AND || (OUTER_CODE) == IOR || (OUTER_CODE) == XOR) \ && CONST_OK_FOR_L (INTVAL (RTX))) \ return 1; \ @@ -1674,10 +1671,7 @@ extern int current_function_anonymous_args; #define RTX_COSTS(X, CODE, OUTER_CODE) \ case PLUS: \ - return (COSTS_N_INSNS (1) \ - + rtx_cost (XEXP ((X), 0), PLUS) \ - + (rtx_equal_p (XEXP ((X), 0), XEXP ((X), 1))\ - ? 0 : rtx_cost (XEXP ((X), 1), PLUS)));\ + return COSTS_N_INSNS (addsubcosts (X)); \ case AND: \ return COSTS_N_INSNS (andcosts (X)); \ case MULT: \ @@ -1685,13 +1679,7 @@ extern int current_function_anonymous_args; case ASHIFT: \ case ASHIFTRT: \ case LSHIFTRT: \ - /* Add one extra unit for the matching constraint. \ - Otherwise loop strength reduction would think that\ - a shift with different sourc and destination is \ - as cheap as adding a constant to a register. */ \ - return (COSTS_N_INSNS (shiftcosts (X)) \ - + rtx_cost (XEXP ((X), 0), (CODE)) \ - + 1); \ + return COSTS_N_INSNS (shiftcosts (X)); \ case DIV: \ case UDIV: \ case MOD: \ @@ -1768,11 +1756,11 @@ while (0) && nonpic_symbol_mentioned_p (X)) /* Compute the cost of an address. For the SH, all valid addresses are - the same cost. */ -/* ??? Perhaps we should make reg+reg addresses have higher cost because - they add to register pressure on r0. */ + the same cost. Use a slightly higher cost for reg + reg addressing, + since it increases pressure on r0. */ -#define ADDRESS_COST(RTX) 1 +#define ADDRESS_COST(X) (GET_CODE (X) == PLUS && ! CONSTANT_P (XEXP (X, 1)) \ + ? 1 : 0) /* Compute extra cost of moving data between one register class and another. */ diff --git a/gcc/cse.c b/gcc/cse.c index 93cfb97ecef..f9348fbabeb 100644 --- a/gcc/cse.c +++ b/gcc/cse.c @@ -28,6 +28,7 @@ Boston, MA 02111-1307, USA. */ #include "tm_p.h" #include "regs.h" #include "hard-reg-set.h" +#include "basic-block.h" #include "flags.h" #include "real.h" #include "insn-config.h" @@ -434,6 +435,8 @@ static int hash_arg_in_memory; chain is not useful. The `cost' field stores the cost of this element's expression. + The `regcost' field stores the value returned by approx_reg_cost for + this element's expression. The `is_const' flag is set if the element is a constant (including a fixed address). @@ -456,6 +459,7 @@ struct table_elt struct table_elt *first_same_value; struct table_elt *related_value; int cost; + int regcost; enum machine_mode mode; char in_memory; char is_const; @@ -477,7 +481,8 @@ struct table_elt ? (((unsigned) REG << 7) + (unsigned) REG_QTY (REGNO (X))) \ : canon_hash (X, M)) & HASH_MASK) -/* Determine whether register number N is considered a fixed register for CSE. +/* Determine whether register number N is considered a fixed register for the + purpose of approximating register costs. It is desirable to replace other regs with fixed regs, to reduce need for non-fixed hard regs. A reg wins if it is either the frame pointer or designated as fixed. */ @@ -497,19 +502,7 @@ struct table_elt || ((N) < FIRST_PSEUDO_REGISTER \ && FIXED_REGNO_P (N) && REGNO_REG_CLASS (N) != NO_REGS)) -/* A register is cheap if it is a user variable assigned to the register - or if its register number always corresponds to a cheap register. */ - -#define CHEAP_REG(N) \ - ((REG_USERVAR_P (N) && REGNO (N) < FIRST_PSEUDO_REGISTER) \ - || CHEAP_REGNO (REGNO (N))) - -#define COST(X) \ - (GET_CODE (X) == REG \ - ? (CHEAP_REG (X) ? 0 \ - : REGNO (X) >= FIRST_PSEUDO_REGISTER ? 1 \ - : 2) \ - : notreg_cost(X)) +#define COST(X) (GET_CODE (X) == REG ? 0 : notreg_cost (X)) /* Get the info associated with register N. */ @@ -644,6 +637,9 @@ struct cse_basic_block_data || GET_CODE (X) == ADDRESSOF) static int notreg_cost PARAMS ((rtx)); +static int approx_reg_cost_1 PARAMS ((rtx *, void *)); +static int approx_reg_cost PARAMS ((rtx)); +static int preferrable PARAMS ((int, int, int, int)); static void new_basic_block PARAMS ((void)); static void make_new_qty PARAMS ((unsigned int, enum machine_mode)); static void make_regs_eqv PARAMS ((unsigned int, unsigned int)); @@ -717,6 +713,62 @@ dump_class (classp) } } +/* Subroutine of approx_reg_cost; called through for_each_rtx. */ +static int +approx_reg_cost_1 (xp, data) + rtx *xp; + void *data; +{ + rtx x = *xp; + regset set = (regset) data; + + if (x && GET_CODE (x) == REG) + SET_REGNO_REG_SET (set, REGNO (x)); + return 0; +} + +/* Return an estimate of the cost of the registers used in an rtx. + This is mostly the number of different REG expressions in the rtx; + however for some excecptions like fixed registers we use a cost of + 0. */ + +static int +approx_reg_cost (x) + rtx x; +{ + regset_head set; + int i; + int cost = 0; + + INIT_REG_SET (&set); + for_each_rtx (&x, approx_reg_cost_1, (void *)&set); + + EXECUTE_IF_SET_IN_REG_SET + (&set, 0, i, + { + if (! CHEAP_REGNO (i)) + cost++; + }); + + CLEAR_REG_SET (&set); + return cost; +} + +/* Return a negative value if an rtx A, whose costs are given by COST_A + and REGCOST_A, is more desirable than an rtx B. + Return a positive value if A is less desirable, or 0 if the two are + equally good. */ +static int +preferrable (cost_a, regcost_a, cost_b, regcost_b) + int cost_a, regcost_a, cost_b, regcost_b; +{ + if (cost_a != cost_b) + return cost_a - cost_b; + if (regcost_a != regcost_b) + return regcost_a - regcost_b; + return 0; +} + /* Internal function, to compute cost when X is not a register; called from COST macro to keep it simple. */ @@ -733,9 +785,7 @@ notreg_cost (x) && subreg_lowpart_p (x) && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (x)), GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x))))) - ? (CHEAP_REG (SUBREG_REG (x)) ? 0 - : (REGNO (SUBREG_REG (x)) >= FIRST_PSEUDO_REGISTER ? 1 - : 2)) + ? 0 : rtx_cost (x, SET) * 2); } @@ -743,7 +793,7 @@ notreg_cost (x) to make the cost of the corresponding register-to-register instruction N times that of a fast register-to-register instruction. */ -#define COSTS_N_INSNS(N) ((N) * 4 - 2) +#define COSTS_N_INSNS(N) ((N) * 2) /* Return an estimate of the cost of computing rtx X. One use is in cse, to decide which expression to keep in the hash table. @@ -795,7 +845,7 @@ rtx_cost (x, outer_code) switch (code) { case REG: - return ! CHEAP_REG (x); + return 0; case SUBREG: /* If we can't tie these modes, make this expensive. The larger @@ -803,7 +853,8 @@ rtx_cost (x, outer_code) if (! MODES_TIEABLE_P (GET_MODE (x), GET_MODE (SUBREG_REG (x)))) return COSTS_N_INSNS (2 + GET_MODE_SIZE (GET_MODE (x)) / UNITS_PER_WORD); - return 2; + break; + #ifdef RTX_COSTS RTX_COSTS (x, code, outer_code); #endif @@ -860,6 +911,7 @@ address_cost (x, mode) return rtx_cost (x, MEM); #endif } + static struct cse_reg_info * get_cse_reg_info (regno) @@ -1474,7 +1526,8 @@ lookup_as_function (x, code) If necessary, update table showing constant values of quantities. */ -#define CHEAPER(X,Y) ((X)->cost < (Y)->cost) +#define CHEAPER(X, Y) \ + (preferrable ((X)->cost, (X)->regcost, (Y)->cost, (Y)->regcost) < 0) static struct table_elt * insert (x, classp, hash, mode) @@ -1521,6 +1574,7 @@ insert (x, classp, hash, mode) elt->exp = x; elt->canon_exp = NULL_RTX; elt->cost = COST (x); + elt->regcost = approx_reg_cost (x); elt->next_same_value = 0; elt->prev_same_value = 0; elt->next_same_hash = table[hash]; @@ -2775,7 +2829,6 @@ find_best_addr (insn, loc, mode) int save_hash_arg_in_memory = hash_arg_in_memory; int addr_volatile; int regno; - int folded_cost, addr_cost; unsigned hash; /* Do not try to replace constant addresses or addresses of local and @@ -2808,14 +2861,15 @@ find_best_addr (insn, loc, mode) if (GET_CODE (addr) != REG) { rtx folded = fold_rtx (copy_rtx (addr), NULL_RTX); + int addr_folded_cost = address_cost (folded, mode); + int addr_cost = address_cost (addr, mode); - folded_cost = address_cost (folded, mode); - addr_cost = address_cost (addr, mode); - - if ((folded_cost < addr_cost - || (folded_cost == addr_cost - && rtx_cost (folded, MEM) > rtx_cost (addr, MEM))) - && rtx_cost (folded, MEM) < rtx_cost (addr, MEM) + if ((addr_folded_cost < addr_cost + || (addr_folded_cost == addr_cost + /* ??? The rtx_cost comparison is left over from an older + version of this code. It is probably no longer helpful. */ + && (rtx_cost (folded, MEM) > rtx_cost (addr, MEM) + || approx_reg_cost (folded) < approx_reg_cost (addr)))) && validate_change (insn, loc, folded, 0)) addr = folded; } @@ -4822,6 +4876,8 @@ cse_insn (insn, libcall_insn) struct table_elt *src_const_elt = 0; int src_cost = 10000, src_eqv_cost = 10000, src_folded_cost = 10000; int src_related_cost = 10000, src_elt_cost = 10000; + int src_regcost, src_eqv_regcost, src_folded_regcost; + int src_related_regcost, src_elt_regcost; /* Set non-zero if we need to call force_const_mem on with the contents of src_folded before using it. */ int src_folded_force_flag = 0; @@ -5230,7 +5286,10 @@ cse_insn (insn, libcall_insn) if (rtx_equal_p (src, dest)) src_cost = -1; else - src_cost = COST (src); + { + src_cost = COST (src); + src_regcost = approx_reg_cost (src); + } } if (src_eqv_here) @@ -5238,7 +5297,10 @@ cse_insn (insn, libcall_insn) if (rtx_equal_p (src_eqv_here, dest)) src_eqv_cost = -1; else - src_eqv_cost = COST (src_eqv_here); + { + src_eqv_cost = COST (src_eqv_here); + src_eqv_regcost = approx_reg_cost (src_eqv_here); + } } if (src_folded) @@ -5246,7 +5308,10 @@ cse_insn (insn, libcall_insn) if (rtx_equal_p (src_folded, dest)) src_folded_cost = -1; else - src_folded_cost = COST (src_folded); + { + src_folded_cost = COST (src_folded); + src_folded_regcost = approx_reg_cost (src_folded); + } } if (src_related) @@ -5254,7 +5319,10 @@ cse_insn (insn, libcall_insn) if (rtx_equal_p (src_related, dest)) src_related_cost = -1; else - src_related_cost = COST (src_related); + { + src_related_cost = COST (src_related); + src_related_regcost = approx_reg_cost (src_related); + } } /* If this was an indirect jump insn, a known label will really be @@ -5292,30 +5360,43 @@ cse_insn (insn, libcall_insn) continue; } - if (elt) - src_elt_cost = elt->cost; + if (elt) + { + src_elt_cost = elt->cost; + src_elt_regcost = elt->regcost; + } - /* Find cheapest and skip it for the next time. For items + /* Find cheapest and skip it for the next time. For items of equal cost, use this order: src_folded, src, src_eqv, src_related and hash table entry. */ - if (src_folded_cost <= src_cost - && src_folded_cost <= src_eqv_cost - && src_folded_cost <= src_related_cost - && src_folded_cost <= src_elt_cost) + if (preferrable (src_folded_cost, src_folded_regcost, + src_cost, src_regcost) <= 0 + && preferrable (src_folded_cost, src_folded_regcost, + src_eqv_cost, src_eqv_regcost) <= 0 + && preferrable (src_folded_cost, src_folded_regcost, + src_related_cost, src_related_regcost) <= 0 + && preferrable (src_folded_cost, src_folded_regcost, + src_elt_cost, src_elt_regcost) <= 0) { trial = src_folded, src_folded_cost = 10000; if (src_folded_force_flag) trial = force_const_mem (mode, trial); } - else if (src_cost <= src_eqv_cost - && src_cost <= src_related_cost - && src_cost <= src_elt_cost) + else if (preferrable (src_cost, src_regcost, + src_eqv_cost, src_eqv_regcost) <= 0 + && preferrable (src_cost, src_regcost, + src_related_cost, src_related_regcost) <= 0 + && preferrable (src_cost, src_regcost, + src_elt_cost, src_elt_regcost) <= 0) trial = src, src_cost = 10000; - else if (src_eqv_cost <= src_related_cost - && src_eqv_cost <= src_elt_cost) + else if (preferrable (src_eqv_cost, src_eqv_regcost, + src_related_cost, src_related_regcost) <= 0 + && preferrable (src_eqv_cost, src_eqv_regcost, + src_elt_cost, src_elt_regcost) <= 0) trial = copy_rtx (src_eqv_here), src_eqv_cost = 10000; - else if (src_related_cost <= src_elt_cost) - trial = copy_rtx (src_related), src_related_cost = 10000; + else if (preferrable (src_related_cost, src_related_regcost, + src_elt_cost, src_elt_regcost) <= 0) + trial = copy_rtx (src_related), src_related_cost = 10000; else { trial = copy_rtx (elt->exp); diff --git a/gcc/loop.c b/gcc/loop.c index c5e5c4c2ccb..ce3494bd415 100644 --- a/gcc/loop.c +++ b/gcc/loop.c @@ -312,6 +312,7 @@ static void try_swap_copy_prop PARAMS ((const struct loop *, rtx, static int replace_label PARAMS ((rtx *, void *)); static rtx check_insn_for_givs PARAMS((struct loop *, rtx, int, int)); static rtx check_insn_for_bivs PARAMS((struct loop *, rtx, int, int)); +static int iv_add_mult_cost PARAMS ((rtx, rtx, rtx, rtx)); static void loop_dump_aux PARAMS ((const struct loop *, FILE *, int)); void debug_loop PARAMS ((const struct loop *)); @@ -341,13 +342,6 @@ static int compute_luids PARAMS ((rtx, rtx, int)); static int biv_elimination_giv_has_0_offset PARAMS ((struct induction *, struct induction *, rtx)); -/* Relative gain of eliminating various kinds of operations. */ -static int add_cost; -#if 0 -static int shift_cost; -static int mult_cost; -#endif - /* Benefit penalty, if a giv is not replaceable, i.e. must emit an insn to copy the value of the strength reduced giv to its original register. */ static int copy_cost; @@ -361,15 +355,9 @@ init_loop () char *free_point = (char *) oballoc (1); rtx reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1); - add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET); - reg_address_cost = address_cost (reg, SImode); - /* We multiply by 2 to reconcile the difference in scale between - these two ways of computing costs. Otherwise the cost of a copy - will be far less than the cost of an add. */ - - copy_cost = 2 * 2; + copy_cost = 2; /* Free the objects we just allocated. */ obfree (free_point); @@ -3825,6 +3813,7 @@ strength_reduce (loop, insn_count, flags) rtx loop_start = loop->start; rtx loop_end = loop->end; rtx loop_scan_start = loop->scan_start; + rtx test_reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1); VARRAY_INT_INIT (reg_iv_type, max_reg_before_loop, "reg_iv_type"); VARRAY_GENERIC_PTR_INIT (reg_iv_info, max_reg_before_loop, "reg_iv_info"); @@ -4436,11 +4425,15 @@ strength_reduce (loop, insn_count, flags) for (v = bl->giv; v; v = v->next_iv) { struct induction *tv; + int add_cost; if (v->ignore || v->same) continue; benefit = v->benefit; + PUT_MODE (test_reg, v->mode); + add_cost = iv_add_mult_cost (bl->biv->add_val, v->mult_val, + test_reg, test_reg); /* Reduce benefit if not replaceable, since we will insert a move-insn to replace the insn that calculates this giv. @@ -4457,7 +4450,14 @@ strength_reduce (loop, insn_count, flags) benefit -= copy_cost; /* Decrease the benefit to count the add-insns that we will - insert to increment the reduced reg for the giv. */ + insert to increment the reduced reg for the giv. + ??? This can overestimate the run-time cost of the additional + insns, e.g. if there are multiple basic blocks that increment + the biv, but only one of these blocks is executed during each + iteration. There is no good way to detect cases like this with + the current structure of the loop optimizer. + This code is more accurate for determining code size than + run-time benefits. */ benefit -= add_cost * bl->biv_count; /* Decide whether to strength-reduce this giv or to leave the code @@ -4469,6 +4469,10 @@ strength_reduce (loop, insn_count, flags) new add insns; if so, increase BENEFIT (undo the subtraction of add_cost that was done above). */ if (v->giv_type == DEST_ADDR + /* Increasing the benefit is risky, since this is only a guess. + Avoid increasing register pressure in cases where there would + be no other benefit from reducing this giv. */ + && benefit > 0 && GET_CODE (v->mult_val) == CONST_INT) { if (HAVE_POST_INCREMENT @@ -6439,7 +6443,20 @@ simplify_giv_expr (loop, x, benefit) /* Form expression from giv and add benefit. Ensure this giv can derive another and subtract any needed adjustment if so. */ - *benefit += v->benefit; + + /* Increasing the benefit here is risky. The only case in which it + is arguably correct is if this is the only use of V. In other + cases, this will artificially inflate the benefit of the current + giv, and lead to suboptimal code. Thus, it is disabled, since + potentially not reducing an only marginally beneficial giv is + less harmful than reducing many givs that are not really + beneficial. */ + { + rtx single_use = VARRAY_RTX (reg_single_usage, REGNO (x)); + if (single_use && single_use != const0_rtx) + *benefit += v->benefit; + } + if (v->cant_derive) return 0; @@ -6683,7 +6700,7 @@ consec_sets_giv (loop, first_benefit, p, src_reg, dest_reg, count--; v->mult_val = *mult_val; v->add_val = *add_val; - v->benefit = benefit; + v->benefit += benefit; } else if (code != NOTE) { @@ -7098,8 +7115,9 @@ restart: if (loop_dump_stream) fprintf (loop_dump_stream, - "giv at %d combined with giv at %d\n", - INSN_UID (g2->insn), INSN_UID (g1->insn)); + "giv at %d combined with giv at %d; new benefit %d + %d, lifetime %d\n", + INSN_UID (g2->insn), INSN_UID (g1->insn), + g1->benefit, g1_add_benefit, g1->lifetime); } } @@ -7613,6 +7631,34 @@ emit_iv_add_mult (b, m, a, reg, insert_before) && GET_CODE (SET_DEST (seq)) == REG) record_base_value (REGNO (SET_DEST (seq)), SET_SRC (seq), 0); } + +/* Similar to emit_iv_add_mult, but compute cost rather than emitting + insns. */ +static int +iv_add_mult_cost (b, m, a, reg) + rtx b; /* initial value of basic induction variable */ + rtx m; /* multiplicative constant */ + rtx a; /* additive constant */ + rtx reg; /* destination register */ +{ + int cost = 0; + rtx last, result; + + start_sequence (); + result = expand_mult_add (b, reg, m, a, GET_MODE (reg), 0); + if (reg != result) + emit_move_insn (reg, result); + last = get_last_insn (); + while (last) + { + rtx t = single_set (last); + if (t) + cost += rtx_cost (SET_SRC (t), SET); + last = PREV_INSN (last); + } + end_sequence (); + return cost; +} /* Test whether A * B can be computed without an actual multiply insn. Value is 1 if so. */