From cac8ce95a1e4c7b10f5847d35b3650e6fba8d38c Mon Sep 17 00:00:00 2001 From: David Edelsohn Date: Mon, 5 Oct 1998 22:03:25 +0000 Subject: [PATCH] unroll.c (loop_iteration_var, [...]): No longer static. * unroll.c (loop_iteration_var, loop_initial_value, loop_increment loop_final_value, loop_comparison_code): No longer static. (unroll_loop): Delete loop_start_value update. * loop.h (loop_iteration_var, loop_initial_value, loop_increment, loop_final_value, loop_comparison_code): Extern. (loop_start_value): Delete extern. * loop.c (loop_can_insert_bct, loop_increment, loop_start_value, loop_comparison_value, loop_comparison_code): Delete. (loop_optimize): Remove initialization for deleted variables. (strength_reduce): Delete analyze_loop_iterations call. Only call insert_bct if flag_branch_count_on_reg set. (analyze_loop_iterations): Delete. (insert_bct): Remove iteration count calculation. Move checks for viable BCT optimization to here. Obtain iteration count from loop_iterations and correct for unrolling. Check for enough iteration to be beneficial. Comment out runtime iteration count case. (insert_bct): Print iteration count in dump file. Remove loop_var_mode and use word_mode directly. * rs6000.h (processor_type): Add PROCESSOR_PPC604e. * rs6000.c (rs6000_override_options): Use it. (optimization_options): Enable use of flag_branch_on_count_reg. * rs6000.md (define_function_unit): Describe 604e. From-SVN: r22852 --- gcc/ChangeLog | 27 ++ gcc/config/rs6000/rs6000.c | 4 +- gcc/config/rs6000/rs6000.h | 3 +- gcc/config/rs6000/rs6000.md | 35 +- gcc/loop.c | 757 +++++++++++------------------------- gcc/loop.h | 15 +- gcc/unroll.c | 17 +- 7 files changed, 293 insertions(+), 565 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index df60f90982b..b3a90f2f608 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,30 @@ +Mon Oct 5 22:43:36 1998 David Edelsohn + + * unroll.c (loop_iteration_var, loop_initial_value, loop_increment + loop_final_value, loop_comparison_code): No longer static. + (unroll_loop): Delete loop_start_value update. + * loop.h (loop_iteration_var, loop_initial_value, loop_increment, + loop_final_value, loop_comparison_code): Extern. + (loop_start_value): Delete extern. + * loop.c (loop_can_insert_bct, loop_increment, loop_start_value, + loop_comparison_value, loop_comparison_code): Delete. + (loop_optimize): Remove initialization for deleted variables. + (strength_reduce): Delete analyze_loop_iterations call. Only call + insert_bct if flag_branch_count_on_reg set. + (analyze_loop_iterations): Delete. + (insert_bct): Remove iteration count calculation. Move checks for + viable BCT optimization to here. Obtain iteration count from + loop_iterations and correct for unrolling. Check for enough + iteration to be beneficial. Comment out runtime iteration count + case. + (insert_bct): Print iteration count in dump file. Remove + loop_var_mode and use word_mode directly. + + * rs6000.h (processor_type): Add PROCESSOR_PPC604e. + * rs6000.c (rs6000_override_options): Use it. + (optimization_options): Enable use of flag_branch_on_count_reg. + * rs6000.md (define_function_unit): Describe 604e. + 1998-10-05 Herman A.J. ten Brugge * loop.c (move_movables): Corrected threshold calculation for diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index f4f47ed2ea8..b1b36fe423e 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -228,7 +228,7 @@ rs6000_override_options (default_cpu) {"604", PROCESSOR_PPC604, MASK_POWERPC | MASK_PPC_GFXOPT | MASK_NEW_MNEMONICS, POWER_MASKS | MASK_PPC_GPOPT | MASK_POWERPC64}, - {"604e", PROCESSOR_PPC604, + {"604e", PROCESSOR_PPC604e, MASK_POWERPC | MASK_PPC_GFXOPT | MASK_NEW_MNEMONICS, POWER_MASKS | MASK_PPC_GPOPT | MASK_POWERPC64}, {"620", PROCESSOR_PPC620, @@ -353,13 +353,11 @@ optimization_options (level, size) int level; int size ATTRIBUTE_UNUSED; { -#if 0 #ifdef HAIFA /* When optimizing, enable use of BCT instruction. */ if (level >= 1) flag_branch_on_count_reg = 1; #endif -#endif } /* Do anything needed at the start of the asm file. */ diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 1ceef9571c6..bbdcebb73b7 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -389,7 +389,7 @@ extern int target_flags; #define TARGET_DEFAULT (MASK_POWER | MASK_MULTIPLE | MASK_STRING) -/* Processor type. */ +/* Processor type. Order must match cpu attribute in MD file. */ enum processor_type {PROCESSOR_RIOS1, PROCESSOR_RIOS2, @@ -398,6 +398,7 @@ enum processor_type PROCESSOR_PPC601, PROCESSOR_PPC603, PROCESSOR_PPC604, + PROCESSOR_PPC604e, PROCESSOR_PPC620}; extern enum processor_type rs6000_cpu; diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 23064076530..7b7f989fddf 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -40,7 +40,7 @@ ;; Processor type -- this attribute must exactly match the processor_type ;; enumeration in rs6000.h. -(define_attr "cpu" "rios1,rios2,mpccore,ppc403,ppc601,ppc603,ppc604,ppc620" +(define_attr "cpu" "rios1,rios2,mpccore,ppc403,ppc601,ppc603,ppc604,ppc604e,ppc620" (const (symbol_ref "rs6000_cpu_attr"))) ; (define_function_unit NAME MULTIPLICITY SIMULTANEITY @@ -50,12 +50,12 @@ ; (POWER and 601 use Integer Unit) (define_function_unit "lsu" 1 0 (and (eq_attr "type" "load") - (eq_attr "cpu" "mpccore,ppc603,ppc604,ppc620")) + (eq_attr "cpu" "mpccore,ppc603,ppc604,ppc604e,ppc620")) 2 1) (define_function_unit "lsu" 1 0 (and (eq_attr "type" "store,fpstore") - (eq_attr "cpu" "mpccore,ppc603,ppc604,ppc620")) + (eq_attr "cpu" "mpccore,ppc603,ppc604,ppc604e,ppc620")) 1 1) (define_function_unit "lsu" 1 0 @@ -65,7 +65,7 @@ (define_function_unit "lsu" 1 0 (and (eq_attr "type" "fpload") - (eq_attr "cpu" "ppc604,ppc620")) + (eq_attr "cpu" "ppc604,ppc604e,ppc620")) 3 1) (define_function_unit "iu" 1 0 @@ -181,12 +181,12 @@ (eq_attr "cpu" "mpccore")) 6 6) -; PPC604 has two units that perform integer operations +; PPC604{,e} has two units that perform integer operations ; and one unit for divide/multiply operations (and move ; from/to spr). (define_function_unit "iu2" 2 0 (and (eq_attr "type" "integer") - (eq_attr "cpu" "ppc604,ppc620")) + (eq_attr "cpu" "ppc604,ppc604e,ppc620")) 1 1) (define_function_unit "imuldiv" 1 0 @@ -194,9 +194,14 @@ (eq_attr "cpu" "ppc604,ppc620")) 4 2) +(define_function_unit "imuldiv" 1 0 + (and (eq_attr "type" "imul") + (eq_attr "cpu" "ppc604e")) + 2 1) + (define_function_unit "imuldiv" 1 0 (and (eq_attr "type" "idiv") - (eq_attr "cpu" "ppc604,ppc620")) + (eq_attr "cpu" "ppc604,ppc604e,ppc620")) 20 19) ; compare is done on integer unit, but feeds insns which @@ -213,7 +218,7 @@ (define_function_unit "iu" 1 0 (and (eq_attr "type" "compare,delayed_compare") - (eq_attr "cpu" "mpccore,ppc403,ppc601,ppc603,ppc604,ppc620")) + (eq_attr "cpu" "mpccore,ppc403,ppc601,ppc603,ppc604,ppc604e,ppc620")) 3 1) (define_function_unit "iu2" 2 0 @@ -223,7 +228,7 @@ (define_function_unit "iu2" 2 0 (and (eq_attr "type" "compare,delayed_compare") - (eq_attr "cpu" "ppc604,ppc620")) + (eq_attr "cpu" "ppc604,ppc604e,ppc620")) 1 1) ; fp compare uses fp unit @@ -250,7 +255,7 @@ ; fp compare uses fp unit (define_function_unit "fpu" 1 0 (and (eq_attr "type" "fpcompare") - (eq_attr "cpu" "ppc601,ppc603,ppc604,ppc620")) + (eq_attr "cpu" "ppc601,ppc603,ppc604,ppc604e,ppc620")) 5 1) (define_function_unit "fpu" 1 0 @@ -265,7 +270,7 @@ (define_function_unit "bpu" 1 0 (and (eq_attr "type" "mtjmpr") - (eq_attr "cpu" "mpccore,ppc403,ppc601,ppc603,ppc604,ppc620")) + (eq_attr "cpu" "mpccore,ppc403,ppc601,ppc603,ppc604,ppc604e,ppc620")) 4 1) ; all jumps/branches are executing on the bpu, in 1 cycle, for all machines. @@ -295,7 +300,7 @@ (define_function_unit "fpu" 1 0 (and (eq_attr "type" "fp") - (eq_attr "cpu" "ppc603,ppc604,ppc620")) + (eq_attr "cpu" "ppc603,ppc604,ppc604e,ppc620")) 3 1) (define_function_unit "fpu" 1 0 @@ -316,7 +321,7 @@ (define_function_unit "fpu" 1 0 (and (eq_attr "type" "dmul") - (eq_attr "cpu" "ppc604,ppc620")) + (eq_attr "cpu" "ppc604,ppc604e,ppc620")) 3 1) (define_function_unit "fpu" 1 0 @@ -336,7 +341,7 @@ (define_function_unit "fpu" 1 0 (and (eq_attr "type" "sdiv") - (eq_attr "cpu" "ppc603,ppc604,ppc620")) + (eq_attr "cpu" "ppc603,ppc604,ppc604e,ppc620")) 18 18) (define_function_unit "fpu" 1 0 @@ -346,7 +351,7 @@ (define_function_unit "fpu" 1 0 (and (eq_attr "type" "ddiv") - (eq_attr "cpu" "ppc601,ppc604,ppc620")) + (eq_attr "cpu" "ppc601,ppc604,ppc604e,ppc620")) 31 31) (define_function_unit "fpu" 1 0 diff --git a/gcc/loop.c b/gcc/loop.c index bf59f7233e2..24f1b6d7479 100644 --- a/gcc/loop.c +++ b/gcc/loop.c @@ -82,26 +82,11 @@ static rtx *loop_number_loop_starts, *loop_number_loop_ends; int *loop_outer_loop; -#ifdef HAIFA -/* The main output of analyze_loop_iterations is placed here */ - -int *loop_can_insert_bct; - -/* For each loop, determines whether some of its inner loops has used - count register */ +#ifdef HAVE_decrement_and_branch_on_count +/* Records whether resource in use by inner loop. */ int *loop_used_count_register; - -/* loop parameters for arithmetic loops. These loops have a loop variable - which is initialized to loop_start_value, incremented in each iteration - by "loop_increment". At the end of the iteration the loop variable is - compared to the loop_comparison_value (using loop_comparison_code). */ - -rtx *loop_increment; -rtx *loop_comparison_value; -rtx *loop_start_value; -enum rtx_code *loop_comparison_code; -#endif /* HAIFA */ +#endif /* HAVE_decrement_and_branch_on_count */ /* For each loop, keep track of its unrolling factor. Potential values: @@ -372,20 +357,13 @@ typedef struct rtx_pair { && INSN_LUID (INSN) >= INSN_LUID (START) \ && INSN_LUID (INSN) <= INSN_LUID (END)) -#ifdef HAIFA -/* This is extern from unroll.c */ -extern void iteration_info PROTO((rtx, rtx *, rtx *, rtx, rtx)); - -/* Two main functions for implementing bct: - first - to be called before loop unrolling, and the second - after */ #ifdef HAVE_decrement_and_branch_on_count -static void analyze_loop_iterations PROTO((rtx, rtx)); +/* Test whether BCT applicable and safe. */ static void insert_bct PROTO((rtx, rtx)); -/* Auxiliary function that inserts the bct pattern into the loop */ +/* Auxiliary function that inserts the BCT pattern into the loop. */ static void instrument_loop_bct PROTO((rtx, rtx, rtx)); #endif /* HAVE_decrement_and_branch_on_count */ -#endif /* HAIFA */ /* Indirect_jump_in_function is computed once per function. */ int indirect_jump_in_function = 0; @@ -500,25 +478,11 @@ loop_optimize (f, dumpfile, unroll_p, bct_p) loop_unroll_factor = (int *) alloca (max_loop_num *sizeof (int)); bzero ((char *) loop_unroll_factor, max_loop_num * sizeof (int)); -#ifdef HAIFA +#ifdef HAVE_decrement_and_branch_on_count /* Allocate for BCT optimization */ - loop_can_insert_bct = (int *) alloca (max_loop_num * sizeof (int)); - bzero ((char *) loop_can_insert_bct, max_loop_num * sizeof (int)); - loop_used_count_register = (int *) alloca (max_loop_num * sizeof (int)); bzero ((char *) loop_used_count_register, max_loop_num * sizeof (int)); - - loop_increment = (rtx *) alloca (max_loop_num * sizeof (rtx)); - loop_comparison_value = (rtx *) alloca (max_loop_num * sizeof (rtx)); - loop_start_value = (rtx *) alloca (max_loop_num * sizeof (rtx)); - bzero ((char *) loop_increment, max_loop_num * sizeof (rtx)); - bzero ((char *) loop_comparison_value, max_loop_num * sizeof (rtx)); - bzero ((char *) loop_start_value, max_loop_num * sizeof (rtx)); - - loop_comparison_code - = (enum rtx_code *) alloca (max_loop_num * sizeof (enum rtx_code)); - bzero ((char *) loop_comparison_code, max_loop_num * sizeof (enum rtx_code)); -#endif /* HAIFA */ +#endif /* HAVE_decrement_and_branch_on_count */ /* Find and process each loop. First, find them, and record them in order of their beginnings. */ @@ -2989,10 +2953,10 @@ mark_loop_jump (x, loop_num) if (loop_num != -1) { -#ifdef HAIFA +#ifdef HAVE_decrement_and_branch_on_count LABEL_OUTSIDE_LOOP_P (x) = 1; LABEL_NEXTREF (x) = loop_number_exit_labels[loop_num]; -#endif /* HAIFA */ +#endif /* HAVE_decrement_and_branch_on_count */ loop_number_exit_labels[loop_num] = x; @@ -4109,16 +4073,6 @@ strength_reduce (scan_start, end, loop_top, insn_count, so that "decrement and branch until zero" insn can be used. */ check_dbra_loop (loop_end, insn_count, loop_start); -#ifdef HAIFA - /* record loop-variables relevant for BCT optimization before unrolling - the loop. Unrolling may update part of this information, and the - correct data will be used for generating the BCT. */ -#ifdef HAVE_decrement_and_branch_on_count - if (HAVE_decrement_and_branch_on_count && bct_p) - analyze_loop_iterations (loop_start, loop_end); -#endif -#endif /* HAIFA */ - /* Create reg_map to hold substitutions for replaceable giv regs. */ reg_map = (rtx *) alloca (max_reg_before_loop * sizeof (rtx)); bzero ((char *) reg_map, max_reg_before_loop * sizeof (rtx)); @@ -4618,13 +4572,12 @@ strength_reduce (scan_start, end, loop_top, insn_count, if (unroll_p) unroll_loop (loop_end, insn_count, loop_start, end_insert_before, 1); -#ifdef HAIFA - /* instrument the loop with bct insn */ #ifdef HAVE_decrement_and_branch_on_count - if (HAVE_decrement_and_branch_on_count && bct_p) + /* Instrument the loop with BCT insn. */ + if (HAVE_decrement_and_branch_on_count && bct_p + && flag_branch_on_count_reg) insert_bct (loop_start, loop_end); -#endif -#endif /* HAIFA */ +#endif /* HAVE_decrement_and_branch_on_count */ if (loop_dump_stream) fprintf (loop_dump_stream, "\n"); @@ -7985,530 +7938,280 @@ get_condition_for_loop (x) XEXP (comparison, 1), XEXP (comparison, 0)); } -#ifdef HAIFA -/* Analyze a loop in order to instrument it with the use of count register. - loop_start and loop_end are the first and last insns of the loop. - This function works in cooperation with insert_bct (). - loop_can_insert_bct[loop_num] is set according to whether the optimization - is applicable to the loop. When it is applicable, the following variables - are also set: - loop_start_value[loop_num] - loop_comparison_value[loop_num] - loop_increment[loop_num] - loop_comparison_code[loop_num] */ - #ifdef HAVE_decrement_and_branch_on_count -static void -analyze_loop_iterations (loop_start, loop_end) - rtx loop_start, loop_end; -{ - rtx comparison, comparison_value; - rtx iteration_var, initial_value, increment; - enum rtx_code comparison_code; - - rtx last_loop_insn; - rtx insn; - int i; - - /* loop_variable mode */ - enum machine_mode original_mode; - - /* find the number of the loop */ - int loop_num = uid_loop_num [INSN_UID (loop_start)]; - - /* we change our mind only when we are sure that loop will be instrumented */ - loop_can_insert_bct[loop_num] = 0; - - /* is the optimization suppressed. */ - if ( !flag_branch_on_count_reg ) - return; - - /* make sure that count-reg is not in use */ - if (loop_used_count_register[loop_num]){ - if (loop_dump_stream) - fprintf (loop_dump_stream, - "analyze_loop_iterations %d: BCT instrumentation failed: count register already in use\n", - loop_num); - return; - } - - /* make sure that the function has no indirect jumps. */ - if (indirect_jump_in_function){ - if (loop_dump_stream) - fprintf (loop_dump_stream, - "analyze_loop_iterations %d: BCT instrumentation failed: indirect jump in function\n", - loop_num); - return; - } - - /* make sure that the last loop insn is a conditional jump */ - last_loop_insn = PREV_INSN (loop_end); - if (GET_CODE (last_loop_insn) != JUMP_INSN || !condjump_p (last_loop_insn)) { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "analyze_loop_iterations %d: BCT instrumentation failed: invalid jump at loop end\n", - loop_num); - return; - } - - /* First find the iteration variable. If the last insn is a conditional - branch, and the insn preceding it tests a register value, make that - register the iteration variable. */ - - /* We used to use prev_nonnote_insn here, but that fails because it might - accidentally get the branch for a contained loop if the branch for this - loop was deleted. We can only trust branches immediately before the - loop_end. */ - - comparison = get_condition_for_loop (last_loop_insn); - /* ??? Get_condition may switch position of induction variable and - invariant register when it canonicalizes the comparison. */ - - if (comparison == 0) { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "analyze_loop_iterations %d: BCT instrumentation failed: comparison not found\n", - loop_num); - return; - } - - comparison_code = GET_CODE (comparison); - iteration_var = XEXP (comparison, 0); - comparison_value = XEXP (comparison, 1); - - original_mode = GET_MODE (iteration_var); - if (GET_MODE_CLASS (original_mode) != MODE_INT - || GET_MODE_SIZE (original_mode) != UNITS_PER_WORD) { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "analyze_loop_iterations %d: BCT Instrumentation failed: loop variable not integer\n", - loop_num); - return; - } - - /* get info about loop bounds and increment */ - iteration_info (iteration_var, &initial_value, &increment, - loop_start, loop_end); - - /* make sure that all required loop data were found */ - if (!(initial_value && increment && comparison_value - && invariant_p (comparison_value) && invariant_p (increment) - && ! indirect_jump_in_function)) - { - if (loop_dump_stream) { - fprintf (loop_dump_stream, - "analyze_loop_iterations %d: BCT instrumentation failed because of wrong loop: ", loop_num); - if (!(initial_value && increment && comparison_value)) { - fprintf (loop_dump_stream, "\tbounds not available: "); - if ( ! initial_value ) - fprintf (loop_dump_stream, "initial "); - if ( ! increment ) - fprintf (loop_dump_stream, "increment "); - if ( ! comparison_value ) - fprintf (loop_dump_stream, "comparison "); - fprintf (loop_dump_stream, "\n"); - } - if (!invariant_p (comparison_value) || !invariant_p (increment)) - fprintf (loop_dump_stream, "\tloop bounds not invariant\n"); - } - return; - } - - /* make sure that the increment is constant */ - if (GET_CODE (increment) != CONST_INT) { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "analyze_loop_iterations %d: instrumentation failed: not arithmetic loop\n", - loop_num); - return; - } - - /* make sure that the loop contains neither function call, nor jump on table. - (the count register might be altered by the called function, and might - be used for a branch on table). */ - for (insn = loop_start; insn && insn != loop_end; insn = NEXT_INSN (insn)) { - if (GET_CODE (insn) == CALL_INSN){ - if (loop_dump_stream) - fprintf (loop_dump_stream, - "analyze_loop_iterations %d: BCT instrumentation failed: function call in the loop\n", - loop_num); - return; - } - - if (GET_CODE (insn) == JUMP_INSN - && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC - || GET_CODE (PATTERN (insn)) == ADDR_VEC)){ - if (loop_dump_stream) - fprintf (loop_dump_stream, - "analyze_loop_iterations %d: BCT instrumentation failed: computed branch in the loop\n", - loop_num); - return; - } - } - - /* At this point, we are sure that the loop can be instrumented with BCT. - Some of the loops, however, will not be instrumented - the final decision - is taken by insert_bct () */ - if (loop_dump_stream) - fprintf (loop_dump_stream, - "analyze_loop_iterations: loop (luid =%d) can be BCT instrumented.\n", - loop_num); - - /* mark all enclosing loops that they cannot use count register */ - /* ???: In fact, since insert_bct may decide not to instrument this loop, - marking here may prevent instrumenting an enclosing loop that could - actually be instrumented. But since this is rare, it is safer to mark - here in case the order of calling (analyze/insert)_bct would be changed. */ - for (i=loop_num; i != -1; i = loop_outer_loop[i]) - loop_used_count_register[i] = 1; - - /* Set data structures which will be used by the instrumentation phase */ - loop_start_value[loop_num] = initial_value; - loop_comparison_value[loop_num] = comparison_value; - loop_increment[loop_num] = increment; - loop_comparison_code[loop_num] = comparison_code; - loop_can_insert_bct[loop_num] = 1; -} - - -/* instrument loop for insertion of bct instruction. We distinguish between - loops with compile-time bounds, to those with run-time bounds. The loop - behaviour is analized according to the following characteristics/variables: - ; Input variables: - ; comparison-value: the value to which the iteration counter is compared. - ; initial-value: iteration-counter initial value. - ; increment: iteration-counter increment. - ; Computed variables: - ; increment-direction: the sign of the increment. - ; compare-direction: '1' for GT, GTE, '-1' for LT, LTE, '0' for NE. - ; range-direction: sign (comparison-value - initial-value) - We give up on the following cases: - ; loop variable overflow. - ; run-time loop bounds with comparison code NE. +/* Instrument loop for insertion of bct instruction. We distinguish between + loops with compile-time bounds and those with run-time bounds. + Information from loop_iterations() is used to compute compile-time bounds. + Run-time bounds should use loop preconditioning, but currently ignored. */ static void insert_bct (loop_start, loop_end) rtx loop_start, loop_end; { - rtx initial_value, comparison_value, increment; - enum rtx_code comparison_code; + int i; + unsigned HOST_WIDE_INT n_iterations; + rtx insn; int increment_direction, compare_direction; - int unsigned_p = 0; - /* if the loop condition is <= or >=, the number of iteration - is 1 more than the range of the bounds of the loop */ + /* If the loop condition is <= or >=, the number of iteration + is 1 more than the range of the bounds of the loop. */ int add_iteration = 0; - /* the only machine mode we work with - is the integer of the size that the - machine has */ enum machine_mode loop_var_mode = word_mode; int loop_num = uid_loop_num [INSN_UID (loop_start)]; - /* get loop-variables. No need to check that these are valid - already - checked in analyze_loop_iterations (). */ - comparison_code = loop_comparison_code[loop_num]; - initial_value = loop_start_value[loop_num]; - comparison_value = loop_comparison_value[loop_num]; - increment = loop_increment[loop_num]; - - /* check analyze_loop_iterations decision for this loop. */ - if (! loop_can_insert_bct[loop_num]){ - if (loop_dump_stream) - fprintf (loop_dump_stream, - "insert_bct: [%d] - was decided not to instrument by analyze_loop_iterations ()\n", - loop_num); - return; - } - /* It's impossible to instrument a competely unrolled loop. */ if (loop_unroll_factor [loop_num] == -1) return; - /* make sure that the last loop insn is a conditional jump . - This check is repeated from analyze_loop_iterations (), - because unrolling might have changed that. */ - if (GET_CODE (PREV_INSN (loop_end)) != JUMP_INSN - || !condjump_p (PREV_INSN (loop_end))) { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "insert_bct: not instrumenting BCT because of invalid branch\n"); - return; - } - - /* fix increment in case loop was unrolled. */ - if (loop_unroll_factor [loop_num] > 1) - increment = GEN_INT ( INTVAL (increment) * loop_unroll_factor [loop_num] ); - - /* determine properties and directions of the loop */ - increment_direction = (INTVAL (increment) > 0) ? 1:-1; - switch ( comparison_code ) { - case LEU: - unsigned_p = 1; - /* fallthrough */ - case LE: - compare_direction = 1; - add_iteration = 1; - break; - case GEU: - unsigned_p = 1; - /* fallthrough */ - case GE: - compare_direction = -1; - add_iteration = 1; - break; - case EQ: - /* in this case we cannot know the number of iterations */ - if (loop_dump_stream) - fprintf (loop_dump_stream, - "insert_bct: %d: loop cannot be instrumented: == in condition\n", - loop_num); - return; - case LTU: - unsigned_p = 1; - /* fallthrough */ - case LT: - compare_direction = 1; - break; - case GTU: - unsigned_p = 1; - /* fallthrough */ - case GT: - compare_direction = -1; - break; - case NE: - compare_direction = 0; - break; - default: - abort (); - } - - - /* make sure that the loop does not end by an overflow */ - if (compare_direction != increment_direction) { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "insert_bct: %d: loop cannot be instrumented: terminated by overflow\n", - loop_num); - return; - } - - /* try to instrument the loop. */ - - /* Handle the simpler case, where the bounds are known at compile time. */ - if (GET_CODE (initial_value) == CONST_INT - && GET_CODE (comparison_value) == CONST_INT) + /* Make sure that the count register is not in use. */ + if (loop_used_count_register [loop_num]) { - int n_iterations; - int increment_value_abs = INTVAL (increment) * increment_direction; - - /* check the relation between compare-val and initial-val */ - int difference = INTVAL (comparison_value) - INTVAL (initial_value); - int range_direction = (difference > 0) ? 1 : -1; - - /* make sure the loop executes enough iterations to gain from BCT */ - if (difference > -3 && difference < 3) { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "insert_bct: loop %d not BCT instrumented: too small iteration count.\n", - loop_num); - return; - } - - /* make sure that the loop executes at least once */ - if ((range_direction == 1 && compare_direction == -1) - || (range_direction == -1 && compare_direction == 1)) - { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "insert_bct: loop %d: does not iterate even once. Not instrumenting.\n", - loop_num); - return; - } - - /* make sure that the loop does not end by an overflow (in compile time - bounds we must have an additional check for overflow, because here - we also support the compare code of 'NE'. */ - if (comparison_code == NE - && increment_direction != range_direction) { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "insert_bct (compile time bounds): %d: loop not instrumented: terminated by overflow\n", - loop_num); - return; - } - - /* Determine the number of iterations by: - ; - ; compare-val - initial-val + (increment -1) + additional-iteration - ; num_iterations = ----------------------------------------------------------------- - ; increment - */ - difference = (range_direction > 0) ? difference : -difference; -#if 0 - fprintf (stderr, "difference is: %d\n", difference); /* @*/ - fprintf (stderr, "increment_value_abs is: %d\n", increment_value_abs); /* @*/ - fprintf (stderr, "add_iteration is: %d\n", add_iteration); /* @*/ - fprintf (stderr, "INTVAL (comparison_value) is: %d\n", INTVAL (comparison_value)); /* @*/ - fprintf (stderr, "INTVAL (initial_value) is: %d\n", INTVAL (initial_value)); /* @*/ -#endif - - if (increment_value_abs == 0) { - fprintf (stderr, "insert_bct: error: increment == 0 !!!\n"); - abort (); - } - n_iterations = (difference + increment_value_abs - 1 + add_iteration) - / increment_value_abs; - -#if 0 - fprintf (stderr, "number of iterations is: %d\n", n_iterations); /* @*/ -#endif - instrument_loop_bct (loop_start, loop_end, GEN_INT (n_iterations)); - - /* Done with this loop. */ + if (loop_dump_stream) + fprintf (loop_dump_stream, + "insert_bct %d: BCT instrumentation failed: count register already in use\n", + loop_num); return; } - /* Handle the more complex case, that the bounds are NOT known at compile time. */ - /* In this case we generate run_time calculation of the number of iterations */ + /* Make sure that the function has no indirect jumps. */ + if (indirect_jump_in_function) + { + if (loop_dump_stream) + fprintf (loop_dump_stream, + "insert_bct %d: BCT instrumentation failed: indirect jump in function\n", + loop_num); + return; + } + + /* Make sure that the last loop insn is a conditional jump. */ + if (GET_CODE (PREV_INSN (loop_end)) != JUMP_INSN + || ! condjump_p (PREV_INSN (loop_end)) + || simplejump_p (PREV_INSN (loop_end))) + { + if (loop_dump_stream) + fprintf (loop_dump_stream, + "insert_bct %d: BCT instrumentation failed: invalid jump at loop end\n", + loop_num); + return; + } + + /* Make sure that the loop does not contain a function call + (the count register might be altered by the called function). */ + if (loop_has_call) + { + if (loop_dump_stream) + fprintf (loop_dump_stream, + "insert_bct %d: BCT instrumentation failed: function call in loop\n", + loop_num); + return; + } + + /* Make sure that the loop does not jump via a table. + (the count register might be used to perform the branch on table). */ + for (insn = loop_start; insn && insn != loop_end; insn = NEXT_INSN (insn)) + { + if (GET_CODE (insn) == JUMP_INSN + && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC + || GET_CODE (PATTERN (insn)) == ADDR_VEC)) + { + if (loop_dump_stream) + fprintf (loop_dump_stream, + "insert_bct %d: BCT instrumentation failed: computed branch in the loop\n", + loop_num); + return; + } + } + + /* Account for loop unrolling in instrumented iteration count. */ + if (loop_unroll_factor [loop_num] > 1) + n_iterations = loop_n_iterations / loop_unroll_factor [loop_num]; + else + n_iterations = loop_n_iterations; + + if (n_iterations != 0 && n_iterations < 3) + { + /* Allow an enclosing outer loop to benefit if possible. */ + if (loop_dump_stream) + fprintf (loop_dump_stream, + "insert_bct %d: Too few iterations to benefit from BCT optimization\n", + loop_num); + return; + } + + /* Try to instrument the loop. */ + + /* Handle the simpler case, where the bounds are known at compile time. */ + if (n_iterations > 0) + { + /* Mark all enclosing loops that they cannot use count register. */ + for (i=loop_num; i != -1; i = loop_outer_loop[i]) + loop_used_count_register[i] = 1; + instrument_loop_bct (loop_start, loop_end, GEN_INT (n_iterations)); + return; + } + + /* Handle the more complex case, that the bounds are NOT known + at compile time. In this case we generate run_time calculation + of the number of iterations. */ + + if (GET_MODE_CLASS (GET_MODE (loop_iteration_var)) != MODE_INT + || GET_MODE_SIZE (GET_MODE (loop_iteration_var)) != UNITS_PER_WORD) + { + if (loop_dump_stream) + fprintf (loop_dump_stream, + "insert_bct %d: BCT Instrumentation failed: loop variable not integer\n", + loop_num); + return; + } /* With runtime bounds, if the compare is of the form '!=' we give up */ - if (comparison_code == NE) { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "insert_bct: fail for loop %d: runtime bounds with != comparison\n", - loop_num); - return; - } - - else { - /* We rely on the existence of run-time guard to ensure that the - loop executes at least once. */ - rtx sequence; - rtx iterations_num_reg; - - int increment_value_abs = INTVAL (increment) * increment_direction; - - /* make sure that the increment is a power of two, otherwise (an - expensive) divide is needed. */ - if (exact_log2 (increment_value_abs) == -1) - { - if (loop_dump_stream) - fprintf (loop_dump_stream, - "insert_bct: not instrumenting BCT because the increment is not power of 2\n"); - return; - } - - /* compute the number of iterations */ - start_sequence (); + if (loop_comparison_code == NE) { - rtx temp_reg; - - /* Again, the number of iterations is calculated by: - ; - ; compare-val - initial-val + (increment -1) + additional-iteration - ; num_iterations = ----------------------------------------------------------------- - ; increment - */ - /* ??? Do we have to call copy_rtx here before passing rtx to - expand_binop? */ - if (compare_direction > 0) { - /* <, <= :the loop variable is increasing */ - temp_reg = expand_binop (loop_var_mode, sub_optab, comparison_value, - initial_value, NULL_RTX, 0, OPTAB_LIB_WIDEN); - } - else { - temp_reg = expand_binop (loop_var_mode, sub_optab, initial_value, - comparison_value, NULL_RTX, 0, OPTAB_LIB_WIDEN); - } - - if (increment_value_abs - 1 + add_iteration != 0) - temp_reg = expand_binop (loop_var_mode, add_optab, temp_reg, - GEN_INT (increment_value_abs - 1 + add_iteration), - NULL_RTX, 0, OPTAB_LIB_WIDEN); - - if (increment_value_abs != 1) - { - /* ??? This will generate an expensive divide instruction for - most targets. The original authors apparently expected this - to be a shift, since they test for power-of-2 divisors above, - but just naively generating a divide instruction will not give - a shift. It happens to work for the PowerPC target because - the rs6000.md file has a divide pattern that emits shifts. - It will probably not work for any other target. */ - iterations_num_reg = expand_binop (loop_var_mode, sdiv_optab, - temp_reg, - GEN_INT (increment_value_abs), - NULL_RTX, 0, OPTAB_LIB_WIDEN); - } - else - iterations_num_reg = temp_reg; + if (loop_dump_stream) + fprintf (loop_dump_stream, + "insert_bct %d: runtime bounds with != comparison\n", + loop_num); + return; } - sequence = gen_sequence (); - end_sequence (); - emit_insn_before (sequence, loop_start); - instrument_loop_bct (loop_start, loop_end, iterations_num_reg); - } +/* Use common loop preconditioning code instead. */ +#if 0 + else + { + /* We rely on the existence of run-time guard to ensure that the + loop executes at least once. */ + rtx sequence; + rtx iterations_num_reg; + + unsigned HOST_WIDE_INT increment_value_abs + = INTVAL (increment) * increment_direction; + + /* make sure that the increment is a power of two, otherwise (an + expensive) divide is needed. */ + if (exact_log2 (increment_value_abs) == -1) + { + if (loop_dump_stream) + fprintf (loop_dump_stream, + "insert_bct: not instrumenting BCT because the increment is not power of 2\n"); + return; + } + + /* compute the number of iterations */ + start_sequence (); + { + rtx temp_reg; + + /* Again, the number of iterations is calculated by: + ; + ; compare-val - initial-val + (increment -1) + additional-iteration + ; num_iterations = ----------------------------------------------------------------- + ; increment + */ + /* ??? Do we have to call copy_rtx here before passing rtx to + expand_binop? */ + if (compare_direction > 0) + { + /* <, <= :the loop variable is increasing */ + temp_reg = expand_binop (loop_var_mode, sub_optab, + comparison_value, initial_value, + NULL_RTX, 0, OPTAB_LIB_WIDEN); + } + else + { + temp_reg = expand_binop (loop_var_mode, sub_optab, + initial_value, comparison_value, + NULL_RTX, 0, OPTAB_LIB_WIDEN); + } + + if (increment_value_abs - 1 + add_iteration != 0) + temp_reg = expand_binop (loop_var_mode, add_optab, temp_reg, + GEN_INT (increment_value_abs - 1 + + add_iteration), + NULL_RTX, 0, OPTAB_LIB_WIDEN); + + if (increment_value_abs != 1) + { + /* ??? This will generate an expensive divide instruction for + most targets. The original authors apparently expected this + to be a shift, since they test for power-of-2 divisors above, + but just naively generating a divide instruction will not give + a shift. It happens to work for the PowerPC target because + the rs6000.md file has a divide pattern that emits shifts. + It will probably not work for any other target. */ + iterations_num_reg = expand_binop (loop_var_mode, sdiv_optab, + temp_reg, + GEN_INT (increment_value_abs), + NULL_RTX, 0, OPTAB_LIB_WIDEN); + } + else + iterations_num_reg = temp_reg; + } + sequence = gen_sequence (); + end_sequence (); + emit_insn_before (sequence, loop_start); + instrument_loop_bct (loop_start, loop_end, iterations_num_reg); + } + + return; +#endif /* Complex case */ } -/* instrument loop by inserting a bct in it. This is done in the following way: - 1. A new register is created and assigned the hard register number of the count - register. - 2. In the head of the loop the new variable is initialized by the value passed in the - loop_num_iterations parameter. +/* Instrument loop by inserting a bct in it as follows: + 1. A new counter register is created. + 2. In the head of the loop the new variable is initialized to the value + passed in the loop_num_iterations parameter. 3. At the end of the loop, comparison of the register with 0 is generated. - The created comparison follows the pattern defined for the - decrement_and_branch_on_count insn, so this insn will be generated in assembly - generation phase. - 4. The compare&branch on the old variable is deleted. So, if the loop-variable was - not used elsewhere, it will be eliminated by data-flow analisys. */ + The created comparison follows the pattern defined for the + decrement_and_branch_on_count insn, so this insn will be generated. + 4. The branch on the old variable are deleted. The compare must remain + because it might be used elsewhere. If the loop-variable or condition + register are used elsewhere, they will be eliminated by flow. */ static void instrument_loop_bct (loop_start, loop_end, loop_num_iterations) rtx loop_start, loop_end; rtx loop_num_iterations; { - rtx temp_reg1, temp_reg2; + rtx counter_reg; rtx start_label; - rtx sequence; - enum machine_mode loop_var_mode = word_mode; if (HAVE_decrement_and_branch_on_count) { if (loop_dump_stream) - fprintf (loop_dump_stream, "Loop: Inserting BCT\n"); + { + fputs ("instrument_bct: Inserting BCT (", loop_dump_stream); + if (GET_CODE (loop_num_iterations) == CONST_INT) + fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC, + INTVAL (loop_num_iterations)); + else + fputs ("runtime", loop_dump_stream); + fputs (" iterations)", loop_dump_stream); + } /* Discard original jump to continue loop. Original compare result may still be live, so it cannot be discarded explicitly. */ delete_insn (PREV_INSN (loop_end)); - /* insert the label which will delimit the start of the loop */ + /* Insert the label which will delimit the start of the loop. */ start_label = gen_label_rtx (); emit_label_after (start_label, loop_start); - /* insert initialization of the count register into the loop header */ + /* Insert initialization of the count register into the loop header. */ start_sequence (); - temp_reg1 = gen_reg_rtx (loop_var_mode); - emit_insn (gen_move_insn (temp_reg1, loop_num_iterations)); - - /* this will be count register */ - temp_reg2 = gen_rtx_REG (loop_var_mode, COUNT_REGISTER_REGNUM); - /* we have to move the value to the count register from an GPR - because rtx pointed to by loop_num_iterations could contain - expression which cannot be moved into count register */ - emit_insn (gen_move_insn (temp_reg2, temp_reg1)); - + counter_reg = gen_reg_rtx (word_mode); + emit_insn (gen_move_insn (counter_reg, loop_num_iterations)); sequence = gen_sequence (); end_sequence (); emit_insn_before (sequence, loop_start); - /* insert new comparison on the count register instead of the + /* Insert new comparison on the count register instead of the old one, generating the needed BCT pattern (that will be later recognized by assembly generation phase). */ - emit_jump_insn_before (gen_decrement_and_branch_on_count (temp_reg2, + emit_jump_insn_before (gen_decrement_and_branch_on_count (counter_reg, start_label), loop_end); LABEL_NUSES (start_label)++; @@ -8517,8 +8220,6 @@ instrument_loop_bct (loop_start, loop_end, loop_num_iterations) } #endif /* HAVE_decrement_and_branch_on_count */ -#endif /* HAIFA */ - /* Scan the function and determine whether it has indirect (computed) jumps. This is taken mostly from flow.c; similar code exists elsewhere diff --git a/gcc/loop.h b/gcc/loop.h index 6851aa78f0d..d0ae25f4d45 100644 --- a/gcc/loop.h +++ b/gcc/loop.h @@ -1,5 +1,5 @@ /* Loop optimization definitions for GNU C-Compiler - Copyright (C) 1991, 1995 Free Software Foundation, Inc. + Copyright (C) 1991, 1995, 1998 Free Software Foundation, Inc. This file is part of GNU CC. @@ -184,9 +184,12 @@ void emit_unrolled_add PROTO((rtx, rtx, rtx)); int back_branch_in_range_p PROTO((rtx, rtx, rtx)); extern int *loop_unroll_factor; -#ifdef HAIFA -/* variables for interaction between unroll.c and loop.c, for - the insertion of branch-on-count instruction. */ -extern rtx *loop_start_value; -#endif /* HAIFA */ + +#ifdef HAVE_decrement_and_branch_on_count +extern rtx loop_iteration_var; +extern rtx loop_initial_value; +extern rtx loop_increment; +extern rtx loop_final_value; +extern enum rtx_code loop_comparison_code; +#endif /* HAVE_decrement_and_branch_on_count */ diff --git a/gcc/unroll.c b/gcc/unroll.c index 72f1f046442..3222e207890 100644 --- a/gcc/unroll.c +++ b/gcc/unroll.c @@ -189,11 +189,11 @@ static int *splittable_regs_updates; /* Values describing the current loop's iteration variable. These are set up by loop_iterations, and used by precondition_loop_p. */ -static rtx loop_iteration_var; -static rtx loop_initial_value; -static rtx loop_increment; -static rtx loop_final_value; -static enum rtx_code loop_comparison_code; +rtx loop_iteration_var; +rtx loop_initial_value; +rtx loop_increment; +rtx loop_final_value; +enum rtx_code loop_comparison_code; /* Forward declarations. */ @@ -1127,13 +1127,6 @@ unroll_loop (loop_end, insn_count, loop_start, end_insert_before, /* Set unroll type to MODULO now. */ unroll_type = UNROLL_MODULO; loop_preconditioned = 1; - -#ifdef HAIFA - /* Fix the initial value for the loop as needed. */ - if (loop_n_iterations <= 0) - loop_start_value [uid_loop_num [INSN_UID (loop_start)]] - = initial_value; -#endif } }