From 2bd1e239eed6f64319af87e9f437eab1ca83d63e Mon Sep 17 00:00:00 2001 From: Steven Bosscher Date: Wed, 27 Jul 2005 16:28:34 +0000 Subject: [PATCH] re PR rtl-optimization/17808 (Scheduler overly conservative in sched-deps) PR rtl-optimization/17808 * sched-deps.c (sched_get_condition): Enable #if 0'ed code. (sched_insns_conditions_mutex_p): Split out from... (add_dependence): ...here. But don't call it from here. (add_dependence_list): Check sched_insns_conditions_mutex_p before calling add_dependence. (add_dependence_list_and_free): Likewise. (fixup_sched_groups): Likewise. (sched_analyze_1): Likewise. (sched_analyze_2): Likewise (and replace a "0" with REG_DEP_TRUE). (sched_analyze): Likewise. (sched_analyze_insn): Likewise. * sched-ebb.c (add_deps_for_risky_insns): Likewise. * sched-rgn.c (add_branch_dependences): Likewise. Also, add dependencies on all COND_EXEC insns to jumps ending basic blocks when doing intrablock scheduling. * sched-int.h (sched_insns_conditions_mutex_p): Add prototype. From-SVN: r102433 --- gcc/ChangeLog | 20 ++++++++ gcc/sched-deps.c | 118 ++++++++++++++++++++++++++--------------------- gcc/sched-ebb.c | 3 +- gcc/sched-int.h | 1 + gcc/sched-rgn.c | 59 +++++++++++++++++++++++- 5 files changed, 146 insertions(+), 55 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 968809f6d49..363c9600fb3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2005-07-27 Steven Bosscher + + PR rtl-optimization/17808 + * sched-deps.c (sched_get_condition): Enable #if 0'ed code. + (sched_insns_conditions_mutex_p): Split out from... + (add_dependence): ...here. But don't call it from here. + (add_dependence_list): Check sched_insns_conditions_mutex_p + before calling add_dependence. + (add_dependence_list_and_free): Likewise. + (fixup_sched_groups): Likewise. + (sched_analyze_1): Likewise. + (sched_analyze_2): Likewise (and replace a "0" with REG_DEP_TRUE). + (sched_analyze): Likewise. + (sched_analyze_insn): Likewise. + * sched-ebb.c (add_deps_for_risky_insns): Likewise. + * sched-rgn.c (add_branch_dependences): Likewise. Also, add + dependencies on all COND_EXEC insns to jumps ending basic blocks + when doing intrablock scheduling. + * sched-int.h (sched_insns_conditions_mutex_p): Add prototype. + 2005-07-27 Jeff Law * tree-vrp.c (vrp_meet): Intersect the equivalency sets when diff --git a/gcc/sched-deps.c b/gcc/sched-deps.c index 6c7b7ec9a33..7baad1b669e 100644 --- a/gcc/sched-deps.c +++ b/gcc/sched-deps.c @@ -149,11 +149,7 @@ sched_get_condition (rtx insn) return 0; src = SET_SRC (pc_set (insn)); -#if 0 - /* The previous code here was completely invalid and could never extract - the condition from a jump. This code does the correct thing, but that - triggers latent bugs later in the scheduler on ports with conditional - execution. So this is disabled for now. */ + if (XEXP (src, 2) == pc_rtx) return XEXP (src, 0); else if (XEXP (src, 1) == pc_rtx) @@ -166,11 +162,11 @@ sched_get_condition (rtx insn) return gen_rtx_fmt_ee (revcode, GET_MODE (cond), XEXP (cond, 0), XEXP (cond, 1)); } -#endif return 0; } + /* Return nonzero if conditions COND1 and COND2 can never be both true. */ static int @@ -184,6 +180,32 @@ conditions_mutex_p (rtx cond1, rtx cond2) return 1; return 0; } + +/* Return true if insn1 and insn2 can never depend on one another because + the conditions under which they are executed are mutually exclusive. */ +bool +sched_insns_conditions_mutex_p (rtx insn1, rtx insn2) +{ + rtx cond1, cond2; + + /* flow.c doesn't handle conditional lifetimes entirely correctly; + calls mess up the conditional lifetimes. */ + if (!CALL_P (insn1) && !CALL_P (insn2)) + { + cond1 = sched_get_condition (insn1); + cond2 = sched_get_condition (insn2); + if (cond1 && cond2 + && conditions_mutex_p (cond1, cond2) + /* Make sure first instruction doesn't affect condition of second + instruction if switched. */ + && !modified_in_p (cond1, insn2) + /* Make sure second instruction doesn't affect condition of first + instruction if switched. */ + && !modified_in_p (cond2, insn1)) + return true; + } + return false; +} /* Add ELEM wrapped in an INSN_LIST with reg note kind DEP_TYPE to the LOG_LINKS of INSN, if not already there. DEP_TYPE indicates the @@ -195,7 +217,6 @@ add_dependence (rtx insn, rtx elem, enum reg_note dep_type) { rtx link; int present_p; - rtx cond1, cond2; /* Don't depend an insn on itself. */ if (insn == elem) @@ -207,26 +228,6 @@ add_dependence (rtx insn, rtx elem, enum reg_note dep_type) if (NOTE_P (elem)) return 0; - /* flow.c doesn't handle conditional lifetimes entirely correctly; - calls mess up the conditional lifetimes. */ - /* ??? add_dependence is the wrong place to be eliding dependencies, - as that forgets that the condition expressions themselves may - be dependent. */ - if (!CALL_P (insn) && !CALL_P (elem)) - { - cond1 = sched_get_condition (insn); - cond2 = sched_get_condition (elem); - if (cond1 && cond2 - && conditions_mutex_p (cond1, cond2) - /* Make sure first instruction doesn't affect condition of second - instruction if switched. */ - && !modified_in_p (cond1, elem) - /* Make sure second instruction doesn't affect condition of first - instruction if switched. */ - && !modified_in_p (cond2, insn)) - return 0; - } - present_p = 1; #ifdef INSN_SCHEDULING /* ??? No good way to tell from here whether we're doing interblock @@ -348,7 +349,10 @@ static void add_dependence_list (rtx insn, rtx list, enum reg_note dep_type) { for (; list; list = XEXP (list, 1)) - add_dependence (insn, XEXP (list, 0), dep_type); + { + if (! sched_insns_conditions_mutex_p (insn, XEXP (list, 0))) + add_dependence (insn, XEXP (list, 0), dep_type); + } } /* Similar, but free *LISTP at the same time. */ @@ -360,7 +364,8 @@ add_dependence_list_and_free (rtx insn, rtx *listp, enum reg_note dep_type) for (list = *listp, *listp = NULL; list ; list = next) { next = XEXP (list, 1); - add_dependence (insn, XEXP (list, 0), dep_type); + if (! sched_insns_conditions_mutex_p (insn, XEXP (list, 0))) + add_dependence (insn, XEXP (list, 0), dep_type); free_INSN_LIST_node (list); } } @@ -393,7 +398,7 @@ delete_all_dependences (rtx insn) static void fixup_sched_groups (rtx insn) { - rtx link; + rtx link, prev_nonnote; for (link = LOG_LINKS (insn); link ; link = XEXP (link, 1)) { @@ -405,14 +410,17 @@ fixup_sched_groups (rtx insn) if (XEXP (link, 0) == i) goto next_link; } while (SCHED_GROUP_P (i)); - add_dependence (i, XEXP (link, 0), REG_NOTE_KIND (link)); + if (! sched_insns_conditions_mutex_p (i, XEXP (link, 0))) + add_dependence (i, XEXP (link, 0), REG_NOTE_KIND (link)); next_link:; } delete_all_dependences (insn); - if (BLOCK_FOR_INSN (insn) == BLOCK_FOR_INSN (prev_nonnote_insn (insn))) - add_dependence (insn, prev_nonnote_insn (insn), REG_DEP_ANTI); + prev_nonnote = prev_nonnote_insn (insn); + if (BLOCK_FOR_INSN (insn) == BLOCK_FOR_INSN (prev_nonnote) + && ! sched_insns_conditions_mutex_p (insn, prev_nonnote)) + add_dependence (insn, prev_nonnote, REG_DEP_ANTI); } /* Process an insn's memory dependencies. There are four kinds of @@ -620,7 +628,8 @@ sched_analyze_1 (struct deps *deps, rtx x, rtx insn) pending_mem = deps->pending_read_mems; while (pending) { - if (anti_dependence (XEXP (pending_mem, 0), t)) + if (anti_dependence (XEXP (pending_mem, 0), t) + && ! sched_insns_conditions_mutex_p (insn, XEXP (pending, 0))) add_dependence (insn, XEXP (pending, 0), REG_DEP_ANTI); pending = XEXP (pending, 1); @@ -631,7 +640,8 @@ sched_analyze_1 (struct deps *deps, rtx x, rtx insn) pending_mem = deps->pending_write_mems; while (pending) { - if (output_dependence (XEXP (pending_mem, 0), t)) + if (output_dependence (XEXP (pending_mem, 0), t) + && ! sched_insns_conditions_mutex_p (insn, XEXP (pending, 0))) add_dependence (insn, XEXP (pending, 0), REG_DEP_OUTPUT); pending = XEXP (pending, 1); @@ -759,7 +769,8 @@ sched_analyze_2 (struct deps *deps, rtx x, rtx insn) pending_mem = deps->pending_read_mems; while (pending) { - if (read_dependence (XEXP (pending_mem, 0), t)) + if (read_dependence (XEXP (pending_mem, 0), t) + && ! sched_insns_conditions_mutex_p (insn, XEXP (pending, 0))) add_dependence (insn, XEXP (pending, 0), REG_DEP_ANTI); pending = XEXP (pending, 1); @@ -771,16 +782,17 @@ sched_analyze_2 (struct deps *deps, rtx x, rtx insn) while (pending) { if (true_dependence (XEXP (pending_mem, 0), VOIDmode, - t, rtx_varies_p)) - add_dependence (insn, XEXP (pending, 0), 0); + t, rtx_varies_p) + && ! sched_insns_conditions_mutex_p (insn, XEXP (pending, 0))) + add_dependence (insn, XEXP (pending, 0), REG_DEP_TRUE); pending = XEXP (pending, 1); pending_mem = XEXP (pending_mem, 1); } for (u = deps->last_pending_memory_flush; u; u = XEXP (u, 1)) - if (!JUMP_P (XEXP (u, 0)) - || deps_may_trap_p (x)) + if ((! JUMP_P (XEXP (u, 0)) || deps_may_trap_p (x)) + && ! sched_insns_conditions_mutex_p (insn, XEXP (u, 0))) add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI); /* Always add these dependencies to pending_reads, since @@ -966,7 +978,8 @@ sched_analyze_insn (struct deps *deps, rtx x, rtx insn, rtx loop_notes) pending_mem = deps->pending_write_mems; while (pending) { - add_dependence (insn, XEXP (pending, 0), REG_DEP_OUTPUT); + if (! sched_insns_conditions_mutex_p (insn, XEXP (pending, 0))) + add_dependence (insn, XEXP (pending, 0), REG_DEP_OUTPUT); pending = XEXP (pending, 1); pending_mem = XEXP (pending_mem, 1); } @@ -975,7 +988,8 @@ sched_analyze_insn (struct deps *deps, rtx x, rtx insn, rtx loop_notes) pending_mem = deps->pending_read_mems; while (pending) { - if (MEM_VOLATILE_P (XEXP (pending_mem, 0))) + if (MEM_VOLATILE_P (XEXP (pending_mem, 0)) + && ! sched_insns_conditions_mutex_p (insn, XEXP (pending, 0))) add_dependence (insn, XEXP (pending, 0), REG_DEP_OUTPUT); pending = XEXP (pending, 1); pending_mem = XEXP (pending_mem, 1); @@ -1019,7 +1033,7 @@ sched_analyze_insn (struct deps *deps, rtx x, rtx insn, rtx loop_notes) { /* In the case of barrier the most added dependencies are not real, so we use anti-dependence here. */ - if (GET_CODE (PATTERN (insn)) == COND_EXEC) + if (sched_get_condition (insn)) { EXECUTE_IF_SET_IN_REG_SET (&deps->reg_last_in_use, 0, i, rsi) { @@ -1027,10 +1041,10 @@ sched_analyze_insn (struct deps *deps, rtx x, rtx insn, rtx loop_notes) add_dependence_list (insn, reg_last->uses, REG_DEP_ANTI); add_dependence_list (insn, reg_last->sets, - reg_pending_barrier == TRUE_BARRIER ? 0 : REG_DEP_ANTI); + reg_pending_barrier == TRUE_BARRIER ? REG_DEP_TRUE : REG_DEP_ANTI); add_dependence_list (insn, reg_last->clobbers, - reg_pending_barrier == TRUE_BARRIER ? 0 : REG_DEP_ANTI); + reg_pending_barrier == TRUE_BARRIER ? REG_DEP_TRUE : REG_DEP_ANTI); } } else @@ -1042,10 +1056,10 @@ sched_analyze_insn (struct deps *deps, rtx x, rtx insn, rtx loop_notes) REG_DEP_ANTI); add_dependence_list_and_free (insn, ®_last->sets, - reg_pending_barrier == TRUE_BARRIER ? 0 : REG_DEP_ANTI); + reg_pending_barrier == TRUE_BARRIER ? REG_DEP_TRUE : REG_DEP_ANTI); add_dependence_list_and_free (insn, ®_last->clobbers, - reg_pending_barrier == TRUE_BARRIER ? 0 : REG_DEP_ANTI); + reg_pending_barrier == TRUE_BARRIER ? REG_DEP_TRUE : REG_DEP_ANTI); reg_last->uses_length = 0; reg_last->clobbers_length = 0; } @@ -1066,13 +1080,13 @@ sched_analyze_insn (struct deps *deps, rtx x, rtx insn, rtx loop_notes) { /* If the current insn is conditional, we can't free any of the lists. */ - if (GET_CODE (PATTERN (insn)) == COND_EXEC) + if (sched_get_condition (insn)) { EXECUTE_IF_SET_IN_REG_SET (reg_pending_uses, 0, i, rsi) { struct deps_reg *reg_last = &deps->reg_last[i]; - add_dependence_list (insn, reg_last->sets, 0); - add_dependence_list (insn, reg_last->clobbers, 0); + add_dependence_list (insn, reg_last->sets, REG_DEP_TRUE); + add_dependence_list (insn, reg_last->clobbers, REG_DEP_TRUE); reg_last->uses = alloc_INSN_LIST (insn, reg_last->uses); reg_last->uses_length++; } @@ -1099,8 +1113,8 @@ sched_analyze_insn (struct deps *deps, rtx x, rtx insn, rtx loop_notes) EXECUTE_IF_SET_IN_REG_SET (reg_pending_uses, 0, i, rsi) { struct deps_reg *reg_last = &deps->reg_last[i]; - add_dependence_list (insn, reg_last->sets, 0); - add_dependence_list (insn, reg_last->clobbers, 0); + add_dependence_list (insn, reg_last->sets, REG_DEP_TRUE); + add_dependence_list (insn, reg_last->clobbers, REG_DEP_TRUE); reg_last->uses_length++; reg_last->uses = alloc_INSN_LIST (insn, reg_last->uses); } diff --git a/gcc/sched-ebb.c b/gcc/sched-ebb.c index c2d7988a1c0..58f5d338ecd 100644 --- a/gcc/sched-ebb.c +++ b/gcc/sched-ebb.c @@ -454,7 +454,8 @@ add_deps_for_risky_insns (rtx head, rtx tail) /* We can not change the mode of the backward dependency because REG_DEP_ANTI has the lowest rank. */ - if (add_dependence (insn, prev, REG_DEP_ANTI)) + if (! sched_insns_conditions_mutex_p (insn, prev) + && add_dependence (insn, prev, REG_DEP_ANTI)) add_forward_dependence (prev, insn, REG_DEP_ANTI); break; diff --git a/gcc/sched-int.h b/gcc/sched-int.h index 47c58552c0c..46ec6e77327 100644 --- a/gcc/sched-int.h +++ b/gcc/sched-int.h @@ -331,6 +331,7 @@ enum INSN_TRAP_CLASS extern void print_insn (char *, rtx, int); /* Functions in sched-deps.c. */ +extern bool sched_insns_conditions_mutex_p (rtx, rtx); extern int add_dependence (rtx, rtx, enum reg_note); extern void sched_analyze (struct deps *, rtx, rtx); extern void init_deps (struct deps *); diff --git a/gcc/sched-rgn.c b/gcc/sched-rgn.c index 1083c5c0cad..a4c19648200 100644 --- a/gcc/sched-rgn.c +++ b/gcc/sched-rgn.c @@ -1883,6 +1883,8 @@ add_branch_dependences (rtx head, rtx tail) cc0 setters remain at the end because they can't be moved away from their cc0 user. + COND_EXEC insns cannot be moved past a branch (see e.g. PR17808). + Insns setting CLASS_LIKELY_SPILLED_P registers (usually return values) are not moved before reload because we can wind up with register allocation failures. */ @@ -1906,7 +1908,8 @@ add_branch_dependences (rtx head, rtx tail) { if (last != 0 && !find_insn_list (insn, LOG_LINKS (last))) { - add_dependence (last, insn, REG_DEP_ANTI); + if (! sched_insns_conditions_mutex_p (last, insn)) + add_dependence (last, insn, REG_DEP_ANTI); INSN_REF_COUNT (insn)++; } @@ -1932,9 +1935,61 @@ add_branch_dependences (rtx head, rtx tail) if (INSN_REF_COUNT (insn) != 0) continue; - add_dependence (last, insn, REG_DEP_ANTI); + if (! sched_insns_conditions_mutex_p (last, insn)) + add_dependence (last, insn, REG_DEP_ANTI); INSN_REF_COUNT (insn) = 1; } + +#ifdef HAVE_conditional_execution + /* Finally, if the block ends in a jump, and we are doing intra-block + scheduling, make sure that the branch depends on any COND_EXEC insns + inside the block to avoid moving the COND_EXECs past the branch insn. + + We only have to do this after reload, because (1) before reload there + are no COND_EXEC insns, and (2) the region scheduler is an intra-block + scheduler after reload. + + FIXME: We could in some cases move COND_EXEC insns past the branch if + this scheduler would be a little smarter. Consider this code: + + T = [addr] + C ? addr += 4 + !C ? X += 12 + C ? T += 1 + C ? jump foo + + On a target with a one cycle stall on a memory access the optimal + sequence would be: + + T = [addr] + C ? addr += 4 + C ? T += 1 + C ? jump foo + !C ? X += 12 + + We don't want to put the 'X += 12' before the branch because it just + wastes a cycle of execution time when the branch is taken. + + Note that in the example "!C" will always be true. That is another + possible improvement for handling COND_EXECs in this scheduler: it + could remove always-true predicates. */ + + if (!reload_completed || ! JUMP_P (tail)) + return; + + insn = PREV_INSN (tail); + while (insn != head) + { + /* Note that we want to add this dependency even when + sched_insns_conditions_mutex_p returns true. The whole point + is that we _want_ this dependency, even if these insns really + are independent. */ + if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == COND_EXEC) + add_dependence (tail, insn, REG_DEP_ANTI); + + insn = PREV_INSN (insn); + } +#endif } /* Data structures for the computation of data dependences in a regions. We