i386.c (ix86_dep_by_shift_count_body): Add check on reload_completed since it can be invoked before register...

* config/i386/i386.c (ix86_dep_by_shift_count_body) : Add
         check on reload_completed since it can be invoked before
         register allocation phase in pre-reload schedule.
         (ia32_multipass_dfa_lookahead) : Do not use dfa_lookahead for pre-reload
         schedule to save compile time.
         (ix86_sched_reorder) : Do not perform ready list reordering for pre-reload
         schedule to save compile time.
         (insn_is_function_arg) : New function. Returns true if lhs of insn is
         HW function argument register.
         (add_parameter_dependencies) : New function. Add output dependencies
         for chain of function adjacent arguments if only there is a move to
         likely spilled HW registers. Return first argument if at least one
         dependence was added or NULL otherwise.
         (avoid_func_arg_motion) : New function. Add output or anti dependency
         from insn to first_arg to restrict code motion.
         (add_dependee_for_func_arg) : New function. Avoid cross block motion of
         function argument through adding dependency from the first non-jump
         insn in bb.
         (ix86_dependencies_evaluation_hook) : New function. Hook for pre-reload schedule:
         avoid motion of function arguments passed in passed in likely spilled
         HW registers.
         (ix86_adjust_priority) : New function. Hook for pre-reload schedule: set priority
         of moves from likely spilled HW registers to maximum to schedule them
         as soon as possible.
         (ix86_sched_init_global): Do not perform multipass scheduling for pre-reload
         schedule to save compile time.

From-SVN: r192065
This commit is contained in:
Yuri Rumyantsev 2012-10-04 08:13:18 +00:00 committed by Kirill Yukhin
parent 3d47a53a93
commit dee7f32e24
2 changed files with 267 additions and 25 deletions

View file

@ -1,3 +1,32 @@
2012-10-04 Yuri Rumyantsev <ysrumyan@gmail.com>
* config/i386/i386.c (ix86_dep_by_shift_count_body) : Add
check on reload_completed since it can be invoked before
register allocation phase in pre-reload schedule.
(ia32_multipass_dfa_lookahead) : Do not use dfa_lookahead for pre-reload
schedule to save compile time.
(ix86_sched_reorder) : Do not perform ready list reordering for pre-reload
schedule to save compile time.
(insn_is_function_arg) : New function. Returns true if lhs of insn is
HW function argument register.
(add_parameter_dependencies) : New function. Add output dependencies
for chain of function adjacent arguments if only there is a move to
likely spilled HW registers. Return first argument if at least one
dependence was added or NULL otherwise.
(avoid_func_arg_motion) : New function. Add output or anti dependency
from insn to first_arg to restrict code motion.
(add_dependee_for_func_arg) : New function. Avoid cross block motion of
function argument through adding dependency from the first non-jump
insn in bb.
(ix86_dependencies_evaluation_hook) : New function. Hook for pre-reload schedule:
avoid motion of function arguments passed in passed in likely spilled
HW registers.
(ix86_adjust_priority) : New function. Hook for pre-reload schedule: set priority
of moves from likely spilled HW registers to maximum to schedule them
as soon as possible.
(ix86_sched_init_global): Do not perform multipass scheduling for pre-reload
schedule to save compile time.
2012-10-04 Uros Bizjak <ubizjak@gmail.com>
* configure.ac (noexception_flags): Add -fasynchronous-unwind-tables.

View file

@ -17520,9 +17520,16 @@ ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
rtx shift_count = XEXP (shift_rtx, 1);
/* Return true if shift count is dest of SET_BODY. */
if (REG_P (shift_count)
&& true_regnum (set_dest) == true_regnum (shift_count))
return true;
if (REG_P (shift_count))
{
/* Add check since it can be invoked before register
allocation in pre-reload schedule. */
if (reload_completed
&& true_regnum (set_dest) == true_regnum (shift_count))
return true;
else if (REGNO(set_dest) == REGNO(shift_count))
return true;
}
}
return false;
@ -24278,7 +24285,10 @@ ia32_multipass_dfa_lookahead (void)
/* Generally, we want haifa-sched:max_issue() to look ahead as far
as many instructions can be executed on a cycle, i.e.,
issue_rate. I wonder why tuning for many CPUs does not do this. */
return ix86_issue_rate ();
if (reload_completed)
return ix86_issue_rate ();
/* Don't use lookahead for pre-reload schedule to save compile time. */
return 0;
default:
return 0;
@ -24311,6 +24321,9 @@ ix86_sched_reorder(FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
/* Do reodering for Atom only. */
if (ix86_tune != PROCESSOR_ATOM)
return issue_rate;
/* Do not perform ready list reodering for pre-reload schedule pass. */
if (!reload_completed)
return issue_rate;
/* Nothing to do if ready list contains only 1 instruction. */
if (n_ready <= 1)
return issue_rate;
@ -24393,7 +24406,198 @@ ix86_sched_reorder(FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
return issue_rate;
}
static bool
ix86_class_likely_spilled_p (reg_class_t);
/* Returns true if lhs of insn is HW function argument register and set up
is_spilled to true if it is likely spilled HW register. */
static bool
insn_is_function_arg (rtx insn, bool* is_spilled)
{
rtx dst;
if (!NONDEBUG_INSN_P (insn))
return false;
insn = PATTERN (insn);
if (GET_CODE (insn) == PARALLEL)
insn = XVECEXP (insn, 0, 0);
if (GET_CODE (insn) != SET)
return false;
dst = SET_DEST (insn);
if (REG_P (dst) && HARD_REGISTER_P (dst)
&& ix86_function_arg_regno_p (REGNO (dst)))
{
/* Is it likely spilled HW register? */
if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
&& ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
*is_spilled = true;
return true;
}
return false;
}
/* Add output dependencies for chain of function adjacent arguments if only
there is a move to likely spilled HW register. Return first argument
if at least one dependence was added or NULL otherwise. */
static rtx
add_parameter_dependencies (rtx call, rtx head)
{
rtx insn;
rtx last = call;
rtx first_arg = NULL;
bool is_spilled = false;
/* Find nearest to call argument passing instruction. */
while (true)
{
last = PREV_INSN (last);
if (last == head)
return NULL;
if (!NONDEBUG_INSN_P (last))
continue;
if (insn_is_function_arg (last, &is_spilled))
break;
return NULL;
}
first_arg = last;
while (true)
{
insn = PREV_INSN (last);
if (!INSN_P (insn))
break;
if (insn == head)
break;
if (!NONDEBUG_INSN_P (insn))
{
last = insn;
continue;
}
if (insn_is_function_arg (insn, &is_spilled))
{
/* Add output depdendence between two function arguments if chain
of output arguments contains likely spilled HW registers. */
if (is_spilled)
add_dependence (last, insn, REG_DEP_OUTPUT);
first_arg = last = insn;
}
else
break;
}
if (!is_spilled)
return NULL;
return first_arg;
}
/* Add output or anti dependency from insn to first_arg to restrict its code
motion. */
static void
avoid_func_arg_motion (rtx first_arg, rtx insn)
{
rtx set;
rtx tmp;
set = single_set (insn);
if (!set)
return;
tmp = SET_DEST (set);
if (REG_P (tmp))
{
/* Add output dependency to the first function argument. */
add_dependence (first_arg, insn, REG_DEP_OUTPUT);
return;
}
/* Add anti dependency. */
add_dependence (first_arg, insn, REG_DEP_ANTI);
}
/* Avoid cross block motion of function argument through adding dependency
from the first non-jump instruction in bb. */
static void
add_dependee_for_func_arg (rtx arg, basic_block bb)
{
rtx insn = BB_END (bb);
while (insn)
{
if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
{
rtx set = single_set (insn);
if (set)
{
avoid_func_arg_motion (arg, insn);
return;
}
}
if (insn == BB_HEAD (bb))
return;
insn = PREV_INSN (insn);
}
}
/* Hook for pre-reload schedule - avoid motion of function arguments
passed in likely spilled HW registers. */
static void
ix86_dependencies_evaluation_hook (rtx head, rtx tail)
{
rtx insn;
rtx first_arg = NULL;
if (reload_completed)
return;
for (insn = tail; insn != head; insn = PREV_INSN (insn))
if (INSN_P (insn) && CALL_P (insn))
{
first_arg = add_parameter_dependencies (insn, head);
if (first_arg)
{
/* Check if first argument has dependee out of its home block. */
sd_iterator_def sd_it1;
dep_t dep1;
FOR_EACH_DEP (first_arg, SD_LIST_BACK, sd_it1, dep1)
{
rtx dee;
dee = DEP_PRO (dep1);
if (!NONDEBUG_INSN_P (dee))
continue;
if (BLOCK_FOR_INSN (dee) != BLOCK_FOR_INSN (first_arg))
/* Must add dependee for first argument in dee's block. */
add_dependee_for_func_arg (first_arg, BLOCK_FOR_INSN (dee));
}
insn = first_arg;
}
}
else if (first_arg)
avoid_func_arg_motion (first_arg, insn);
}
/* Hook for pre-reload schedule - set priority of moves from likely spilled
HW registers to maximum, to schedule them at soon as possible. These are
moves from function argument registers at the top of the function entry
and moves from function return value registers after call. */
static int
ix86_adjust_priority (rtx insn, int priority)
{
rtx set;
if (reload_completed)
return priority;
if (!NONDEBUG_INSN_P (insn))
return priority;
set = single_set (insn);
if (set)
{
rtx tmp = SET_SRC (set);
if (REG_P (tmp)
&& HARD_REGISTER_P (tmp)
&& !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
&& ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
return current_sched_info->sched_max_insns_priority;
}
return priority;
}
/* Model decoder of Core 2/i7.
Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
@ -24606,27 +24810,32 @@ ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
case PROCESSOR_CORE2_64:
case PROCESSOR_COREI7_32:
case PROCESSOR_COREI7_64:
targetm.sched.dfa_post_advance_cycle
= core2i7_dfa_post_advance_cycle;
targetm.sched.first_cycle_multipass_init
= core2i7_first_cycle_multipass_init;
targetm.sched.first_cycle_multipass_begin
= core2i7_first_cycle_multipass_begin;
targetm.sched.first_cycle_multipass_issue
= core2i7_first_cycle_multipass_issue;
targetm.sched.first_cycle_multipass_backtrack
= core2i7_first_cycle_multipass_backtrack;
targetm.sched.first_cycle_multipass_end
= core2i7_first_cycle_multipass_end;
targetm.sched.first_cycle_multipass_fini
= core2i7_first_cycle_multipass_fini;
/* Set decoder parameters. */
core2i7_secondary_decoder_max_insn_size = 8;
core2i7_ifetch_block_size = 16;
core2i7_ifetch_block_max_insns = 6;
break;
/* Do not perform multipass scheduling for pre-reload schedule
to save compile time. */
if (reload_completed)
{
targetm.sched.dfa_post_advance_cycle
= core2i7_dfa_post_advance_cycle;
targetm.sched.first_cycle_multipass_init
= core2i7_first_cycle_multipass_init;
targetm.sched.first_cycle_multipass_begin
= core2i7_first_cycle_multipass_begin;
targetm.sched.first_cycle_multipass_issue
= core2i7_first_cycle_multipass_issue;
targetm.sched.first_cycle_multipass_backtrack
= core2i7_first_cycle_multipass_backtrack;
targetm.sched.first_cycle_multipass_end
= core2i7_first_cycle_multipass_end;
targetm.sched.first_cycle_multipass_fini
= core2i7_first_cycle_multipass_fini;
/* Set decoder parameters. */
core2i7_secondary_decoder_max_insn_size = 8;
core2i7_ifetch_block_size = 16;
core2i7_ifetch_block_max_insns = 6;
break;
}
/* ... Fall through ... */
default:
targetm.sched.dfa_post_advance_cycle = NULL;
targetm.sched.first_cycle_multipass_init = NULL;
@ -39687,6 +39896,10 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree)
#define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
#undef TARGET_SCHED_REORDER
#define TARGET_SCHED_REORDER ix86_sched_reorder
#undef TARGET_SCHED_ADJUST_PRIORITY
#define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ix86_dependencies_evaluation_hook
/* The size of the dispatch window is the total number of bytes of
object code allowed in a window. */