2002-04-29 Vladimir Makarov <vmakarov@redhat.com>
Merging code from dfa-branch: From-SVN: r52915
This commit is contained in:
parent
a2ff290c02
commit
fae15c9379
27 changed files with 12626 additions and 1574 deletions
362
gcc/ChangeLog
362
gcc/ChangeLog
|
@ -1,3 +1,365 @@
|
|||
2002-04-29 Vladimir Makarov <vmakarov@redhat.com>
|
||||
|
||||
Merging code from dfa-branch:
|
||||
|
||||
2002-04-24 Vladimir Makarov <vmakarov@redhat.com>
|
||||
|
||||
* genautomata.c (output_reserv_sets): Fix typo.
|
||||
|
||||
2002-04-23 Vladimir Makarov <vmakarov@redhat.com>
|
||||
|
||||
* genautomata.c (output_reserv_sets): Remove
|
||||
next_cycle_output_flag.
|
||||
|
||||
Thu Apr 18 08:57:06 2002 Jeffrey A Law (law@redhat.com)
|
||||
|
||||
* sched-rgn.c (init_ready_list): Make the DFA code handle
|
||||
USE/CLOBBER insns in the same way as the traditional
|
||||
scheduler.
|
||||
(new_ready): Similarly..
|
||||
|
||||
2002-04-17 Vladimir Makarov <vmakarov@redhat.com>
|
||||
|
||||
* haifa-sched.c (schedule_block): Change the DFA state only after
|
||||
issuing insn.
|
||||
|
||||
Wed Apr 17 15:38:36 2002 Jeffrey A Law (law@redhat.com)
|
||||
|
||||
* pa.c (hppa_use_dfa_pipeline_interface): New function.
|
||||
(TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE): Define.
|
||||
(override_options): Add PA7300 scheduling support.
|
||||
(pa_adjust_cost): Update various comments. Properly
|
||||
handle anti and output dependencies when using the
|
||||
DFA scheduler.
|
||||
(pa_issue_rate): Add PA7300 scheduling support.
|
||||
(pa_can_combine_p): Call extract_insn before calling
|
||||
constrain_operands (taken from mainline tree).
|
||||
* pa.h (enum processor_type): Add PROCESSOR_PA7300.
|
||||
* pa.md (cpu attr): Add 7300. Rewrite pipeline
|
||||
descriptions using DFA descriptions. Add PA7300
|
||||
scheduling support.
|
||||
|
||||
2002-03-30 David S. Miller <davem@redhat.com>
|
||||
|
||||
Add UltraSPARC-III DFA scheduling support.
|
||||
* config/sparc/sparc.md (define_attr type): Add fpcrmove.
|
||||
Update FP conditional move on register insn patterns to use it, as
|
||||
appropriate.
|
||||
(define_attr cpu): Add ultrasparc3.
|
||||
(define_attr us3load_type): New, update integer load patterns to
|
||||
set it, as appropriate.
|
||||
(define_automaton): Add ultrasparc3_0 and ultrasparc3_1.
|
||||
(rest): Add UltraSPARC3 scheduling description.
|
||||
* config/sparc/sparc.h (TARGET_CPU_ultrasparc3): New.
|
||||
(PROCESSOR_ULTRASPARC3): New.
|
||||
({ASM,CPP}_CPU64_DEFAULT_SPEC): Handle ultrasparc3.
|
||||
({ASM,CPP}_CPU_SPEC): Likewise.
|
||||
(REGISTER_MOVE_COST): Likewise.
|
||||
(RTX_COSTS): Likewise.
|
||||
* config/sparc/sparc.c (sparc_override_options,
|
||||
sparc_initialize_trampoline, sparc64_initialize_trampoline,
|
||||
sparc_use_dfa_pipeline_interface, sparc_use_sched_lookahead,
|
||||
sparc_issue_rate): Likewise.
|
||||
* config/sparc/sol2.h: Likewise.
|
||||
* config/sparc/sol2-sld-64.h: Likewise.
|
||||
* config/sparc/linux64.h: Likewise.
|
||||
|
||||
2002-03-22 Vladimir Makarov <vmakarov@redhat.com>
|
||||
|
||||
* doc/md.texi: Add comments about usage the latency time for the
|
||||
different dependencies and about case when two or more conditions
|
||||
in different define_insn_reservations returns TRUE for an insn.
|
||||
|
||||
* doc/md.texi: Add reference for automaton based pipeline
|
||||
description.
|
||||
|
||||
2002-03-04 Vladimir Makarov <vmakarov@redhat.com>
|
||||
|
||||
* doc/passes.texi: Add missed information about genattrtab.
|
||||
|
||||
2002-03-01 Vladimir Makarov <vmakarov@redhat.com>
|
||||
|
||||
* genautomata.c (output_automata_list_transition_code): Check
|
||||
automata_list on NULL.
|
||||
|
||||
2002-02-28 Vladimir Makarov <vmakarov@redhat.com>
|
||||
|
||||
* genautomata.c (output_insn_code_cases,
|
||||
output_automata_list_min_issue_delay_code,
|
||||
output_automata_list_transition_code,
|
||||
output_automata_list_state_alts_code): Comment the functions.
|
||||
|
||||
2002-02-22 Vladimir Makarov <vmakarov@redhat.com>
|
||||
|
||||
* genautomata.c (automata_list_el_t): New typedef.
|
||||
(get_free_automata_list_el,free_automata_list_el,
|
||||
free_automata_list, automata_list_hash, automata_list_eq_p,
|
||||
initiate_automata_lists, automata_list_start, automata_list_add,
|
||||
automata_list_finish, finish_automata_lists,
|
||||
output_insn_code_cases, output_automata_list_min_issue_delay_code,
|
||||
output_automata_list_transition_code,
|
||||
output_automata_list_state_alts_code, add_automaton_state,
|
||||
form_important_insn_automata_lists): New functions and prototypes.
|
||||
(insn_reserv_decl): Add members important_automata_list and
|
||||
processed_p.
|
||||
(ainsn): Add members important_p.
|
||||
(automata_list_el): New structure.
|
||||
(first_free_automata_list_el, current_automata_list,
|
||||
automata_list_table): New global variables.
|
||||
(create_ainsns): Initiate member important_p.
|
||||
(output_internal_min_issue_delay_func): Generate the switch and
|
||||
call output_insn_code_cases.
|
||||
(output_internal_trans_func, output_internal_state_alts_func):
|
||||
Ditto.
|
||||
(generate): Call initiate_automata_lists.
|
||||
(automaton_states): New global variable.
|
||||
(expand_automata): Call form_important_insn_automata_lists.
|
||||
(write_automata): Call finish_automata_lists.
|
||||
|
||||
2002-02-21 Vladimir Makarov <vmakarov@redhat.com>
|
||||
|
||||
* genautomata.c (add_excls, add_presence_absence): Check that
|
||||
cpu units in the sets belong the same automaton.
|
||||
|
||||
* rtl.def (EXCLUSION_SET, PRESENCE_SET, ABSENCE_SET): Add comment
|
||||
about that cpu units in the sets belong the same automaton.
|
||||
|
||||
* doc/md.texi: Ditto.
|
||||
|
||||
2001-12-20 Naveen Sharma,Nitin Gupta <naveens@noida.hcltech.com,niting@noida.hcltech.com>
|
||||
|
||||
* config/sh/sh.c (sh_use_dfa_interface): New function.
|
||||
|
||||
(sh_issue_rate): New Function.
|
||||
TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE: define.
|
||||
TARGET_SCHED_ISSUE_RATE: define.
|
||||
|
||||
* config/sh/sh.md: Add DFA based pipeline description for SH4.
|
||||
|
||||
(define_attr insn_class): New attribute used for DFA
|
||||
scheduling.
|
||||
(define_insn cmpgtsi_t): Set attribute insn_class mt_group.
|
||||
(cmpgesi_t,cmpgtusi_t,cmpgeusi_t,cmpeqsi_t,
|
||||
cmpeqdi_t): Likewise.
|
||||
|
||||
(add,addc1,addsi3,subc,subc1,*subsi3_internal,
|
||||
negc,negsi2,ashldi3_k,lshrdi3_k,ashrdi3_k): Set insn_class
|
||||
ex_group.
|
||||
(iorsi3,rotlsi3_1,rotlsi3_31,rotlsi3_16): Likewise.
|
||||
|
||||
2001-10-03 Vladimir Makarov <vmakarov@toke.toronto.redhat.com>
|
||||
|
||||
* haifa-sched.c (queue_to_ready): Remove unnecessary condition for
|
||||
break.
|
||||
|
||||
2001-10-03 Vladimir Makarov <vmakarov@toke.toronto.redhat.com>
|
||||
|
||||
* genautomata.c (DFA_INSN_CODES_LENGTH_VARIABLE_NAME): New macro.
|
||||
(output_dfa_insn_code_func): Expand dfa_insn_codes if it is
|
||||
necessary.
|
||||
(output_dfa_start_func): Initiate new variable insn_codes_length,
|
||||
(write_automata): Output definition of the new variable.
|
||||
|
||||
2001-10-02 David S. Miller <davem@redhat.com>
|
||||
|
||||
* haifa-sched.c (advance_one_cycle): New function.
|
||||
(schedule_block): Use it.
|
||||
(queue_to_ready): Use it, and also make sure to advance the DFA
|
||||
state on all stall cycles, not just those where insn_queue links
|
||||
are found.
|
||||
|
||||
2001-10-02 Richard Sandiford <rsandifo@redhat.com>
|
||||
|
||||
* haifa-sched.c (max_issue): Remove last_p argument. Only return
|
||||
non-zero if the highest-priority instruction could be scheduled.
|
||||
(choose_ready): Remove last argument from max_issue call.
|
||||
|
||||
2001-09-28 David S. Miller <davem@redhat.com>
|
||||
|
||||
* config/sparc/sparc.c (sparc_use_sched_lookahead): Use 4 for
|
||||
ultrasparc and 3 for other multi-issue sparcs.
|
||||
|
||||
2001-09-27 David S. Miller <davem@redhat.com>
|
||||
|
||||
* config/sparc/sparc.md (cycle_display): New pattern.
|
||||
* config/sparc/sparc.c (sparc_cycle_display): New.
|
||||
(TARGET_SCHED_CYCLE_DISPLAY): Set it.
|
||||
|
||||
2001-09-25 David S. Miller <davem@redhat.com>
|
||||
|
||||
Convert all of Sparc scheduling to DFA
|
||||
* config/sparc/sparc.md: Kill all define_function_unit
|
||||
directives and replace with DFA equivalent.
|
||||
* config/sparc/sparc.c (ultrasparc_adjust_cost,
|
||||
mark_ultrasparc_pipeline_state, ultra_cmove_results_ready_p,
|
||||
ultra_fpmode_conflict_exists, ultra_find_type,
|
||||
ultra_build_types_avail, ultra_flush_pipeline,
|
||||
ultra_rescan_pipeline_state, ultrasparc_sched_reorder,
|
||||
ultrasparc_variable_issue, ultrasparc_sched_init,
|
||||
sparc_variable_issue, sparc_sched_reorder, ultra_code_from_mask,
|
||||
ultra_schedule_insn, ultra_code_names, ultra_pipe_hist,
|
||||
ultra_cur_hist, ultra_cycles_elapsed): Kill.
|
||||
(sparc_use_dfa_pipeline_interface, sparc_use_sched_lookahead,
|
||||
ultrasparc_store_bypass_p): New.
|
||||
* config/sparc/sparc-protos.h (ultrasparc_store_bypass_p):
|
||||
Declare.
|
||||
|
||||
2001-09-24 David S. Miller <davem@redhat.com>
|
||||
|
||||
* haifa-sched.c (ready_remove): Fix thinko, we want to copy around
|
||||
ready->vec[foo] not ready[foo].
|
||||
|
||||
2001-09-07 Vladimir Makarov <vmakarov@redhat.com>
|
||||
|
||||
* doc/md.texi: Correct examples for define_insn_reservations
|
||||
`mult' and `div'.
|
||||
|
||||
2001-09-07 Vladimir Makarov <vmakarov@redhat.com>
|
||||
|
||||
* genautomata.c (create_automata): Print message about creation of
|
||||
each automaton.
|
||||
(generate): Remove printing meease about creation of
|
||||
automata.
|
||||
|
||||
2001-09-05 David S. Miller <davem@redhat.com>
|
||||
|
||||
* config/sparc/linux.h: Set CPLUSPLUS_CPP_SPEC.
|
||||
* config/sparc/linux64.h: Likewise.
|
||||
|
||||
2001-08-31 Vladimir Makarov <vmakarov@redhat.com>
|
||||
|
||||
* haifa-sched.c (insn_cost, schedule_insn, queue_to_ready,
|
||||
schedule_block, sched_init, sched_finish): Add missed calls of
|
||||
use_dfa_pipeline_interface.
|
||||
|
||||
* sched-rgn.c (init_ready_list, new_ready, debug_dependencies):
|
||||
Ditto.
|
||||
|
||||
* sched-vis.c (get_visual_tbl_length): Ditto.
|
||||
|
||||
2001-08-27 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* genattr.c (main): Emit state_t even when not doing scheduling.
|
||||
|
||||
2001-08-27 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* genautomata.c (expand_automata): Always create a description.
|
||||
|
||||
2001-08-27 Vladimir Makarov <vmakarov@touchme.toronto.redhat.com>
|
||||
|
||||
* rtl.def (DEFINE_CPU_UNIT, DEFINE_QUERY_CPU_UNIT, EXCLUSION_SET,
|
||||
PRESENCE_SET, ABSENCE_SET, DEFINE_BYPASS, DEFINE_AUTOMATON,
|
||||
AUTOMATA_OPTION, DEFINE_RESERVATION, DEFINE_INSN_RESERVATION): New
|
||||
RTL constructions.
|
||||
|
||||
* genattr.c (main): New variable num_insn_reservations. Increase
|
||||
it if there is DEFINE_INSN_RESERVATION. Output automaton based
|
||||
pipeline hazard recognizer interface.
|
||||
|
||||
* genattrtab.h: New file.
|
||||
|
||||
* genattrtab.c: Include genattrtab.h.
|
||||
(attr_printf, check_attr_test, make_internal_attr,
|
||||
make_numeric_value): Move protypes into genattrtab.h. Define them
|
||||
as external.
|
||||
(num_dfa_decls): New global variable.
|
||||
(main): Process DEFINE_CPU_UNIT, DEFINE_QUERY_CPU_UNIT,
|
||||
DEFINE_BYPASS, EXCLUSION_SET, PRESENCE_SET, ABSENCE_SET,
|
||||
DEFINE_AUTOMATON, AUTOMATA_OPTION, DEFINE_RESERVATION,
|
||||
DEFINE_INSN_RESERVATION. Call expand_automata and write_automata.
|
||||
|
||||
* genautomata.c: New file.
|
||||
|
||||
* rtl.h (LINK_COST_ZERO, LINK_COST_FREE): Remove them.
|
||||
|
||||
* sched-int.h: (curr_state): Add the external definition for
|
||||
automaton pipeline interface.
|
||||
(haifa_insn_data): Add comments for members blockage and units.
|
||||
|
||||
* target-def.h (TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE,
|
||||
TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN,
|
||||
TARGET_SCHED_DFA_PRE_CYCLE_INSN,
|
||||
TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN,
|
||||
TARGET_SCHED_DFA_POST_CYCLE_INSN,
|
||||
TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD,
|
||||
TARGET_SCHED_INIT_DFA_BUBBLES, TARGET_SCHED_DFA_BUBBLE): New
|
||||
macros.
|
||||
(TARGET_SCHED): Use the new macros.
|
||||
|
||||
* target.h (use_dfa_pipeline_interface, init_dfa_pre_cycle_insn,
|
||||
dfa_pre_cycle_insn, init_dfa_post_cycle_insn, dfa_post_cycle_insn,
|
||||
first_cycle_multipass_dfa_lookahead, init_dfa_bubbles,
|
||||
dfa_bubble): New members in gcc_target.sched.
|
||||
|
||||
* haifa-sched.c (insert_schedule_bubbles_p): New variable.
|
||||
(MAX_INSN_QUEUE_INDEX): New macro for automaton interface.
|
||||
(insn_queue): Redefine it as pointer to array.
|
||||
(NEXT_Q, NEXT_Q_AFTER): Use MAX_INSN_QUEUE_INDEX instead of
|
||||
INSN_QUEUE_SIZE.
|
||||
(max_insn_queue_index_macro_value): New variable.
|
||||
(curr_state, dfa_state_size, ready_try): New varaibles for
|
||||
automaton interface.
|
||||
(ready_element, ready_remove, max_issue): New function prototypes
|
||||
for automaton interface.
|
||||
(choose_ready): New function prototype.
|
||||
(insn_unit, blockage_range): Add comments.
|
||||
(unit_last_insn, unit_tick, unit_n_insns): Define them for case
|
||||
FUNCTION_UNITS_SIZE == 0.
|
||||
(insn_issue_delay, actual_hazard_this_instance, schedule_unit,
|
||||
actual_hazard, potential_hazard): Add comments.
|
||||
(insn_cost): Use cost -1 as undefined value. Remove
|
||||
LINK_COST_ZERO and LINK_COST_FREE. Add new code for automaton
|
||||
pipeline interface.
|
||||
(ready_element, ready_remove): New functions for automaton
|
||||
interface.
|
||||
(schedule_insn): Add new code for automaton pipeline interface.
|
||||
(queue_to_ready): Add new code for automaton pipeline interface.
|
||||
Use MAX_INSN_QUEUE_INDEX instead of INSN_QUEUE_SIZE.
|
||||
(debug_ready_list): Print newline when the queue is empty.
|
||||
(max_issue): New function for automaton pipeline interface.
|
||||
(choose_ready): New function.
|
||||
(schedule_block): Add new code for automaton pipeline interface.
|
||||
Print ready list before scheduling each insn.
|
||||
(sched_init): Add new code for automaton pipeline interface.
|
||||
Initiate insn cost by -1.
|
||||
(sched_finish): Free the current automaton state and finalize
|
||||
automaton pipeline interface.
|
||||
|
||||
* sched-rgn.c: Include target.h.
|
||||
(init_ready_list, new_ready, debug_dependencies): Add new code for
|
||||
automaton pipeline interface.
|
||||
|
||||
* sched-vis.c: Include target.h.
|
||||
(get_visual_tbl_length): Add code for automaton interface.
|
||||
(target_units, print_block_visualization): Add comments.
|
||||
|
||||
* Makefile.in (GETRUNTIME, HASHTAB, HOST_GETRUNTIME, HOST_HASHTAB,
|
||||
USE_HOST_GETRUNTIME, USE_HOST_HASHTAB, HOST_VARRAY): New variables.
|
||||
(sched-rgn.o, sched-vis.o): Add new dependency file target.h.
|
||||
(getruntime.o, genautomata.o): New entries.
|
||||
(genattrtab.o): Add new dependency file genattrtab.h.
|
||||
(genattrtab): Add new dependencies. Link it with `libm.a'.
|
||||
(getruntime.o, hashtab.o): New entries for canadian cross.
|
||||
|
||||
* doc/md.texi: Description of automaton based model.
|
||||
|
||||
* doc/tm.texi (TARGET_SCHED_ISSUE_RATE, TARGET_SCHED_ADJUST_COST):
|
||||
Add comments.
|
||||
(TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE,
|
||||
TARGET_SCHED_DFA_PRE_CYCLE_INSN,
|
||||
TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN,
|
||||
TARGET_SCHED_DFA_POST_CYCLE_INSN,
|
||||
TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN,
|
||||
TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD,
|
||||
TARGET_SCHED_INIT_DFA_BUBBLES, TARGET_SCHED_DFA_BUBBLE): The new
|
||||
hook descriptions.
|
||||
(TRADITIONAL_PIPELINE_INTERFACE, DFA_PIPELINE_INTERFACE,
|
||||
MAX_DFA_ISSUE_RATE): New macro descriptions.
|
||||
|
||||
* doc/contrib.texi: Add dfa based scheduler contribution.
|
||||
|
||||
* doc/gcc.texi: Add more information about genattrtab.
|
||||
|
||||
Mon Apr 29 17:19:10 2002 Richard Kenner <kenner@vlsi1.ultra.nyu.edu>
|
||||
|
||||
* reload1.c (eliminate_regs, case SUBREG): Fix typo in
|
||||
|
|
|
@ -344,6 +344,14 @@ LIBICONV = @LIBICONV@
|
|||
# List of internationalization subdirectories.
|
||||
INTL_SUBDIRS = intl
|
||||
|
||||
# Change this to a null string if obstacks are installed in the
|
||||
# system library.
|
||||
OBSTACK=obstack.o
|
||||
|
||||
# The following object files is used by genautomata.
|
||||
GETRUNTIME = getruntime.o
|
||||
HASHTAB = hashtab.o
|
||||
|
||||
# The GC method to be used on this system.
|
||||
GGC=@GGC@.o
|
||||
|
||||
|
@ -475,6 +483,12 @@ HOST_CFLAGS= @HOST_CFLAGS@ -DGENERATOR_FILE
|
|||
# Native linker and preprocessor flags. For x-fragment overrides.
|
||||
HOST_LDFLAGS=$(LDFLAGS)
|
||||
HOST_CPPFLAGS=$(ALL_CPPFLAGS)
|
||||
HOST_OBSTACK=$(OBSTACK)
|
||||
HOST_VFPRINTF=$(VFPRINTF)
|
||||
HOST_DOPRINT=$(DOPRINT)
|
||||
HOST_GETRUNTIME=$(GETRUNTIME)
|
||||
HOST_HASHTAB=$(HASHTAB)
|
||||
HOST_STRSTR=$(STRSTR)
|
||||
|
||||
# Actual name to use when installing a native compiler.
|
||||
GCC_INSTALL_NAME = `echo gcc|sed '$(program_transform_name)'`
|
||||
|
@ -598,8 +612,17 @@ ALL_CPPFLAGS = $(CPPFLAGS) $(X_CPPFLAGS) $(T_CPPFLAGS)
|
|||
LIBIBERTY = ../libiberty/libiberty.a
|
||||
BUILD_LIBIBERTY = @FORBUILD@/libiberty/libiberty.a
|
||||
|
||||
# Dependencies on the intl and portability libraries.
|
||||
LIBDEPS= $(INTLDEPS) $(LIBIBERTY)
|
||||
USE_HOST_OBSTACK= ` case "${HOST_OBSTACK}" in ?*) echo ${HOST_PREFIX}${HOST_OBSTACK} ;; esac `
|
||||
USE_HOST_VFPRINTF= ` case "${HOST_VFPRINTF}" in ?*) echo ${HOST_PREFIX}${HOST_VFPRINTF} ;; esac `
|
||||
USE_HOST_DOPRINT= ` case "${HOST_DOPRINT}" in ?*) echo ${HOST_PREFIX}${HOST_DOPRINT} ;; esac `
|
||||
USE_HOST_GETRUNTIME= ` case "${HOST_GETRUNTIME}" in ?*) echo ${HOST_PREFIX}${HOST_GETRUNTIME} ;; esac `
|
||||
USE_HOST_HASHTAB= ` case "${HOST_HASHTAB}" in ?*) echo ${HOST_PREFIX}${HOST_HASHTAB} ;; esac `
|
||||
USE_HOST_STRSTR= ` case "${HOST_STRSTR}" in ?*) echo ${HOST_PREFIX}${HOST_STRSTR} ;; esac `
|
||||
|
||||
# Dependency on the intl, portability libraries, obstack or whatever
|
||||
# library facilities are not installed in the system libraries.
|
||||
# We don't use USE_* because backquote expansion doesn't work in deps.
|
||||
LIBDEPS= $(INTLLIBS) $(LIBIBERTY) $(OBSTACK) $(VFPRINTF) $(DOPRINT) $(STRSTR)
|
||||
|
||||
# Likewise, for use in the tools that must run on this machine
|
||||
# even if we are cross-building GCC.
|
||||
|
@ -618,6 +641,7 @@ HOST_RTL = $(HOST_PREFIX)rtl.o read-rtl.o $(HOST_PREFIX)bitmap.o \
|
|||
|
||||
HOST_PRINT = $(HOST_PREFIX)print-rtl.o
|
||||
HOST_ERRORS = $(HOST_PREFIX)errors.o
|
||||
HOST_VARRAY = $(HOST_PREFIX)varray.o
|
||||
|
||||
# Specify the directories to be searched for header files.
|
||||
# Both . and srcdir are used, in that order,
|
||||
|
@ -1319,6 +1343,17 @@ line-map.o: line-map.c line-map.h intl.h $(CONFIG_H) $(SYSTEM_H)
|
|||
ggc-none.o: ggc-none.c $(GCONFIG_H) $(SYSTEM_H) $(GGC_H)
|
||||
$(CC) -c $(ALL_CFLAGS) -DGENERATOR_FILE $(ALL_CPPFLAGS) $(INCLUDES) $< $(OUTPUT_OPTION)
|
||||
|
||||
obstack.o: $(srcdir)/../libiberty/obstack.c $(GCONFIG_H)
|
||||
rm -f obstack.c
|
||||
$(LN_S) $(srcdir)/../libiberty/obstack.c obstack.c
|
||||
$(CC) -c $(ALL_CFLAGS) -DGENERATOR_FILE $(ALL_CPPFLAGS) $(INCLUDES) \
|
||||
obstack.c $(OUTPUT_OPTION)
|
||||
|
||||
getruntime.o: $(srcdir)/../libiberty/getruntime.c $(CONFIG_H)
|
||||
rm -f getruntime.c
|
||||
$(LN_S) $(srcdir)/../libiberty/getruntime.c getruntime.c
|
||||
$(CC) -c $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) getruntime.c
|
||||
|
||||
prefix.o: prefix.c $(CONFIG_H) $(SYSTEM_H) Makefile prefix.h
|
||||
$(CC) $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
|
||||
-DPREFIX=\"$(prefix)\" \
|
||||
|
@ -1558,12 +1593,13 @@ sched-deps.o : sched-deps.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) sched-int.h \
|
|||
$(INSN_ATTR_H) toplev.h $(RECOG_H) except.h cselib.h $(PARAMS_H) $(TM_P_H)
|
||||
sched-rgn.o : sched-rgn.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) sched-int.h \
|
||||
$(BASIC_BLOCK_H) $(REGS_H) hard-reg-set.h flags.h insn-config.h function.h \
|
||||
$(INSN_ATTR_H) toplev.h $(RECOG_H) except.h $(TM_P_H)
|
||||
$(INSN_ATTR_H) toplev.h $(RECOG_H) except.h $(TM_P_H) $(TARGET_H)
|
||||
sched-ebb.o : sched-ebb.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) sched-int.h \
|
||||
$(BASIC_BLOCK_H) $(REGS_H) hard-reg-set.h flags.h insn-config.h function.h \
|
||||
$(INSN_ATTR_H) toplev.h $(RECOG_H) except.h $(TM_P_H)
|
||||
sched-vis.o : sched-vis.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) sched-int.h \
|
||||
hard-reg-set.h $(BASIC_BLOCK_H) $(INSN_ATTR_H) $(REGS_H) $(TM_P_H)
|
||||
hard-reg-set.h $(BASIC_BLOCK_H) $(INSN_ATTR_H) $(REGS_H) $(TM_P_H) \
|
||||
$(TARGET_H)
|
||||
final.o : final.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) flags.h intl.h \
|
||||
$(REGS_H) $(RECOG_H) conditions.h insn-config.h $(INSN_ATTR_H) function.h \
|
||||
real.h output.h hard-reg-set.h except.h debug.h xcoffout.h \
|
||||
|
@ -1868,14 +1904,18 @@ genattr$(build_exeext) : genattr.o $(HOST_RTL) $(HOST_PRINT) $(HOST_ERRORS) $(HO
|
|||
genattr.o : genattr.c $(RTL_H) $(HCONFIG_H) $(SYSTEM_H) errors.h gensupport.h
|
||||
$(HOST_CC) -c $(HOST_CFLAGS) $(HOST_CPPFLAGS) $(INCLUDES) $(srcdir)/genattr.c $(OUTPUT_OPTION)
|
||||
|
||||
genattrtab$(build_exeext) : genattrtab.o $(HOST_RTL) $(HOST_PRINT) $(HOST_ERRORS) $(HOST_LIBDEPS)
|
||||
genattrtab$(build_exeext) : genattrtab.o genautomata.o $(HOST_RTL) $(HOST_PRINT) $(HOST_ERRORS) $(HOST_VARRAY) $(HOST_PREFIX)$(HOST_GETRUNTIME) $(HOST_LIBDEPS)
|
||||
$(HOST_CC) $(HOST_CFLAGS) $(HOST_LDFLAGS) -o $@ \
|
||||
genattrtab.o $(HOST_RTL) $(HOST_PRINT) $(HOST_ERRORS) $(HOST_LIBS)
|
||||
genattrtab.o genautomata.o $(HOST_RTL) $(HOST_PRINT) $(HOST_ERRORS) $(HOST_VARRAY) $(USE_HOST_GETRUNTIME) $(HOST_LIBS) -lm
|
||||
|
||||
genattrtab.o : genattrtab.c $(RTL_H) $(OBSTACK_H) $(HCONFIG_H) \
|
||||
$(SYSTEM_H) errors.h $(GGC_H) gensupport.h
|
||||
$(SYSTEM_H) errors.h $(GGC_H) gensupport.h genattrtab.h
|
||||
$(HOST_CC) -c $(HOST_CFLAGS) $(HOST_CPPFLAGS) $(INCLUDES) $(srcdir)/genattrtab.c $(OUTPUT_OPTION)
|
||||
|
||||
genautomata.o : genautomata.c $(RTL_H) $(OBSTACK_H) $(HCONFIG_H) \
|
||||
$(SYSTEM_H) errors.h varray.h hash.h genattrtab.h
|
||||
$(HOST_CC) -c $(HOST_CFLAGS) $(HOST_CPPFLAGS) $(INCLUDES) $(srcdir)/genautomata.c $(OUTPUT_OPTION)
|
||||
|
||||
genoutput$(build_exeext) : genoutput.o $(HOST_RTL) $(HOST_PRINT) $(HOST_ERRORS) $(HOST_LIBDEPS)
|
||||
$(HOST_CC) $(HOST_CFLAGS) $(HOST_LDFLAGS) -o $@ \
|
||||
genoutput.o $(HOST_RTL) $(HOST_PRINT) $(HOST_ERRORS) $(HOST_LIBS)
|
||||
|
@ -1921,11 +1961,47 @@ $(HOST_PREFIX_1)bitmap.o: $(srcdir)/bitmap.c $(HCONFIG_H) $(SYSTEM_H) $(RTL_H) \
|
|||
sed -e 's/config[.]h/hconfig.h/' $(srcdir)/bitmap.c > $(HOST_PREFIX)bitmap.c
|
||||
$(HOST_CC) -c $(HOST_CFLAGS) $(HOST_CPPFLAGS) $(INCLUDES) $(HOST_PREFIX)bitmap.c $(OUTPUT_OPTION)
|
||||
|
||||
$(HOST_PREFIX_1)obstack.o: $(srcdir)/../libiberty/obstack.c $(HCONFIG_H)
|
||||
rm -f $(HOST_PREFIX)obstack.c
|
||||
sed -e 's/config[.]h/hconfig.h/' $(srcdir)/../libiberty/obstack.c > $(HOST_PREFIX)obstack.c
|
||||
$(HOST_CC) -c $(HOST_CFLAGS) $(HOST_CPPFLAGS) $(INCLUDES) $(HOST_PREFIX)obstack.c $(OUTPUT_OPTION)
|
||||
|
||||
$(HOST_PREFIX_1)getruntime.o: $(srcdir)/../libiberty/getruntime.c
|
||||
rm -f $(HOST_PREFIX)getruntime.c
|
||||
sed -e 's/config[.]h/hconfig.h/' $(srcdir)/../libiberty/getruntime.c > $(HOST_PREFIX)getruntime.c
|
||||
$(HOST_CC) -c $(HOST_CFLAGS) $(HOST_CPPFLAGS) $(INCLUDES) $(HOST_PREFIX)getruntime.c $(OUTPUT_OPTION)
|
||||
|
||||
$(HOST_PREFIX_1)hashtab.o: $(srcdir)/../libiberty/hashtab.c
|
||||
rm -f $(HOST_PREFIX)hashtab.c
|
||||
sed -e 's/config[.]h/hconfig.h/' $(srcdir)/../libiberty/hashtab.c > $(HOST_PREFIX)hashtab.c
|
||||
$(HOST_CC) -c $(HOST_CFLAGS) $(HOST_CPPFLAGS) $(INCLUDES) $(HOST_PREFIX)hashtab.c $(OUTPUT_OPTION)
|
||||
|
||||
$(HOST_PREFIX_1)vfprintf.o: $(srcdir)/../libiberty/vfprintf.c $(HCONFIG_H)
|
||||
rm -f $(HOST_PREFIX)vfprintf.c
|
||||
sed -e 's/config[.]h/hconfig.h/' $(srcdir)/../libiberty/vfprintf.c > $(HOST_PREFIX)vfprintf.c
|
||||
$(HOST_CC) -c $(HOST_CFLAGS) $(HOST_CPPFLAGS) $(INCLUDES) $(HOST_PREFIX)vfprintf.c $(OUTPUT_OPTION)
|
||||
|
||||
$(HOST_PREFIX_1)doprint.o: doprint.c $(HCONFIG_H)
|
||||
rm -f $(HOST_PREFIX)doprint.c
|
||||
sed -e 's/config[.]h/hconfig.h/' $(srcdir)/doprint.c > $(HOST_PREFIX)doprint.c
|
||||
$(HOST_CC) -c $(HOST_CFLAGS) $(HOST_CPPFLAGS) $(INCLUDES) $(HOST_PREFIX)doprint.c $(OUTPUT_OPTION)
|
||||
|
||||
$(HOST_PREFIX_1)strstr.o: $(srcdir)/../libiberty/strstr.c $(HCONFIG_H)
|
||||
rm -f $(HOST_PREFIX)strstr.c
|
||||
sed -e 's/config[.]h/hconfig.h/' $(srcdir)/../libiberty/strstr.c > $(HOST_PREFIX)strstr.c
|
||||
$(HOST_CC) -c $(HOST_CFLAGS) $(HOST_CPPFLAGS) $(INCLUDES) $(HOST_PREFIX)strstr.c $(OUTPUT_OPTION)
|
||||
|
||||
$(HOST_PREFIX_1)errors.o: errors.c $(HCONFIG_H) $(SYSTEM_H) errors.h
|
||||
rm -f $(HOST_PREFIX)errors.c
|
||||
sed -e 's/config[.]h/hconfig.h/' $(srcdir)/errors.c > $(HOST_PREFIX)errors.c
|
||||
$(HOST_CC) -c $(HOST_CFLAGS) $(HOST_CPPFLAGS) $(INCLUDES) $(HOST_PREFIX)errors.c $(OUTPUT_OPTION)
|
||||
|
||||
|
||||
# This satisfies the dependency that we get if you cross-compile a compiler
|
||||
# that does not need to compile doprint or whatever.
|
||||
$(HOST_PREFIX_1):
|
||||
$(STAMP) $(HOST_PREFIX_1)
|
||||
|
||||
$(HOST_PREFIX_1)ggc-none.o: ggc-none.c $(HCONFIG_H) $(SYSTEM_H) $(GCC_H)
|
||||
rm -f $(HOST_PREFIX)ggc-none.c
|
||||
sed -e 's/config[.]h/hconfig.h/' $(srcdir)/ggc-none.c > $(HOST_PREFIX)ggc-none.c
|
||||
|
|
|
@ -49,6 +49,17 @@ Boston, MA 02111-1307, USA. */
|
|||
#include "target.h"
|
||||
#include "target-def.h"
|
||||
|
||||
static int hppa_use_dfa_pipeline_interface PARAMS ((void));
|
||||
|
||||
#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
|
||||
#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE hppa_use_dfa_pipeline_interface
|
||||
|
||||
static int
|
||||
hppa_use_dfa_pipeline_interface ()
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifndef DO_FRAME_NOTES
|
||||
#ifdef INCOMING_RETURN_ADDR_RTX
|
||||
#define DO_FRAME_NOTES 1
|
||||
|
@ -172,6 +183,11 @@ override_options ()
|
|||
pa_cpu_string = "7200";
|
||||
pa_cpu = PROCESSOR_7200;
|
||||
}
|
||||
else if (pa_cpu_string && ! strcmp (pa_cpu_string, "7300"))
|
||||
{
|
||||
pa_cpu_string = "7300";
|
||||
pa_cpu = PROCESSOR_7300;
|
||||
}
|
||||
else if (pa_cpu_string && ! strcmp (pa_cpu_string, "8000"))
|
||||
{
|
||||
pa_cpu_string = "8000";
|
||||
|
@ -179,7 +195,7 @@ override_options ()
|
|||
}
|
||||
else
|
||||
{
|
||||
warning ("unknown -mschedule= option (%s).\nValid options are 700, 7100, 7100LC, 7200, and 8000\n", pa_cpu_string);
|
||||
warning ("unknown -mschedule= option (%s).\nValid options are 700, 7100, 7100LC, 7200, 7300, and 8000\n", pa_cpu_string);
|
||||
}
|
||||
|
||||
/* Set the instruction set architecture. */
|
||||
|
@ -3929,7 +3945,7 @@ pa_adjust_cost (insn, link, dep_insn, cost)
|
|||
{
|
||||
case TYPE_FPLOAD:
|
||||
/* This cost 3 cycles, not 2 as the md says for the
|
||||
700 and 7100. */
|
||||
700 and 7100, 7100lc, 7200 and 7300. */
|
||||
return cost + 1;
|
||||
|
||||
case TYPE_FPALU:
|
||||
|
@ -3947,6 +3963,11 @@ pa_adjust_cost (insn, link, dep_insn, cost)
|
|||
return cost;
|
||||
}
|
||||
}
|
||||
|
||||
/* A flop-flop true depenendency where the sizes of the operand
|
||||
carrying the dependency is difference causes an additional
|
||||
cycle stall on the 7100lc, 7200, and 7300. Similarly for
|
||||
a fpload-flop true dependency. */
|
||||
}
|
||||
|
||||
/* For other data dependencies, the default cost specified in the
|
||||
|
@ -3989,7 +4010,10 @@ pa_adjust_cost (insn, link, dep_insn, cost)
|
|||
preceding arithmetic operation has finished if
|
||||
the target of the fpload is any of the sources
|
||||
(or destination) of the arithmetic operation. */
|
||||
return cost - 1;
|
||||
if (hppa_use_dfa_pipeline_interface ())
|
||||
return insn_default_latency (dep_insn) - 1;
|
||||
else
|
||||
return cost - 1;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
|
@ -4024,7 +4048,10 @@ pa_adjust_cost (insn, link, dep_insn, cost)
|
|||
preceding divide or sqrt operation has finished if
|
||||
the target of the ALU flop is any of the sources
|
||||
(or destination) of the divide or sqrt operation. */
|
||||
return cost - 2;
|
||||
if (hppa_use_dfa_pipeline_interface ())
|
||||
return insn_default_latency (dep_insn) - 2;
|
||||
else
|
||||
return cost - 2;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
|
@ -4069,8 +4096,15 @@ pa_adjust_cost (insn, link, dep_insn, cost)
|
|||
/* A fpload can't be issued until one cycle before a
|
||||
preceding arithmetic operation has finished if
|
||||
the target of the fpload is the destination of the
|
||||
arithmetic operation. */
|
||||
return cost - 1;
|
||||
arithmetic operation.
|
||||
|
||||
Exception: For PA7100LC, PA7200 and PA7300, the cost
|
||||
is 3 cycles, unless they bundle together. We also
|
||||
pay the penalty if the second insn is a fpload. */
|
||||
if (hppa_use_dfa_pipeline_interface ())
|
||||
return insn_default_latency (dep_insn) - 1;
|
||||
else
|
||||
return cost - 1;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
|
@ -4105,7 +4139,10 @@ pa_adjust_cost (insn, link, dep_insn, cost)
|
|||
preceding divide or sqrt operation has finished if
|
||||
the target of the ALU flop is also the target of
|
||||
the divide or sqrt operation. */
|
||||
return cost - 2;
|
||||
if (hppa_use_dfa_pipeline_interface ())
|
||||
return insn_default_latency (dep_insn) - 2;
|
||||
else
|
||||
return cost - 2;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
|
@ -4165,6 +4202,7 @@ pa_issue_rate ()
|
|||
case PROCESSOR_7100: return 2;
|
||||
case PROCESSOR_7100LC: return 2;
|
||||
case PROCESSOR_7200: return 2;
|
||||
case PROCESSOR_7300: return 2;
|
||||
case PROCESSOR_8000: return 4;
|
||||
|
||||
default:
|
||||
|
|
|
@ -41,6 +41,7 @@ enum processor_type
|
|||
PROCESSOR_7100,
|
||||
PROCESSOR_7100LC,
|
||||
PROCESSOR_7200,
|
||||
PROCESSOR_7300,
|
||||
PROCESSOR_8000
|
||||
};
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@
|
|||
;;
|
||||
;; FIXME: Add 800 scheduling for completeness?
|
||||
|
||||
(define_attr "cpu" "700,7100,7100LC,7200,8000" (const (symbol_ref "pa_cpu_attr")))
|
||||
(define_attr "cpu" "700,7100,7100LC,7200,7300,8000" (const (symbol_ref "pa_cpu_attr")))
|
||||
|
||||
;; Length (in # of bytes).
|
||||
(define_attr "length" ""
|
||||
|
@ -139,35 +139,10 @@
|
|||
(const_int 0)))
|
||||
[(eq_attr "in_branch_delay" "true") (nil) (nil)])
|
||||
|
||||
;; Function units of the HPPA. The following data is for the 700 CPUs
|
||||
;; (Mustang CPU + Timex FPU aka PA-89) because that's what I have the docs for.
|
||||
;; Scheduling instructions for PA-83 machines according to the Snake
|
||||
;; constraints shouldn't hurt.
|
||||
|
||||
;; (define_function_unit {name} {num-units} {n-users} {test}
|
||||
;; {ready-delay} {issue-delay} [{conflict-list}])
|
||||
|
||||
;; The integer ALU.
|
||||
;; (Noted only for documentation; units that take one cycle do not need to
|
||||
;; be specified.)
|
||||
|
||||
;; (define_function_unit "alu" 1 0
|
||||
;; (and (eq_attr "type" "unary,shift,nullshift,binary,move,address")
|
||||
;; (eq_attr "cpu" "700"))
|
||||
;; 1 0)
|
||||
|
||||
|
||||
;; Memory. Disregarding Cache misses, the Mustang memory times are:
|
||||
;; load: 2, fpload: 3
|
||||
;; store, fpstore: 3, no D-cache operations should be scheduled.
|
||||
|
||||
(define_function_unit "pa700memory" 1 0
|
||||
(and (eq_attr "type" "load,fpload")
|
||||
(eq_attr "cpu" "700")) 2 0)
|
||||
(define_function_unit "pa700memory" 1 0
|
||||
(and (eq_attr "type" "store,fpstore")
|
||||
(eq_attr "cpu" "700")) 3 3)
|
||||
|
||||
;; The Timex (aka 700) has two floating-point units: ALU, and MUL/DIV/SQRT.
|
||||
;; Timings:
|
||||
;; Instruction Time Unit Minimum Distance (unit contention)
|
||||
|
@ -186,44 +161,73 @@
|
|||
;; fdiv,dbl 12 MPY 12
|
||||
;; fsqrt,sgl 14 MPY 14
|
||||
;; fsqrt,dbl 18 MPY 18
|
||||
;;
|
||||
;; We don't model fmpyadd/fmpysub properly as those instructions
|
||||
;; keep both the FP ALU and MPY units busy. Given that these
|
||||
;; processors are obsolete, I'm not going to spend the time to
|
||||
;; model those instructions correctly.
|
||||
|
||||
(define_function_unit "pa700fp_alu" 1 0
|
||||
(define_automaton "pa700")
|
||||
(define_cpu_unit "dummy_700,mem_700,fpalu_700,fpmpy_700" "pa700")
|
||||
|
||||
(define_insn_reservation "W0" 4
|
||||
(and (eq_attr "type" "fpcc")
|
||||
(eq_attr "cpu" "700")) 4 2)
|
||||
(define_function_unit "pa700fp_alu" 1 0
|
||||
(eq_attr "cpu" "700"))
|
||||
"fpalu_700*2")
|
||||
|
||||
(define_insn_reservation "W1" 3
|
||||
(and (eq_attr "type" "fpalu")
|
||||
(eq_attr "cpu" "700")) 3 2)
|
||||
(define_function_unit "pa700fp_mpy" 1 0
|
||||
(eq_attr "cpu" "700"))
|
||||
"fpalu_700*2")
|
||||
|
||||
(define_insn_reservation "W2" 3
|
||||
(and (eq_attr "type" "fpmulsgl,fpmuldbl")
|
||||
(eq_attr "cpu" "700")) 3 2)
|
||||
(define_function_unit "pa700fp_mpy" 1 0
|
||||
(eq_attr "cpu" "700"))
|
||||
"fpmpy_700*2")
|
||||
|
||||
(define_insn_reservation "W3" 10
|
||||
(and (eq_attr "type" "fpdivsgl")
|
||||
(eq_attr "cpu" "700")) 10 10)
|
||||
(define_function_unit "pa700fp_mpy" 1 0
|
||||
(eq_attr "cpu" "700"))
|
||||
"fpmpy_700*10")
|
||||
|
||||
(define_insn_reservation "W4" 12
|
||||
(and (eq_attr "type" "fpdivdbl")
|
||||
(eq_attr "cpu" "700")) 12 12)
|
||||
(define_function_unit "pa700fp_mpy" 1 0
|
||||
(eq_attr "cpu" "700"))
|
||||
"fpmpy_700*12")
|
||||
|
||||
(define_insn_reservation "W5" 14
|
||||
(and (eq_attr "type" "fpsqrtsgl")
|
||||
(eq_attr "cpu" "700")) 14 14)
|
||||
(define_function_unit "pa700fp_mpy" 1 0
|
||||
(eq_attr "cpu" "700"))
|
||||
"fpmpy_700*14")
|
||||
|
||||
(define_insn_reservation "W6" 18
|
||||
(and (eq_attr "type" "fpsqrtdbl")
|
||||
(eq_attr "cpu" "700")) 18 18)
|
||||
(eq_attr "cpu" "700"))
|
||||
"fpmpy_700*18")
|
||||
|
||||
(define_insn_reservation "W7" 2
|
||||
(and (eq_attr "type" "load,fpload")
|
||||
(eq_attr "cpu" "700"))
|
||||
"mem_700")
|
||||
|
||||
(define_insn_reservation "W8" 3
|
||||
(and (eq_attr "type" "store,fpstore")
|
||||
(eq_attr "cpu" "700"))
|
||||
"mem_700*3")
|
||||
|
||||
(define_insn_reservation "W9" 1
|
||||
(and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,load,fpload,store,fpstore")
|
||||
(eq_attr "cpu" "700"))
|
||||
"dummy_700")
|
||||
|
||||
;; Function units for the 7100 and 7150. The 7100/7150 can dual-issue
|
||||
;; floating point computations with non-floating point computations (fp loads
|
||||
;; and stores are not fp computations).
|
||||
;;
|
||||
|
||||
;; Memory. Disregarding Cache misses, memory loads take two cycles; stores also
|
||||
;; take two cycles, during which no Dcache operations should be scheduled.
|
||||
;; Any special cases are handled in pa_adjust_cost. The 7100, 7150 and 7100LC
|
||||
;; all have the same memory characteristics if one disregards cache misses.
|
||||
(define_function_unit "pa7100memory" 1 0
|
||||
(and (eq_attr "type" "load,fpload")
|
||||
(eq_attr "cpu" "7100,7100LC")) 2 0)
|
||||
(define_function_unit "pa7100memory" 1 0
|
||||
(and (eq_attr "type" "store,fpstore")
|
||||
(eq_attr "cpu" "7100,7100LC")) 2 2)
|
||||
|
||||
;; The 7100/7150 has three floating-point units: ALU, MUL, and DIV.
|
||||
;; Timings:
|
||||
|
@ -243,41 +247,46 @@
|
|||
;; fdiv,dbl 15 DIV 15
|
||||
;; fsqrt,sgl 8 DIV 8
|
||||
;; fsqrt,dbl 15 DIV 15
|
||||
;;
|
||||
;; We don't really model the FP ALU/MPY units properly (they are
|
||||
;; distinct subunits in the FP unit). However, there can never be
|
||||
;; a functional unit; conflict given the latency and issue rates
|
||||
;; for those units.
|
||||
|
||||
(define_function_unit "pa7100fp_alu" 1 0
|
||||
(and (eq_attr "type" "fpcc,fpalu")
|
||||
(eq_attr "cpu" "7100")) 2 1)
|
||||
(define_function_unit "pa7100fp_mpy" 1 0
|
||||
(and (eq_attr "type" "fpmulsgl,fpmuldbl")
|
||||
(eq_attr "cpu" "7100")) 2 1)
|
||||
(define_function_unit "pa7100fp_div" 1 0
|
||||
(define_automaton "pa7100")
|
||||
(define_cpu_unit "i_7100, f_7100,fpmac_7100,fpdivsqrt_7100,mem_7100" "pa7100")
|
||||
|
||||
(define_insn_reservation "X0" 2
|
||||
(and (eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl")
|
||||
(eq_attr "cpu" "7100"))
|
||||
"f_7100,fpmac_7100")
|
||||
|
||||
(define_insn_reservation "X1" 8
|
||||
(and (eq_attr "type" "fpdivsgl,fpsqrtsgl")
|
||||
(eq_attr "cpu" "7100")) 8 8)
|
||||
(define_function_unit "pa7100fp_div" 1 0
|
||||
(eq_attr "cpu" "7100"))
|
||||
"f_7100+fpdivsqrt_7100,fpdivsqrt_7100*7")
|
||||
|
||||
(define_insn_reservation "X2" 15
|
||||
(and (eq_attr "type" "fpdivdbl,fpsqrtdbl")
|
||||
(eq_attr "cpu" "7100")) 15 15)
|
||||
(eq_attr "cpu" "7100"))
|
||||
"f_7100+fpdivsqrt_7100,fpdivsqrt_7100*14")
|
||||
|
||||
;; To encourage dual issue we define function units corresponding to
|
||||
;; the instructions which can be dual issued. This is a rather crude
|
||||
;; approximation, the "pa7100nonflop" test in particular could be refined.
|
||||
(define_function_unit "pa7100flop" 1 1
|
||||
(and
|
||||
(eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl")
|
||||
(eq_attr "cpu" "7100")) 1 1)
|
||||
(define_insn_reservation "X3" 2
|
||||
(and (eq_attr "type" "load,fpload")
|
||||
(eq_attr "cpu" "7100"))
|
||||
"i_7100+mem_7100")
|
||||
|
||||
(define_function_unit "pa7100nonflop" 1 1
|
||||
(and
|
||||
(eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl")
|
||||
(eq_attr "cpu" "7100")) 1 1)
|
||||
(define_insn_reservation "X4" 2
|
||||
(and (eq_attr "type" "store,fpstore")
|
||||
(eq_attr "cpu" "7100"))
|
||||
"i_7100+mem_7100,mem_7100")
|
||||
|
||||
|
||||
;; Memory subsystem works just like 7100/7150 (except for cache miss times which
|
||||
;; we don't model here).
|
||||
(define_insn_reservation "X5" 1
|
||||
(and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl,load,fpload,store,fpstore")
|
||||
(eq_attr "cpu" "7100"))
|
||||
"i_7100")
|
||||
|
||||
;; The 7100LC has three floating-point units: ALU, MUL, and DIV.
|
||||
;; Note divides and sqrt flops lock the cpu until the flop is
|
||||
;; finished. fmpy and xmpyu (fmpyi) lock the cpu for one cycle.
|
||||
;; There's no way to avoid the penalty.
|
||||
;; Timings:
|
||||
;; Instruction Time Unit Minimum Distance (unit contention)
|
||||
;; fcpy 2 ALU 1
|
||||
|
@ -299,106 +308,179 @@
|
|||
;; fdiv,dbl 15 DIV 15
|
||||
;; fsqrt,sgl 8 DIV 8
|
||||
;; fsqrt,dbl 15 DIV 15
|
||||
;;
|
||||
;; The PA7200 is just like the PA7100LC except that there is
|
||||
;; no store-store penalty.
|
||||
;;
|
||||
;; The PA7300 is just like the PA7200 except that there is
|
||||
;; no store-load penalty.
|
||||
;;
|
||||
;; Note there are some aspects of the 7100LC we are not modeling
|
||||
;; at the moment. I'll be reviewing the 7100LC scheduling info
|
||||
;; shortly and updating this description.
|
||||
;;
|
||||
;; load-load pairs
|
||||
;; store-store pairs
|
||||
;; fmpyadd,dbl
|
||||
;; fmpysub,dbl
|
||||
;; other issue modeling
|
||||
|
||||
(define_function_unit "pa7100LCfp_alu" 1 0
|
||||
(define_automaton "pa7100lc")
|
||||
(define_cpu_unit "i0_7100lc, i1_7100lc, f_7100lc" "pa7100lc")
|
||||
(define_cpu_unit "fpalu_7100lc,fpdivsqrt_7100lc,fpmul_7100lc" "pa7100lc")
|
||||
(define_cpu_unit "mem_7100lc" "pa7100lc")
|
||||
|
||||
(define_insn_reservation "Y0" 2
|
||||
(and (eq_attr "type" "fpcc,fpalu")
|
||||
(eq_attr "cpu" "7100LC,7200")) 2 1)
|
||||
(define_function_unit "pa7100LCfp_mpy" 1 0
|
||||
(eq_attr "cpu" "7100LC,7200,7300"))
|
||||
"f_7100lc,fpalu_7100lc")
|
||||
|
||||
(define_insn_reservation "Y1" 2
|
||||
(and (eq_attr "type" "fpmulsgl")
|
||||
(eq_attr "cpu" "7100LC,7200")) 2 1)
|
||||
(define_function_unit "pa7100LCfp_mpy" 1 0
|
||||
(eq_attr "cpu" "7100LC,7200,7300"))
|
||||
"f_7100lc,fpmul_7100lc")
|
||||
|
||||
(define_insn_reservation "Y2" 3
|
||||
(and (eq_attr "type" "fpmuldbl")
|
||||
(eq_attr "cpu" "7100LC,7200")) 3 2)
|
||||
(define_function_unit "pa7100LCfp_div" 1 0
|
||||
(eq_attr "cpu" "7100LC,7200,7300"))
|
||||
"f_7100lc,fpmul_7100lc,fpmul_7100lc")
|
||||
|
||||
(define_insn_reservation "Y3" 8
|
||||
(and (eq_attr "type" "fpdivsgl,fpsqrtsgl")
|
||||
(eq_attr "cpu" "7100LC,7200")) 8 8)
|
||||
(define_function_unit "pa7100LCfp_div" 1 0
|
||||
(eq_attr "cpu" "7100LC,7200,7300"))
|
||||
"f_7100lc+fpdivsqrt_7100lc,fpdivsqrt_7100lc*7")
|
||||
|
||||
(define_insn_reservation "Y4" 15
|
||||
(and (eq_attr "type" "fpdivdbl,fpsqrtdbl")
|
||||
(eq_attr "cpu" "7100LC,7200")) 15 15)
|
||||
(eq_attr "cpu" "7100LC,7200,7300"))
|
||||
"f_7100lc+fpdivsqrt_7100lc,fpdivsqrt_7100lc*14")
|
||||
|
||||
;; Define the various functional units for dual-issue.
|
||||
(define_insn_reservation "Y5" 2
|
||||
(and (eq_attr "type" "load,fpload")
|
||||
(eq_attr "cpu" "7100LC,7200,7300"))
|
||||
"i1_7100lc+mem_7100lc")
|
||||
|
||||
;; There's only one floating point unit.
|
||||
(define_function_unit "pa7100LCflop" 1 1
|
||||
(and
|
||||
(eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl")
|
||||
(eq_attr "cpu" "7100LC,7200")) 1 1)
|
||||
(define_insn_reservation "Y6" 2
|
||||
(and (eq_attr "type" "store,fpstore")
|
||||
(eq_attr "cpu" "7100LC"))
|
||||
"i1_7100lc+mem_7100lc,mem_7100lc")
|
||||
|
||||
;; Shifts and memory ops execute in only one of the integer ALUs
|
||||
(define_function_unit "pa7100LCshiftmem" 1 1
|
||||
(and
|
||||
(eq_attr "type" "shift,nullshift,load,fpload,store,fpstore")
|
||||
(eq_attr "cpu" "7100LC,7200")) 1 1)
|
||||
(define_insn_reservation "Y7" 1
|
||||
(and (eq_attr "type" "shift,nullshift")
|
||||
(eq_attr "cpu" "7100LC,7200,7300"))
|
||||
"i1_7100lc")
|
||||
|
||||
;; We have two basic ALUs.
|
||||
(define_function_unit "pa7100LCalu" 2 1
|
||||
(and
|
||||
(eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl")
|
||||
(eq_attr "cpu" "7100LC,7200")) 1 1)
|
||||
(define_insn_reservation "Y8" 1
|
||||
(and (eq_attr "type" "!fpcc,fpalu,fpmulsgl,fpmuldbl,fpdivsgl,fpsqrtsgl,fpdivdbl,fpsqrtdbl,load,fpload,store,fpstore,shift,nullshift")
|
||||
(eq_attr "cpu" "7100LC,7200,7300"))
|
||||
"(i0_7100lc|i1_7100lc)")
|
||||
|
||||
;; I don't have complete information on the PA7200; however, most of
|
||||
;; what I've heard makes it look like a 7100LC without the store-store
|
||||
;; penalty. So that's how we'll model it.
|
||||
;; The 7200 has a store-load penalty
|
||||
(define_insn_reservation "Y9" 2
|
||||
(and (eq_attr "type" "store,fpstore")
|
||||
(eq_attr "cpu" "7200"))
|
||||
"i0_7100lc,mem_7100lc")
|
||||
|
||||
;; Memory. Disregarding Cache misses, memory loads and stores take
|
||||
;; two cycles. Any special cases are handled in pa_adjust_cost.
|
||||
(define_function_unit "pa7200memory" 1 0
|
||||
(and (eq_attr "type" "load,fpload,store,fpstore")
|
||||
(eq_attr "cpu" "7200")) 2 0)
|
||||
;; The 7300 has no penalty for store-store or store-load
|
||||
(define_insn_reservation "YA" 2
|
||||
(and (eq_attr "type" "store,fpstore")
|
||||
(eq_attr "cpu" "7300"))
|
||||
"i0_7100lc")
|
||||
|
||||
;; I don't have detailed information on the PA7200 FP pipeline, so I
|
||||
;; treat it just like the 7100LC pipeline.
|
||||
;; Similarly for the multi-issue fake units.
|
||||
|
||||
;;
|
||||
;; Scheduling for the PA8000 is somewhat different than scheduling for a
|
||||
;; traditional architecture.
|
||||
;;
|
||||
;; The PA8000 has a large (56) entry reorder buffer that is split between
|
||||
;; memory and non-memory operations.
|
||||
;;
|
||||
;; The PA800 can issue two memory and two non-memory operations per cycle to
|
||||
;; the function units. Similarly, the PA8000 can retire two memory and two
|
||||
;; non-memory operations per cycle.
|
||||
;; The PA8000 can issue two memory and two non-memory operations per cycle to
|
||||
;; the function units, with the exception of branches and multi-output
|
||||
;; instructions. The PA8000 can retire two non-memory operations per cycle
|
||||
;; and two memory operations per cycle, only one of which may be a store.
|
||||
;;
|
||||
;; Given the large reorder buffer, the processor can hide most latencies.
|
||||
;; According to HP, they've got the best results by scheduling for retirement
|
||||
;; bandwidth with limited latency scheduling for floating point operations.
|
||||
;; Latency for integer operations and memory references is ignored.
|
||||
;;
|
||||
;;
|
||||
;; We claim floating point operations have a 2 cycle latency and are
|
||||
;; fully pipelined, except for div and sqrt which are not pipelined.
|
||||
;; fully pipelined, except for div and sqrt which are not pipelined and
|
||||
;; take from 17 to 31 cycles to complete.
|
||||
;;
|
||||
;; It is not necessary to define the shifter and integer alu units.
|
||||
;;
|
||||
;; These first two define_unit_unit descriptions model retirement from
|
||||
;; the reorder buffer.
|
||||
(define_function_unit "pa8000lsu" 2 1
|
||||
(and
|
||||
(eq_attr "type" "load,fpload,store,fpstore")
|
||||
(eq_attr "cpu" "8000")) 1 1)
|
||||
;; It's worth noting that there is no way to saturate all the functional
|
||||
;; units on the PA8000 as there is not enough issue bandwidth.
|
||||
|
||||
(define_function_unit "pa8000alu" 2 1
|
||||
(and
|
||||
(eq_attr "type" "!load,fpload,store,fpstore")
|
||||
(eq_attr "cpu" "8000")) 1 1)
|
||||
(define_automaton "pa8000")
|
||||
(define_cpu_unit "inm0_8000, inm1_8000, im0_8000, im1_8000" "pa8000")
|
||||
(define_cpu_unit "rnm0_8000, rnm1_8000, rm0_8000, rm1_8000" "pa8000")
|
||||
(define_cpu_unit "store_8000" "pa8000")
|
||||
(define_cpu_unit "f0_8000, f1_8000" "pa8000")
|
||||
(define_cpu_unit "fdivsqrt0_8000, fdivsqrt1_8000" "pa8000")
|
||||
(define_reservation "inm_8000" "inm0_8000 | inm1_8000")
|
||||
(define_reservation "im_8000" "im0_8000 | im1_8000")
|
||||
(define_reservation "rnm_8000" "rnm0_8000 | rnm1_8000")
|
||||
(define_reservation "rm_8000" "rm0_8000 | rm1_8000")
|
||||
(define_reservation "f_8000" "f0_8000 | f1_8000")
|
||||
(define_reservation "fdivsqrt_8000" "fdivsqrt0_8000 | fdivsqrt1_8000")
|
||||
|
||||
;; Claim floating point ops have a 2 cycle latency, excluding div and
|
||||
;; sqrt, which are not pipelined and issue to different units.
|
||||
(define_function_unit "pa8000fmac" 2 0
|
||||
;; We can issue any two memops per cycle, but we can only retire
|
||||
;; one memory store per cycle. We assume that the reorder buffer
|
||||
;; will hide any memory latencies per HP's recommendation.
|
||||
(define_insn_reservation "Z0" 0
|
||||
(and
|
||||
(eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl")
|
||||
(eq_attr "cpu" "8000")) 2 1)
|
||||
(eq_attr "type" "load,fpload")
|
||||
(eq_attr "cpu" "8000"))
|
||||
"im_8000,rm_8000")
|
||||
|
||||
(define_function_unit "pa8000fdiv" 2 1
|
||||
(define_insn_reservation "Z1" 0
|
||||
(and
|
||||
(eq_attr "type" "fpdivsgl,fpsqrtsgl")
|
||||
(eq_attr "cpu" "8000")) 17 17)
|
||||
(eq_attr "type" "store,fpstore")
|
||||
(eq_attr "cpu" "8000"))
|
||||
"im_8000,rm_8000+store_8000")
|
||||
|
||||
(define_function_unit "pa8000fdiv" 2 1
|
||||
;; We can issue and retire two non-memory operations per cycle with
|
||||
;; a few exceptions (branches). This group catches those we want
|
||||
;; to assume have zero latency.
|
||||
(define_insn_reservation "Z2" 0
|
||||
(and
|
||||
(eq_attr "type" "fpdivdbl,fpsqrtdbl")
|
||||
(eq_attr "cpu" "8000")) 31 31)
|
||||
(eq_attr "type" "!load,fpload,store,fpstore,uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch,fpcc,fpalu,fpmulsgl,fpmuldbl,fpsqrtsgl,fpsqrtdbl,fpdivsgl,fpdivdbl")
|
||||
(eq_attr "cpu" "8000"))
|
||||
"inm_8000,rnm_8000")
|
||||
|
||||
;; Branches use both slots in the non-memory issue and
|
||||
;; retirement unit.
|
||||
(define_insn_reservation "Z3" 0
|
||||
(and
|
||||
(eq_attr "type" "uncond_branch,branch,cbranch,fbranch,call,dyncall,multi,milli,parallel_branch")
|
||||
(eq_attr "cpu" "8000"))
|
||||
"inm0_8000+inm1_8000,rnm0_8000+rnm1_8000")
|
||||
|
||||
;; We partial latency schedule the floating point units.
|
||||
;; They can issue/retire two at a time in the non-memory
|
||||
;; units. We fix their latency at 2 cycles and they
|
||||
;; are fully pipelined.
|
||||
(define_insn_reservation "Z4" 1
|
||||
(and
|
||||
(eq_attr "type" "fpcc,fpalu,fpmulsgl,fpmuldbl")
|
||||
(eq_attr "cpu" "8000"))
|
||||
"inm_8000,f_8000,rnm_8000")
|
||||
|
||||
;; The fdivsqrt units are not pipelined and have a very long latency.
|
||||
;; To keep the DFA from exploding, we do not show all the
|
||||
;; reservations for the divsqrt unit.
|
||||
(define_insn_reservation "Z5" 17
|
||||
(and
|
||||
(eq_attr "type" "fpdivsgl,fpsqrtsgl")
|
||||
(eq_attr "cpu" "8000"))
|
||||
"inm_8000,fdivsqrt_8000*6,rnm_8000")
|
||||
|
||||
(define_insn_reservation "Z6" 31
|
||||
(and
|
||||
(eq_attr "type" "fpdivdbl,fpsqrtdbl")
|
||||
(eq_attr "cpu" "8000"))
|
||||
"inm_8000,fdivsqrt_8000*6,rnm_8000")
|
||||
|
||||
|
||||
|
||||
;; Compare instructions.
|
||||
|
|
|
@ -197,6 +197,9 @@ static void sh_insert_attributes PARAMS ((tree, tree *));
|
|||
static void sh_asm_named_section PARAMS ((const char *, unsigned int));
|
||||
#endif
|
||||
static int sh_adjust_cost PARAMS ((rtx, rtx, rtx, int));
|
||||
static int sh_use_dfa_interface PARAMS ((void));
|
||||
static int sh_issue_rate PARAMS ((void));
|
||||
|
||||
static bool sh_cannot_modify_jumps_p PARAMS ((void));
|
||||
|
||||
static bool sh_ms_bitfield_layout_p PARAMS ((tree));
|
||||
|
@ -226,6 +229,12 @@ static bool sh_ms_bitfield_layout_p PARAMS ((tree));
|
|||
#undef TARGET_SCHED_ADJUST_COST
|
||||
#define TARGET_SCHED_ADJUST_COST sh_adjust_cost
|
||||
|
||||
#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
|
||||
#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
|
||||
sh_use_dfa_interface
|
||||
#undef TARGET_SCHED_ISSUE_RATE
|
||||
#define TARGET_SCHED_ISSUE_RATE sh_issue_rate
|
||||
|
||||
#undef TARGET_CANNOT_MODIFY_JUMPS_P
|
||||
#define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
|
||||
|
||||
|
@ -6726,6 +6735,29 @@ sh_pr_n_sets ()
|
|||
return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
|
||||
}
|
||||
|
||||
/* This Function Returns non zero if DFA based scheduler
|
||||
interface is to be used.At present supported only for
|
||||
SH4. */
|
||||
static int
|
||||
sh_use_dfa_interface()
|
||||
{
|
||||
if (TARGET_SH4)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* This function returns "2" that signifies dual issue
|
||||
for SH4 processor.To be used by DFA pipeline description. */
|
||||
static int
|
||||
sh_issue_rate()
|
||||
{
|
||||
if(TARGET_SH4)
|
||||
return 2;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* SHmedia requires registers for branches, so we can't generate new
|
||||
branches past reload. */
|
||||
static bool
|
||||
|
|
|
@ -195,6 +195,28 @@
|
|||
"cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,other,load,load_si,store,move,fmove,smpy,dmpy,return,pload,prset,pstore,prget,pcload,pcload_si,pt,ptabs,rte,sfunc,call,fp,fdiv,dfp_arith,dfp_cmp,dfp_conv,dfdiv,gp_fpul,nil"
|
||||
(const_string "other"))
|
||||
|
||||
;; We define a new attribute namely "insn_class".We use
|
||||
;; this for DFA based pipeline description.
|
||||
;; Although the "type" attribute covers almost all insn
|
||||
;; classes,it is more convenient to define new attribute
|
||||
;; for certain reservations.
|
||||
;;
|
||||
;; mt_group SH4 "mt" group instructions.
|
||||
;;
|
||||
;; ex_group SH4 "ex" group instructions.They mostly
|
||||
;; overlap with arithmetic instructions but
|
||||
;; new attribute defined to distinguish from
|
||||
;; mt group instructions.
|
||||
;;
|
||||
;; lds_to_fpscr The "type" attribute couldn't sufficiently
|
||||
;; distinguish it from others.It is part of
|
||||
;; new attribute.Similar case with ldsmem_to_fpscr
|
||||
;; and cwb.
|
||||
|
||||
(define_attr "insn_class"
|
||||
"mt_group,ex_group,lds_to_fpscr,ldsmem_to_fpscr,cwb,none"
|
||||
(const_string "none"))
|
||||
|
||||
;; Indicate what precision must be selected in fpscr for this insn, if any.
|
||||
|
||||
(define_attr "fp_mode" "single,double,none" (const_string "none"))
|
||||
|
@ -631,7 +653,8 @@
|
|||
(match_operand:SI 1 "arith_operand" "L,r"))
|
||||
(const_int 0)))]
|
||||
"TARGET_SH1"
|
||||
"tst %1,%0")
|
||||
"tst %1,%0"
|
||||
[(set_attr "insn_class" "mt_group")])
|
||||
|
||||
;; ??? Perhaps should only accept reg/constant if the register is reg 0.
|
||||
;; That would still allow reload to create cmpi instructions, but would
|
||||
|
@ -647,7 +670,8 @@
|
|||
"@
|
||||
tst %0,%0
|
||||
cmp/eq %1,%0
|
||||
cmp/eq %1,%0")
|
||||
cmp/eq %1,%0"
|
||||
[(set_attr "insn_class" "mt_group,mt_group,mt_group")])
|
||||
|
||||
(define_insn "cmpgtsi_t"
|
||||
[(set (reg:SI T_REG)
|
||||
|
@ -656,7 +680,8 @@
|
|||
"TARGET_SH1"
|
||||
"@
|
||||
cmp/gt %1,%0
|
||||
cmp/pl %0")
|
||||
cmp/pl %0"
|
||||
[(set_attr "insn_class" "mt_group,mt_group")])
|
||||
|
||||
(define_insn "cmpgesi_t"
|
||||
[(set (reg:SI T_REG)
|
||||
|
@ -665,8 +690,9 @@
|
|||
"TARGET_SH1"
|
||||
"@
|
||||
cmp/ge %1,%0
|
||||
cmp/pz %0")
|
||||
|
||||
cmp/pz %0"
|
||||
[(set_attr "insn_class" "mt_group,mt_group")])
|
||||
|
||||
;; -------------------------------------------------------------------------
|
||||
;; SImode unsigned integer comparisons
|
||||
;; -------------------------------------------------------------------------
|
||||
|
@ -676,14 +702,16 @@
|
|||
(geu:SI (match_operand:SI 0 "arith_reg_operand" "r")
|
||||
(match_operand:SI 1 "arith_reg_operand" "r")))]
|
||||
"TARGET_SH1"
|
||||
"cmp/hs %1,%0")
|
||||
"cmp/hs %1,%0"
|
||||
[(set_attr "insn_class" "mt_group")])
|
||||
|
||||
(define_insn "cmpgtusi_t"
|
||||
[(set (reg:SI T_REG)
|
||||
(gtu:SI (match_operand:SI 0 "arith_reg_operand" "r")
|
||||
(match_operand:SI 1 "arith_reg_operand" "r")))]
|
||||
"TARGET_SH1"
|
||||
"cmp/hi %1,%0")
|
||||
"cmp/hi %1,%0"
|
||||
[(set_attr "insn_class" "mt_group")])
|
||||
|
||||
;; We save the compare operands in the cmpxx patterns and use them when
|
||||
;; we generate the branch.
|
||||
|
@ -1050,7 +1078,8 @@
|
|||
(ltu:SI (plus:SI (match_dup 1) (match_dup 2)) (match_dup 1)))]
|
||||
"TARGET_SH1"
|
||||
"addc %2,%0"
|
||||
[(set_attr "type" "arith")])
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_insn "addc1"
|
||||
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
|
||||
|
@ -1060,7 +1089,8 @@
|
|||
(clobber (reg:SI T_REG))]
|
||||
"TARGET_SH1"
|
||||
"addc %2,%0"
|
||||
[(set_attr "type" "arith")])
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_expand "addsi3"
|
||||
[(set (match_operand:SI 0 "arith_reg_operand" "")
|
||||
|
@ -1088,8 +1118,9 @@
|
|||
(match_operand:SI 2 "arith_operand" "rI")))]
|
||||
"TARGET_SH1"
|
||||
"add %2,%0"
|
||||
[(set_attr "type" "arith")])
|
||||
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
;; -------------------------------------------------------------------------
|
||||
;; Subtraction instructions
|
||||
;; -------------------------------------------------------------------------
|
||||
|
@ -1155,7 +1186,8 @@
|
|||
(gtu:SI (minus:SI (match_dup 1) (match_dup 2)) (match_dup 1)))]
|
||||
"TARGET_SH1"
|
||||
"subc %2,%0"
|
||||
[(set_attr "type" "arith")])
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_insn "subc1"
|
||||
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
|
||||
|
@ -1165,7 +1197,8 @@
|
|||
(clobber (reg:SI T_REG))]
|
||||
"TARGET_SH1"
|
||||
"subc %2,%0"
|
||||
[(set_attr "type" "arith")])
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_insn "*subsi3_internal"
|
||||
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
|
||||
|
@ -1173,7 +1206,8 @@
|
|||
(match_operand:SI 2 "arith_reg_operand" "r")))]
|
||||
"TARGET_SH1"
|
||||
"sub %2,%0"
|
||||
[(set_attr "type" "arith")])
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_insn "*subsi3_media"
|
||||
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
|
||||
|
@ -1909,7 +1943,8 @@
|
|||
(match_operand:SI 2 "logical_operand" "r,L")))]
|
||||
"TARGET_SH1"
|
||||
"and %2,%0"
|
||||
[(set_attr "type" "arith")])
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
;; If the constant is 255, then emit a extu.b instruction instead of an
|
||||
;; and, since that will give better code.
|
||||
|
@ -1951,7 +1986,8 @@
|
|||
(match_operand:SI 2 "logical_operand" "r,L")))]
|
||||
"TARGET_SH1"
|
||||
"or %2,%0"
|
||||
[(set_attr "type" "arith")])
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_insn "iordi3"
|
||||
[(set (match_operand:DI 0 "arith_reg_operand" "=r,r")
|
||||
|
@ -1968,7 +2004,8 @@
|
|||
(match_operand:SI 2 "logical_operand" "L,r")))]
|
||||
"TARGET_SH1"
|
||||
"xor %2,%0"
|
||||
[(set_attr "type" "arith")])
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_insn "xordi3"
|
||||
[(set (match_operand:DI 0 "arith_reg_operand" "=r,r")
|
||||
|
@ -1991,7 +2028,8 @@
|
|||
(lshiftrt:SI (match_dup 1) (const_int 31)))]
|
||||
"TARGET_SH1"
|
||||
"rotl %0"
|
||||
[(set_attr "type" "arith")])
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_insn "rotlsi3_31"
|
||||
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
|
||||
|
@ -2000,7 +2038,8 @@
|
|||
(clobber (reg:SI T_REG))]
|
||||
"TARGET_SH1"
|
||||
"rotr %0"
|
||||
[(set_attr "type" "arith")])
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_insn "rotlsi3_16"
|
||||
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
|
||||
|
@ -2008,7 +2047,8 @@
|
|||
(const_int 16)))]
|
||||
"TARGET_SH1"
|
||||
"swap.w %1,%0"
|
||||
[(set_attr "type" "arith")])
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_expand "rotlsi3"
|
||||
[(set (match_operand:SI 0 "arith_reg_operand" "")
|
||||
|
@ -2072,7 +2112,8 @@
|
|||
(const_int 8)))]
|
||||
"TARGET_SH1"
|
||||
"swap.b %1,%0"
|
||||
[(set_attr "type" "arith")])
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_expand "rotlhi3"
|
||||
[(set (match_operand:HI 0 "arith_reg_operand" "")
|
||||
|
@ -2114,7 +2155,8 @@
|
|||
(clobber (match_dup 4))])]
|
||||
"operands[4] = gen_rtx_SCRATCH (SImode);"
|
||||
[(set_attr "length" "*,*,*,4")
|
||||
(set_attr "type" "dyn_shift,arith,arith,arith")])
|
||||
(set_attr "type" "dyn_shift,arith,arith,arith")
|
||||
(set_attr "insn_class" "ex_group,ex_group,ex_group,ex_group")])
|
||||
|
||||
(define_insn "ashlhi3_k"
|
||||
[(set (match_operand:HI 0 "arith_reg_operand" "=r,r")
|
||||
|
@ -2124,7 +2166,8 @@
|
|||
"@
|
||||
add %0,%0
|
||||
shll%O2 %0"
|
||||
[(set_attr "type" "arith")])
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_insn "ashlsi3_n"
|
||||
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
|
||||
|
@ -2141,7 +2184,8 @@
|
|||
(eq (symbol_ref "shift_insns_rtx (insn)") (const_int 3))
|
||||
(const_string "6")]
|
||||
(const_string "8")))
|
||||
(set_attr "type" "arith")])
|
||||
(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:SI 0 "arith_reg_operand" "")
|
||||
|
@ -2229,7 +2273,8 @@
|
|||
(clobber (reg:SI T_REG))]
|
||||
"TARGET_SH1 && INTVAL (operands[2]) == 1"
|
||||
"shar %0"
|
||||
[(set_attr "type" "arith")])
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
;; We can't do HImode right shifts correctly unless we start out with an
|
||||
;; explicit zero / sign extension; doing that would result in worse overall
|
||||
|
@ -2288,7 +2333,8 @@
|
|||
(lt:SI (match_dup 1) (const_int 0)))]
|
||||
"TARGET_SH1"
|
||||
"shll %0"
|
||||
[(set_attr "type" "arith")])
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_insn "ashrsi3_d"
|
||||
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
|
||||
|
@ -2296,7 +2342,8 @@
|
|||
(neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
|
||||
"TARGET_SH3"
|
||||
"shad %2,%0"
|
||||
[(set_attr "type" "dyn_shift")])
|
||||
[(set_attr "type" "dyn_shift")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_insn "ashrsi3_n"
|
||||
[(set (reg:SI R4_REG)
|
||||
|
@ -2346,7 +2393,8 @@
|
|||
(neg:SI (match_operand:SI 2 "arith_reg_operand" "r"))))]
|
||||
"TARGET_SH3"
|
||||
"shld %2,%0"
|
||||
[(set_attr "type" "dyn_shift")])
|
||||
[(set_attr "type" "dyn_shift")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
;; Only the single bit shift clobbers the T bit.
|
||||
|
||||
|
@ -2357,7 +2405,8 @@
|
|||
(clobber (reg:SI T_REG))]
|
||||
"TARGET_SH1 && CONST_OK_FOR_M (INTVAL (operands[2]))"
|
||||
"shlr %0"
|
||||
[(set_attr "type" "arith")])
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_insn "lshrsi3_k"
|
||||
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
|
||||
|
@ -2366,7 +2415,8 @@
|
|||
"TARGET_SH1 && CONST_OK_FOR_K (INTVAL (operands[2]))
|
||||
&& ! CONST_OK_FOR_M (INTVAL (operands[2]))"
|
||||
"shlr%O2 %0"
|
||||
[(set_attr "type" "arith")])
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_insn "lshrsi3_n"
|
||||
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
|
||||
|
@ -2444,7 +2494,8 @@
|
|||
"TARGET_SH1"
|
||||
"shll %R0\;rotcl %S0"
|
||||
[(set_attr "length" "4")
|
||||
(set_attr "type" "arith")])
|
||||
(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_insn "ashldi3_media"
|
||||
[(set (match_operand:DI 0 "arith_reg_operand" "=r,r")
|
||||
|
@ -2483,7 +2534,8 @@
|
|||
"TARGET_SH1"
|
||||
"shlr %S0\;rotcr %R0"
|
||||
[(set_attr "length" "4")
|
||||
(set_attr "type" "arith")])
|
||||
(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_insn "lshrdi3_media"
|
||||
[(set (match_operand:DI 0 "arith_reg_operand" "=r,r")
|
||||
|
@ -2522,7 +2574,8 @@
|
|||
"TARGET_SH1"
|
||||
"shar %S0\;rotcr %R0"
|
||||
[(set_attr "length" "4")
|
||||
(set_attr "type" "arith")])
|
||||
(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_insn "ashrdi3_media"
|
||||
[(set (match_operand:DI 0 "arith_reg_operand" "=r,r")
|
||||
|
@ -2756,7 +2809,8 @@
|
|||
(const_int 16))))]
|
||||
"TARGET_SH1"
|
||||
"xtrct %1,%0"
|
||||
[(set_attr "type" "arith")])
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_insn "xtrct_right"
|
||||
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
|
||||
|
@ -2766,8 +2820,9 @@
|
|||
(const_int 16))))]
|
||||
"TARGET_SH1"
|
||||
"xtrct %2,%0"
|
||||
[(set_attr "type" "arith")])
|
||||
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
;; -------------------------------------------------------------------------
|
||||
;; Unary arithmetic
|
||||
;; -------------------------------------------------------------------------
|
||||
|
@ -2781,7 +2836,8 @@
|
|||
(const_int 0)))]
|
||||
"TARGET_SH1"
|
||||
"negc %1,%0"
|
||||
[(set_attr "type" "arith")])
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_insn "*negdi_media"
|
||||
[(set (match_operand:DI 0 "arith_reg_operand" "=r")
|
||||
|
@ -2819,14 +2875,16 @@
|
|||
(neg:SI (match_operand:SI 1 "arith_reg_operand" "r")))]
|
||||
"TARGET_SH1"
|
||||
"neg %1,%0"
|
||||
[(set_attr "type" "arith")])
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_insn "one_cmplsi2"
|
||||
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
|
||||
(not:SI (match_operand:SI 1 "arith_reg_operand" "r")))]
|
||||
"TARGET_SH1"
|
||||
"not %1,%0"
|
||||
[(set_attr "type" "arith")])
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_expand "one_cmpldi2"
|
||||
[(set (match_operand:DI 0 "arith_reg_operand" "")
|
||||
|
@ -2872,22 +2930,25 @@
|
|||
(zero_extend:SI (match_operand:HI 1 "arith_reg_operand" "r")))]
|
||||
"TARGET_SH1"
|
||||
"extu.w %1,%0"
|
||||
[(set_attr "type" "arith")])
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_insn "zero_extendqisi2"
|
||||
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
|
||||
(zero_extend:SI (match_operand:QI 1 "arith_reg_operand" "r")))]
|
||||
"TARGET_SH1"
|
||||
"extu.b %1,%0"
|
||||
[(set_attr "type" "arith")])
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
(define_insn "zero_extendqihi2"
|
||||
[(set (match_operand:HI 0 "arith_reg_operand" "=r")
|
||||
(zero_extend:HI (match_operand:QI 1 "arith_reg_operand" "r")))]
|
||||
"TARGET_SH1"
|
||||
"extu.b %1,%0"
|
||||
[(set_attr "type" "arith")])
|
||||
|
||||
[(set_attr "type" "arith")
|
||||
(set_attr "insn_class" "ex_group")])
|
||||
|
||||
;; -------------------------------------------------------------------------
|
||||
;; Sign extension instructions
|
||||
;; -------------------------------------------------------------------------
|
||||
|
@ -2941,7 +3002,8 @@
|
|||
"@
|
||||
exts.w %1,%0
|
||||
mov.w %1,%0"
|
||||
[(set_attr "type" "arith,load")])
|
||||
[(set_attr "type" "arith,load")
|
||||
(set_attr "insn_class" "ex_group,*")])
|
||||
|
||||
(define_insn "extendqisi2"
|
||||
[(set (match_operand:SI 0 "arith_reg_operand" "=r,r")
|
||||
|
@ -2950,7 +3012,8 @@
|
|||
"@
|
||||
exts.b %1,%0
|
||||
mov.b %1,%0"
|
||||
[(set_attr "type" "arith,load")])
|
||||
[(set_attr "type" "arith,load")
|
||||
(set_attr "insn_class" "ex_group,*")])
|
||||
|
||||
(define_insn "extendqihi2"
|
||||
[(set (match_operand:HI 0 "arith_reg_operand" "=r,r")
|
||||
|
@ -2959,8 +3022,9 @@
|
|||
"@
|
||||
exts.b %1,%0
|
||||
mov.b %1,%0"
|
||||
[(set_attr "type" "arith,load")])
|
||||
|
||||
[(set_attr "type" "arith,load")
|
||||
(set_attr "insn_class" "ex_group,*")])
|
||||
|
||||
;; -------------------------------------------------------------------------
|
||||
;; Move instructions
|
||||
;; -------------------------------------------------------------------------
|
||||
|
@ -3070,6 +3134,7 @@
|
|||
lds.l %1,%0
|
||||
fake %1,%0"
|
||||
[(set_attr "type" "pcload_si,move,*,load_si,move,prget,move,store,store,pstore,move,prset,load,pload,pcload_si")
|
||||
(set_attr "insn_class" "*,*,mt_group,*,*,*,*,*,*,*,*,*,*,*,*")
|
||||
(set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")])
|
||||
|
||||
;; t/r must come after r/r, lest reload will try to reload stuff like
|
||||
|
@ -3227,7 +3292,8 @@
|
|||
(clobber (match_scratch:SI 2 "=&r"))]
|
||||
"TARGET_HARD_SH4"
|
||||
"ocbwb\\t@%0\;extu.w\\t%0,%2\;or\\t%1,%2\;mov.l\\t%0,@%2"
|
||||
[(set_attr "length" "8")])
|
||||
[(set_attr "length" "8")
|
||||
(set_attr "insn_class" "cwb")])
|
||||
|
||||
(define_insn "ic_invalidate_line_media"
|
||||
[(unspec_volatile [(match_operand 0 "register_operand" "r")]
|
||||
|
@ -7415,7 +7481,8 @@
|
|||
mov.l %1,%0
|
||||
sts fpscr,%0"
|
||||
[(set_attr "length" "0,2,2,4,2,2,2,2")
|
||||
(set_attr "type" "dfp_conv,dfp_conv,load,dfp_conv,dfp_conv,move,store,gp_fpul")])
|
||||
(set_attr "type" "dfp_conv,dfp_conv,load,dfp_conv,dfp_conv,move,store,gp_fpul")
|
||||
(set_attr "insn_class" "ldsmem_to_fpscr,*,*,lds_to_fpscr,*,*,*,*")])
|
||||
|
||||
(define_split
|
||||
[(set (reg:PSI FPSCR_REG)
|
||||
|
@ -8557,3 +8624,292 @@
|
|||
"TARGET_SH1"
|
||||
"mov.l @r15+,r15\;mov.l @r15+,r0"
|
||||
[(set_attr "length" "4")])
|
||||
|
||||
;; The following description models the
|
||||
;; SH4 pipeline using the DFA based scheduler.
|
||||
;; The DFA based description is better way to model
|
||||
;; a superscalar pipeline as compared to function unit
|
||||
;; reservation model.
|
||||
;; 1. The function unit based model is oriented to describe at most one
|
||||
;; unit reservation by each insn. It is difficult to model unit reservations in multiple
|
||||
;; pipeline units by same insn. This can be done using DFA based description.
|
||||
;; 2. The execution performance of DFA based scheduler does not depend on processor complexity.
|
||||
;; 3. Writing all unit reservations for an instruction class is more natural description
|
||||
;; of the pipeline and makes interface of the hazard recognizer simpler than the
|
||||
;; old function unit based model.
|
||||
;; 4. The DFA model is richer and is a part of greater overall framework of RCSP.
|
||||
|
||||
|
||||
;; Two automata are defined to reduce number of states
|
||||
;; which a single large automaton will have.(Factoring)
|
||||
|
||||
(define_automaton "inst_pipeline,fpu_pipe")
|
||||
|
||||
;; This unit is basically the decode unit of the processor.
|
||||
;; Since SH4 is a dual issue machine,it is as if there are two
|
||||
;; units so that any insn can be processed by either one
|
||||
;; of the decoding unit.
|
||||
|
||||
(define_cpu_unit "pipe_01,pipe_02" "inst_pipeline")
|
||||
|
||||
|
||||
;; The fixed point arithmetic calculator(?? EX Unit).
|
||||
|
||||
(define_cpu_unit "int" "inst_pipeline")
|
||||
|
||||
;; f1_1 and f1_2 are floating point units.Actually there is
|
||||
;; a f1 unit which can overlap with other f1 unit but
|
||||
;; not another F1 unit.It is as though there were two
|
||||
;; f1 units.
|
||||
|
||||
(define_cpu_unit "f1_1,f1_2" "fpu_pipe")
|
||||
|
||||
;; The floating point units.
|
||||
|
||||
(define_cpu_unit "F1,F2,F3,FS" "fpu_pipe")
|
||||
|
||||
;; This is basically the MA unit of SH4
|
||||
;; used in LOAD/STORE pipeline.
|
||||
|
||||
(define_cpu_unit "memory" "inst_pipeline")
|
||||
|
||||
;; The address calculator used for branch instructions.
|
||||
;; This will be reserved with "issue" of branch instructions
|
||||
;; and this is to make sure that no two branch instructions
|
||||
;; can be issued in parallel.
|
||||
|
||||
(define_cpu_unit "pcr_addrcalc" "inst_pipeline")
|
||||
|
||||
;; ----------------------------------------------------
|
||||
;; This reservation is to simplify the dual issue description.
|
||||
|
||||
(define_reservation "issue" "pipe_01|pipe_02")
|
||||
|
||||
;; This is to express the locking of D stage.
|
||||
|
||||
(define_reservation "d_lock" "pipe_01+pipe_02")
|
||||
|
||||
;; This is to simplify description where F1,F2,FS
|
||||
;; are used simultaneously.
|
||||
|
||||
(define_reservation "fpu" "F1+F2+FS")
|
||||
|
||||
;; This is to highlight the fact that f1
|
||||
;; cannot overlap with F1.
|
||||
|
||||
(exclusion_set "f1_1,f1_2" "F1")
|
||||
|
||||
;; Although reg moves have a latency of zero
|
||||
;; we need to highlight that they use D stage
|
||||
;; for one cycle.
|
||||
|
||||
(define_insn_reservation "reg_mov" 0
|
||||
(eq_attr "type" "move,fmove")
|
||||
"issue")
|
||||
|
||||
;; Other MT group intructions(1 step operations)
|
||||
;; Group: MT
|
||||
;; Latency: 1
|
||||
;; Issue Rate: 1
|
||||
|
||||
(define_insn_reservation "mt" 1
|
||||
(eq_attr "insn_class" "mt_group")
|
||||
"issue,nothing")
|
||||
|
||||
;; Fixed Point Arithmetic Instructions(1 step operations)
|
||||
;; Group: EX
|
||||
;; Latency: 1
|
||||
;; Issue Rate: 1
|
||||
|
||||
(define_insn_reservation "simple_arith" 1
|
||||
(eq_attr "insn_class" "ex_group")
|
||||
"issue,int")
|
||||
|
||||
;; Load Store instructions. (MOV.[BWL]@(d,GBR)
|
||||
;; Group: LS
|
||||
;; Latency: 2
|
||||
;; Issue Rate: 1
|
||||
|
||||
(define_insn_reservation "load_store" 2
|
||||
(eq_attr "type" "load,load_si,pcload,pcload_si,store")
|
||||
"issue,memory*2")
|
||||
|
||||
;; Branch (BF,BF/S,BT,BT/S,BRA)
|
||||
;; Group: BR
|
||||
;; Latency: 2 (or 1) Actually Observed to be 5/7
|
||||
;; Issue Rate: 1
|
||||
;; The latency is 1 when displacement is 0.
|
||||
;; This reservation can be further broken into 2
|
||||
;; 1. branch_zero : One with latency 1 and in the TEST
|
||||
;; part it also checks for 0 (ZERO) displacement
|
||||
;; 2. branch: Latency 2.
|
||||
|
||||
(define_insn_reservation "branch_zero" 5
|
||||
(and (eq_attr "type" "cbranch")
|
||||
(eq_attr "length" "2"))
|
||||
"(issue+pcr_addrcalc),pcr_addrcalc,nothing")
|
||||
|
||||
(define_insn_reservation "branch" 7
|
||||
(eq_attr "type" "cbranch")
|
||||
"(issue+pcr_addrcalc),pcr_addrcalc,nothing")
|
||||
|
||||
;; Branch Far (JMP,RTS,BRAF)
|
||||
;; Group: CO
|
||||
;; Latency: 3
|
||||
;; Issue Rate: 2
|
||||
;; Since issue stage (D stage) is blocked for 2nd cycle,
|
||||
;; cpu_unit int is reserved since it might be required for far
|
||||
;; address calculation.
|
||||
|
||||
(define_insn_reservation "branch_far" 12
|
||||
(and (eq_attr "type" "jump,return")
|
||||
(eq_attr "length" "6"))
|
||||
"d_lock*2,int+pcr_addrcalc,pcr_addrcalc")
|
||||
|
||||
;; RTE
|
||||
;; Group: CO
|
||||
;; atency: 5
|
||||
;; Issue Rate: 5
|
||||
;; this instruction can be executed in any of the pipelines
|
||||
;; and blocks the pipeline for next 4 stages.
|
||||
|
||||
(define_insn_reservation "return_from_exp" 5
|
||||
(eq_attr "type" "rte")
|
||||
"(issue+pcr_addrcalc),d_lock*4,int+pcr_addrcalc,nothing")
|
||||
|
||||
;; OCBP, OCBWB
|
||||
;; Group: CO
|
||||
;; Latency: 5
|
||||
;; Issue Rate: 1
|
||||
|
||||
(define_insn_reservation "ocbwb" 5
|
||||
(eq_attr "insn_class" "cwb")
|
||||
"issue,(int+memory),memory*5")
|
||||
|
||||
;; LDS to PR,JSR
|
||||
;; Group: CO
|
||||
;; Latency: 3
|
||||
;; Issue Rate: 2
|
||||
;; The SX stage is blocked for last 2 cycles.
|
||||
|
||||
(define_insn_reservation "lds_to_pr" 3
|
||||
(eq_attr "type" "prset,call,sfunc")
|
||||
"(issue+pcr_addrcalc),(issue+int+pcr_addrcalc),(int+pcr_addrcalc)*2")
|
||||
|
||||
;; LDS.L to PR
|
||||
;; Group: CO
|
||||
;; Latency: 3
|
||||
;; Issue Rate: 2
|
||||
;; The SX unit is blocked for last 2 cycles.
|
||||
|
||||
(define_insn_reservation "ldsmem_to_pr" 3
|
||||
(eq_attr "type" "pload")
|
||||
"(issue+pcr_addrcalc),(issue+int+pcr_addrcalc),(int+memory+pcr_addrcalc),(int+pcr_addrcalc)")
|
||||
|
||||
;; STS from PR
|
||||
;; Group: CO
|
||||
;; Latency: 2
|
||||
;; Issue Rate: 2
|
||||
;; The SX unit in second and third cycles.
|
||||
|
||||
(define_insn_reservation "sts_from_pr" 2
|
||||
(eq_attr "type" "prget")
|
||||
"(issue+pcr_addrcalc),(pipe_01+int+pcr_addrcalc),(int+pcr_addrcalc),nothing")
|
||||
|
||||
;; STS.L from PR
|
||||
;; Group: CO
|
||||
;; Latency: 2
|
||||
;; Issue Rate: 2
|
||||
|
||||
(define_insn_reservation "prload_mem" 2
|
||||
(eq_attr "type" "pstore")
|
||||
"(issue+pcr_addrcalc),(pipe_01+int+pcr_addrcalc),(int+memory+pcr_addrcalc),memory")
|
||||
|
||||
;; LDS to FPSCR
|
||||
;; Group: CO
|
||||
;; Latency: 4
|
||||
;; Issue Rate: 1
|
||||
;; F1 is blocked for last three cycles.
|
||||
|
||||
(define_insn_reservation "fpscr_store" 4
|
||||
(eq_attr "insn_class" "lds_to_fpscr")
|
||||
"issue,int,F1*3")
|
||||
|
||||
;; LDS.L to FPSCR
|
||||
;; Group: CO
|
||||
;; Latency: 1 / 4
|
||||
;; Latency to update Rn is 1 and latency to update FPSCR is 4
|
||||
;; Issue Rate: 1
|
||||
;; F1 is blocked for last three cycles.
|
||||
|
||||
(define_insn_reservation "fpscr_store_mem" 4
|
||||
(eq_attr "insn_class" "ldsmem_to_fpscr")
|
||||
"issue,(int+memory),(F1+memory),F1*2")
|
||||
|
||||
|
||||
;; Fixed point multiplication (DMULS.L DMULU.L MUL.L MULS.W,MULU.W)
|
||||
;; Group: CO
|
||||
;; Latency: 4 / 4
|
||||
;; Issue Rate: 1
|
||||
|
||||
(define_insn_reservation "multi" 4
|
||||
(eq_attr "type" "smpy,dmpy")
|
||||
"issue,(issue+int+f1_1),(int+f1_1),(f1_1|f1_2)*2,F2,FS")
|
||||
|
||||
|
||||
;; Single precision floating point computation FCMP/EQ,
|
||||
;; FCP/GT, FADD, FLOAT, FMAC, FMUL, FSUB, FTRC, FRVHG, FSCHG
|
||||
;; Group: FE
|
||||
;; Latency: 4
|
||||
;; Issue Rate: 1
|
||||
|
||||
(define_insn_reservation "fp_arith" 4
|
||||
(eq_attr "type" "fp")
|
||||
"issue,F1,F2,FS")
|
||||
|
||||
;; Single Precision FDIV/SQRT
|
||||
;; Group: FE
|
||||
;; Latency: 12/13
|
||||
;; Issue Rate: 1
|
||||
|
||||
(define_insn_reservation "fp_div" 13
|
||||
(eq_attr "type" "fdiv")
|
||||
"issue,F1+F3,F1+F2+F3,F3*7,F1+F3,F2,FS")
|
||||
|
||||
;; Double Precision floating point computation
|
||||
;; (FCNVDS, FCNVSD, FLOAT, FTRC)
|
||||
;; Group: FE
|
||||
;; Latency: (3,4)/5
|
||||
;; Issue Rate: 1
|
||||
|
||||
(define_insn_reservation "dp_float" 5
|
||||
(eq_attr "type" "dfp_conv")
|
||||
"issue,F1,F1+F2,F2+FS,FS")
|
||||
|
||||
;; Double-precision floating-point (FADD ,FMUL,FSUB)
|
||||
;; Group: FE
|
||||
;; Latency: (7,8)/9
|
||||
;; Issue Rate: 1
|
||||
|
||||
(define_insn_reservation "fp_double_arith" 9
|
||||
(eq_attr "type" "dfp_arith")
|
||||
"issue,F1,F1+F2,fpu*4,F2+FS,FS")
|
||||
|
||||
;; Double-precision FCMP (FCMP/EQ,FCMP/GT)
|
||||
;; Group: FE
|
||||
;; Latency: 3/5
|
||||
;; Issue Rate: 2
|
||||
|
||||
(define_insn_reservation "fp_double_cmp" 5
|
||||
(eq_attr "type" "dfp_cmp")
|
||||
"issue,(issue+F1),F1+F2,F2+FS,FS")
|
||||
|
||||
;; Double precision FDIV/SQRT
|
||||
;; Group: FE
|
||||
;; Latency: (24,25)/26
|
||||
;; Issue Rate: 1
|
||||
|
||||
(define_insn_reservation "dp_div" 26
|
||||
(eq_attr "type" "dfdiv")
|
||||
"issue,F1+F3,F1+F2+F3,F2+F3+FS,F3*16,F1+F3,F1+F2+F3,fpu+F3,F2+FS,FS")
|
||||
|
||||
|
|
|
@ -117,6 +117,8 @@ extern char *sparc_v8plus_shift PARAMS ((rtx *, rtx, const char *));
|
|||
32 bits of REG are 0 before INSN. */
|
||||
extern int sparc_check_64 PARAMS ((rtx, rtx));
|
||||
extern rtx gen_df_reg PARAMS ((rtx, int));
|
||||
/* Used for DFA scheduling when cpu is ultrasparc. */
|
||||
extern int ultrasparc_store_bypass_p PARAMS ((rtx, rtx));
|
||||
extern int sparc_extra_constraint_check PARAMS ((rtx, int, int));
|
||||
#endif /* RTX_CODE */
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -122,9 +122,11 @@ extern enum cmodel sparc_cmodel;
|
|||
#define TARGET_CPU_sparcv9 7 /* alias */
|
||||
#define TARGET_CPU_sparc64 7 /* alias */
|
||||
#define TARGET_CPU_ultrasparc 8
|
||||
#define TARGET_CPU_ultrasparc3 9
|
||||
|
||||
#if TARGET_CPU_DEFAULT == TARGET_CPU_v9 \
|
||||
|| TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc
|
||||
|| TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc \
|
||||
|| TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3
|
||||
|
||||
#define CPP_CPU32_DEFAULT_SPEC ""
|
||||
#define ASM_CPU32_DEFAULT_SPEC ""
|
||||
|
@ -141,6 +143,10 @@ extern enum cmodel sparc_cmodel;
|
|||
#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
|
||||
#define ASM_CPU64_DEFAULT_SPEC "-Av9a"
|
||||
#endif
|
||||
#if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3
|
||||
#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
|
||||
#define ASM_CPU64_DEFAULT_SPEC "-Av9b"
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
|
@ -230,6 +236,7 @@ Unrecognized value in TARGET_CPU_DEFAULT.
|
|||
%{mcpu=sparclite86x:-D__sparclite86x__} \
|
||||
%{mcpu=v9:-D__sparc_v9__} \
|
||||
%{mcpu=ultrasparc:-D__sparc_v9__} \
|
||||
%{mcpu=ultrasparc3:-D__sparc_v9__} \
|
||||
%{!mcpu*:%{!mcypress:%{!msparclite:%{!mf930:%{!mf934:%{!mv8:%{!msupersparc:%(cpp_cpu_default)}}}}}}} \
|
||||
"
|
||||
|
||||
|
@ -296,6 +303,7 @@ Unrecognized value in TARGET_CPU_DEFAULT.
|
|||
%{mv8plus:-Av8plus} \
|
||||
%{mcpu=v9:-Av9} \
|
||||
%{mcpu=ultrasparc:%{!mv8plus:-Av9a}} \
|
||||
%{mcpu=ultrasparc3:%{!mv8plus:-Av9b}} \
|
||||
%{!mcpu*:%{!mcypress:%{!msparclite:%{!mf930:%{!mf934:%{!mv8:%{!msupersparc:%(asm_cpu_default)}}}}}}} \
|
||||
"
|
||||
|
||||
|
@ -623,7 +631,8 @@ enum processor_type {
|
|||
PROCESSOR_SPARCLET,
|
||||
PROCESSOR_TSC701,
|
||||
PROCESSOR_V9,
|
||||
PROCESSOR_ULTRASPARC
|
||||
PROCESSOR_ULTRASPARC,
|
||||
PROCESSOR_ULTRASPARC3
|
||||
};
|
||||
|
||||
/* This is set from -m{cpu,tune}=xxx. */
|
||||
|
@ -2622,7 +2631,8 @@ do { \
|
|||
(((FP_REG_CLASS_P (CLASS1) && GENERAL_OR_I64 (CLASS2)) \
|
||||
|| (GENERAL_OR_I64 (CLASS1) && FP_REG_CLASS_P (CLASS2)) \
|
||||
|| (CLASS1) == FPCC_REGS || (CLASS2) == FPCC_REGS) \
|
||||
? (sparc_cpu == PROCESSOR_ULTRASPARC ? 12 : 6) : 2)
|
||||
? ((sparc_cpu == PROCESSOR_ULTRASPARC \
|
||||
|| sparc_cpu == PROCESSOR_ULTRASPARC3) ? 12 : 6) : 2)
|
||||
|
||||
/* Provide the cost of a branch. For pre-v9 processors we use
|
||||
a value of 3 to take into account the potential annulling of
|
||||
|
@ -2653,6 +2663,8 @@ do { \
|
|||
if (sparc_cpu == PROCESSOR_ULTRASPARC) \
|
||||
return (GET_MODE (X) == DImode ? \
|
||||
COSTS_N_INSNS (34) : COSTS_N_INSNS (19)); \
|
||||
if (sparc_cpu == PROCESSOR_ULTRASPARC3) \
|
||||
return COSTS_N_INSNS (6); \
|
||||
return TARGET_HARD_MUL ? COSTS_N_INSNS (5) : COSTS_N_INSNS (25); \
|
||||
case DIV: \
|
||||
case UDIV: \
|
||||
|
@ -2661,6 +2673,9 @@ do { \
|
|||
if (sparc_cpu == PROCESSOR_ULTRASPARC) \
|
||||
return (GET_MODE (X) == DImode ? \
|
||||
COSTS_N_INSNS (68) : COSTS_N_INSNS (37)); \
|
||||
if (sparc_cpu == PROCESSOR_ULTRASPARC3) \
|
||||
return (GET_MODE (X) == DImode ? \
|
||||
COSTS_N_INSNS (71) : COSTS_N_INSNS (40)); \
|
||||
return COSTS_N_INSNS (25); \
|
||||
/* Make FLOAT and FIX more expensive than CONST_DOUBLE,\
|
||||
so that cse will favor the latter. */ \
|
||||
|
|
|
@ -44,6 +44,7 @@
|
|||
;; 16 embmedany_textlo
|
||||
;; 18 sethm
|
||||
;; 19 setlo
|
||||
;; 20 cycle_display
|
||||
;;
|
||||
;; UNSPEC_VOLATILE: 0 blockage
|
||||
;; 1 flush_register_windows
|
||||
|
@ -61,7 +62,7 @@
|
|||
|
||||
;; Attribute for cpu type.
|
||||
;; These must match the values for enum processor_type in sparc.h.
|
||||
(define_attr "cpu" "v7,cypress,v8,supersparc,sparclite,f930,f934,hypersparc,sparclite86x,sparclet,tsc701,v9,ultrasparc"
|
||||
(define_attr "cpu" "v7,cypress,v8,supersparc,sparclite,f930,f934,hypersparc,sparclite86x,sparclet,tsc701,v9,ultrasparc,ultrasparc3"
|
||||
(const (symbol_ref "sparc_cpu_attr")))
|
||||
|
||||
;; Attribute for the instruction set.
|
||||
|
@ -82,9 +83,8 @@
|
|||
|
||||
;; Insn type.
|
||||
|
||||
;; If you add any new type here, please update ultrasparc_sched_reorder too.
|
||||
(define_attr "type"
|
||||
"ialu,compare,shift,load,sload,store,uncond_branch,branch,call,sibcall,call_no_delay_slot,return,imul,idiv,fpload,fpstore,fp,fpmove,fpcmove,fpcmp,fpmul,fpdivs,fpdivd,fpsqrts,fpsqrtd,cmove,multi,misc"
|
||||
"ialu,compare,shift,load,sload,store,uncond_branch,branch,call,sibcall,call_no_delay_slot,return,imul,idiv,fpload,fpstore,fp,fpmove,fpcmove,fpcrmove,fpcmp,fpmul,fpdivs,fpdivd,fpsqrts,fpsqrtd,cmove,multi,misc"
|
||||
(const_string "ialu"))
|
||||
|
||||
;; true if branch/call has empty delay slot and will emit a nop in it
|
||||
|
@ -163,6 +163,9 @@
|
|||
;; FP precision.
|
||||
(define_attr "fptype" "single,double" (const_string "single"))
|
||||
|
||||
;; UltraSPARC-III integer load type.
|
||||
(define_attr "us3load_type" "2cycle,3cycle" (const_string "2cycle"))
|
||||
|
||||
(define_asm_attributes
|
||||
[(set_attr "length" "2")
|
||||
(set_attr "type" "multi")])
|
||||
|
@ -245,344 +248,525 @@
|
|||
[(eq_attr "in_uncond_branch_delay" "true")
|
||||
(nil) (nil)])
|
||||
|
||||
;; Function units of the SPARC
|
||||
;; DFA scheduling on the SPARC
|
||||
|
||||
;; (define_function_unit {name} {num-units} {n-users} {test}
|
||||
;; {ready-delay} {issue-delay} [{conflict-list}])
|
||||
(define_automaton "cypress_0,cypress_1,supersparc_0,supersparc_1,hypersparc_0,hypersparc_1,sparclet,ultrasparc_0,ultrasparc_1,ultrasparc3_0,ultrasparc3_1")
|
||||
|
||||
;; The integer ALU.
|
||||
;; (Noted only for documentation; units that take one cycle do not need to
|
||||
;; be specified.)
|
||||
;; Cypress scheduling
|
||||
|
||||
;; On the sparclite, integer multiply takes 1, 3, or 5 cycles depending on
|
||||
;; the inputs.
|
||||
(define_cpu_unit "cyp_memory, cyp_fpalu" "cypress_0")
|
||||
(define_cpu_unit "cyp_fpmds" "cypress_1")
|
||||
|
||||
;; ---- cypress CY7C602 scheduling:
|
||||
;; Memory with load-delay of 1 (i.e., 2 cycle load).
|
||||
|
||||
(define_function_unit "memory" 1 0
|
||||
(define_insn_reservation "cyp_load" 2
|
||||
(and (eq_attr "cpu" "cypress")
|
||||
(eq_attr "type" "load,sload,fpload"))
|
||||
2 2)
|
||||
"cyp_memory, nothing")
|
||||
|
||||
;; SPARC has two floating-point units: the FP ALU,
|
||||
;; and the FP MUL/DIV/SQRT unit.
|
||||
;; Instruction timings on the CY7C602 are as follows
|
||||
;; FABSs 4
|
||||
;; FADDs/d 5/5
|
||||
;; FCMPs/d 4/4
|
||||
;; FDIVs/d 23/37
|
||||
;; FMOVs 4
|
||||
;; FMULs/d 5/7
|
||||
;; FNEGs 4
|
||||
;; FSQRTs/d 34/63
|
||||
;; FSUBs/d 5/5
|
||||
;; FdTOi/s 5/5
|
||||
;; FsTOi/d 5/5
|
||||
;; FiTOs/d 9/5
|
||||
|
||||
;; The CY7C602 can only support 2 fp isnsn simultaneously.
|
||||
;; More insns cause the chip to stall.
|
||||
|
||||
(define_function_unit "fp_alu" 1 0
|
||||
(define_insn_reservation "cyp_fp_alu" 5
|
||||
(and (eq_attr "cpu" "cypress")
|
||||
(eq_attr "type" "fp,fpmove"))
|
||||
5 5)
|
||||
"cyp_fpalu, nothing*3")
|
||||
|
||||
(define_function_unit "fp_mds" 1 0
|
||||
(define_insn_reservation "cyp_fp_mult" 7
|
||||
(and (eq_attr "cpu" "cypress")
|
||||
(eq_attr "type" "fpmul"))
|
||||
7 7)
|
||||
"cyp_fpmds, nothing*5")
|
||||
|
||||
(define_function_unit "fp_mds" 1 0
|
||||
(define_insn_reservation "cyp_fp_div" 37
|
||||
(and (eq_attr "cpu" "cypress")
|
||||
(eq_attr "type" "fpdivs,fpdivd"))
|
||||
37 37)
|
||||
"cyp_fpmds, nothing*35")
|
||||
|
||||
(define_function_unit "fp_mds" 1 0
|
||||
(define_insn_reservation "cyp_fp_sqrt" 63
|
||||
(and (eq_attr "cpu" "cypress")
|
||||
(eq_attr "type" "fpsqrts,fpsqrtd"))
|
||||
63 63)
|
||||
"cyp_fpmds, nothing*61")
|
||||
|
||||
;; ----- The TMS390Z55 scheduling
|
||||
;; The Supersparc can issue 1 - 3 insns per cycle: up to two integer,
|
||||
;; one ld/st, one fp.
|
||||
;; Memory delivers its result in one cycle to IU, zero cycles to FP
|
||||
;; SuperSPARC scheduling
|
||||
|
||||
(define_function_unit "memory" 1 0
|
||||
(define_cpu_unit "ss_memory, ss_shift, ss_iwport0, ss_iwport1" "supersparc_0")
|
||||
(define_cpu_unit "ss_fpalu" "supersparc_0")
|
||||
(define_cpu_unit "ss_fpmds" "supersparc_1")
|
||||
|
||||
(define_reservation "ss_iwport" "(ss_iwport0 | ss_iwport1)")
|
||||
|
||||
(define_insn_reservation "ss_iuload" 1
|
||||
(and (eq_attr "cpu" "supersparc")
|
||||
(eq_attr "type" "load,sload"))
|
||||
1 1)
|
||||
"ss_memory")
|
||||
|
||||
(define_function_unit "memory" 1 0
|
||||
;; Ok, fpu loads deliver the result in zero cycles. But we
|
||||
;; have to show the ss_memory reservation somehow, thus...
|
||||
(define_insn_reservation "ss_fpload" 0
|
||||
(and (eq_attr "cpu" "supersparc")
|
||||
(eq_attr "type" "fpload"))
|
||||
0 1)
|
||||
"ss_memory")
|
||||
|
||||
(define_function_unit "memory" 1 0
|
||||
(define_bypass 0 "ss_fpload" "ss_fp_alu,ss_fp_mult,ss_fp_divs,ss_fp_divd,ss_fp_sqrt")
|
||||
|
||||
(define_insn_reservation "ss_store" 1
|
||||
(and (eq_attr "cpu" "supersparc")
|
||||
(eq_attr "type" "store,fpstore"))
|
||||
1 1)
|
||||
"ss_memory")
|
||||
|
||||
(define_function_unit "shift" 1 0
|
||||
(define_insn_reservation "ss_ialu_shift" 1
|
||||
(and (eq_attr "cpu" "supersparc")
|
||||
(eq_attr "type" "shift"))
|
||||
1 1)
|
||||
"ss_shift + ss_iwport")
|
||||
|
||||
;; There are only two write ports to the integer register file
|
||||
;; A store also uses a write port
|
||||
|
||||
(define_function_unit "iwport" 2 0
|
||||
(define_insn_reservation "ss_ialu_any" 1
|
||||
(and (eq_attr "cpu" "supersparc")
|
||||
(eq_attr "type" "load,sload,store,shift,ialu"))
|
||||
1 1)
|
||||
"ss_iwport")
|
||||
|
||||
;; Timings; throughput/latency
|
||||
;; FADD 1/3 add/sub, format conv, compar, abs, neg
|
||||
;; FMUL 1/3
|
||||
;; FDIVs 4/6
|
||||
;; FDIVd 7/9
|
||||
;; FSQRTs 6/8
|
||||
;; FSQRTd 10/12
|
||||
;; IMUL 4/4
|
||||
|
||||
(define_function_unit "fp_alu" 1 0
|
||||
(define_insn_reservation "ss_fp_alu" 3
|
||||
(and (eq_attr "cpu" "supersparc")
|
||||
(eq_attr "type" "fp,fpmove,fpcmp"))
|
||||
3 1)
|
||||
"ss_fpalu, nothing*2")
|
||||
|
||||
(define_function_unit "fp_mds" 1 0
|
||||
(define_insn_reservation "ss_fp_mult" 3
|
||||
(and (eq_attr "cpu" "supersparc")
|
||||
(eq_attr "type" "fpmul"))
|
||||
3 1)
|
||||
"ss_fpmds, nothing*2")
|
||||
|
||||
(define_function_unit "fp_mds" 1 0
|
||||
(define_insn_reservation "ss_fp_divs" 6
|
||||
(and (eq_attr "cpu" "supersparc")
|
||||
(eq_attr "type" "fpdivs"))
|
||||
6 4)
|
||||
"ss_fpmds*4, nothing*2")
|
||||
|
||||
(define_function_unit "fp_mds" 1 0
|
||||
(define_insn_reservation "ss_fp_divd" 9
|
||||
(and (eq_attr "cpu" "supersparc")
|
||||
(eq_attr "type" "fpdivd"))
|
||||
9 7)
|
||||
"ss_fpmds*7, nothing*2")
|
||||
|
||||
(define_function_unit "fp_mds" 1 0
|
||||
(define_insn_reservation "ss_fp_sqrt" 12
|
||||
(and (eq_attr "cpu" "supersparc")
|
||||
(eq_attr "type" "fpsqrts,fpsqrtd"))
|
||||
12 10)
|
||||
"ss_fpmds*10, nothing*2")
|
||||
|
||||
(define_function_unit "fp_mds" 1 0
|
||||
(define_insn_reservation "ss_imul" 4
|
||||
(and (eq_attr "cpu" "supersparc")
|
||||
(eq_attr "type" "imul"))
|
||||
4 4)
|
||||
"ss_fpmds*4")
|
||||
|
||||
;; ----- hypersparc/sparclite86x scheduling
|
||||
;; The Hypersparc can issue 1 - 2 insns per cycle. The dual issue cases are:
|
||||
;; L-Ld/St I-Int F-Float B-Branch LI/LF/LB/II/IF/IB/FF/FB
|
||||
;; II/FF case is only when loading a 32 bit hi/lo constant
|
||||
;; Single issue insns include call, jmpl, u/smul, u/sdiv, lda, sta, fcmp
|
||||
;; Memory delivers its result in one cycle to IU
|
||||
;; HyperSPARC/sparclite86x scheduling
|
||||
|
||||
(define_function_unit "memory" 1 0
|
||||
(define_cpu_unit "hs_memory,hs_branch,hs_shift,hs_fpalu" "hypersparc_0")
|
||||
(define_cpu_unit "hs_fpmds" "hypersparc_1")
|
||||
|
||||
(define_insn_reservation "hs_load" 1
|
||||
(and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
|
||||
(eq_attr "type" "load,sload,fpload"))
|
||||
1 1)
|
||||
"hs_memory")
|
||||
|
||||
(define_function_unit "memory" 1 0
|
||||
(define_insn_reservation "hs_store" 2
|
||||
(and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
|
||||
(eq_attr "type" "store,fpstore"))
|
||||
2 1)
|
||||
"hs_memory, nothing")
|
||||
|
||||
(define_function_unit "sparclite86x_branch" 1 0
|
||||
(define_insn_reservation "hs_slbranch" 1
|
||||
(and (eq_attr "cpu" "sparclite86x")
|
||||
(eq_attr "type" "branch"))
|
||||
1 1)
|
||||
"hs_branch")
|
||||
|
||||
;; integer multiply insns
|
||||
(define_function_unit "sparclite86x_shift" 1 0
|
||||
(define_insn_reservation "hs_slshift" 1
|
||||
(and (eq_attr "cpu" "sparclite86x")
|
||||
(eq_attr "type" "shift"))
|
||||
1 1)
|
||||
"hs_shift")
|
||||
|
||||
(define_function_unit "fp_alu" 1 0
|
||||
(define_insn_reservation "hs_fp_alu" 1
|
||||
(and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
|
||||
(eq_attr "type" "fp,fpmove,fpcmp"))
|
||||
1 1)
|
||||
"hs_fpalu")
|
||||
|
||||
(define_function_unit "fp_mds" 1 0
|
||||
(define_insn_reservation "hs_fp_mult" 1
|
||||
(and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
|
||||
(eq_attr "type" "fpmul"))
|
||||
1 1)
|
||||
"hs_fpmds")
|
||||
|
||||
(define_function_unit "fp_mds" 1 0
|
||||
(define_insn_reservation "hs_fp_divs" 8
|
||||
(and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
|
||||
(eq_attr "type" "fpdivs"))
|
||||
8 6)
|
||||
"hs_fpmds*6, nothing*2")
|
||||
|
||||
(define_function_unit "fp_mds" 1 0
|
||||
(define_insn_reservation "hs_fp_divd" 12
|
||||
(and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
|
||||
(eq_attr "type" "fpdivd"))
|
||||
12 10)
|
||||
"hs_fpmds*10, nothing*2")
|
||||
|
||||
(define_function_unit "fp_mds" 1 0
|
||||
(define_insn_reservation "hs_fp_sqrt" 17
|
||||
(and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
|
||||
(eq_attr "type" "fpsqrts,fpsqrtd"))
|
||||
17 15)
|
||||
"hs_fpmds*15, nothing*2")
|
||||
|
||||
(define_function_unit "fp_mds" 1 0
|
||||
(define_insn_reservation "hs_imul" 17
|
||||
(and (ior (eq_attr "cpu" "hypersparc") (eq_attr "cpu" "sparclite86x"))
|
||||
(eq_attr "type" "imul"))
|
||||
17 15)
|
||||
"hs_fpmds*15, nothing*2")
|
||||
|
||||
;; ----- sparclet tsc701 scheduling
|
||||
;; The tsc701 issues 1 insn per cycle.
|
||||
;; Results may be written back out of order.
|
||||
;; Sparclet tsc701 scheduling
|
||||
|
||||
;; Loads take 2 extra cycles to complete and 4 can be buffered at a time.
|
||||
(define_cpu_unit "sl_load0,sl_load1,sl_load2,sl_load3" "sparclet")
|
||||
(define_cpu_unit "sl_store,sl_imul" "sparclet")
|
||||
|
||||
(define_function_unit "tsc701_load" 4 1
|
||||
(define_reservation "sl_load_any" "(sl_load0 | sl_load1 | sl_load2 | sl_load3)")
|
||||
(define_reservation "sl_load_all" "(sl_load0 + sl_load1 + sl_load2 + sl_load3)")
|
||||
|
||||
(define_insn_reservation "sl_ld" 3
|
||||
(and (eq_attr "cpu" "tsc701")
|
||||
(eq_attr "type" "load,sload"))
|
||||
3 1)
|
||||
(eq_attr "type" "load,sload"))
|
||||
"sl_load_any, sl_load_any, sl_load_any")
|
||||
|
||||
;; Stores take 2(?) extra cycles to complete.
|
||||
;; It is desirable to not have any memory operation in the following 2 cycles.
|
||||
;; (??? or 2 memory ops in the case of std).
|
||||
|
||||
(define_function_unit "tsc701_store" 1 0
|
||||
(define_insn_reservation "sl_st" 3
|
||||
(and (eq_attr "cpu" "tsc701")
|
||||
(eq_attr "type" "store"))
|
||||
3 3
|
||||
[(eq_attr "type" "load,sload,store")])
|
||||
"(sl_store+sl_load_all)*3")
|
||||
|
||||
;; The multiply unit has a latency of 5.
|
||||
(define_function_unit "tsc701_mul" 1 0
|
||||
(define_insn_reservation "sl_imul" 5
|
||||
(and (eq_attr "cpu" "tsc701")
|
||||
(eq_attr "type" "imul"))
|
||||
5 5)
|
||||
"sl_imul*5")
|
||||
|
||||
;; ----- The UltraSPARC-1 scheduling
|
||||
;; UltraSPARC has two integer units. Shift instructions can only execute
|
||||
;; on IE0. Condition code setting instructions, call, and jmpl (including
|
||||
;; the ret and retl pseudo-instructions) can only execute on IE1.
|
||||
;; Branch on register uses IE1, but branch on condition code does not.
|
||||
;; Conditional moves take 2 cycles. No other instruction can issue in the
|
||||
;; same cycle as a conditional move.
|
||||
;; Multiply and divide take many cycles during which no other instructions
|
||||
;; can issue.
|
||||
;; Memory delivers its result in two cycles (except for signed loads,
|
||||
;; which take one cycle more). One memory instruction can be issued per
|
||||
;; cycle.
|
||||
;; UltraSPARC-I/II scheduling
|
||||
|
||||
(define_function_unit "memory" 1 0
|
||||
(define_cpu_unit "us1_fdivider,us1_fpm" "ultrasparc_0");
|
||||
(define_cpu_unit "us1_fpa,us1_load_writeback" "ultrasparc_1")
|
||||
(define_cpu_unit "us1_fps_0,us1_fps_1,us1_fpd_0,us1_fpd_1" "ultrasparc_1")
|
||||
(define_cpu_unit "us1_slot0,us1_slot1,us1_slot2,us1_slot3" "ultrasparc_1")
|
||||
(define_cpu_unit "us1_ieu0,us1_ieu1,us1_cti,us1_lsu" "ultrasparc_1")
|
||||
|
||||
(define_reservation "us1_slot012" "(us1_slot0 | us1_slot1 | us1_slot2)")
|
||||
(define_reservation "us1_slotany" "(us1_slot0 | us1_slot1 | us1_slot2 | us1_slot3)")
|
||||
(define_reservation "us1_single_issue" "us1_slot0 + us1_slot1 + us1_slot2 + us1_slot3")
|
||||
|
||||
(define_reservation "us1_fp_single" "(us1_fps_0 | us1_fps_1)")
|
||||
(define_reservation "us1_fp_double" "(us1_fpd_0 | us1_fpd_1)")
|
||||
;; This is a simplified representation of the issue at hand.
|
||||
;; For most cases, going from one FP precision type insn to another
|
||||
;; just breaks up the insn group. However for some cases, such
|
||||
;; a situation causes the second insn to stall 2 more cycles.
|
||||
(exclusion_set "us1_fps_0,us1_fps_1" "us1_fpd_0,us1_fpd_1")
|
||||
|
||||
;; If we have to schedule an ieu1 specific instruction and we want
|
||||
;; to reserve the ieu0 unit as well, we must reserve it first. So for
|
||||
;; example we could not schedule this sequence:
|
||||
;; COMPARE IEU1
|
||||
;; IALU IEU0
|
||||
;; but we could schedule them together like this:
|
||||
;; IALU IEU0
|
||||
;; COMPARE IEU1
|
||||
;; This basically requires that ieu0 is reserved before ieu1 when
|
||||
;; it is required that both be reserved.
|
||||
(absence_set "us1_ieu0" "us1_ieu1")
|
||||
|
||||
;; This defines the slotting order. Most IEU instructions can only
|
||||
;; execute in the first three slots, FPU and branches can go into
|
||||
;; any slot. We represent instructions which "break the group"
|
||||
;; as requiring reservation of us1_slot0.
|
||||
(absence_set "us1_slot0" "us1_slot1,us1_slot2,us1_slot3")
|
||||
(absence_set "us1_slot1" "us1_slot2,us1_slot3")
|
||||
(absence_set "us1_slot2" "us1_slot3")
|
||||
|
||||
(define_insn_reservation "us1_simple_ieuN" 1
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "load,fpload"))
|
||||
2 1)
|
||||
(eq_attr "type" "ialu"))
|
||||
"(us1_ieu0 | us1_ieu1) + us1_slot012")
|
||||
|
||||
(define_function_unit "memory" 1 0
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "sload"))
|
||||
3 1)
|
||||
|
||||
(define_function_unit "memory" 1 0
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "store,fpstore"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "ieuN" 2 0
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "ialu,shift,compare,call,sibcall,call_no_delay_slot,uncond_branch"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "ieu0" 1 0
|
||||
(define_insn_reservation "us1_simple_ieu0" 1
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "shift"))
|
||||
1 1)
|
||||
"us1_ieu0 + us1_slot012")
|
||||
|
||||
(define_function_unit "ieu0" 1 0
|
||||
(define_insn_reservation "us1_simple_ieu1" 1
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "compare"))
|
||||
"us1_ieu1 + us1_slot012")
|
||||
|
||||
(define_insn_reservation "us1_cmove" 2
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "cmove"))
|
||||
2 1)
|
||||
"us1_single_issue, nothing")
|
||||
|
||||
(define_function_unit "ieu1" 1 0
|
||||
(define_insn_reservation "us1_imul" 1
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "compare,call,sibcall,call_no_delay_slot,uncond_branch"))
|
||||
1 1)
|
||||
(eq_attr "type" "imul"))
|
||||
"us1_single_issue")
|
||||
|
||||
(define_function_unit "cti" 1 0
|
||||
(define_insn_reservation "us1_idiv" 1
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "idiv"))
|
||||
"us1_single_issue")
|
||||
|
||||
;; For loads, the "delayed return mode" behavior of the chip
|
||||
;; is represented using the us1_load_writeback resource.
|
||||
(define_insn_reservation "us1_load" 2
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "load,fpload"))
|
||||
"us1_lsu + us1_slot012, us1_load_writeback")
|
||||
|
||||
(define_insn_reservation "us1_load_signed" 3
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "sload"))
|
||||
"us1_lsu + us1_slot012, nothing, us1_load_writeback")
|
||||
|
||||
(define_insn_reservation "us1_store" 1
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "store,fpstore"))
|
||||
"us1_lsu + us1_slot012")
|
||||
|
||||
(define_insn_reservation "us1_branch" 1
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "branch"))
|
||||
1 1)
|
||||
"us1_cti + us1_slotany")
|
||||
|
||||
;; Timings; throughput/latency
|
||||
;; FMOV 1/1 fmov, fabs, fneg
|
||||
;; FMOVcc 1/2
|
||||
;; FADD 1/3 add/sub, format conv, compar
|
||||
;; FMUL 1/3
|
||||
;; FDIVs 12/12
|
||||
;; FDIVd 22/22
|
||||
;; FSQRTs 12/12
|
||||
;; FSQRTd 22/22
|
||||
;; FCMP takes 1 cycle to branch, 2 cycles to conditional move.
|
||||
;;
|
||||
;; FDIV{s,d}/FSQRT{s,d} are given their own unit since they only
|
||||
;; use the FPM multiplier for final rounding 3 cycles before the
|
||||
;; end of their latency and we have no real way to model that.
|
||||
;;
|
||||
;; ??? This is really bogus because the timings really depend upon
|
||||
;; who uses the result. We should record who the user is with
|
||||
;; more descriptive 'type' attribute names and account for these
|
||||
;; issues in ultrasparc_adjust_cost.
|
||||
|
||||
(define_function_unit "fadd" 1 0
|
||||
(define_insn_reservation "us1_call_jmpl" 1
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "call,sibcall,call_no_delay_slot,uncond_branch"))
|
||||
"us1_cti + us1_ieu1 + us1_slot0")
|
||||
|
||||
(define_insn_reservation "us1_fmov_single" 1
|
||||
(and (and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "fpmove"))
|
||||
(eq_attr "fptype" "single"))
|
||||
"us1_fpa + us1_fp_single + us1_slotany")
|
||||
|
||||
(define_insn_reservation "us1_fmov_double" 1
|
||||
(and (and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "fpmove"))
|
||||
(eq_attr "fptype" "double"))
|
||||
"us1_fpa + us1_fp_double + us1_slotany")
|
||||
|
||||
(define_insn_reservation "us1_fcmov_single" 2
|
||||
(and (and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "fpcmove,fpcrmove"))
|
||||
(eq_attr "fptype" "single"))
|
||||
"us1_fpa + us1_fp_single + us1_slotany, nothing")
|
||||
|
||||
(define_insn_reservation "us1_fcmov_double" 2
|
||||
(and (and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "fpcmove,fpcrmove"))
|
||||
(eq_attr "fptype" "double"))
|
||||
"us1_fpa + us1_fp_double + us1_slotany, nothing")
|
||||
|
||||
(define_insn_reservation "us1_faddsub_single" 4
|
||||
(and (and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "fp"))
|
||||
(eq_attr "fptype" "single"))
|
||||
"us1_fpa + us1_fp_single + us1_slotany, nothing*3")
|
||||
|
||||
(define_insn_reservation "us1_faddsub_double" 4
|
||||
(and (and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "fp"))
|
||||
(eq_attr "fptype" "double"))
|
||||
"us1_fpa + us1_fp_double + us1_slotany, nothing*3")
|
||||
|
||||
(define_insn_reservation "us1_fpcmp_single" 1
|
||||
(and (and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "fpcmp"))
|
||||
(eq_attr "fptype" "single"))
|
||||
"us1_fpa + us1_fp_single + us1_slotany")
|
||||
|
||||
(define_insn_reservation "us1_fpcmp_double" 1
|
||||
(and (and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "fpcmp"))
|
||||
(eq_attr "fptype" "double"))
|
||||
"us1_fpa + us1_fp_double + us1_slotany")
|
||||
|
||||
(define_insn_reservation "us1_fmult_single" 4
|
||||
(and (and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "fpmul"))
|
||||
(eq_attr "fptype" "single"))
|
||||
"us1_fpm + us1_fp_single + us1_slotany, nothing*3")
|
||||
|
||||
(define_insn_reservation "us1_fmult_double" 4
|
||||
(and (and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "fpmul"))
|
||||
(eq_attr "fptype" "double"))
|
||||
"us1_fpm + us1_fp_double + us1_slotany, nothing*3")
|
||||
|
||||
;; This is actually in theory dangerous, because it is possible
|
||||
;; for the chip to prematurely dispatch the dependant instruction
|
||||
;; in the G stage, resulting in a 9 cycle stall. However I have never
|
||||
;; been able to trigger this case myself even with hand written code,
|
||||
;; so it must require some rare complicated pipeline state.
|
||||
(define_bypass 3
|
||||
"us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double"
|
||||
"us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double")
|
||||
|
||||
;; Floating point divide and square root use the multiplier unit
|
||||
;; for final rounding 3 cycles before the divide/sqrt is complete.
|
||||
|
||||
(define_insn_reservation "us1_fdivs"
|
||||
13
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "fpdivs,fpsqrts"))
|
||||
"(us1_fpm + us1_fdivider + us1_slot0), us1_fdivider*8, (us1_fpm + us1_fdivider), us1_fdivider*2"
|
||||
)
|
||||
|
||||
(define_bypass
|
||||
12
|
||||
"us1_fdivs"
|
||||
"us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double")
|
||||
|
||||
(define_insn_reservation "us1_fdivd"
|
||||
23
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "fpdivd,fpsqrtd"))
|
||||
"(us1_fpm + us1_fdivider + us1_slot0), us1_fdivider*18, (us1_fpm + us1_fdivider), us1_fdivider*2"
|
||||
)
|
||||
(define_bypass
|
||||
22
|
||||
"us1_fdivd"
|
||||
"us1_faddsub_single,us1_faddsub_double,us1_fmult_single,us1_fmult_double")
|
||||
|
||||
;; Any store may multi issue with the insn creating the source
|
||||
;; data as long as that creating insn is not an FPU div/sqrt.
|
||||
;; We need a special guard function because this bypass does
|
||||
;; not apply to the address inputs of the store.
|
||||
(define_bypass 0 "us1_simple_ieuN,us1_simple_ieu1,us1_simple_ieu0,us1_faddsub_single,us1_faddsub_double,us1_fmov_single,us1_fmov_double,us1_fcmov_single,us1_fcmov_double,us1_fmult_single,us1_fmult_double" "us1_store"
|
||||
"ultrasparc_store_bypass_p")
|
||||
|
||||
;; An integer branch may execute in the same cycle as the compare
|
||||
;; creating the condition codes.
|
||||
(define_bypass 0 "us1_simple_ieu1" "us1_branch")
|
||||
|
||||
;; UltraSPARC-III scheduling
|
||||
;;
|
||||
;; A much simpler beast, no silly slotting rules and both
|
||||
;; integer units are fully symmetric. It does still have
|
||||
;; single-issue instructions though.
|
||||
|
||||
(define_cpu_unit "us3_a0,us3_a1,us3_ms,us3_br,us3_fpm" "ultrasparc3_0")
|
||||
(define_cpu_unit "us3_slot0,us3_slot1,us3_slot2,us3_slot3,us3_fpa" "ultrasparc3_1")
|
||||
(define_cpu_unit "us3_load_writeback" "ultrasparc3_1")
|
||||
|
||||
(define_reservation "us3_slotany" "(us3_slot0 | us3_slot1 | us3_slot2 | us3_slot3)")
|
||||
(define_reservation "us3_single_issue" "us3_slot0 + us3_slot1 + us3_slot2 + us3_slot3")
|
||||
(define_reservation "us3_ax" "(us3_a0 | us3_a1)")
|
||||
|
||||
(define_insn_reservation "us3_integer" 1
|
||||
(and (eq_attr "cpu" "ultrasparc3")
|
||||
(eq_attr "type" "ialu,shift,compare"))
|
||||
"us3_ax + us3_slotany")
|
||||
|
||||
(define_insn_reservation "us3_cmove" 2
|
||||
(and (eq_attr "cpu" "ultrasparc3")
|
||||
(eq_attr "type" "cmove"))
|
||||
"us3_ms + us3_br + us3_slotany, nothing")
|
||||
|
||||
;; ??? Not entirely accurate.
|
||||
;; ??? It can run from 6 to 9 cycles. The first cycle the MS pipe
|
||||
;; ??? is needed, and the instruction group is broken right after
|
||||
;; ??? the imul. Then 'helper' instructions are generated to perform
|
||||
;; ??? each further stage of the multiplication, each such 'helper' is
|
||||
;; ??? single group. So, the reservation aspect is represented accurately
|
||||
;; ??? here, but the variable cycles are not.
|
||||
;; ??? Currently I have no idea how to determine the variability, but once
|
||||
;; ??? known we can simply add a define_bypass or similar to model it.
|
||||
(define_insn_reservation "us3_imul" 6
|
||||
(and (eq_attr "cpu" "ultrasparc3")
|
||||
(eq_attr "type" "imul"))
|
||||
"us3_ms + us3_slotany, us3_single_issue*5")
|
||||
|
||||
(define_insn_reservation "us3_idiv" 71
|
||||
(and (eq_attr "cpu" "ultrasparc3")
|
||||
(eq_attr "type" "idiv"))
|
||||
"us3_ms + us3_slotany, us3_single_issue*70")
|
||||
|
||||
;; UltraSPARC-III has a similar load delay as UltraSPARC-I/II except
|
||||
;; that all loads except 32-bit/64-bit unsigned loads take the extra
|
||||
;; delay for sign/zero extension.
|
||||
(define_insn_reservation "us3_2cycle_load" 2
|
||||
(and (eq_attr "cpu" "ultrasparc3")
|
||||
(and (eq_attr "type" "load,fpload")
|
||||
(eq_attr "us3load_type" "2cycle")))
|
||||
"us3_ms + us3_slotany, us3_load_writeback")
|
||||
|
||||
(define_insn_reservation "us3_load_delayed" 3
|
||||
(and (eq_attr "cpu" "ultrasparc3")
|
||||
(and (eq_attr "type" "load,sload")
|
||||
(eq_attr "us3load_type" "3cycle")))
|
||||
"us3_ms + us3_slotany, nothing, us3_load_writeback")
|
||||
|
||||
(define_insn_reservation "us3_store" 1
|
||||
(and (eq_attr "cpu" "ultrasparc3")
|
||||
(eq_attr "type" "store,fpstore"))
|
||||
"us3_ms + us3_slotany")
|
||||
|
||||
(define_insn_reservation "us3_branch" 1
|
||||
(and (eq_attr "cpu" "ultrasparc3")
|
||||
(eq_attr "type" "branch"))
|
||||
"us3_br + us3_slotany")
|
||||
|
||||
(define_insn_reservation "us3_call_jmpl" 1
|
||||
(and (eq_attr "cpu" "ultrasparc3")
|
||||
(eq_attr "type" "call,sibcall,call_no_delay_slot,uncond_branch"))
|
||||
"us3_br + us3_ms + us3_slotany")
|
||||
|
||||
(define_insn_reservation "us3_fmov" 3
|
||||
(and (eq_attr "cpu" "ultrasparc3")
|
||||
(eq_attr "type" "fpmove"))
|
||||
1 1)
|
||||
"us3_fpa + us3_slotany, nothing*2")
|
||||
|
||||
(define_function_unit "fadd" 1 0
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(define_insn_reservation "us3_fcmov" 3
|
||||
(and (eq_attr "cpu" "ultrasparc3")
|
||||
(eq_attr "type" "fpcmove"))
|
||||
2 1)
|
||||
"us3_fpa + us3_br + us3_slotany, nothing*2")
|
||||
|
||||
(define_function_unit "fadd" 1 0
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(define_insn_reservation "us3_fcrmov" 3
|
||||
(and (eq_attr "cpu" "ultrasparc3")
|
||||
(eq_attr "type" "fpcrmove"))
|
||||
"us3_fpa + us3_ms + us3_slotany, nothing*2")
|
||||
|
||||
(define_insn_reservation "us3_faddsub" 4
|
||||
(and (eq_attr "cpu" "ultrasparc3")
|
||||
(eq_attr "type" "fp"))
|
||||
3 1)
|
||||
"us3_fpa + us3_slotany, nothing*3")
|
||||
|
||||
(define_function_unit "fadd" 1 0
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(define_insn_reservation "us3_fpcmp" 5
|
||||
(and (eq_attr "cpu" "ultrasparc3")
|
||||
(eq_attr "type" "fpcmp"))
|
||||
2 1)
|
||||
"us3_fpa + us3_slotany, nothing*4")
|
||||
|
||||
(define_function_unit "fmul" 1 0
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(define_insn_reservation "us3_fmult" 4
|
||||
(and (eq_attr "cpu" "ultrasparc3")
|
||||
(eq_attr "type" "fpmul"))
|
||||
3 1)
|
||||
"us3_fpm + us3_slotany, nothing*3")
|
||||
|
||||
(define_function_unit "fadd" 1 0
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "fpcmove"))
|
||||
2 1)
|
||||
|
||||
(define_function_unit "fdiv" 1 0
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(define_insn_reservation "us3_fdivs" 17
|
||||
(and (eq_attr "cpu" "ultrasparc3")
|
||||
(eq_attr "type" "fpdivs"))
|
||||
12 12)
|
||||
"(us3_fpm + us3_slotany), us3_fpm*14, nothing*2")
|
||||
|
||||
(define_function_unit "fdiv" 1 0
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(eq_attr "type" "fpdivd"))
|
||||
22 22)
|
||||
|
||||
(define_function_unit "fdiv" 1 0
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(define_insn_reservation "us3_fsqrts" 20
|
||||
(and (eq_attr "cpu" "ultrasparc3")
|
||||
(eq_attr "type" "fpsqrts"))
|
||||
12 12)
|
||||
"(us3_fpm + us3_slotany), us3_fpm*17, nothing*2")
|
||||
|
||||
(define_function_unit "fdiv" 1 0
|
||||
(and (eq_attr "cpu" "ultrasparc")
|
||||
(define_insn_reservation "us3_fdivd" 20
|
||||
(and (eq_attr "cpu" "ultrasparc3")
|
||||
(eq_attr "type" "fpdivd"))
|
||||
"(us3_fpm + us3_slotany), us3_fpm*17, nothing*2")
|
||||
|
||||
(define_insn_reservation "us3_fsqrtd" 29
|
||||
(and (eq_attr "cpu" "ultrasparc3")
|
||||
(eq_attr "type" "fpsqrtd"))
|
||||
22 22)
|
||||
"(us3_fpm + us3_slotany), us3_fpm*26, nothing*2")
|
||||
|
||||
;; Any store may multi issue with the insn creating the source
|
||||
;; data as long as that creating insn is not an FPU div/sqrt.
|
||||
;; We need a special guard function because this bypass does
|
||||
;; not apply to the address inputs of the store.
|
||||
(define_bypass 0 "us3_integer,us3_faddsub,us3_fmov,us3_fcmov,us3_fmult" "us3_store"
|
||||
"ultrasparc_store_bypass_p")
|
||||
|
||||
;; An integer branch may execute in the same cycle as the compare
|
||||
;; creating the condition codes.
|
||||
(define_bypass 0 "us3_integer" "us3_branch")
|
||||
|
||||
;; If FMOVfcc is user of FPCMP, latency is only 1 cycle.
|
||||
(define_bypass 1 "us3_fpcmp" "us3_fcmov")
|
||||
|
||||
|
||||
;; Compare instructions.
|
||||
;; This controls RTL generation and register allocation.
|
||||
|
@ -2181,7 +2365,8 @@
|
|||
mov\\t%1, %0
|
||||
ldub\\t%1, %0
|
||||
stb\\t%r1, %0"
|
||||
[(set_attr "type" "*,load,store")])
|
||||
[(set_attr "type" "*,load,store")
|
||||
(set_attr "us3load_type" "*,3cycle,*")])
|
||||
|
||||
(define_expand "movhi"
|
||||
[(set (match_operand:HI 0 "general_operand" "")
|
||||
|
@ -2255,7 +2440,8 @@
|
|||
sethi\\t%%hi(%a1), %0
|
||||
lduh\\t%1, %0
|
||||
sth\\t%r1, %0"
|
||||
[(set_attr "type" "*,*,load,store")])
|
||||
[(set_attr "type" "*,*,load,store")
|
||||
(set_attr "us3load_type" "*,*,3cycle,*")])
|
||||
|
||||
;; We always work with constants here.
|
||||
(define_insn "*movhi_lo_sum"
|
||||
|
@ -4404,7 +4590,7 @@
|
|||
"@
|
||||
fmovrs%D1\\t%2, %3, %0
|
||||
fmovrs%d1\\t%2, %4, %0"
|
||||
[(set_attr "type" "fpcmove")])
|
||||
[(set_attr "type" "fpcrmove")])
|
||||
|
||||
(define_insn "movdf_cc_reg_sp64"
|
||||
[(set (match_operand:DF 0 "register_operand" "=e,e")
|
||||
|
@ -4417,7 +4603,7 @@
|
|||
"@
|
||||
fmovrd%D1\\t%2, %3, %0
|
||||
fmovrd%d1\\t%2, %4, %0"
|
||||
[(set_attr "type" "fpcmove")
|
||||
[(set_attr "type" "fpcrmove")
|
||||
(set_attr "fptype" "double")])
|
||||
|
||||
(define_insn "*movtf_cc_reg_hq_sp64"
|
||||
|
@ -4431,7 +4617,7 @@
|
|||
"@
|
||||
fmovrq%D1\\t%2, %3, %0
|
||||
fmovrq%d1\\t%2, %4, %0"
|
||||
[(set_attr "type" "fpcmove")])
|
||||
[(set_attr "type" "fpcrmove")])
|
||||
|
||||
(define_insn "*movtf_cc_reg_sp64"
|
||||
[(set (match_operand:TF 0 "register_operand" "=e,e")
|
||||
|
@ -4521,7 +4707,8 @@
|
|||
(zero_extend:SI (match_operand:HI 1 "memory_operand" "m")))]
|
||||
""
|
||||
"lduh\\t%1, %0"
|
||||
[(set_attr "type" "load")])
|
||||
[(set_attr "type" "load")
|
||||
(set_attr "us3load_type" "3cycle")])
|
||||
|
||||
(define_expand "zero_extendqihi2"
|
||||
[(set (match_operand:HI 0 "register_operand" "")
|
||||
|
@ -4536,7 +4723,8 @@
|
|||
"@
|
||||
and\\t%1, 0xff, %0
|
||||
ldub\\t%1, %0"
|
||||
[(set_attr "type" "*,load")])
|
||||
[(set_attr "type" "*,load")
|
||||
(set_attr "us3load_type" "*,3cycle")])
|
||||
|
||||
(define_expand "zero_extendqisi2"
|
||||
[(set (match_operand:SI 0 "register_operand" "")
|
||||
|
@ -4551,7 +4739,8 @@
|
|||
"@
|
||||
and\\t%1, 0xff, %0
|
||||
ldub\\t%1, %0"
|
||||
[(set_attr "type" "*,load")])
|
||||
[(set_attr "type" "*,load")
|
||||
(set_attr "us3load_type" "*,3cycle")])
|
||||
|
||||
(define_expand "zero_extendqidi2"
|
||||
[(set (match_operand:DI 0 "register_operand" "")
|
||||
|
@ -4566,7 +4755,8 @@
|
|||
"@
|
||||
and\\t%1, 0xff, %0
|
||||
ldub\\t%1, %0"
|
||||
[(set_attr "type" "*,load")])
|
||||
[(set_attr "type" "*,load")
|
||||
(set_attr "us3load_type" "*,3cycle")])
|
||||
|
||||
(define_expand "zero_extendhidi2"
|
||||
[(set (match_operand:DI 0 "register_operand" "")
|
||||
|
@ -4597,7 +4787,8 @@
|
|||
(zero_extend:DI (match_operand:HI 1 "memory_operand" "m")))]
|
||||
"TARGET_ARCH64"
|
||||
"lduh\\t%1, %0"
|
||||
[(set_attr "type" "load")])
|
||||
[(set_attr "type" "load")
|
||||
(set_attr "us3load_type" "3cycle")])
|
||||
|
||||
|
||||
;; ??? Write truncdisi pattern using sra?
|
||||
|
@ -4803,7 +4994,8 @@
|
|||
(sign_extend:SI (match_operand:HI 1 "memory_operand" "m")))]
|
||||
""
|
||||
"ldsh\\t%1, %0"
|
||||
[(set_attr "type" "sload")])
|
||||
[(set_attr "type" "sload")
|
||||
(set_attr "us3load_type" "3cycle")])
|
||||
|
||||
(define_expand "extendqihi2"
|
||||
[(set (match_operand:HI 0 "register_operand" "")
|
||||
|
@ -4843,7 +5035,8 @@
|
|||
(sign_extend:HI (match_operand:QI 1 "memory_operand" "m")))]
|
||||
""
|
||||
"ldsb\\t%1, %0"
|
||||
[(set_attr "type" "sload")])
|
||||
[(set_attr "type" "sload")
|
||||
(set_attr "us3load_type" "3cycle")])
|
||||
|
||||
(define_expand "extendqisi2"
|
||||
[(set (match_operand:SI 0 "register_operand" "")
|
||||
|
@ -4874,7 +5067,8 @@
|
|||
(sign_extend:SI (match_operand:QI 1 "memory_operand" "m")))]
|
||||
""
|
||||
"ldsb\\t%1, %0"
|
||||
[(set_attr "type" "sload")])
|
||||
[(set_attr "type" "sload")
|
||||
(set_attr "us3load_type" "3cycle")])
|
||||
|
||||
(define_expand "extendqidi2"
|
||||
[(set (match_operand:DI 0 "register_operand" "")
|
||||
|
@ -4905,7 +5099,8 @@
|
|||
(sign_extend:DI (match_operand:QI 1 "memory_operand" "m")))]
|
||||
"TARGET_ARCH64"
|
||||
"ldsb\\t%1, %0"
|
||||
[(set_attr "type" "sload")])
|
||||
[(set_attr "type" "sload")
|
||||
(set_attr "us3load_type" "3cycle")])
|
||||
|
||||
(define_expand "extendhidi2"
|
||||
[(set (match_operand:DI 0 "register_operand" "")
|
||||
|
@ -4936,7 +5131,8 @@
|
|||
(sign_extend:DI (match_operand:HI 1 "memory_operand" "m")))]
|
||||
"TARGET_ARCH64"
|
||||
"ldsh\\t%1, %0"
|
||||
[(set_attr "type" "sload")])
|
||||
[(set_attr "type" "sload")
|
||||
(set_attr "us3load_type" "3cycle")])
|
||||
|
||||
(define_expand "extendsidi2"
|
||||
[(set (match_operand:DI 0 "register_operand" "")
|
||||
|
@ -4951,7 +5147,8 @@
|
|||
"@
|
||||
sra\\t%1, 0, %0
|
||||
ldsw\\t%1, %0"
|
||||
[(set_attr "type" "shift,sload")])
|
||||
[(set_attr "type" "shift,sload")
|
||||
(set_attr "us3load_type" "*,3cycle")])
|
||||
|
||||
;; Special pattern for optimizing bit-field compares. This is needed
|
||||
;; because combine uses this as a canonical form.
|
||||
|
@ -9507,3 +9704,9 @@
|
|||
"TARGET_V9"
|
||||
"t%C0\\t%%xcc, %1"
|
||||
[(set_attr "type" "misc")])
|
||||
|
||||
(define_insn "cycle_display"
|
||||
[(unspec [(match_operand 0 "const_int_operand" "")] 20)]
|
||||
""
|
||||
"! cycle %0"
|
||||
[(set_attr "length" "0")])
|
||||
|
|
|
@ -330,9 +330,10 @@ Andrew MacLeod for his ongoing work in building a real EH system,
|
|||
various code generation improvements, work on the global optimizer, etc.
|
||||
|
||||
@item
|
||||
Vladimir Makarov for hacking some ugly i960 problems, PowerPC
|
||||
hacking improvements to compile-time performance and overall knowledge
|
||||
and direction in the area of instruction scheduling.
|
||||
Vladimir Makarov for hacking some ugly i960 problems, PowerPC hacking
|
||||
improvements to compile-time performance, overall knowledge and
|
||||
direction in the area of instruction scheduling, and design and
|
||||
implementation of the automaton based instruction scheduler.
|
||||
|
||||
@item
|
||||
Bob Manson for his behind the scenes work on dejagnu.
|
||||
|
|
528
gcc/doc/md.texi
528
gcc/doc/md.texi
|
@ -3871,13 +3871,14 @@ in the compiler.
|
|||
@cindex instruction splitting
|
||||
@cindex splitting instructions
|
||||
|
||||
There are two cases where you should specify how to split a pattern into
|
||||
multiple insns. On machines that have instructions requiring delay
|
||||
slots (@pxref{Delay Slots}) or that have instructions whose output is
|
||||
not available for multiple cycles (@pxref{Function Units}), the compiler
|
||||
phases that optimize these cases need to be able to move insns into
|
||||
one-instruction delay slots. However, some insns may generate more than one
|
||||
machine instruction. These insns cannot be placed into a delay slot.
|
||||
There are two cases where you should specify how to split a pattern
|
||||
into multiple insns. On machines that have instructions requiring
|
||||
delay slots (@pxref{Delay Slots}) or that have instructions whose
|
||||
output is not available for multiple cycles (@pxref{Processor pipeline
|
||||
description}), the compiler phases that optimize these cases need to
|
||||
be able to move insns into one-instruction delay slots. However, some
|
||||
insns may generate more than one machine instruction. These insns
|
||||
cannot be placed into a delay slot.
|
||||
|
||||
Often you can rewrite the single insn as a list of individual insns,
|
||||
each corresponding to one machine instruction. The disadvantage of
|
||||
|
@ -4497,7 +4498,7 @@ to track the condition codes.
|
|||
* Insn Lengths:: Computing the length of insns.
|
||||
* Constant Attributes:: Defining attributes that are constant.
|
||||
* Delay Slots:: Defining delay slots required for a machine.
|
||||
* Function Units:: Specifying information for insn scheduling.
|
||||
* Processor pipeline description:: Specifying information for insn scheduling.
|
||||
@end menu
|
||||
|
||||
@node Defining Attributes
|
||||
|
@ -5127,14 +5128,101 @@ branch is true, we might represent this as follows:
|
|||
@end smallexample
|
||||
@c the above is *still* too long. --mew 4feb93
|
||||
|
||||
@node Function Units
|
||||
@subsection Specifying Function Units
|
||||
@node Processor pipeline description
|
||||
@subsection Specifying processor pipeline description
|
||||
@cindex processor pipeline description
|
||||
@cindex processor functional units
|
||||
@cindex instruction latency time
|
||||
@cindex interlock delays
|
||||
@cindex data dependence delays
|
||||
@cindex reservation delays
|
||||
@cindex pipeline hazard recognizer
|
||||
@cindex automaton based pipeline description
|
||||
@cindex regular expressions
|
||||
@cindex deterministic finite state automaton
|
||||
@cindex automaton based scheduler
|
||||
@cindex RISC
|
||||
@cindex VLIW
|
||||
|
||||
To achieve better productivity most modern processors
|
||||
(super-pipelined, superscalar @acronym{RISC}, and @acronym{VLIW}
|
||||
processors) have many @dfn{functional units} on which several
|
||||
instructions can be executed simultaneously. An instruction starts
|
||||
execution if its issue conditions are satisfied. If not, the
|
||||
instruction is interlocked until its conditions are satisfied. Such
|
||||
@dfn{interlock (pipeline) delay} causes interruption of the fetching
|
||||
of successor instructions (or demands nop instructions, e.g. for some
|
||||
MIPS processors).
|
||||
|
||||
There are two major kinds of interlock delays in modern processors.
|
||||
The first one is a data dependence delay determining @dfn{instruction
|
||||
latency time}. The instruction execution is not started until all
|
||||
source data have been evaluated by prior instructions (there are more
|
||||
complex cases when the instruction execution starts even when the data
|
||||
are not availaible but will be ready in given time after the
|
||||
instruction execution start). Taking the data dependence delays into
|
||||
account is simple. The data dependence (true, output, and
|
||||
anti-dependence) delay between two instructions is given by a
|
||||
constant. In most cases this approach is adequate. The second kind
|
||||
of interlock delays is a reservation delay. The reservation delay
|
||||
means that two instructions under execution will be in need of shared
|
||||
processors resources, i.e. buses, internal registers, and/or
|
||||
functional units, which are reserved for some time. Taking this kind
|
||||
of delay into account is complex especially for modern @acronym{RISC}
|
||||
processors.
|
||||
|
||||
The task of exploiting more processor parallelism is solved by an
|
||||
instruction scheduler. For better solution of this problem, the
|
||||
instruction scheduler has to have an adequate description of the
|
||||
processor parallelism (or @dfn{pipeline description}). Currently GCC
|
||||
has two ways to describe processor parallelism. The first one is old
|
||||
and originated from instruction scheduler written by Michael Tiemann
|
||||
and described in the first subsequent section. The second one was
|
||||
created later. It is based on description of functional unit
|
||||
reservations by processor instructions with the aid of @dfn{regular
|
||||
expressions}. This is so called @dfn{automaton based description}.
|
||||
|
||||
Gcc instruction scheduler uses a @dfn{pipeline hazard recognizer} to
|
||||
figure out the possibility of the instruction issue by the processor
|
||||
on given simulated processor cycle. The pipeline hazard recognizer is
|
||||
a code generated from the processor pipeline description. The
|
||||
pipeline hazard recognizer generated from the automaton based
|
||||
description is more sophisticated and based on deterministic finite
|
||||
state automaton (@acronym{DFA}) and therefore faster than one
|
||||
generated from the old description. Also its speed is not depended on
|
||||
processor complexity. The instruction issue is possible if there is
|
||||
a transition from one automaton state to another one.
|
||||
|
||||
You can use any model to describe processor pipeline characteristics
|
||||
or even a mix of them. You could use the old description for some
|
||||
processor submodels and the @acronym{DFA}-based one for the rest
|
||||
processor submodels.
|
||||
|
||||
In general, the usage of the automaton based description is more
|
||||
preferable. Its model is more rich. It permits to describe more
|
||||
accurately pipeline characteristics of processors which results in
|
||||
improving code quality (although sometimes only on several percent
|
||||
fractions). It will be also used as an infrastructure to implement
|
||||
sophisticated and practical insn scheduling which will try many
|
||||
instruction sequences to choose the best one.
|
||||
|
||||
|
||||
@menu
|
||||
* Old pipeline description:: Specifying information for insn scheduling.
|
||||
* Automaton pipeline description:: Describing insn pipeline characteristics.
|
||||
* Comparison of the two descriptions:: Drawbacks of the old pipeline description
|
||||
@end menu
|
||||
|
||||
@node Old pipeline description
|
||||
@subsubsection Specifying Function Units
|
||||
@cindex old pipeline description
|
||||
@cindex function units, for scheduling
|
||||
|
||||
On most RISC machines, there are instructions whose results are not
|
||||
available for a specific number of cycles. Common cases are instructions
|
||||
that load data from memory. On many machines, a pipeline stall will result
|
||||
if the data is referenced too soon after the load instruction.
|
||||
On most @acronym{RISC} machines, there are instructions whose results
|
||||
are not available for a specific number of cycles. Common cases are
|
||||
instructions that load data from memory. On many machines, a pipeline
|
||||
stall will result if the data is referenced too soon after the load
|
||||
instruction.
|
||||
|
||||
In addition, many newer microprocessors have multiple function units, usually
|
||||
one for integer and one for floating point, and often will incur pipeline
|
||||
|
@ -5148,13 +5236,14 @@ due to function unit conflicts.
|
|||
|
||||
For the purposes of the specifications in this section, a machine is
|
||||
divided into @dfn{function units}, each of which execute a specific
|
||||
class of instructions in first-in-first-out order. Function units that
|
||||
accept one instruction each cycle and allow a result to be used in the
|
||||
succeeding instruction (usually via forwarding) need not be specified.
|
||||
Classic RISC microprocessors will normally have a single function unit,
|
||||
which we can call @samp{memory}. The newer ``superscalar'' processors
|
||||
will often have function units for floating point operations, usually at
|
||||
least a floating point adder and multiplier.
|
||||
class of instructions in first-in-first-out order. Function units
|
||||
that accept one instruction each cycle and allow a result to be used
|
||||
in the succeeding instruction (usually via forwarding) need not be
|
||||
specified. Classic @acronym{RISC} microprocessors will normally have
|
||||
a single function unit, which we can call @samp{memory}. The newer
|
||||
``superscalar'' processors will often have function units for floating
|
||||
point operations, usually at least a floating point adder and
|
||||
multiplier.
|
||||
|
||||
@findex define_function_unit
|
||||
Each usage of a function units by a class of insns is specified with a
|
||||
|
@ -5217,10 +5306,10 @@ Typical uses of this vector are where a floating point function unit can
|
|||
pipeline either single- or double-precision operations, but not both, or
|
||||
where a memory unit can pipeline loads, but not stores, etc.
|
||||
|
||||
As an example, consider a classic RISC machine where the result of a
|
||||
load instruction is not available for two cycles (a single ``delay''
|
||||
instruction is required) and where only one load instruction can be executed
|
||||
simultaneously. This would be specified as:
|
||||
As an example, consider a classic @acronym{RISC} machine where the
|
||||
result of a load instruction is not available for two cycles (a single
|
||||
``delay'' instruction is required) and where only one load instruction
|
||||
can be executed simultaneously. This would be specified as:
|
||||
|
||||
@smallexample
|
||||
(define_function_unit "memory" 1 1 (eq_attr "type" "load") 2 0)
|
||||
|
@ -5246,6 +5335,395 @@ used during their execution and there is no way of representing that
|
|||
conflict. We welcome any examples of how function unit conflicts work
|
||||
in such processors and suggestions for their representation.
|
||||
|
||||
@node Automaton pipeline description
|
||||
@subsubsection Describing instruction pipeline characteristics
|
||||
@cindex automaton based pipeline description
|
||||
|
||||
This section describes constructions of the automaton based processor
|
||||
pipeline description. The order of all mentioned below constructions
|
||||
in the machine description file is not important.
|
||||
|
||||
@findex define_automaton
|
||||
@cindex pipeline hazard recognizer
|
||||
The following optional construction describes names of automata
|
||||
generated and used for the pipeline hazards recognition. Sometimes
|
||||
the generated finite state automaton used by the pipeline hazard
|
||||
recognizer is large. If we use more one automaton and bind functional
|
||||
units to the automata, the summary size of the automata usually is
|
||||
less than the size of the single automaton. If there is no one such
|
||||
construction, only one finite state automaton is generated.
|
||||
|
||||
@smallexample
|
||||
(define_automaton @var{automata-names})
|
||||
@end smallexample
|
||||
|
||||
@var{automata-names} is a string giving names of the automata. The
|
||||
names are separated by commas. All the automata should have unique names.
|
||||
The automaton name is used in construction @code{define_cpu_unit} and
|
||||
@code{define_query_cpu_unit}.
|
||||
|
||||
@findex define_cpu_unit
|
||||
@cindex processor functional units
|
||||
Each processor functional unit used in description of instruction
|
||||
reservations should be described by the following construction.
|
||||
|
||||
@smallexample
|
||||
(define_cpu_unit @var{unit-names} [@var{automaton-name}])
|
||||
@end smallexample
|
||||
|
||||
@var{unit-names} is a string giving the names of the functional units
|
||||
separated by commas. Don't use name @samp{nothing}, it is reserved
|
||||
for other goals.
|
||||
|
||||
@var{automaton-name} is a string giving the name of automaton with
|
||||
which the unit is bound. The automaton should be described in
|
||||
construction @code{define_automaton}. You should give
|
||||
@dfn{automaton-name}, if there is a defined automaton.
|
||||
|
||||
@findex define_query_cpu_unit
|
||||
@cindex querying function unit reservations
|
||||
The following construction describes CPU functional units analogously
|
||||
to @code{define_cpu_unit}. If we use automata without their
|
||||
minimization, the reservation of such units can be queried for an
|
||||
automaton state. The instruction scheduler never queries reservation
|
||||
of functional units for given automaton state. So as a rule, you
|
||||
don't need this construction. This construction could be used for
|
||||
future code generation goals (e.g. to generate @acronym{VLIW} insn
|
||||
templates).
|
||||
|
||||
@smallexample
|
||||
(define_query_cpu_unit @var{unit-names} [@var{automaton-name}])
|
||||
@end smallexample
|
||||
|
||||
@var{unit-names} is a string giving names of the functional units
|
||||
separated by commas.
|
||||
|
||||
@var{automaton-name} is a string giving name of the automaton with
|
||||
which the unit is bound.
|
||||
|
||||
@findex define_insn_reservation
|
||||
@cindex instruction latency time
|
||||
@cindex regular expressions
|
||||
@cindex data bypass
|
||||
The following construction is major one to describe pipeline
|
||||
characteristics of an instruction.
|
||||
|
||||
@smallexample
|
||||
(define_insn_reservation @var{insn-name} @var{default_latency}
|
||||
@var{condition} @var{regexp})
|
||||
@end smallexample
|
||||
|
||||
@var{default_latency} is a number giving latency time of the
|
||||
instruction. There is an important difference between the old
|
||||
description and the automaton based pipeline description. The latency
|
||||
time is used for all dependencies when we use the old description. In
|
||||
the automaton based pipeline description, given latency time is used
|
||||
only for true dependencies. The cost of anti-dependencies is always
|
||||
zero and the cost of output dependencies is the difference between
|
||||
latency times of the producing and consuming insns (if the difference
|
||||
is negative, the cost is considered to be zero). You always can
|
||||
change the default costs for any description by using target hook
|
||||
@code{TARGET_SCHED_ADJUST_COST} (@pxref{Scheduling}).
|
||||
|
||||
@var{insn-names} is a string giving internal name of the insn. The
|
||||
internal names are used in constructions @code{define_bypass} and in
|
||||
the automaton description file generated for debugging. The internal
|
||||
name has nothing common with the names in @code{define_insn}. It is a
|
||||
good practice to use insn classes described in the processor manual.
|
||||
|
||||
@var{condition} defines what RTL insns are described by this
|
||||
construction. You should remember that you will be in trouble if
|
||||
@var{condition} for two or more different
|
||||
@code{define_insn_reservation} constructions is TRUE for an insn. In
|
||||
this case what reservation will be used for the insn is not defined.
|
||||
Such cases are not checked during generation of the pipeline hazards
|
||||
recognizer because in general recognizing that two conditions may have
|
||||
the same value is quite difficult (especially if the conditions
|
||||
contain @code{symbol_ref}). It is also not checked during the
|
||||
pipeline hazard recognizer work because it would slow down the
|
||||
recognizer considerably.
|
||||
|
||||
@var{regexp} is a string describing reservation of the cpu functional
|
||||
units by the instruction. The reservations are described by a regular
|
||||
expression according to the following syntax:
|
||||
|
||||
@smallexample
|
||||
regexp = regexp "," oneof
|
||||
| oneof
|
||||
|
||||
oneof = oneof "|" allof
|
||||
| allof
|
||||
|
||||
allof = allof "+" repeat
|
||||
| repeat
|
||||
|
||||
repeat = element "*" number
|
||||
| element
|
||||
|
||||
element = cpu_function_unit_name
|
||||
| reservation_name
|
||||
| result_name
|
||||
| "nothing"
|
||||
| "(" regexp ")"
|
||||
@end smallexample
|
||||
|
||||
@itemize @bullet
|
||||
@item
|
||||
@samp{,} is used for describing the start of the next cycle in
|
||||
the reservation.
|
||||
|
||||
@item
|
||||
@samp{|} is used for describing a reservation described by the first
|
||||
regular expression @strong{or} a reservation described by the second
|
||||
regular expression @strong{or} etc.
|
||||
|
||||
@item
|
||||
@samp{+} is used for describing a reservation described by the first
|
||||
regular expression @strong{and} a reservation described by the
|
||||
second regular expression @strong{and} etc.
|
||||
|
||||
@item
|
||||
@samp{*} is used for convenience and simply means a sequence in which
|
||||
the regular expression are repeated @var{number} times with cycle
|
||||
advancing (see @samp{,}).
|
||||
|
||||
@item
|
||||
@samp{cpu_function_unit_name} denotes reservation of the named
|
||||
functional unit.
|
||||
|
||||
@item
|
||||
@samp{reservation_name} --- see description of construction
|
||||
@samp{define_reservation}.
|
||||
|
||||
@item
|
||||
@samp{nothing} denotes no unit reservations.
|
||||
@end itemize
|
||||
|
||||
@findex define_reservation
|
||||
Sometimes unit reservations for different insns contain common parts.
|
||||
In such case, you can simplify the pipeline description by describing
|
||||
the common part by the following construction
|
||||
|
||||
@smallexample
|
||||
(define_reservation @var{reservation-name} @var{regexp})
|
||||
@end smallexample
|
||||
|
||||
@var{reservation-name} is a string giving name of @var{regexp}.
|
||||
Functional unit names and reservation names are in the same name
|
||||
space. So the reservation names should be different from the
|
||||
functional unit names and can not be reserved name @samp{nothing}.
|
||||
|
||||
@findex define_bypass
|
||||
@cindex instruction latency time
|
||||
@cindex data bypass
|
||||
The following construction is used to describe exceptions in the
|
||||
latency time for given instruction pair. This is so called bypasses.
|
||||
|
||||
@smallexample
|
||||
(define_bypass @var{number} @var{out_insn_names} @var{in_insn_names}
|
||||
[@var{guard}])
|
||||
@end smallexample
|
||||
|
||||
@var{number} defines when the result generated by the instructions
|
||||
given in string @var{out_insn_names} will be ready for the
|
||||
instructions given in string @var{in_insn_names}. The instructions in
|
||||
the string are separated by commas.
|
||||
|
||||
@var{guard} is an optional string giving name of a C function which
|
||||
defines an additional guard for the bypass. The function will get the
|
||||
two insns as parameters. If the function returns zero the bypass will
|
||||
be ignored for this case. The additional guard is necessary to
|
||||
recognize complicated bypasses, e.g. when consumer is only an address
|
||||
of insn @samp{store} (not a stored value).
|
||||
|
||||
@findex exclusion_set
|
||||
@findex presence_set
|
||||
@findex absence_set
|
||||
@cindex VLIW
|
||||
@cindex RISC
|
||||
Usually the following three constructions are used to describe
|
||||
@acronym{VLIW} processors (more correctly to describe a placement of
|
||||
small insns into @acronym{VLIW} insn slots). Although they can be
|
||||
used for @acronym{RISC} processors too.
|
||||
|
||||
@smallexample
|
||||
(exclusion_set @var{unit-names} @var{unit-names})
|
||||
(presence_set @var{unit-names} @var{unit-names})
|
||||
(absence_set @var{unit-names} @var{unit-names})
|
||||
@end smallexample
|
||||
|
||||
@var{unit-names} is a string giving names of functional units
|
||||
separated by commas.
|
||||
|
||||
The first construction (@samp{exclusion_set}) means that each
|
||||
functional unit in the first string can not be reserved simultaneously
|
||||
with a unit whose name is in the second string and vice versa. For
|
||||
example, the construction is useful for describing processors
|
||||
(e.g. some SPARC processors) with a fully pipelined floating point
|
||||
functional unit which can execute simultaneously only single floating
|
||||
point insns or only double floating point insns.
|
||||
|
||||
The second construction (@samp{presence_set}) means that each
|
||||
functional unit in the first string can not be reserved unless at
|
||||
least one of units whose names are in the second string is reserved.
|
||||
This is an asymmetric relation. For example, it is useful for
|
||||
description that @acronym{VLIW} @samp{slot1} is reserved after
|
||||
@samp{slot0} reservation.
|
||||
|
||||
The third construction (@samp{absence_set}) means that each functional
|
||||
unit in the first string can be reserved only if each unit whose name
|
||||
is in the second string is not reserved. This is an asymmetric
|
||||
relation (actually @samp{exclusion_set} is analogous to this one but
|
||||
it is symmetric). For example, it is useful for description that
|
||||
@acronym{VLIW} @samp{slot0} can not be reserved after @samp{slot1} or
|
||||
@samp{slot2} reservation.
|
||||
|
||||
All functional units mentioned in a set should belong the same
|
||||
automaton.
|
||||
|
||||
@findex automata_option
|
||||
@cindex deterministic finite state automaton
|
||||
@cindex nondeterministic finite state automaton
|
||||
@cindex finite state automaton minimization
|
||||
You can control the generator of the pipeline hazard recognizer with
|
||||
the following construction.
|
||||
|
||||
@smallexample
|
||||
(automata_option @var{options})
|
||||
@end smallexample
|
||||
|
||||
@var{options} is a string giving options which affect the generated
|
||||
code. Currently there are the following options:
|
||||
|
||||
@itemize @bullet
|
||||
@item
|
||||
@dfn{no-minimization} makes no minimization of the automaton. This is
|
||||
only worth to do when we are going to query CPU functional unit
|
||||
reservations in an automaton state.
|
||||
|
||||
@item
|
||||
@dfn{w} means a generation of the file describing the result
|
||||
automaton. The file can be used to verify the description.
|
||||
|
||||
@item
|
||||
@dfn{ndfa} makes nondeterministic finite state automata. This affects
|
||||
the treatment of operator @samp{|} in the regular expressions. The
|
||||
usual treatment of the operator is to try the first alternative and,
|
||||
if the reservation is not possible, the second alternative. The
|
||||
nondeterministic treatment means trying all alternatives, some of them
|
||||
may be rejected by reservations in the subsequent insns. You can not
|
||||
query functional unit reservations in nondeterministic automaton
|
||||
states.
|
||||
@end itemize
|
||||
|
||||
As an example, consider a superscalar @acronym{RISC} machine which can
|
||||
issue three insns (two integer insns and one floating point insn) on
|
||||
the cycle but can finish only two insns. To describe this, we define
|
||||
the following functional units.
|
||||
|
||||
@smallexample
|
||||
(define_cpu_unit "i0_pipeline, i1_pipeline, f_pipeline")
|
||||
(define_cpu_unit "port_0, port1")
|
||||
@end smallexample
|
||||
|
||||
All simple integer insns can be executed in any integer pipeline and
|
||||
their result is ready in two cycles. The simple integer insns are
|
||||
issued into the first pipeline unless it is reserved, otherwise they
|
||||
are issued into the second pipeline. Integer division and
|
||||
multiplication insns can be executed only in the second integer
|
||||
pipeline and their results are ready correspondingly in 8 and 4
|
||||
cycles. The integer division is not pipelined, i.e. the subsequent
|
||||
integer division insn can not be issued until the current division
|
||||
insn finished. Floating point insns are fully pipelined and their
|
||||
results are ready in 3 cycles. There is also additional one cycle
|
||||
delay in the usage by integer insns of result produced by floating
|
||||
point insns. To describe all of this we could specify
|
||||
|
||||
@smallexample
|
||||
(define_cpu_unit "div")
|
||||
|
||||
(define_insn_reservation "simple" 2 (eq_attr "cpu" "int")
|
||||
"(i0_pipeline | i1_pipeline), (port_0 | port1)")
|
||||
|
||||
(define_insn_reservation "mult" 4 (eq_attr "cpu" "mult")
|
||||
"i1_pipeline, nothing*2, (port_0 | port1)")
|
||||
|
||||
(define_insn_reservation "div" 8 (eq_attr "cpu" "div")
|
||||
"i1_pipeline, div*7, div + (port_0 | port1)")
|
||||
|
||||
(define_insn_reservation "float" 3 (eq_attr "cpu" "float")
|
||||
"f_pipeline, nothing, (port_0 | port1))
|
||||
|
||||
(define_bypass 4 "float" "simple,mut,div")
|
||||
@end smallexample
|
||||
|
||||
To simplify the description we could describe the following reservation
|
||||
|
||||
@smallexample
|
||||
(define_reservation "finish" "port0|port1")
|
||||
@end smallexample
|
||||
|
||||
and use it in all @code{define_insn_reservation} as in the following
|
||||
construction
|
||||
|
||||
@smallexample
|
||||
(define_insn_reservation "simple" 2 (eq_attr "cpu" "int")
|
||||
"(i0_pipeline | i1_pipeline), finish")
|
||||
@end smallexample
|
||||
|
||||
|
||||
@node Comparison of the two descriptions
|
||||
@subsubsection Drawbacks of the old pipeline description
|
||||
@cindex old pipeline description
|
||||
@cindex automaton based pipeline description
|
||||
@cindex processor functional units
|
||||
@cindex interlock delays
|
||||
@cindex instruction latency time
|
||||
@cindex pipeline hazard recognizer
|
||||
@cindex data bypass
|
||||
|
||||
The old instruction level parallelism description and the pipeline
|
||||
hazards recognizer based on it have the following drawbacks in
|
||||
comparison with the @acronym{DFA}-based ones:
|
||||
|
||||
@itemize @bullet
|
||||
@item
|
||||
Each functional unit is believed to be reserved at the instruction
|
||||
execution start. This is a very inaccurate model for modern
|
||||
processors.
|
||||
|
||||
@item
|
||||
An inadequate description of instruction latency times. The latency
|
||||
time is bound with a functional unit reserved by an instruction not
|
||||
with the instruction itself. In other words, the description is
|
||||
oriented to describe at most one unit reservation by each instruction.
|
||||
It also does not permit to describe special bypasses between
|
||||
instruction pairs.
|
||||
|
||||
@item
|
||||
The implementation of the pipeline hazard recognizer interface has
|
||||
constraints on number of functional units. This is a number of bits
|
||||
in integer on the host machine.
|
||||
|
||||
@item
|
||||
The interface to the pipeline hazard recognizer is more complex than
|
||||
one to the automaton based pipeline recognizer.
|
||||
|
||||
@item
|
||||
An unnatural description when you write an unit and a condition which
|
||||
selects instructions using the unit. Writing all unit reservations
|
||||
for an instruction (an instruction class) is more natural.
|
||||
|
||||
@item
|
||||
The recognition of the interlock delays has slow implementation. GCC
|
||||
scheduler supports structures which describe the unit reservations.
|
||||
The more processor has functional units, the slower pipeline hazard
|
||||
recognizer. Such implementation would become slower when we enable to
|
||||
reserve functional units not only at the instruction execution start.
|
||||
The automaton based pipeline hazard recognizer speed is not depended
|
||||
on processor complexity.
|
||||
@end itemize
|
||||
|
||||
@node Conditional Execution
|
||||
@section Conditional Execution
|
||||
@cindex conditional execution
|
||||
|
|
|
@ -654,6 +654,8 @@ Several passes use instruction attributes. A definition of the
|
|||
attributes defined for a particular machine is in file
|
||||
@file{insn-attr.h}, which is generated from the machine description by
|
||||
the program @file{genattr}. The file @file{insn-attrtab.c} contains
|
||||
subroutines to obtain the attribute values for insns. It is generated
|
||||
from the machine description by the program @file{genattrtab}.
|
||||
subroutines to obtain the attribute values for insns and information
|
||||
about processor pipeline characteristics for the instruction
|
||||
scheduler. It is generated from the machine description by the
|
||||
program @file{genattrtab}.
|
||||
@end itemize
|
||||
|
|
166
gcc/doc/tm.texi
166
gcc/doc/tm.texi
|
@ -5401,11 +5401,19 @@ hooks for this purpose. It is usually enough to define just a few of
|
|||
them: try the first ones in this list first.
|
||||
|
||||
@deftypefn {Target Hook} int TARGET_SCHED_ISSUE_RATE (void)
|
||||
This hook returns the maximum number of instructions that can ever issue
|
||||
at the same time on the target machine. The default is one. This value
|
||||
must be constant over the entire compilation. If you need it to vary
|
||||
depending on what the instructions are, you must use
|
||||
This hook returns the maximum number of instructions that can ever
|
||||
issue at the same time on the target machine. The default is one.
|
||||
Although the insn scheduler can define itself the possibility of issue
|
||||
an insn on the same cycle, the value can serve as an additional
|
||||
constraint to issue insns on the same simulated processor cycle (see
|
||||
hooks @samp{TARGET_SCHED_REORDER} and @samp{TARGET_SCHED_REORDER2}).
|
||||
This value must be constant over the entire compilation. If you need
|
||||
it to vary depending on what the instructions are, you must use
|
||||
@samp{TARGET_SCHED_VARIABLE_ISSUE}.
|
||||
|
||||
You could use the value of macro @samp{MAX_DFA_ISSUE_RATE} to return
|
||||
the value of the hook @samp{TARGET_SCHED_ISSUE_RATE} for the automaton
|
||||
based pipeline interface.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} int TARGET_SCHED_VARIABLE_ISSUE (FILE *@var{file}, int @var{verbose}, rtx @var{insn}, int @var{more})
|
||||
|
@ -5421,12 +5429,18 @@ instruction that was scheduled.
|
|||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} int TARGET_SCHED_ADJUST_COST (rtx @var{insn}, rtx @var{link}, rtx @var{dep_insn}, int @var{cost})
|
||||
This function corrects the value of @var{cost} based on the relationship
|
||||
between @var{insn} and @var{dep_insn} through the dependence @var{link}.
|
||||
It should return the new value. The default is to make no adjustment to
|
||||
@var{cost}. This can be used for example to specify to the scheduler
|
||||
This function corrects the value of @var{cost} based on the
|
||||
relationship between @var{insn} and @var{dep_insn} through the
|
||||
dependence @var{link}. It should return the new value. The default
|
||||
is to make no adjustment to @var{cost}. This can be used for example
|
||||
to specify to the scheduler using the traditional pipeline description
|
||||
that an output- or anti-dependence does not incur the same cost as a
|
||||
data-dependence.
|
||||
data-dependence. If the scheduler using the automaton based pipeline
|
||||
description, the cost of anti-dependence is zero and the cost of
|
||||
output-dependence is maximum of one and the difference of latency
|
||||
times of the first and the second insns. If these values are not
|
||||
acceptable, you could use the hook to modify them too. See also
|
||||
@pxref{Automaton pipeline description}.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} int TARGET_SCHED_ADJUST_PRIORITY (rtx @var{insn}, int @var{priority})
|
||||
|
@ -5492,6 +5506,140 @@ RTL dumps and assembly output. Define this hook only if you need this
|
|||
level of detail about what the scheduler is doing.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} int TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE (void)
|
||||
This hook is called many times during insn scheduling. If the hook
|
||||
returns nonzero, the automaton based pipeline description is used for
|
||||
insn scheduling. Otherwise the traditional pipeline description is
|
||||
used. The default is usage of the traditional pipeline description.
|
||||
|
||||
You should also remember that to simplify the insn scheduler sources
|
||||
an empty traditional pipeline description interface is generated even
|
||||
if there is no a traditional pipeline description in the @file{.md}
|
||||
file. The same is true for the automaton based pipeline description.
|
||||
That means that you should be accurate in defining the hook.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} int TARGET_SCHED_DFA_PRE_CYCLE_INSN (void)
|
||||
The hook returns an RTL insn. The automaton state used in the
|
||||
pipeline hazard recognizer is changed as if the insn were scheduled
|
||||
when the new simulated processor cycle starts. Usage of the hook may
|
||||
simplify the automaton pipeline description for some @acronym{VLIW}
|
||||
processors. If the hook is defined, it is used only for the automaton
|
||||
based pipeline description. The default is not to change the state
|
||||
when the new simulated processor cycle starts.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} void TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN (void)
|
||||
The hook can be used to initialize data used by the previous hook.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} int TARGET_SCHED_DFA_POST_CYCLE_INSN (void)
|
||||
The hook is analogous to @samp{TARGET_SCHED_DFA_PRE_CYCLE_INSN} but used
|
||||
to changed the state as if the insn were scheduled when the new
|
||||
simulated processor cycle finishes.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} void TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN (void)
|
||||
The hook is analogous to @samp{TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN} but
|
||||
used to initialize data used by the previous hook.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} int TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD (void)
|
||||
This hook controls better choosing an insn from the ready insn queue
|
||||
for the @acronym{DFA}-based insn scheduler. Usually the scheduler
|
||||
chooses the first insn from the queue. If the hook returns a positive
|
||||
value, an additional scheduler code tries all permutations of
|
||||
@samp{TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ()}
|
||||
subsequent ready insns to choose an insn whose issue will result in
|
||||
maximal number of issued insns on the same cycle. For the
|
||||
@acronym{VLIW} processor, the code could actually solve the problem of
|
||||
packing simple insns into the @acronym{VLIW} insn. Of course, if the
|
||||
rules of @acronym{VLIW} packing are described in the automaton.
|
||||
|
||||
This code also could be used for superscalar @acronym{RISC}
|
||||
processors. Let us consider a superscalar @acronym{RISC} processor
|
||||
with 3 pipelines. Some insns can be executed in pipelines @var{A} or
|
||||
@var{B}, some insns can be executed only in pipelines @var{B} or
|
||||
@var{C}, and one insn can be executed in pipeline @var{B}. The
|
||||
processor may issue the 1st insn into @var{A} and the 2nd one into
|
||||
@var{B}. In this case, the 3rd insn will wait for freeing @var{B}
|
||||
until the next cycle. If the scheduler issues the 3rd insn the first,
|
||||
the processor could issue all 3 insns per cycle.
|
||||
|
||||
Actually this code demonstrates advantages of the automaton based
|
||||
pipeline hazard recognizer. We try quickly and easy many insn
|
||||
schedules to choose the best one.
|
||||
|
||||
The default is no multipass scheduling.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} void TARGET_SCHED_INIT_DFA_BUBBLES (void)
|
||||
The @acronym{DFA}-based scheduler could take the insertion of nop
|
||||
operations for better insn scheduling into account. It can be done
|
||||
only if the multi-pass insn scheduling works (see hook
|
||||
@samp{TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD}).
|
||||
|
||||
Let us consider a @acronym{VLIW} processor insn with 3 slots. Each
|
||||
insn can be placed only in one of the three slots. We have 3 ready
|
||||
insns @var{A}, @var{B}, and @var{C}. @var{A} and @var{C} can be
|
||||
placed only in the 1st slot, @var{B} can be placed only in the 3rd
|
||||
slot. We described the automaton which does not permit empty slot
|
||||
gaps between insns (usually such description is simpler). Without
|
||||
this code the scheduler would place each insn in 3 separate
|
||||
@acronym{VLIW} insns. If the scheduler places a nop insn into the 2nd
|
||||
slot, it could place the 3 insns into 2 @acronym{VLIW} insns. What is
|
||||
the nop insn is returned by hook @samp{TARGET_SCHED_DFA_BUBBLE}. Hook
|
||||
@samp{TARGET_SCHED_INIT_DFA_BUBBLES} can be used to initialize or
|
||||
create the nop insns.
|
||||
|
||||
You should remember that the scheduler does not insert the nop insns.
|
||||
It is not wise because of the following optimizations. The scheduler
|
||||
only considers such possibility to improve the result schedule. The
|
||||
nop insns should be inserted lately, e.g. on the final phase.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} rtx TARGET_SCHED_DFA_BUBBLE (int @var{index})
|
||||
This hook @samp{FIRST_CYCLE_MULTIPASS_SCHEDULING} is used to insert
|
||||
nop operations for better insn scheduling when @acronym{DFA}-based
|
||||
scheduler makes multipass insn scheduling (see also description of
|
||||
hook @samp{TARGET_SCHED_INIT_DFA_BUBBLES}). This hook
|
||||
returns a nop insn with given @var{index}. The indexes start with
|
||||
zero. The hook should return @code{NULL} if there are no more nop
|
||||
insns with indexes greater than given index.
|
||||
@end deftypefn
|
||||
|
||||
Macros in the following table are generated by the program
|
||||
@file{genattr} and can be useful for writing the hooks.
|
||||
|
||||
@table @code
|
||||
@findex TRADITIONAL_PIPELINE_INTERFACE
|
||||
@item TRADITIONAL_PIPELINE_INTERFACE
|
||||
The macro definition is generated if there is a traditional pipeline
|
||||
description in @file{.md} file. You should also remember that to
|
||||
simplify the insn scheduler sources an empty traditional pipeline
|
||||
description interface is generated even if there is no a traditional
|
||||
pipeline description in the @file{.md} file. The macro can be used to
|
||||
distinguish the two types of the traditional interface.
|
||||
|
||||
@findex DFA_PIPELINE_INTERFACE
|
||||
@item DFA_PIPELINE_INTERFACE
|
||||
The macro definition is generated if there is an automaton pipeline
|
||||
description in @file{.md} file. You should also remember that to
|
||||
simplify the insn scheduler sources an empty automaton pipeline
|
||||
description interface is generated even if there is no an automaton
|
||||
pipeline description in the @file{.md} file. The macro can be used to
|
||||
distinguish the two types of the automaton interface.
|
||||
|
||||
@findex MAX_DFA_ISSUE_RATE
|
||||
@item MAX_DFA_ISSUE_RATE
|
||||
The macro definition is generated in the automaton based pipeline
|
||||
description interface. Its value is calculated from the automaton
|
||||
based pipeline description and is equal to maximal number of all insns
|
||||
described in constructions @samp{define_insn_reservation} which can be
|
||||
issued on the same processor cycle.
|
||||
|
||||
@end table
|
||||
|
||||
@node Sections
|
||||
@section Dividing the Output into Sections (Texts, Data, @dots{})
|
||||
@c the above section title is WAY too long. maybe cut the part between
|
||||
|
|
107
gcc/genattr.c
107
gcc/genattr.c
|
@ -193,6 +193,7 @@ main (argc, argv)
|
|||
int have_delay = 0;
|
||||
int have_annul_true = 0;
|
||||
int have_annul_false = 0;
|
||||
int num_insn_reservations = 0;
|
||||
int num_units = 0;
|
||||
struct range all_simultaneity, all_multiplicity;
|
||||
struct range all_ready_cost, all_issue_delay, all_blockage;
|
||||
|
@ -308,10 +309,18 @@ main (argc, argv)
|
|||
extend_range (&all_issue_delay,
|
||||
unit->issue_delay.min, unit->issue_delay.max);
|
||||
}
|
||||
else if (GET_CODE (desc) == DEFINE_INSN_RESERVATION)
|
||||
num_insn_reservations++;
|
||||
}
|
||||
|
||||
if (num_units > 0)
|
||||
if (num_units > 0 || num_insn_reservations > 0)
|
||||
{
|
||||
if (num_units > 0)
|
||||
printf ("#define TRADITIONAL_PIPELINE_INTERFACE 1\n");
|
||||
|
||||
if (num_insn_reservations > 0)
|
||||
printf ("#define DFA_PIPELINE_INTERFACE 1\n");
|
||||
|
||||
/* Compute the range of blockage cost values. See genattrtab.c
|
||||
for the derivation. BLOCKAGE (E,C) when SIMULTANEITY is zero is
|
||||
|
||||
|
@ -348,6 +357,102 @@ main (argc, argv)
|
|||
|
||||
write_units (num_units, &all_multiplicity, &all_simultaneity,
|
||||
&all_ready_cost, &all_issue_delay, &all_blockage);
|
||||
|
||||
/* Output interface for pipeline hazards recognition based on
|
||||
DFA (deterministic finite state automata. */
|
||||
printf ("\n/* DFA based pipeline interface. */");
|
||||
printf ("\n#ifndef AUTOMATON_STATE_ALTS\n");
|
||||
printf ("#define AUTOMATON_STATE_ALTS 0\n");
|
||||
printf ("#endif\n\n");
|
||||
printf ("#ifndef CPU_UNITS_QUERY\n");
|
||||
printf ("#define CPU_UNITS_QUERY 0\n");
|
||||
printf ("#endif\n\n");
|
||||
/* Interface itself: */
|
||||
printf ("extern int max_dfa_issue_rate;\n\n");
|
||||
printf ("/* The following macro value is calculated from the\n");
|
||||
printf (" automaton based pipeline description and is equal to\n");
|
||||
printf (" maximal number of all insns described in constructions\n");
|
||||
printf (" `define_insn_reservation' which can be issued on the\n");
|
||||
printf (" same processor cycle. */\n");
|
||||
printf ("#define MAX_DFA_ISSUE_RATE max_dfa_issue_rate\n\n");
|
||||
printf ("/* Insn latency time defined in define_insn_reservation. */\n");
|
||||
printf ("extern int insn_default_latency PARAMS ((rtx));\n\n");
|
||||
printf ("/* Return nonzero if there is a bypass for given insn\n");
|
||||
printf (" which is a data producer. */\n");
|
||||
printf ("extern int bypass_p PARAMS ((rtx));\n\n");
|
||||
printf ("/* Insn latency time on data consumed by the 2nd insn.\n");
|
||||
printf (" Use the function if bypass_p returns nonzero for\n");
|
||||
printf (" the 1st insn. */\n");
|
||||
printf ("extern int insn_latency PARAMS ((rtx, rtx));\n\n");
|
||||
printf ("/* The following function returns number of alternative\n");
|
||||
printf (" reservations of given insn. It may be used for better\n");
|
||||
printf (" insns scheduling heuristics. */\n");
|
||||
printf ("extern int insn_alts PARAMS ((rtx));\n\n");
|
||||
printf ("/* Maximal possible number of insns waiting results being\n");
|
||||
printf (" produced by insns whose execution is not finished. */\n");
|
||||
printf ("extern int max_insn_queue_index;\n\n");
|
||||
printf ("/* Pointer to data describing current state of DFA. */\n");
|
||||
printf ("typedef void *state_t;\n\n");
|
||||
printf ("/* Size of the data in bytes. */\n");
|
||||
printf ("extern int state_size PARAMS ((void));\n\n");
|
||||
printf ("/* Initiate given DFA state, i.e. Set up the state\n");
|
||||
printf (" as all functional units were not reserved. */\n");
|
||||
printf ("extern void state_reset PARAMS ((state_t));\n");
|
||||
printf ("/* The following function returns negative value if given\n");
|
||||
printf (" insn can be issued in processor state described by given\n");
|
||||
printf (" DFA state. In this case, the DFA state is changed to\n");
|
||||
printf (" reflect the current and future reservations by given\n");
|
||||
printf (" insn. Otherwise the function returns minimal time\n");
|
||||
printf (" delay to issue the insn. This delay may be zero\n");
|
||||
printf (" for superscalar or VLIW processors. If the second\n");
|
||||
printf (" parameter is NULL the function changes given DFA state\n");
|
||||
printf (" as new processor cycle started. */\n");
|
||||
printf ("extern int state_transition PARAMS ((state_t, rtx));\n");
|
||||
printf ("\n#if AUTOMATON_STATE_ALTS\n");
|
||||
printf ("/* The following function returns number of possible\n");
|
||||
printf (" alternative reservations of given insn in given\n");
|
||||
printf (" DFA state. It may be used for better insns scheduling\n");
|
||||
printf (" heuristics. By default the function is defined if\n");
|
||||
printf (" macro AUTOMATON_STATE_ALTS is defined because its\n");
|
||||
printf (" implementation may require much memory. */\n");
|
||||
printf ("extern int state_alts PARAMS ((state_t, rtx));\n");
|
||||
printf ("#endif\n\n");
|
||||
printf ("extern int min_issue_delay PARAMS ((state_t, rtx));\n");
|
||||
printf ("/* The following function returns nonzero if no one insn\n");
|
||||
printf (" can be issued in current DFA state. */\n");
|
||||
printf ("extern int state_dead_lock_p PARAMS ((state_t));\n");
|
||||
printf ("/* The function returns minimal delay of issue of the 2nd\n");
|
||||
printf (" insn after issuing the 1st insn in given DFA state.\n");
|
||||
printf (" The 1st insn should be issued in given state (i.e.\n");
|
||||
printf (" state_transition should return negative value for\n");
|
||||
printf (" the insn and the state). Data dependencies between\n");
|
||||
printf (" the insns are ignored by the function. */\n");
|
||||
printf
|
||||
("extern int min_insn_conflict_delay PARAMS ((state_t, rtx, rtx));\n");
|
||||
printf ("/* The following function outputs reservations for given\n");
|
||||
printf (" insn as they are described in the corresponding\n");
|
||||
printf (" define_insn_reservation. */\n");
|
||||
printf ("extern void print_reservation PARAMS ((FILE *, rtx));\n");
|
||||
printf ("\n#if CPU_UNITS_QUERY\n");
|
||||
printf ("/* The following function returns code of functional unit\n");
|
||||
printf (" with given name (see define_cpu_unit). */\n");
|
||||
printf ("extern int get_cpu_unit_code PARAMS ((const char *));\n");
|
||||
printf ("/* The following function returns nonzero if functional\n");
|
||||
printf (" unit with given code is currently reserved in given\n");
|
||||
printf (" DFA state. */\n");
|
||||
printf ("extern int cpu_unit_reservation_p PARAMS ((state_t, int));\n");
|
||||
printf ("#endif\n\n");
|
||||
printf ("/* Initiate and finish work with DFA. They should be\n");
|
||||
printf (" called as the first and the last interface\n");
|
||||
printf (" functions. */\n");
|
||||
printf ("extern void dfa_start PARAMS ((void));\n");
|
||||
printf ("extern void dfa_finish PARAMS ((void));\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Otherwise we do no scheduling, but we need these typedefs
|
||||
in order to avoid uglifying other code with more ifdefs. */
|
||||
printf ("typedef void *state_t;\n\n");
|
||||
}
|
||||
|
||||
/* Output flag masks for use by reorg.
|
||||
|
|
|
@ -115,6 +115,8 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
|
|||
#include "obstack.h"
|
||||
#include "errors.h"
|
||||
|
||||
#include "genattrtab.h"
|
||||
|
||||
static struct obstack obstack1, obstack2;
|
||||
struct obstack *hash_obstack = &obstack1;
|
||||
struct obstack *temp_obstack = &obstack2;
|
||||
|
@ -309,6 +311,8 @@ static int have_annul_true, have_annul_false;
|
|||
static int num_units, num_unit_opclasses;
|
||||
static int num_insn_ents;
|
||||
|
||||
int num_dfa_decls;
|
||||
|
||||
/* Used as operand to `operate_exp': */
|
||||
|
||||
enum operator {PLUS_OP, MINUS_OP, POS_MINUS_OP, EQ_OP, OR_OP, ORX_OP, MAX_OP, MIN_OP, RANGE_OP};
|
||||
|
@ -371,10 +375,7 @@ static void attr_hash_add_rtx PARAMS ((int, rtx));
|
|||
static void attr_hash_add_string PARAMS ((int, char *));
|
||||
static rtx attr_rtx PARAMS ((enum rtx_code, ...));
|
||||
static rtx attr_rtx_1 PARAMS ((enum rtx_code, va_list));
|
||||
static char *attr_printf PARAMS ((unsigned int, const char *, ...))
|
||||
ATTRIBUTE_PRINTF_2;
|
||||
static char *attr_string PARAMS ((const char *, int));
|
||||
static rtx check_attr_test PARAMS ((rtx, int, int));
|
||||
static rtx check_attr_value PARAMS ((rtx, struct attr_desc *));
|
||||
static rtx convert_set_attr_alternative PARAMS ((rtx, struct insn_def *));
|
||||
static rtx convert_set_attr PARAMS ((rtx, struct insn_def *));
|
||||
|
@ -458,10 +459,8 @@ static void write_const_num_delay_slots PARAMS ((void));
|
|||
static int n_comma_elts PARAMS ((const char *));
|
||||
static char *next_comma_elt PARAMS ((const char **));
|
||||
static struct attr_desc *find_attr PARAMS ((const char *, int));
|
||||
static void make_internal_attr PARAMS ((const char *, rtx, int));
|
||||
static struct attr_value *find_most_used PARAMS ((struct attr_desc *));
|
||||
static rtx find_single_value PARAMS ((struct attr_desc *));
|
||||
static rtx make_numeric_value PARAMS ((int));
|
||||
static void extend_range PARAMS ((struct range *, int, int));
|
||||
static rtx attr_eq PARAMS ((const char *, const char *));
|
||||
static const char *attr_numeral PARAMS ((int));
|
||||
|
@ -739,7 +738,7 @@ attr_rtx VPARAMS ((enum rtx_code code, ...))
|
|||
|
||||
rtx attr_printf (len, format, [arg1, ..., argn]) */
|
||||
|
||||
static char *
|
||||
char *
|
||||
attr_printf VPARAMS ((unsigned int len, const char *fmt, ...))
|
||||
{
|
||||
char str[256];
|
||||
|
@ -920,7 +919,7 @@ attr_copy_rtx (orig)
|
|||
|
||||
Return the new expression, if any. */
|
||||
|
||||
static rtx
|
||||
rtx
|
||||
check_attr_test (exp, is_const, lineno)
|
||||
rtx exp;
|
||||
int is_const;
|
||||
|
@ -5880,7 +5879,7 @@ find_attr (name, create)
|
|||
|
||||
/* Create internal attribute with the given default value. */
|
||||
|
||||
static void
|
||||
void
|
||||
make_internal_attr (name, value, special)
|
||||
const char *name;
|
||||
rtx value;
|
||||
|
@ -5947,7 +5946,7 @@ find_single_value (attr)
|
|||
|
||||
/* Return (attr_value "n") */
|
||||
|
||||
static rtx
|
||||
rtx
|
||||
make_numeric_value (n)
|
||||
int n;
|
||||
{
|
||||
|
@ -6097,6 +6096,7 @@ from the machine description file `md'. */\n\n");
|
|||
|
||||
/* Read the machine description. */
|
||||
|
||||
initiate_automaton_gen (argc, argv);
|
||||
while (1)
|
||||
{
|
||||
int lineno;
|
||||
|
@ -6125,6 +6125,46 @@ from the machine description file `md'. */\n\n");
|
|||
gen_unit (desc, lineno);
|
||||
break;
|
||||
|
||||
case DEFINE_CPU_UNIT:
|
||||
gen_cpu_unit (desc);
|
||||
break;
|
||||
|
||||
case DEFINE_QUERY_CPU_UNIT:
|
||||
gen_query_cpu_unit (desc);
|
||||
break;
|
||||
|
||||
case DEFINE_BYPASS:
|
||||
gen_bypass (desc);
|
||||
break;
|
||||
|
||||
case EXCLUSION_SET:
|
||||
gen_excl_set (desc);
|
||||
break;
|
||||
|
||||
case PRESENCE_SET:
|
||||
gen_presence_set (desc);
|
||||
break;
|
||||
|
||||
case ABSENCE_SET:
|
||||
gen_absence_set (desc);
|
||||
break;
|
||||
|
||||
case DEFINE_AUTOMATON:
|
||||
gen_automaton (desc);
|
||||
break;
|
||||
|
||||
case AUTOMATA_OPTION:
|
||||
gen_automata_option (desc);
|
||||
break;
|
||||
|
||||
case DEFINE_RESERVATION:
|
||||
gen_reserv (desc);
|
||||
break;
|
||||
|
||||
case DEFINE_INSN_RESERVATION:
|
||||
gen_insn_reserv (desc);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -6149,9 +6189,14 @@ from the machine description file `md'. */\n\n");
|
|||
if (num_delays)
|
||||
expand_delays ();
|
||||
|
||||
/* Expand DEFINE_FUNCTION_UNIT information into new attributes. */
|
||||
if (num_units)
|
||||
expand_units ();
|
||||
if (num_units || num_dfa_decls)
|
||||
{
|
||||
/* Expand DEFINE_FUNCTION_UNIT information into new attributes. */
|
||||
expand_units ();
|
||||
/* Build DFA, output some functions and expand DFA information
|
||||
into new attributes. */
|
||||
expand_automata ();
|
||||
}
|
||||
|
||||
printf ("#include \"config.h\"\n");
|
||||
printf ("#include \"system.h\"\n");
|
||||
|
@ -6226,9 +6271,14 @@ from the machine description file `md'. */\n\n");
|
|||
write_eligible_delay ("annul_false");
|
||||
}
|
||||
|
||||
/* Write out information about function units. */
|
||||
if (num_units)
|
||||
write_function_unit_info ();
|
||||
if (num_units || num_dfa_decls)
|
||||
{
|
||||
/* Write out information about function units. */
|
||||
write_function_unit_info ();
|
||||
/* Output code for pipeline hazards recognition based on DFA
|
||||
(deterministic finite state automata. */
|
||||
write_automata ();
|
||||
}
|
||||
|
||||
/* Write out constant delay slot info */
|
||||
write_const_num_delay_slots ();
|
||||
|
|
43
gcc/genattrtab.h
Normal file
43
gcc/genattrtab.h
Normal file
|
@ -0,0 +1,43 @@
|
|||
/* External definitions of source files of genattrtab.
|
||||
Copyright (C) 2001 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU CC.
|
||||
|
||||
GNU CC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GNU CC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GNU CC; see the file COPYING. If not, write to
|
||||
the Free Software Foundation, 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
/* Defined in genattrtab.c: */
|
||||
extern rtx check_attr_test PARAMS ((rtx, int, int));
|
||||
extern rtx make_numeric_value PARAMS ((int));
|
||||
extern void make_internal_attr PARAMS ((const char *, rtx, int));
|
||||
extern char *attr_printf PARAMS ((unsigned int, const char *, ...))
|
||||
ATTRIBUTE_PRINTF_2;
|
||||
|
||||
extern int num_dfa_decls;
|
||||
|
||||
/* Defined in genautomata.c: */
|
||||
extern void gen_cpu_unit PARAMS ((rtx));
|
||||
extern void gen_query_cpu_unit PARAMS ((rtx));
|
||||
extern void gen_bypass PARAMS ((rtx));
|
||||
extern void gen_excl_set PARAMS ((rtx));
|
||||
extern void gen_presence_set PARAMS ((rtx));
|
||||
extern void gen_absence_set PARAMS ((rtx));
|
||||
extern void gen_automaton PARAMS ((rtx));
|
||||
extern void gen_automata_option PARAMS ((rtx));
|
||||
extern void gen_reserv PARAMS ((rtx));
|
||||
extern void gen_insn_reserv PARAMS ((rtx));
|
||||
extern void initiate_automaton_gen PARAMS ((int, char **));
|
||||
extern void expand_automata PARAMS ((void));
|
||||
extern void write_automata PARAMS ((void));
|
9162
gcc/genautomata.c
Normal file
9162
gcc/genautomata.c
Normal file
File diff suppressed because it is too large
Load diff
|
@ -158,6 +158,12 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
|
|||
|
||||
static int issue_rate;
|
||||
|
||||
/* If the following variable value is non zero, the scheduler inserts
|
||||
bubbles (nop insns). The value of variable affects on scheduler
|
||||
behavior only if automaton pipeline interface with multipass
|
||||
scheduling is used and hook dfa_bubble is defined. */
|
||||
int insert_schedule_bubbles_p = 0;
|
||||
|
||||
/* sched-verbose controls the amount of debugging output the
|
||||
scheduler prints. It is controlled by -fsched-verbose=N:
|
||||
N>0 and no -DSR : the output is directed to stderr.
|
||||
|
@ -254,14 +260,39 @@ static rtx note_list;
|
|||
passes or stalls are introduced. */
|
||||
|
||||
/* Implement a circular buffer to delay instructions until sufficient
|
||||
time has passed. INSN_QUEUE_SIZE is a power of two larger than
|
||||
MAX_BLOCKAGE and MAX_READY_COST computed by genattr.c. This is the
|
||||
longest time an isnsn may be queued. */
|
||||
static rtx insn_queue[INSN_QUEUE_SIZE];
|
||||
time has passed. For the old pipeline description interface,
|
||||
INSN_QUEUE_SIZE is a power of two larger than MAX_BLOCKAGE and
|
||||
MAX_READY_COST computed by genattr.c. For the new pipeline
|
||||
description interface, MAX_INSN_QUEUE_INDEX is a power of two minus
|
||||
one which is larger than maximal time of instruction execution
|
||||
computed by genattr.c on the base maximal time of functional unit
|
||||
reservations and geting a result. This is the longest time an
|
||||
insn may be queued. */
|
||||
|
||||
#define MAX_INSN_QUEUE_INDEX max_insn_queue_index_macro_value
|
||||
|
||||
static rtx *insn_queue;
|
||||
static int q_ptr = 0;
|
||||
static int q_size = 0;
|
||||
#define NEXT_Q(X) (((X)+1) & (INSN_QUEUE_SIZE-1))
|
||||
#define NEXT_Q_AFTER(X, C) (((X)+C) & (INSN_QUEUE_SIZE-1))
|
||||
#define NEXT_Q(X) (((X)+1) & MAX_INSN_QUEUE_INDEX)
|
||||
#define NEXT_Q_AFTER(X, C) (((X)+C) & MAX_INSN_QUEUE_INDEX)
|
||||
|
||||
/* The following variable defines value for macro
|
||||
MAX_INSN_QUEUE_INDEX. */
|
||||
static int max_insn_queue_index_macro_value;
|
||||
|
||||
/* The following variable value refers for all current and future
|
||||
reservations of the processor units. */
|
||||
state_t curr_state;
|
||||
|
||||
/* The following variable value is size of memory representing all
|
||||
current and future reservations of the processor units. It is used
|
||||
only by DFA based scheduler. */
|
||||
static size_t dfa_state_size;
|
||||
|
||||
/* The following array is used to find the best insn from ready when
|
||||
the automaton pipeline interface is used. */
|
||||
static char *ready_try;
|
||||
|
||||
/* Describe the ready list of the scheduler.
|
||||
VEC holds space enough for all insns in the current region. VECLEN
|
||||
|
@ -280,11 +311,15 @@ struct ready_list
|
|||
};
|
||||
|
||||
/* Forward declarations. */
|
||||
|
||||
/* The scheduler using only DFA description should never use the
|
||||
following five functions: */
|
||||
static unsigned int blockage_range PARAMS ((int, rtx));
|
||||
static void clear_units PARAMS ((void));
|
||||
static void schedule_unit PARAMS ((int, rtx, int));
|
||||
static int actual_hazard PARAMS ((int, rtx, int, int));
|
||||
static int potential_hazard PARAMS ((int, rtx, int));
|
||||
|
||||
static int priority PARAMS ((rtx));
|
||||
static int rank_for_schedule PARAMS ((const PTR, const PTR));
|
||||
static void swap_sort PARAMS ((rtx *, int));
|
||||
|
@ -292,6 +327,7 @@ static void queue_insn PARAMS ((rtx, int));
|
|||
static void schedule_insn PARAMS ((rtx, struct ready_list *, int));
|
||||
static void find_insn_reg_weight PARAMS ((int));
|
||||
static void adjust_priority PARAMS ((rtx));
|
||||
static void advance_one_cycle PARAMS ((void));
|
||||
|
||||
/* Notes handling mechanism:
|
||||
=========================
|
||||
|
@ -331,6 +367,14 @@ static void debug_ready_list PARAMS ((struct ready_list *));
|
|||
static rtx move_insn1 PARAMS ((rtx, rtx));
|
||||
static rtx move_insn PARAMS ((rtx, rtx));
|
||||
|
||||
/* The following functions are used to implement multi-pass scheduling
|
||||
on the first cycle. It is used only for DFA based scheduler. */
|
||||
static rtx ready_element PARAMS ((struct ready_list *, int));
|
||||
static rtx ready_remove PARAMS ((struct ready_list *, int));
|
||||
static int max_issue PARAMS ((struct ready_list *, state_t, int *));
|
||||
|
||||
static rtx choose_ready PARAMS ((struct ready_list *));
|
||||
|
||||
#endif /* INSN_SCHEDULING */
|
||||
|
||||
/* Point to state used for the current scheduling pass. */
|
||||
|
@ -354,7 +398,8 @@ static rtx last_scheduled_insn;
|
|||
returned by function_units_used. A function unit is encoded as the
|
||||
unit number if the value is non-negative and the compliment of a
|
||||
mask if the value is negative. A function unit index is the
|
||||
non-negative encoding. */
|
||||
non-negative encoding. The scheduler using only DFA description
|
||||
should never use the following function. */
|
||||
|
||||
HAIFA_INLINE int
|
||||
insn_unit (insn)
|
||||
|
@ -391,7 +436,9 @@ insn_unit (insn)
|
|||
/* Compute the blockage range for executing INSN on UNIT. This caches
|
||||
the value returned by the blockage_range_function for the unit.
|
||||
These values are encoded in an int where the upper half gives the
|
||||
minimum value and the lower half gives the maximum value. */
|
||||
minimum value and the lower half gives the maximum value. The
|
||||
scheduler using only DFA description should never use the following
|
||||
function. */
|
||||
|
||||
HAIFA_INLINE static unsigned int
|
||||
blockage_range (unit, insn)
|
||||
|
@ -415,20 +462,38 @@ blockage_range (unit, insn)
|
|||
return range;
|
||||
}
|
||||
|
||||
/* A vector indexed by function unit instance giving the last insn to use
|
||||
the unit. The value of the function unit instance index for unit U
|
||||
instance I is (U + I * FUNCTION_UNITS_SIZE). */
|
||||
/* A vector indexed by function unit instance giving the last insn to
|
||||
use the unit. The value of the function unit instance index for
|
||||
unit U instance I is (U + I * FUNCTION_UNITS_SIZE). The scheduler
|
||||
using only DFA description should never use the following variable. */
|
||||
#if FUNCTION_UNITS_SIZE
|
||||
static rtx unit_last_insn[FUNCTION_UNITS_SIZE * MAX_MULTIPLICITY];
|
||||
#else
|
||||
static rtx unit_last_insn[1];
|
||||
#endif
|
||||
|
||||
/* A vector indexed by function unit instance giving the minimum time when
|
||||
the unit will unblock based on the maximum blockage cost. */
|
||||
/* A vector indexed by function unit instance giving the minimum time
|
||||
when the unit will unblock based on the maximum blockage cost. The
|
||||
scheduler using only DFA description should never use the following
|
||||
variable. */
|
||||
#if FUNCTION_UNITS_SIZE
|
||||
static int unit_tick[FUNCTION_UNITS_SIZE * MAX_MULTIPLICITY];
|
||||
#else
|
||||
static int unit_tick[1];
|
||||
#endif
|
||||
|
||||
/* A vector indexed by function unit number giving the number of insns
|
||||
that remain to use the unit. */
|
||||
that remain to use the unit. The scheduler using only DFA
|
||||
description should never use the following variable. */
|
||||
#if FUNCTION_UNITS_SIZE
|
||||
static int unit_n_insns[FUNCTION_UNITS_SIZE];
|
||||
#else
|
||||
static int unit_n_insns[1];
|
||||
#endif
|
||||
|
||||
/* Access the unit_last_insn array. Used by the visualization code. */
|
||||
/* Access the unit_last_insn array. Used by the visualization code.
|
||||
The scheduler using only DFA description should never use the
|
||||
following function. */
|
||||
|
||||
rtx
|
||||
get_unit_last_insn (instance)
|
||||
|
@ -447,7 +512,8 @@ clear_units ()
|
|||
memset ((char *) unit_n_insns, 0, sizeof (unit_n_insns));
|
||||
}
|
||||
|
||||
/* Return the issue-delay of an insn. */
|
||||
/* Return the issue-delay of an insn. The scheduler using only DFA
|
||||
description should never use the following function. */
|
||||
|
||||
HAIFA_INLINE int
|
||||
insn_issue_delay (insn)
|
||||
|
@ -477,7 +543,8 @@ insn_issue_delay (insn)
|
|||
|
||||
/* Return the actual hazard cost of executing INSN on the unit UNIT,
|
||||
instance INSTANCE at time CLOCK if the previous actual hazard cost
|
||||
was COST. */
|
||||
was COST. The scheduler using only DFA description should never
|
||||
use the following function. */
|
||||
|
||||
HAIFA_INLINE int
|
||||
actual_hazard_this_instance (unit, instance, insn, clock, cost)
|
||||
|
@ -513,8 +580,9 @@ actual_hazard_this_instance (unit, instance, insn, clock, cost)
|
|||
return cost;
|
||||
}
|
||||
|
||||
/* Record INSN as having begun execution on the units encoded by UNIT at
|
||||
time CLOCK. */
|
||||
/* Record INSN as having begun execution on the units encoded by UNIT
|
||||
at time CLOCK. The scheduler using only DFA description should
|
||||
never use the following function. */
|
||||
|
||||
HAIFA_INLINE static void
|
||||
schedule_unit (unit, insn, clock)
|
||||
|
@ -545,8 +613,10 @@ schedule_unit (unit, insn, clock)
|
|||
schedule_unit (i, insn, clock);
|
||||
}
|
||||
|
||||
/* Return the actual hazard cost of executing INSN on the units encoded by
|
||||
UNIT at time CLOCK if the previous actual hazard cost was COST. */
|
||||
/* Return the actual hazard cost of executing INSN on the units
|
||||
encoded by UNIT at time CLOCK if the previous actual hazard cost
|
||||
was COST. The scheduler using only DFA description should never
|
||||
use the following function. */
|
||||
|
||||
HAIFA_INLINE static int
|
||||
actual_hazard (unit, insn, clock, cost)
|
||||
|
@ -591,11 +661,13 @@ actual_hazard (unit, insn, clock, cost)
|
|||
}
|
||||
|
||||
/* Return the potential hazard cost of executing an instruction on the
|
||||
units encoded by UNIT if the previous potential hazard cost was COST.
|
||||
An insn with a large blockage time is chosen in preference to one
|
||||
with a smaller time; an insn that uses a unit that is more likely
|
||||
to be used is chosen in preference to one with a unit that is less
|
||||
used. We are trying to minimize a subsequent actual hazard. */
|
||||
units encoded by UNIT if the previous potential hazard cost was
|
||||
COST. An insn with a large blockage time is chosen in preference
|
||||
to one with a smaller time; an insn that uses a unit that is more
|
||||
likely to be used is chosen in preference to one with a unit that
|
||||
is less used. We are trying to minimize a subsequent actual
|
||||
hazard. The scheduler using only DFA description should never use
|
||||
the following function. */
|
||||
|
||||
HAIFA_INLINE static int
|
||||
potential_hazard (unit, insn, cost)
|
||||
|
@ -648,62 +720,69 @@ insn_cost (insn, link, used)
|
|||
{
|
||||
int cost = INSN_COST (insn);
|
||||
|
||||
if (cost == 0)
|
||||
if (cost < 0)
|
||||
{
|
||||
recog_memoized (insn);
|
||||
|
||||
/* A USE insn, or something else we don't need to understand.
|
||||
We can't pass these directly to result_ready_cost because it will
|
||||
trigger a fatal error for unrecognizable insns. */
|
||||
if (INSN_CODE (insn) < 0)
|
||||
/* A USE insn, or something else we don't need to
|
||||
understand. We can't pass these directly to
|
||||
result_ready_cost or insn_default_latency because it will
|
||||
trigger a fatal error for unrecognizable insns. */
|
||||
if (recog_memoized (insn) < 0)
|
||||
{
|
||||
INSN_COST (insn) = 1;
|
||||
return 1;
|
||||
INSN_COST (insn) = 0;
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
cost = result_ready_cost (insn);
|
||||
|
||||
if (cost < 1)
|
||||
cost = 1;
|
||||
|
||||
if (targetm.sched.use_dfa_pipeline_interface
|
||||
&& (*targetm.sched.use_dfa_pipeline_interface) ())
|
||||
cost = insn_default_latency (insn);
|
||||
else
|
||||
cost = result_ready_cost (insn);
|
||||
|
||||
if (cost < 0)
|
||||
cost = 0;
|
||||
|
||||
INSN_COST (insn) = cost;
|
||||
}
|
||||
}
|
||||
|
||||
/* In this case estimate cost without caring how insn is used. */
|
||||
if (link == 0 && used == 0)
|
||||
if (link == 0 || used == 0)
|
||||
return cost;
|
||||
|
||||
/* A USE insn should never require the value used to be computed. This
|
||||
allows the computation of a function's result and parameter values to
|
||||
overlap the return and call. */
|
||||
recog_memoized (used);
|
||||
if (INSN_CODE (used) < 0)
|
||||
LINK_COST_FREE (link) = 1;
|
||||
|
||||
/* If some dependencies vary the cost, compute the adjustment. Most
|
||||
commonly, the adjustment is complete: either the cost is ignored
|
||||
(in the case of an output- or anti-dependence), or the cost is
|
||||
unchanged. These values are cached in the link as LINK_COST_FREE
|
||||
and LINK_COST_ZERO. */
|
||||
|
||||
if (LINK_COST_FREE (link))
|
||||
/* A USE insn should never require the value used to be computed.
|
||||
This allows the computation of a function's result and parameter
|
||||
values to overlap the return and call. */
|
||||
if (recog_memoized (used) < 0)
|
||||
cost = 0;
|
||||
else if (!LINK_COST_ZERO (link) && targetm.sched.adjust_cost)
|
||||
else
|
||||
{
|
||||
int ncost = (*targetm.sched.adjust_cost) (used, link, insn, cost);
|
||||
|
||||
if (ncost < 1)
|
||||
if (targetm.sched.use_dfa_pipeline_interface
|
||||
&& (*targetm.sched.use_dfa_pipeline_interface) ())
|
||||
{
|
||||
LINK_COST_FREE (link) = 1;
|
||||
ncost = 0;
|
||||
if (INSN_CODE (insn) >= 0)
|
||||
{
|
||||
if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
|
||||
cost = 0;
|
||||
else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
|
||||
{
|
||||
cost = (insn_default_latency (insn)
|
||||
- insn_default_latency (used));
|
||||
if (cost <= 0)
|
||||
cost = 1;
|
||||
}
|
||||
else if (bypass_p (insn))
|
||||
cost = insn_latency (insn, used);
|
||||
}
|
||||
}
|
||||
if (cost == ncost)
|
||||
LINK_COST_ZERO (link) = 1;
|
||||
cost = ncost;
|
||||
}
|
||||
|
||||
if (targetm.sched.adjust_cost)
|
||||
cost = (*targetm.sched.adjust_cost) (used, link, insn, cost);
|
||||
|
||||
if (cost < 0)
|
||||
cost = 0;
|
||||
}
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
||||
|
@ -930,6 +1009,48 @@ ready_remove_first (ready)
|
|||
return t;
|
||||
}
|
||||
|
||||
/* The following code implements multi-pass scheduling for the first
|
||||
cycle. In other words, we will try to choose ready insn which
|
||||
permits to start maximum number of insns on the same cycle. */
|
||||
|
||||
/* Return a pointer to the element INDEX from the ready. INDEX for
|
||||
insn with the highest priority is 0, and the lowest priority has
|
||||
N_READY - 1. */
|
||||
|
||||
HAIFA_INLINE static rtx
|
||||
ready_element (ready, index)
|
||||
struct ready_list *ready;
|
||||
int index;
|
||||
{
|
||||
if (ready->n_ready == 0 || index >= ready->n_ready)
|
||||
abort ();
|
||||
return ready->vec[ready->first - index];
|
||||
}
|
||||
|
||||
/* Remove the element INDEX from the ready list and return it. INDEX
|
||||
for insn with the highest priority is 0, and the lowest priority
|
||||
has N_READY - 1. */
|
||||
|
||||
HAIFA_INLINE static rtx
|
||||
ready_remove (ready, index)
|
||||
struct ready_list *ready;
|
||||
int index;
|
||||
{
|
||||
rtx t;
|
||||
int i;
|
||||
|
||||
if (index == 0)
|
||||
return ready_remove_first (ready);
|
||||
if (ready->n_ready == 0 || index >= ready->n_ready)
|
||||
abort ();
|
||||
t = ready->vec[ready->first - index];
|
||||
ready->n_ready--;
|
||||
for (i = index; i < ready->n_ready; i++)
|
||||
ready->vec[ready->first - i] = ready->vec[ready->first - i - 1];
|
||||
return t;
|
||||
}
|
||||
|
||||
|
||||
/* Sort the ready list READY by ascending priority, using the SCHED_SORT
|
||||
macro. */
|
||||
|
||||
|
@ -961,6 +1082,25 @@ adjust_priority (prev)
|
|||
(*targetm.sched.adjust_priority) (prev, INSN_PRIORITY (prev));
|
||||
}
|
||||
|
||||
/* Advance time on one cycle. */
|
||||
HAIFA_INLINE static void
|
||||
advance_one_cycle ()
|
||||
{
|
||||
if (targetm.sched.use_dfa_pipeline_interface
|
||||
&& (*targetm.sched.use_dfa_pipeline_interface) ())
|
||||
{
|
||||
if (targetm.sched.dfa_pre_cycle_insn)
|
||||
state_transition (curr_state,
|
||||
(*targetm.sched.dfa_pre_cycle_insn) ());
|
||||
|
||||
state_transition (curr_state, NULL);
|
||||
|
||||
if (targetm.sched.dfa_post_cycle_insn)
|
||||
state_transition (curr_state,
|
||||
(*targetm.sched.dfa_post_cycle_insn) ());
|
||||
}
|
||||
}
|
||||
|
||||
/* Clock at which the previous instruction was issued. */
|
||||
static int last_clock_var;
|
||||
|
||||
|
@ -976,26 +1116,50 @@ schedule_insn (insn, ready, clock)
|
|||
int clock;
|
||||
{
|
||||
rtx link;
|
||||
int unit;
|
||||
int unit = 0;
|
||||
|
||||
unit = insn_unit (insn);
|
||||
if (!targetm.sched.use_dfa_pipeline_interface
|
||||
|| !(*targetm.sched.use_dfa_pipeline_interface) ())
|
||||
unit = insn_unit (insn);
|
||||
|
||||
if (sched_verbose >= 2)
|
||||
{
|
||||
fprintf (sched_dump, ";;\t\t--> scheduling insn <<<%d>>> on unit ",
|
||||
INSN_UID (insn));
|
||||
insn_print_units (insn);
|
||||
|
||||
if (targetm.sched.use_dfa_pipeline_interface
|
||||
&& (*targetm.sched.use_dfa_pipeline_interface) ())
|
||||
{
|
||||
fprintf (sched_dump,
|
||||
";;\t\t--> scheduling insn <<<%d>>>:reservation ",
|
||||
INSN_UID (insn));
|
||||
|
||||
if (recog_memoized (insn) < 0)
|
||||
fprintf (sched_dump, "nothing");
|
||||
else
|
||||
print_reservation (sched_dump, insn);
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf (sched_dump, ";;\t\t--> scheduling insn <<<%d>>> on unit ",
|
||||
INSN_UID (insn));
|
||||
insn_print_units (insn);
|
||||
}
|
||||
|
||||
fprintf (sched_dump, "\n");
|
||||
}
|
||||
|
||||
if (sched_verbose && unit == -1)
|
||||
visualize_no_unit (insn);
|
||||
if (!targetm.sched.use_dfa_pipeline_interface
|
||||
|| !(*targetm.sched.use_dfa_pipeline_interface) ())
|
||||
{
|
||||
if (sched_verbose && unit == -1)
|
||||
visualize_no_unit (insn);
|
||||
|
||||
if (MAX_BLOCKAGE > 1 || issue_rate > 1 || sched_verbose)
|
||||
schedule_unit (unit, insn, clock);
|
||||
|
||||
if (INSN_DEPEND (insn) == 0)
|
||||
return;
|
||||
if (MAX_BLOCKAGE > 1 || issue_rate > 1 || sched_verbose)
|
||||
schedule_unit (unit, insn, clock);
|
||||
|
||||
if (INSN_DEPEND (insn) == 0)
|
||||
return;
|
||||
}
|
||||
|
||||
for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
|
||||
{
|
||||
|
@ -1037,7 +1201,9 @@ schedule_insn (insn, ready, clock)
|
|||
to issue on the same cycle as the previous insn. A machine
|
||||
may use this information to decide how the instruction should
|
||||
be aligned. */
|
||||
if (reload_completed && issue_rate > 1)
|
||||
if (reload_completed && issue_rate > 1
|
||||
&& GET_CODE (PATTERN (insn)) != USE
|
||||
&& GET_CODE (PATTERN (insn)) != CLOBBER)
|
||||
{
|
||||
PUT_MODE (insn, clock > last_clock_var ? TImode : VOIDmode);
|
||||
last_clock_var = clock;
|
||||
|
@ -1464,7 +1630,7 @@ queue_to_ready (ready)
|
|||
{
|
||||
int stalls;
|
||||
|
||||
for (stalls = 1; stalls < INSN_QUEUE_SIZE; stalls++)
|
||||
for (stalls = 1; stalls <= MAX_INSN_QUEUE_INDEX; stalls++)
|
||||
{
|
||||
if ((link = insn_queue[NEXT_Q_AFTER (q_ptr, stalls)]))
|
||||
{
|
||||
|
@ -1483,13 +1649,19 @@ queue_to_ready (ready)
|
|||
}
|
||||
insn_queue[NEXT_Q_AFTER (q_ptr, stalls)] = 0;
|
||||
|
||||
if (ready->n_ready)
|
||||
break;
|
||||
advance_one_cycle ();
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
advance_one_cycle ();
|
||||
}
|
||||
|
||||
if (sched_verbose && stalls)
|
||||
if ((!targetm.sched.use_dfa_pipeline_interface
|
||||
|| !(*targetm.sched.use_dfa_pipeline_interface) ())
|
||||
&& sched_verbose && stalls)
|
||||
visualize_stall_cycles (stalls);
|
||||
|
||||
q_ptr = NEXT_Q_AFTER (q_ptr, stalls);
|
||||
clock_var += stalls;
|
||||
}
|
||||
|
@ -1505,7 +1677,10 @@ debug_ready_list (ready)
|
|||
int i;
|
||||
|
||||
if (ready->n_ready == 0)
|
||||
return;
|
||||
{
|
||||
fprintf (sched_dump, "\n");
|
||||
return;
|
||||
}
|
||||
|
||||
p = ready_lastpos (ready);
|
||||
for (i = 0; i < ready->n_ready; i++)
|
||||
|
@ -1617,6 +1792,113 @@ move_insn (insn, last)
|
|||
return retval;
|
||||
}
|
||||
|
||||
/* The following function returns maximal (or close to maximal) number
|
||||
of insns which can be issued on the same cycle and one of which
|
||||
insns is insns with the best rank (the last insn in READY). To
|
||||
make this function tries different samples of ready insns. READY
|
||||
is current queue `ready'. Global array READY_TRY reflects what
|
||||
insns are already issued in this try. STATE is current processor
|
||||
state. If the function returns nonzero, INDEX will contain index
|
||||
of the best insn in READY. The following function is used only for
|
||||
first cycle multipass scheduling. */
|
||||
|
||||
static int
|
||||
max_issue (ready, state, index)
|
||||
struct ready_list *ready;
|
||||
state_t state;
|
||||
int *index;
|
||||
{
|
||||
int i, best, n, temp_index, delay;
|
||||
state_t temp_state;
|
||||
rtx insn;
|
||||
int max_lookahead = (*targetm.sched.first_cycle_multipass_dfa_lookahead) ();
|
||||
|
||||
if (state_dead_lock_p (state))
|
||||
return 0;
|
||||
|
||||
temp_state = alloca (dfa_state_size);
|
||||
best = 0;
|
||||
|
||||
for (i = 0; i < ready->n_ready; i++)
|
||||
if (!ready_try [i])
|
||||
{
|
||||
insn = ready_element (ready, i);
|
||||
|
||||
if (INSN_CODE (insn) < 0)
|
||||
continue;
|
||||
|
||||
memcpy (temp_state, state, dfa_state_size);
|
||||
|
||||
delay = state_transition (temp_state, insn);
|
||||
|
||||
if (delay == 0)
|
||||
{
|
||||
if (!targetm.sched.dfa_bubble)
|
||||
continue;
|
||||
else
|
||||
{
|
||||
int j;
|
||||
rtx bubble;
|
||||
|
||||
for (j = 0;
|
||||
(bubble = (*targetm.sched.dfa_bubble) (j)) != NULL_RTX;
|
||||
j++)
|
||||
if (state_transition (temp_state, bubble) < 0
|
||||
&& state_transition (temp_state, insn) < 0)
|
||||
break;
|
||||
|
||||
if (bubble == NULL_RTX)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (delay > 0)
|
||||
continue;
|
||||
|
||||
--max_lookahead;
|
||||
|
||||
if (max_lookahead < 0)
|
||||
break;
|
||||
|
||||
ready_try [i] = 1;
|
||||
|
||||
n = max_issue (ready, temp_state, &temp_index);
|
||||
if (n > 0 || ready_try[0])
|
||||
n += 1;
|
||||
|
||||
if (best < n)
|
||||
{
|
||||
best = n;
|
||||
*index = i;
|
||||
}
|
||||
ready_try [i] = 0;
|
||||
}
|
||||
|
||||
return best;
|
||||
}
|
||||
|
||||
/* The following function chooses insn from READY and modifies
|
||||
*N_READY and READY. The following function is used only for first
|
||||
cycle multipass scheduling. */
|
||||
|
||||
static rtx
|
||||
choose_ready (ready)
|
||||
struct ready_list *ready;
|
||||
{
|
||||
if (!targetm.sched.first_cycle_multipass_dfa_lookahead
|
||||
|| (*targetm.sched.first_cycle_multipass_dfa_lookahead) () <= 0)
|
||||
return ready_remove_first (ready);
|
||||
else
|
||||
{
|
||||
/* Try to choose the better insn. */
|
||||
int index;
|
||||
|
||||
if (max_issue (ready, curr_state, &index) == 0)
|
||||
return ready_remove_first (ready);
|
||||
else
|
||||
return ready_remove (ready, index);
|
||||
}
|
||||
}
|
||||
|
||||
/* Called from backends from targetm.sched.reorder to emit stuff into
|
||||
the instruction stream. */
|
||||
|
||||
|
@ -1638,7 +1920,9 @@ schedule_block (b, rgn_n_insns)
|
|||
int rgn_n_insns;
|
||||
{
|
||||
struct ready_list ready;
|
||||
int first_cycle_insn_p;
|
||||
int can_issue_more;
|
||||
state_t temp_state = NULL; /* It is used for multipass scheduling. */
|
||||
|
||||
/* Head/tail info for this block. */
|
||||
rtx prev_head = current_sched_info->prev_head;
|
||||
|
@ -1671,7 +1955,11 @@ schedule_block (b, rgn_n_insns)
|
|||
init_block_visualization ();
|
||||
}
|
||||
|
||||
clear_units ();
|
||||
if (targetm.sched.use_dfa_pipeline_interface
|
||||
&& (*targetm.sched.use_dfa_pipeline_interface) ())
|
||||
state_reset (curr_state);
|
||||
else
|
||||
clear_units ();
|
||||
|
||||
/* Allocate the ready list. */
|
||||
ready.veclen = rgn_n_insns + 1 + issue_rate;
|
||||
|
@ -1679,6 +1967,15 @@ schedule_block (b, rgn_n_insns)
|
|||
ready.vec = (rtx *) xmalloc (ready.veclen * sizeof (rtx));
|
||||
ready.n_ready = 0;
|
||||
|
||||
if (targetm.sched.use_dfa_pipeline_interface
|
||||
&& (*targetm.sched.use_dfa_pipeline_interface) ())
|
||||
{
|
||||
/* It is used for first cycle multipass scheduling. */
|
||||
temp_state = alloca (dfa_state_size);
|
||||
ready_try = (char *) xmalloc ((rgn_n_insns + 1) * sizeof (char));
|
||||
memset (ready_try, 0, (rgn_n_insns + 1) * sizeof (char));
|
||||
}
|
||||
|
||||
(*current_sched_info->init_ready_list) (&ready);
|
||||
|
||||
if (targetm.sched.md_init)
|
||||
|
@ -1691,8 +1988,16 @@ schedule_block (b, rgn_n_insns)
|
|||
queue. */
|
||||
q_ptr = 0;
|
||||
q_size = 0;
|
||||
last_clock_var = 0;
|
||||
memset ((char *) insn_queue, 0, sizeof (insn_queue));
|
||||
|
||||
if (!targetm.sched.use_dfa_pipeline_interface
|
||||
|| !(*targetm.sched.use_dfa_pipeline_interface) ())
|
||||
max_insn_queue_index_macro_value = INSN_QUEUE_SIZE - 1;
|
||||
else
|
||||
max_insn_queue_index_macro_value = max_insn_queue_index;
|
||||
|
||||
insn_queue = (rtx *) alloca ((MAX_INSN_QUEUE_INDEX + 1) * sizeof (rtx));
|
||||
memset ((char *) insn_queue, 0, (MAX_INSN_QUEUE_INDEX + 1) * sizeof (rtx));
|
||||
last_clock_var = -1;
|
||||
|
||||
/* Start just before the beginning of time. */
|
||||
clock_var = -1;
|
||||
|
@ -1702,12 +2007,18 @@ schedule_block (b, rgn_n_insns)
|
|||
{
|
||||
clock_var++;
|
||||
|
||||
advance_one_cycle ();
|
||||
|
||||
/* Add to the ready list all pending insns that can be issued now.
|
||||
If there are no ready insns, increment clock until one
|
||||
is ready and add all pending insns at that point to the ready
|
||||
list. */
|
||||
queue_to_ready (&ready);
|
||||
|
||||
if (sched_verbose && targetm.sched.cycle_display)
|
||||
last_scheduled_insn
|
||||
= (*targetm.sched.cycle_display) (clock_var, last_scheduled_insn);
|
||||
|
||||
if (ready.n_ready == 0)
|
||||
abort ();
|
||||
|
||||
|
@ -1730,24 +2041,127 @@ schedule_block (b, rgn_n_insns)
|
|||
else
|
||||
can_issue_more = issue_rate;
|
||||
|
||||
if (sched_verbose && targetm.sched.cycle_display)
|
||||
last_scheduled_insn
|
||||
= (*targetm.sched.cycle_display) (clock_var, last_scheduled_insn);
|
||||
|
||||
if (sched_verbose)
|
||||
first_cycle_insn_p = 1;
|
||||
for (;;)
|
||||
{
|
||||
fprintf (sched_dump, "\n;;\tReady list (t =%3d): ", clock_var);
|
||||
debug_ready_list (&ready);
|
||||
}
|
||||
rtx insn;
|
||||
int cost;
|
||||
|
||||
if (sched_verbose)
|
||||
{
|
||||
fprintf (sched_dump, ";;\tReady list (t =%3d): ",
|
||||
clock_var);
|
||||
debug_ready_list (&ready);
|
||||
}
|
||||
|
||||
if (!targetm.sched.use_dfa_pipeline_interface
|
||||
|| !(*targetm.sched.use_dfa_pipeline_interface) ())
|
||||
{
|
||||
if (ready.n_ready == 0 || !can_issue_more
|
||||
|| !(*current_sched_info->schedule_more_p) ())
|
||||
break;
|
||||
insn = choose_ready (&ready);
|
||||
cost = actual_hazard (insn_unit (insn), insn, clock_var, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ready.n_ready == 0 || !can_issue_more
|
||||
|| state_dead_lock_p (curr_state)
|
||||
|| !(*current_sched_info->schedule_more_p) ())
|
||||
break;
|
||||
|
||||
/* Select and remove the insn from the ready list. */
|
||||
insn = choose_ready (&ready);
|
||||
|
||||
memcpy (temp_state, curr_state, dfa_state_size);
|
||||
if (recog_memoized (insn) < 0)
|
||||
{
|
||||
if (!first_cycle_insn_p
|
||||
&& (GET_CODE (PATTERN (insn)) == ASM_INPUT
|
||||
|| asm_noperands (PATTERN (insn)) >= 0))
|
||||
/* This is asm insn which is tryed to be issued on the
|
||||
cycle not first. Issue it on the next cycle. */
|
||||
cost = 1;
|
||||
else
|
||||
/* A USE insn, or something else we don't need to
|
||||
understand. We can't pass these directly to
|
||||
state_transition because it will trigger a
|
||||
fatal error for unrecognizable insns. */
|
||||
cost = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
cost = state_transition (temp_state, insn);
|
||||
|
||||
if (targetm.sched.first_cycle_multipass_dfa_lookahead
|
||||
&& targetm.sched.dfa_bubble)
|
||||
{
|
||||
if (cost == 0)
|
||||
{
|
||||
int j;
|
||||
rtx bubble;
|
||||
|
||||
for (j = 0;
|
||||
(bubble = (*targetm.sched.dfa_bubble) (j))
|
||||
!= NULL_RTX;
|
||||
j++)
|
||||
{
|
||||
memcpy (temp_state, curr_state, dfa_state_size);
|
||||
|
||||
if (state_transition (temp_state, bubble) < 0
|
||||
&& state_transition (temp_state, insn) < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (bubble != NULL_RTX)
|
||||
{
|
||||
if (insert_schedule_bubbles_p)
|
||||
{
|
||||
rtx copy;
|
||||
|
||||
copy = copy_rtx (PATTERN (bubble));
|
||||
emit_insn_after (copy, last_scheduled_insn);
|
||||
last_scheduled_insn
|
||||
= NEXT_INSN (last_scheduled_insn);
|
||||
INSN_CODE (last_scheduled_insn)
|
||||
= INSN_CODE (bubble);
|
||||
|
||||
/* Annotate the same for the first insns
|
||||
scheduling by using mode. */
|
||||
PUT_MODE (last_scheduled_insn,
|
||||
(clock_var > last_clock_var
|
||||
? clock_var - last_clock_var
|
||||
: VOIDmode));
|
||||
last_clock_var = clock_var;
|
||||
|
||||
if (sched_verbose >= 2)
|
||||
{
|
||||
fprintf (sched_dump,
|
||||
";;\t\t--> scheduling bubble insn <<<%d>>>:reservation ",
|
||||
INSN_UID (last_scheduled_insn));
|
||||
|
||||
if (recog_memoized (last_scheduled_insn)
|
||||
< 0)
|
||||
fprintf (sched_dump, "nothing");
|
||||
else
|
||||
print_reservation
|
||||
(sched_dump, last_scheduled_insn);
|
||||
|
||||
fprintf (sched_dump, "\n");
|
||||
}
|
||||
}
|
||||
cost = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (cost < 0)
|
||||
cost = 0;
|
||||
else if (cost == 0)
|
||||
cost = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Issue insns from ready list. */
|
||||
while (ready.n_ready != 0
|
||||
&& can_issue_more
|
||||
&& (*current_sched_info->schedule_more_p) ())
|
||||
{
|
||||
/* Select and remove the insn from the ready list. */
|
||||
rtx insn = ready_remove_first (&ready);
|
||||
int cost = actual_hazard (insn_unit (insn), insn, clock_var, 0);
|
||||
|
||||
if (cost >= 1)
|
||||
{
|
||||
|
@ -1760,6 +2174,10 @@ schedule_block (b, rgn_n_insns)
|
|||
|
||||
last_scheduled_insn = move_insn (insn, last_scheduled_insn);
|
||||
|
||||
if (targetm.sched.use_dfa_pipeline_interface
|
||||
&& (*targetm.sched.use_dfa_pipeline_interface) ())
|
||||
memcpy (curr_state, temp_state, dfa_state_size);
|
||||
|
||||
if (targetm.sched.variable_issue)
|
||||
can_issue_more =
|
||||
(*targetm.sched.variable_issue) (sched_dump, sched_verbose,
|
||||
|
@ -1770,6 +2188,8 @@ schedule_block (b, rgn_n_insns)
|
|||
schedule_insn (insn, &ready, clock_var);
|
||||
|
||||
next:
|
||||
first_cycle_insn_p = 0;
|
||||
|
||||
if (targetm.sched.reorder2)
|
||||
{
|
||||
/* Sort the ready list based on priority. */
|
||||
|
@ -1783,8 +2203,10 @@ schedule_block (b, rgn_n_insns)
|
|||
}
|
||||
}
|
||||
|
||||
/* Debug info. */
|
||||
if (sched_verbose)
|
||||
if ((!targetm.sched.use_dfa_pipeline_interface
|
||||
|| !(*targetm.sched.use_dfa_pipeline_interface) ())
|
||||
&& sched_verbose)
|
||||
/* Debug info. */
|
||||
visualize_scheduled_insns (clock_var);
|
||||
}
|
||||
|
||||
|
@ -1796,7 +2218,9 @@ schedule_block (b, rgn_n_insns)
|
|||
{
|
||||
fprintf (sched_dump, ";;\tReady list (final): ");
|
||||
debug_ready_list (&ready);
|
||||
print_block_visualization ("");
|
||||
if (!targetm.sched.use_dfa_pipeline_interface
|
||||
|| !(*targetm.sched.use_dfa_pipeline_interface) ())
|
||||
print_block_visualization ("");
|
||||
}
|
||||
|
||||
/* Sanity check -- queue must be empty now. Meaningless if region has
|
||||
|
@ -1841,6 +2265,10 @@ schedule_block (b, rgn_n_insns)
|
|||
current_sched_info->tail = tail;
|
||||
|
||||
free (ready.vec);
|
||||
|
||||
if (targetm.sched.use_dfa_pipeline_interface
|
||||
&& (*targetm.sched.use_dfa_pipeline_interface) ())
|
||||
free (ready_try);
|
||||
}
|
||||
|
||||
/* Set_priorities: compute priority of each insn in the block. */
|
||||
|
@ -1882,6 +2310,7 @@ sched_init (dump_file)
|
|||
{
|
||||
int luid, b;
|
||||
rtx insn;
|
||||
int i;
|
||||
|
||||
/* Disable speculative loads in their presence if cc0 defined. */
|
||||
#ifdef HAVE_cc0
|
||||
|
@ -1909,6 +2338,27 @@ sched_init (dump_file)
|
|||
|
||||
h_i_d = (struct haifa_insn_data *) xcalloc (old_max_uid, sizeof (*h_i_d));
|
||||
|
||||
for (i = 0; i < old_max_uid; i++)
|
||||
h_i_d [i].cost = -1;
|
||||
|
||||
if (targetm.sched.use_dfa_pipeline_interface
|
||||
&& (*targetm.sched.use_dfa_pipeline_interface) ())
|
||||
{
|
||||
if (targetm.sched.init_dfa_pre_cycle_insn)
|
||||
(*targetm.sched.init_dfa_pre_cycle_insn) ();
|
||||
|
||||
if (targetm.sched.init_dfa_post_cycle_insn)
|
||||
(*targetm.sched.init_dfa_post_cycle_insn) ();
|
||||
|
||||
if (targetm.sched.first_cycle_multipass_dfa_lookahead
|
||||
&& targetm.sched.init_dfa_bubbles)
|
||||
(*targetm.sched.init_dfa_bubbles) ();
|
||||
|
||||
dfa_start ();
|
||||
dfa_state_size = state_size ();
|
||||
curr_state = xmalloc (dfa_state_size);
|
||||
}
|
||||
|
||||
h_i_d[0].luid = 0;
|
||||
luid = 1;
|
||||
for (b = 0; b < n_basic_blocks; b++)
|
||||
|
@ -1966,8 +2416,10 @@ sched_init (dump_file)
|
|||
}
|
||||
}
|
||||
|
||||
/* Find units used in this function, for visualization. */
|
||||
if (sched_verbose)
|
||||
if ((!targetm.sched.use_dfa_pipeline_interface
|
||||
|| !(*targetm.sched.use_dfa_pipeline_interface) ())
|
||||
&& sched_verbose)
|
||||
/* Find units used in this function, for visualization. */
|
||||
init_target_units ();
|
||||
|
||||
/* ??? Add a NOTE after the last insn of the last basic block. It is not
|
||||
|
@ -1997,6 +2449,13 @@ void
|
|||
sched_finish ()
|
||||
{
|
||||
free (h_i_d);
|
||||
|
||||
if (targetm.sched.use_dfa_pipeline_interface
|
||||
&& (*targetm.sched.use_dfa_pipeline_interface) ())
|
||||
{
|
||||
free (curr_state);
|
||||
dfa_finish ();
|
||||
}
|
||||
free_dependency_caches ();
|
||||
end_alias_analysis ();
|
||||
if (write_symbols != NO_DEBUG)
|
||||
|
|
141
gcc/rtl.def
141
gcc/rtl.def
|
@ -341,6 +341,147 @@ DEF_RTL_EXPR(SEQUENCE, "sequence", "E", 'x')
|
|||
/* Refers to the address of its argument. This is only used in alias.c. */
|
||||
DEF_RTL_EXPR(ADDRESS, "address", "e", 'm')
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Constructions for CPU pipeline description described by NDFAs.
|
||||
These do not appear in actual rtl code in the compiler.
|
||||
---------------------------------------------------------------------- */
|
||||
|
||||
/* (define_cpu_unit string [string]) describes cpu functional
|
||||
units (separated by comma).
|
||||
|
||||
1st operand: Names of cpu functional units.
|
||||
2nd operand: Name of automaton (see comments for DEFINE_AUTOMATON).
|
||||
|
||||
All define_reservations, define_cpu_units, and
|
||||
define_query_cpu_units should have unique names which may not be
|
||||
"nothing". */
|
||||
DEF_RTL_EXPR(DEFINE_CPU_UNIT, "define_cpu_unit", "sS", 'x')
|
||||
|
||||
/* (define_query_cpu_unit string [string]) describes cpu functional
|
||||
units analogously to define_cpu_unit. If we use automaton without
|
||||
minimization, the reservation of such units can be queried for
|
||||
automaton state. */
|
||||
DEF_RTL_EXPR(DEFINE_QUERY_CPU_UNIT, "define_query_cpu_unit", "sS", 'x')
|
||||
|
||||
/* (exclusion_set string string) means that each CPU functional unit
|
||||
in the first string can not be reserved simultaneously with any
|
||||
unit whose name is in the second string and vise versa. CPU units
|
||||
in the string are separated by commas. For example, it is useful
|
||||
for description CPU with fully pipelined floating point functional
|
||||
unit which can execute simultaneously only single floating point
|
||||
insns or only double floating point insns. All CPU functional
|
||||
units in a set should belong the same automaton. */
|
||||
DEF_RTL_EXPR(EXCLUSION_SET, "exclusion_set", "ss", 'x')
|
||||
|
||||
/* (presence_set string string) means that each CPU functional unit in
|
||||
the first string can not be reserved unless at least one of units
|
||||
whose names are in the second string is reserved. This is an
|
||||
asymmetric relation. CPU units in the string are separated by
|
||||
commas. For example, it is useful for description that slot1 is
|
||||
reserved after slot0 reservation for VLIW processor. All CPU
|
||||
functional units in a set should belong the same automaton. */
|
||||
DEF_RTL_EXPR(PRESENCE_SET, "presence_set", "ss", 'x')
|
||||
|
||||
/* (absence_set string string) means that each CPU functional unit in
|
||||
the first string can not be reserved only if each unit whose name
|
||||
is in the second string is not reserved. This is an asymmetric
|
||||
relation (actually exclusion set is analogous to this one but it is
|
||||
symmetric). CPU units in the string are separated by commas. For
|
||||
example, it is useful for description that slot0 can not be
|
||||
reserved after slot1 or slot2 reservation for VLIW processor. All
|
||||
CPU functional units in a set should belong the same automaton. */
|
||||
DEF_RTL_EXPR(ABSENCE_SET, "absence_set", "ss", 'x')
|
||||
|
||||
/* (define_bypass number out_insn_names in_insn_names) names bypass
|
||||
with given latency (the first number) from insns given by the first
|
||||
string (see define_insn_reservation) into insns given by the second
|
||||
string. Insn names in the strings are separated by commas. The
|
||||
third operand is optional name of function which is additional
|
||||
guard for the bypass. The function will get the two insns as
|
||||
parameters. If the function returns zero the bypass will be
|
||||
ignored for this case. Additional guard is necessary to recognize
|
||||
complicated bypasses, e.g. when consumer is load address. */
|
||||
DEF_RTL_EXPR(DEFINE_BYPASS, "define_bypass", "issS", 'x')
|
||||
|
||||
/* (define_automaton string) describes names of automata generated and
|
||||
used for pipeline hazards recognition. The names are separated by
|
||||
comma. Actually it is possibly to generate the single automaton
|
||||
but unfortunately it can be very large. If we use more one
|
||||
automata, the summary size of the automata usually is less than the
|
||||
single one. The automaton name is used in define_cpu_unit and
|
||||
define_query_cpu_unit. All automata should have unique names. */
|
||||
DEF_RTL_EXPR(DEFINE_AUTOMATON, "define_automaton", "s", 'x')
|
||||
|
||||
/* (automata_option string) describes option for generation of
|
||||
automata. Currently there are the following options:
|
||||
|
||||
o "no-minimization" which makes no minimization of automata. This
|
||||
is only worth to do when we are going to query CPU functional
|
||||
unit reservations in an automaton state.
|
||||
|
||||
o "w" which means generation of file describing the result
|
||||
automaton. The file can be used for the description verification.
|
||||
|
||||
o "ndfa" which makes nondeterministic finite state automata. */
|
||||
DEF_RTL_EXPR(AUTOMATA_OPTION, "automata_option", "s", 'x')
|
||||
|
||||
/* (define_reservation string string) names reservation (the first
|
||||
string) of cpu functional units (the 2nd string). Sometimes unit
|
||||
reservations for different insns contain common parts. In such
|
||||
case, you can describe common part and use its name (the 1st
|
||||
parameter) in regular expression in define_insn_reservation. All
|
||||
define_reservations, define_cpu_units, and define_query_cpu_units
|
||||
should have unique names which may not be "nothing". */
|
||||
DEF_RTL_EXPR(DEFINE_RESERVATION, "define_reservation", "ss", 'x')
|
||||
|
||||
/* (define_insn_reservation name default_latency condition regexpr)
|
||||
describes reservation of cpu functional units (the 3nd operand) for
|
||||
instruction which is selected by the condition (the 2nd parameter).
|
||||
The first parameter is used for output of debugging information.
|
||||
The reservations are described by a regular expression according
|
||||
the following syntax:
|
||||
|
||||
regexp = regexp "," oneof
|
||||
| oneof
|
||||
|
||||
oneof = oneof "|" allof
|
||||
| allof
|
||||
|
||||
allof = allof "+" repeat
|
||||
| repeat
|
||||
|
||||
repeat = element "*" number
|
||||
| element
|
||||
|
||||
element = cpu_function_unit_name
|
||||
| reservation_name
|
||||
| result_name
|
||||
| "nothing"
|
||||
| "(" regexp ")"
|
||||
|
||||
1. "," is used for describing start of the next cycle in
|
||||
reservation.
|
||||
|
||||
2. "|" is used for describing the reservation described by the
|
||||
first regular expression *or* the reservation described by the
|
||||
second regular expression *or* etc.
|
||||
|
||||
3. "+" is used for describing the reservation described by the
|
||||
first regular expression *and* the reservation described by the
|
||||
second regular expression *and* etc.
|
||||
|
||||
4. "*" is used for convinience and simply means sequence in
|
||||
which the regular expression are repeated NUMBER times with
|
||||
cycle advancing (see ",").
|
||||
|
||||
5. cpu functional unit name which means its reservation.
|
||||
|
||||
6. reservation name -- see define_reservation.
|
||||
|
||||
7. string "nothing" means no units reservation. */
|
||||
|
||||
DEF_RTL_EXPR(DEFINE_INSN_RESERVATION, "define_insn_reservation", "sies", 'x')
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Expressions used for insn attributes. These also do not appear in
|
||||
actual rtl code in the compiler.
|
||||
|
|
14
gcc/rtl.h
14
gcc/rtl.h
|
@ -130,11 +130,9 @@ struct rtx_def
|
|||
/* 1 in an INSN if it can alter flow of control
|
||||
within this function.
|
||||
MEM_KEEP_ALIAS_SET_P in a MEM.
|
||||
LINK_COST_ZERO in an INSN_LIST.
|
||||
SET_IS_RETURN_P in a SET. */
|
||||
unsigned int jump : 1;
|
||||
/* 1 in an INSN if it can call another function.
|
||||
LINK_COST_FREE in an INSN_LIST. */
|
||||
/* 1 in an INSN if it can call another function. */
|
||||
unsigned int call : 1;
|
||||
/* 1 in a REG if value of this expression will never change during
|
||||
the current function, even though it is not manifestly constant.
|
||||
|
@ -983,16 +981,6 @@ do { \
|
|||
with the preceding insn. */
|
||||
#define SCHED_GROUP_P(INSN) ((INSN)->in_struct)
|
||||
|
||||
/* During sched, for the LOG_LINKS of an insn, these cache the adjusted
|
||||
cost of the dependence link. The cost of executing an instruction
|
||||
may vary based on how the results are used. LINK_COST_ZERO is 1 when
|
||||
the cost through the link varies and is unchanged (i.e., the link has
|
||||
zero additional cost). LINK_COST_FREE is 1 when the cost through the
|
||||
link is zero (i.e., the link makes the cost free). In other cases,
|
||||
the adjustment to the cost is recomputed each time it is needed. */
|
||||
#define LINK_COST_ZERO(X) ((X)->jump)
|
||||
#define LINK_COST_FREE(X) ((X)->call)
|
||||
|
||||
/* For a SET rtx, SET_DEST is the place that is set
|
||||
and SET_SRC is the value it is set to. */
|
||||
#define SET_DEST(RTX) XC2EXP(RTX, 0, SET, CLOBBER)
|
||||
|
|
|
@ -20,6 +20,9 @@ along with GCC; see the file COPYING. If not, write to the Free
|
|||
Software Foundation, 59 Temple Place - Suite 330, Boston, MA
|
||||
02111-1307, USA. */
|
||||
|
||||
/* Pointer to data describing the current DFA state. */
|
||||
extern state_t curr_state;
|
||||
|
||||
/* Forward declaration. */
|
||||
struct ready_list;
|
||||
|
||||
|
@ -184,7 +187,7 @@ struct haifa_insn_data
|
|||
int dep_count;
|
||||
|
||||
/* An encoding of the blockage range function. Both unit and range
|
||||
are coded. */
|
||||
are coded. This member is used only for old pipeline interface. */
|
||||
unsigned int blockage;
|
||||
|
||||
/* Number of instructions referring to this insn. */
|
||||
|
@ -196,7 +199,8 @@ struct haifa_insn_data
|
|||
|
||||
short cost;
|
||||
|
||||
/* An encoding of the function units used. */
|
||||
/* An encoding of the function units used. This member is used only
|
||||
for old pipeline interface. */
|
||||
short units;
|
||||
|
||||
/* This weight is an estimation of the insn's contribution to
|
||||
|
|
|
@ -62,6 +62,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
|
|||
#include "recog.h"
|
||||
#include "cfglayout.h"
|
||||
#include "sched-int.h"
|
||||
#include "target.h"
|
||||
|
||||
/* Define when we want to do count REG_DEAD notes before and after scheduling
|
||||
for sanity checking. We can't do that when conditional execution is used,
|
||||
|
@ -2057,7 +2058,14 @@ init_ready_list (ready)
|
|||
|
||||
if (!CANT_MOVE (insn)
|
||||
&& (!IS_SPECULATIVE_INSN (insn)
|
||||
|| (insn_issue_delay (insn) <= 3
|
||||
|| ((((!targetm.sched.use_dfa_pipeline_interface
|
||||
|| !(*targetm.sched.use_dfa_pipeline_interface) ())
|
||||
&& insn_issue_delay (insn) <= 3)
|
||||
|| (targetm.sched.use_dfa_pipeline_interface
|
||||
&& (*targetm.sched.use_dfa_pipeline_interface) ()
|
||||
&& (recog_memoized (insn) < 0
|
||||
|| min_insn_conflict_delay (curr_state,
|
||||
insn, insn) <= 3)))
|
||||
&& check_live (insn, bb_src)
|
||||
&& is_exception_free (insn, bb_src, target_bb))))
|
||||
{
|
||||
|
@ -2165,7 +2173,15 @@ new_ready (next)
|
|||
&& (!IS_VALID (INSN_BB (next))
|
||||
|| CANT_MOVE (next)
|
||||
|| (IS_SPECULATIVE_INSN (next)
|
||||
&& (insn_issue_delay (next) > 3
|
||||
&& (0
|
||||
|| (targetm.sched.use_dfa_pipeline_interface
|
||||
&& (*targetm.sched.use_dfa_pipeline_interface) ()
|
||||
&& recog_memoized (next) >= 0
|
||||
&& min_insn_conflict_delay (curr_state, next,
|
||||
next) > 3)
|
||||
|| ((!targetm.sched.use_dfa_pipeline_interface
|
||||
|| !(*targetm.sched.use_dfa_pipeline_interface) ())
|
||||
&& insn_issue_delay (next) > 3)
|
||||
|| !check_live (next, INSN_BB (next))
|
||||
|| !is_exception_free (next, INSN_BB (next), target_bb)))))
|
||||
return 0;
|
||||
|
@ -2589,14 +2605,27 @@ debug_dependencies ()
|
|||
fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
|
||||
BB_TO_BLOCK (bb), bb);
|
||||
|
||||
fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
|
||||
"insn", "code", "bb", "dep", "prio", "cost", "blockage", "units");
|
||||
fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
|
||||
"----", "----", "--", "---", "----", "----", "--------", "-----");
|
||||
if (targetm.sched.use_dfa_pipeline_interface
|
||||
&& (*targetm.sched.use_dfa_pipeline_interface) ())
|
||||
{
|
||||
fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
|
||||
"insn", "code", "bb", "dep", "prio", "cost",
|
||||
"reservation");
|
||||
fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
|
||||
"----", "----", "--", "---", "----", "----",
|
||||
"-----------");
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
|
||||
"insn", "code", "bb", "dep", "prio", "cost", "blockage", "units");
|
||||
fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
|
||||
"----", "----", "--", "---", "----", "----", "--------", "-----");
|
||||
}
|
||||
|
||||
for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
|
||||
{
|
||||
rtx link;
|
||||
int unit, range;
|
||||
|
||||
if (! INSN_P (insn))
|
||||
{
|
||||
|
@ -2616,22 +2645,46 @@ debug_dependencies ()
|
|||
continue;
|
||||
}
|
||||
|
||||
unit = insn_unit (insn);
|
||||
range = (unit < 0
|
||||
|| function_units[unit].blockage_range_function == 0) ? 0 :
|
||||
function_units[unit].blockage_range_function (insn);
|
||||
fprintf (sched_dump,
|
||||
";; %s%5d%6d%6d%6d%6d%6d %3d -%3d ",
|
||||
(SCHED_GROUP_P (insn) ? "+" : " "),
|
||||
INSN_UID (insn),
|
||||
INSN_CODE (insn),
|
||||
INSN_BB (insn),
|
||||
INSN_DEP_COUNT (insn),
|
||||
INSN_PRIORITY (insn),
|
||||
insn_cost (insn, 0, 0),
|
||||
(int) MIN_BLOCKAGE_COST (range),
|
||||
(int) MAX_BLOCKAGE_COST (range));
|
||||
insn_print_units (insn);
|
||||
if (targetm.sched.use_dfa_pipeline_interface
|
||||
&& (*targetm.sched.use_dfa_pipeline_interface) ())
|
||||
{
|
||||
fprintf (sched_dump,
|
||||
";; %s%5d%6d%6d%6d%6d%6d ",
|
||||
(SCHED_GROUP_P (insn) ? "+" : " "),
|
||||
INSN_UID (insn),
|
||||
INSN_CODE (insn),
|
||||
INSN_BB (insn),
|
||||
INSN_DEP_COUNT (insn),
|
||||
INSN_PRIORITY (insn),
|
||||
insn_cost (insn, 0, 0));
|
||||
|
||||
if (recog_memoized (insn) < 0)
|
||||
fprintf (sched_dump, "nothing");
|
||||
else
|
||||
print_reservation (sched_dump, insn);
|
||||
}
|
||||
else
|
||||
{
|
||||
int unit = insn_unit (insn);
|
||||
int range
|
||||
= (unit < 0
|
||||
|| function_units[unit].blockage_range_function == 0
|
||||
? 0
|
||||
: function_units[unit].blockage_range_function (insn));
|
||||
fprintf (sched_dump,
|
||||
";; %s%5d%6d%6d%6d%6d%6d %3d -%3d ",
|
||||
(SCHED_GROUP_P (insn) ? "+" : " "),
|
||||
INSN_UID (insn),
|
||||
INSN_CODE (insn),
|
||||
INSN_BB (insn),
|
||||
INSN_DEP_COUNT (insn),
|
||||
INSN_PRIORITY (insn),
|
||||
insn_cost (insn, 0, 0),
|
||||
(int) MIN_BLOCKAGE_COST (range),
|
||||
(int) MAX_BLOCKAGE_COST (range));
|
||||
insn_print_units (insn);
|
||||
}
|
||||
|
||||
fprintf (sched_dump, "\t: ");
|
||||
for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
|
||||
fprintf (sched_dump, "%d ", INSN_UID (XEXP (link, 0)));
|
||||
|
|
|
@ -31,6 +31,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
|
|||
#include "basic-block.h"
|
||||
#include "insn-attr.h"
|
||||
#include "sched-int.h"
|
||||
#include "target.h"
|
||||
|
||||
#ifdef INSN_SCHEDULING
|
||||
/* target_units bitmask has 1 for each unit in the cpu. It should be
|
||||
|
@ -38,7 +39,8 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
|
|||
But currently it is computed by examining the insn list. Since
|
||||
this is only needed for visualization, it seems an acceptable
|
||||
solution. (For understanding the mapping of bits to units, see
|
||||
definition of function_units[] in "insn-attrtab.c".) */
|
||||
definition of function_units[] in "insn-attrtab.c".) The scheduler
|
||||
using only DFA description should never use the following variable. */
|
||||
|
||||
static int target_units = 0;
|
||||
|
||||
|
@ -122,6 +124,14 @@ get_visual_tbl_length ()
|
|||
int n, n1;
|
||||
char *s;
|
||||
|
||||
if (targetm.sched.use_dfa_pipeline_interface
|
||||
&& (*targetm.sched.use_dfa_pipeline_interface) ())
|
||||
{
|
||||
visual_tbl_line_length = 1;
|
||||
return 1; /* Can't return 0 because that will cause problems
|
||||
with alloca. */
|
||||
}
|
||||
|
||||
/* Compute length of one field in line. */
|
||||
s = (char *) alloca (INSN_LEN + 6);
|
||||
sprintf (s, " %33s", "uname");
|
||||
|
@ -815,7 +825,8 @@ print_insn (buf, x, verbose)
|
|||
}
|
||||
} /* print_insn */
|
||||
|
||||
/* Print visualization debugging info. */
|
||||
/* Print visualization debugging info. The scheduler using only DFA
|
||||
description should never use the following function. */
|
||||
|
||||
void
|
||||
print_block_visualization (s)
|
||||
|
|
|
@ -145,16 +145,33 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|||
#define TARGET_SCHED_REORDER 0
|
||||
#define TARGET_SCHED_REORDER2 0
|
||||
#define TARGET_SCHED_CYCLE_DISPLAY 0
|
||||
#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE 0
|
||||
#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN 0
|
||||
#define TARGET_SCHED_DFA_PRE_CYCLE_INSN 0
|
||||
#define TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN 0
|
||||
#define TARGET_SCHED_DFA_POST_CYCLE_INSN 0
|
||||
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 0
|
||||
#define TARGET_SCHED_INIT_DFA_BUBBLES 0
|
||||
#define TARGET_SCHED_DFA_BUBBLE 0
|
||||
|
||||
#define TARGET_SCHED {TARGET_SCHED_ADJUST_COST, \
|
||||
TARGET_SCHED_ADJUST_PRIORITY, \
|
||||
TARGET_SCHED_ISSUE_RATE, \
|
||||
TARGET_SCHED_VARIABLE_ISSUE, \
|
||||
TARGET_SCHED_INIT, \
|
||||
TARGET_SCHED_FINISH, \
|
||||
TARGET_SCHED_REORDER, \
|
||||
TARGET_SCHED_REORDER2, \
|
||||
TARGET_SCHED_CYCLE_DISPLAY}
|
||||
#define TARGET_SCHED \
|
||||
{TARGET_SCHED_ADJUST_COST, \
|
||||
TARGET_SCHED_ADJUST_PRIORITY, \
|
||||
TARGET_SCHED_ISSUE_RATE, \
|
||||
TARGET_SCHED_VARIABLE_ISSUE, \
|
||||
TARGET_SCHED_INIT, \
|
||||
TARGET_SCHED_FINISH, \
|
||||
TARGET_SCHED_REORDER, \
|
||||
TARGET_SCHED_REORDER2, \
|
||||
TARGET_SCHED_CYCLE_DISPLAY, \
|
||||
TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE, \
|
||||
TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN, \
|
||||
TARGET_SCHED_DFA_PRE_CYCLE_INSN, \
|
||||
TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN, \
|
||||
TARGET_SCHED_DFA_POST_CYCLE_INSN, \
|
||||
TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD, \
|
||||
TARGET_SCHED_INIT_DFA_BUBBLES, \
|
||||
TARGET_SCHED_DFA_BUBBLE}
|
||||
|
||||
/* All in tree.c. */
|
||||
#define TARGET_MERGE_DECL_ATTRIBUTES merge_decl_attributes
|
||||
|
|
41
gcc/target.h
41
gcc/target.h
|
@ -136,6 +136,47 @@ struct gcc_target
|
|||
insn in the new chain we're building. Returns a new LAST.
|
||||
The default is to do nothing. */
|
||||
rtx (* cycle_display) PARAMS ((int clock, rtx last));
|
||||
/* The following member value is a pointer to a function returning
|
||||
nonzero if we should use DFA based scheduling. The default is
|
||||
to use the old pipeline scheduler. */
|
||||
int (* use_dfa_pipeline_interface) PARAMS ((void));
|
||||
/* The values of all the following members are used only for the
|
||||
DFA based scheduler: */
|
||||
/* The values of the following four members are pointers to
|
||||
functions used to simplify the automaton descriptions.
|
||||
dfa_pre_cycle_insn and dfa_post_cycle_insn give functions
|
||||
returning insns which are used to change the pipeline hazard
|
||||
recognizer state when the new simulated processor cycle
|
||||
correspondingly starts and finishes. The function defined by
|
||||
init_dfa_pre_cycle_insn and init_dfa_post_cycle_insn are used
|
||||
to initialize the corresponding insns. The default values of
|
||||
the memebers result in not changing the automaton state when
|
||||
the new simulated processor cycle correspondingly starts and
|
||||
finishes. */
|
||||
void (* init_dfa_pre_cycle_insn) PARAMS ((void));
|
||||
rtx (* dfa_pre_cycle_insn) PARAMS ((void));
|
||||
void (* init_dfa_post_cycle_insn) PARAMS ((void));
|
||||
rtx (* dfa_post_cycle_insn) PARAMS ((void));
|
||||
/* The following member value is a pointer to a function returning value
|
||||
which defines how many insns in queue `ready' will we try for
|
||||
multi-pass scheduling. if the member value is nonzero and the
|
||||
function returns positive value, the DFA based scheduler will make
|
||||
multi-pass scheduling for the first cycle. In other words, we will
|
||||
try to choose ready insn which permits to start maximum number of
|
||||
insns on the same cycle. */
|
||||
int (* first_cycle_multipass_dfa_lookahead) PARAMS ((void));
|
||||
/* The values of the following members are pointers to functions
|
||||
used to improve the first cycle multipass scheduling by
|
||||
inserting nop insns. dfa_scheduler_bubble gives a function
|
||||
returning a nop insn with given index. The indexes start with
|
||||
zero. The function should return NULL if there are no more nop
|
||||
insns with indexes greater than given index. To initialize the
|
||||
nop insn the function given by member
|
||||
init_dfa_scheduler_bubbles is used. The default values of the
|
||||
members result in not inserting nop insns during the multipass
|
||||
scheduling. */
|
||||
void (* init_dfa_bubbles) PARAMS ((void));
|
||||
rtx (* dfa_bubble) PARAMS ((int));
|
||||
} sched;
|
||||
|
||||
/* Given two decls, merge their attributes and return the result. */
|
||||
|
|
Loading…
Add table
Reference in a new issue