arm.c (arm_use_dfa_pipeline_interface): Declare.

2003-06-02  Ben Elliston  <bje@wasabisystems.com>

	* config/arm/arm.c (arm_use_dfa_pipeline_interface): Declare.
	(TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE): Define if not already defined.
	(arm_use_dfa_pipeline_interface): Implement.
	* config/arm/arm.md (arm): New automaton.
	(write_buf): Remove function units; new cpu unit.
	(write_blockage): Remove function units; new cpu unit.
	(core): Remove function units; new cpu unit.
	(r_mem_f_wbuf): New instruction reservation.
	(store1_wbuf, store2_wbuf, store3_wbuf, store4_wbuf): Likewise.
	(store1_ldsched, store2, store3, store4): Likewise.
	(load_ldsched, load_ldsched_xscale, load_or_store): Likewise.
	(mult, mult_ldsched, mult_ldsched_strongarm): Likewise.
	(multi_cycle, single_cycle): Likewise.
	* config/arm/fpa.md (armfp): New automaton.
	(fpa): Remove function units; new cpu unit.
	(fpa_mem): Remove function unit; new cpu unit.
	(fdivx, fdivd, fdivs, fmul, ffmul, farith, ffarith): New reservations.
	(r_2_f, f_2_r, f_load, f_store, r_mem_f, f_mem_r): Likewise.

From-SVN: r67322
This commit is contained in:
Ben Elliston 2003-06-02 03:53:54 +00:00 committed by Ben Elliston
parent 3ae1d4c240
commit 103fc15d39
4 changed files with 170 additions and 105 deletions

View file

@ -1,3 +1,24 @@
2003-06-02 Ben Elliston <bje@wasabisystems.com>
* config/arm/arm.c (arm_use_dfa_pipeline_interface): Declare.
(TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE): Define if not already defined.
(arm_use_dfa_pipeline_interface): Implement.
* config/arm/arm.md (arm): New automaton.
(write_buf): Remove function units; new cpu unit.
(write_blockage): Remove function units; new cpu unit.
(core): Remove function units; new cpu unit.
(r_mem_f_wbuf): New instruction reservation.
(store1_wbuf, store2_wbuf, store3_wbuf, store4_wbuf): Likewise.
(store1_ldsched, store2, store3, store4): Likewise.
(load_ldsched, load_ldsched_xscale, load_or_store): Likewise.
(mult, mult_ldsched, mult_ldsched_strongarm): Likewise.
(multi_cycle, single_cycle): Likewise.
* config/arm/fpa.md (armfp): New automaton.
(fpa): Remove function units; new cpu unit.
(fpa_mem): Remove function unit; new cpu unit.
(fdivx, fdivd, fdivs, fmul, ffmul, farith, ffarith): New reservations.
(r_2_f, f_2_r, f_load, f_store, r_mem_f, f_mem_r): Likewise.
2003-06-01 Kaveh R. Ghazi <ghazi@caip.rutgers.edu>
* builtin-attrs.def (ATTR_ASM_FPRINTF): New.

View file

@ -127,6 +127,7 @@ static void thumb_output_function_prologue PARAMS ((FILE *, Hint));
static int arm_comp_type_attributes PARAMS ((tree, tree));
static void arm_set_default_type_attributes PARAMS ((tree));
static int arm_adjust_cost PARAMS ((rtx, rtx, rtx, int));
static int arm_use_dfa_pipeline_interface PARAMS ((void));
static int count_insns_for_constant PARAMS ((Hint, int));
static int arm_get_strip_length PARAMS ((int));
static bool arm_function_ok_for_sibcall PARAMS ((tree, tree));
@ -193,6 +194,9 @@ static void aof_globalize_label PARAMS ((FILE *, Ccstar));
#undef TARGET_SCHED_ADJUST_COST
#define TARGET_SCHED_ADJUST_COST arm_adjust_cost
#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE arm_use_dfa_pipeline_interface
#undef TARGET_ENCODE_SECTION_INFO
#ifdef ARM_PE
#define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
@ -3468,6 +3472,12 @@ arm_address_cost (X)
return (TARGET_ARM ? ARM_ADDRESS_COST (X) : THUMB_ADDRESS_COST (X));
}
static int
arm_use_dfa_pipeline_interface ()
{
return true;
}
static int
arm_adjust_cost (insn, link, dep, cost)
rtx insn;

View file

@ -249,12 +249,10 @@
;; distant label. Only applicable to Thumb code.
(define_attr "far_jump" "yes,no" (const_string "no"))
;; (define_function_unit {name} {num-units} {n-users} {test}
;; {ready-delay} {issue-delay} [{conflict-list}])
(define_automaton "arm")
;;--------------------------------------------------------------------
;; Write buffer
;;--------------------------------------------------------------------
;
; Strictly, we should model a 4-deep write buffer for ARM7xx based chips
;
; The write buffer on some of the arm6 processors is hard to model exactly.
@ -266,102 +264,101 @@
; writes will take 2 FCLK cycles per word, if FCLK and MCLK are asynchronous
; (they aren't allowed to be at present) then there is a startup cost of 1MCLK
; cycle to add as well.
(define_cpu_unit "write_buf" "arm")
(define_function_unit "write_buf" 1 2
(and (eq_attr "model_wbuf" "yes")
(eq_attr "type" "store1,r_mem_f")) 5 3)
(define_function_unit "write_buf" 1 2
(and (eq_attr "model_wbuf" "yes")
(eq_attr "type" "store2")) 7 4)
(define_function_unit "write_buf" 1 2
(and (eq_attr "model_wbuf" "yes")
(eq_attr "type" "store3")) 9 5)
(define_function_unit "write_buf" 1 2
(and (eq_attr "model_wbuf" "yes")
(eq_attr "type" "store4")) 11 6)
;;--------------------------------------------------------------------
;; Write blockage unit
;;--------------------------------------------------------------------
;
; The write_blockage unit models (partially), the fact that reads will stall
; until the write buffer empties.
; The f_mem_r and r_mem_f could also block, but they are to the stack,
; so we don't model them here
(define_function_unit "write_blockage" 1 0 (and (eq_attr "model_wbuf" "yes")
(eq_attr "type" "store1")) 5 5
[(eq_attr "write_conflict" "yes")])
(define_function_unit "write_blockage" 1 0 (and (eq_attr "model_wbuf" "yes")
(eq_attr "type" "store2")) 7 7
[(eq_attr "write_conflict" "yes")])
(define_function_unit "write_blockage" 1 0 (and (eq_attr "model_wbuf" "yes")
(eq_attr "type" "store3")) 9 9
[(eq_attr "write_conflict" "yes")])
(define_function_unit "write_blockage" 1 0
(and (eq_attr "model_wbuf" "yes") (eq_attr "type" "store4")) 11 11
[(eq_attr "write_conflict" "yes")])
(define_function_unit "write_blockage" 1 0
(and (eq_attr "model_wbuf" "yes")
(eq_attr "write_conflict" "yes")) 1 1)
(define_cpu_unit "write_blockage" "arm")
;;--------------------------------------------------------------------
;; Core unit
;;--------------------------------------------------------------------
; Everything must spend at least one cycle in the core unit
(define_function_unit "core" 1 0 (eq_attr "core_cycles" "single") 1 1)
;; Core
;
(define_cpu_unit "core" "arm")
(define_function_unit "core" 1 0
(and (eq_attr "ldsched" "yes") (eq_attr "type" "store1")) 1 1)
(define_insn_reservation "r_mem_f_wbuf" 5
(and (eq_attr "model_wbuf" "yes")
(eq_attr "type" "r_mem_f"))
"core+write_buf*3")
(define_function_unit "core" 1 0
(and (eq_attr "ldsched" "yes") (eq_attr "type" "load")) 2 1)
(define_insn_reservation "store1_wbuf" 5
(and (eq_attr "model_wbuf" "yes")
(eq_attr "type" "store1"))
"core+write_buf*3+write_blockage*5")
;; We do not need to conditionalize the define_function_unit immediately
;; above. This one will be ignored for anything other than xscale
;; compiles and for xscale compiles it provides a larger delay
;; and the scheduler will DTRT.
;; FIXME: this test needs to be revamped to not depend on this feature
;; of the scheduler.
(define_insn_reservation "store2_wbuf" 7
(and (eq_attr "model_wbuf" "yes")
(eq_attr "type" "store2"))
"core+write_buf*4+write_blockage*7")
(define_function_unit "core" 1 0
(define_insn_reservation "store3_wbuf" 9
(and (eq_attr "model_wbuf" "yes")
(eq_attr "type" "store3"))
"core+write_buf*5+write_blockage*9")
(define_insn_reservation "store4_wbuf" 11
(and (eq_attr "model_wbuf" "yes")
(eq_attr "type" "store4"))
"core+write_buf*6+write_blockage*11")
(define_insn_reservation "store2" 3
(and (eq_attr "model_wbuf" "no")
(eq_attr "type" "store2"))
"core*3")
(define_insn_reservation "store3" 4
(and (eq_attr "model_wbuf" "no")
(eq_attr "type" "store3"))
"core*4")
(define_insn_reservation "store4" 5
(and (eq_attr "model_wbuf" "no")
(eq_attr "type" "store4"))
"core*5")
(define_insn_reservation "store1_ldsched" 1
(and (eq_attr "ldsched" "yes") (eq_attr "type" "store1"))
"core")
(define_insn_reservation "load_ldsched_xscale" 3
(and (and (eq_attr "ldsched" "yes") (eq_attr "type" "load"))
(eq_attr "is_xscale" "yes"))
3 1)
"core")
(define_function_unit "core" 1 0
(and (eq_attr "ldsched" "!yes") (eq_attr "type" "load,store1")) 2 2)
(define_insn_reservation "load_ldsched" 2
(and (and (eq_attr "ldsched" "yes") (eq_attr "type" "load"))
(eq_attr "is_xscale" "no"))
"core")
(define_function_unit "core" 1 0
(and (eq_attr "fpu" "fpa") (eq_attr "type" "f_load")) 3 3)
(define_insn_reservation "load_or_store" 2
(and (eq_attr "ldsched" "!yes") (eq_attr "type" "load,store1"))
"core*2")
(define_function_unit "core" 1 0
(and (eq_attr "fpu" "fpa") (eq_attr "type" "f_store")) 4 4)
(define_insn_reservation "mult" 16
(and (eq_attr "ldsched" "no") (eq_attr "type" "mult"))
"core*16")
(define_function_unit "core" 1 0
(and (eq_attr "fpu" "fpa") (eq_attr "type" "r_mem_f")) 6 6)
(define_function_unit "core" 1 0
(and (eq_attr "fpu" "fpa") (eq_attr "type" "f_mem_r")) 7 7)
(define_function_unit "core" 1 0
(and (eq_attr "ldsched" "no") (eq_attr "type" "mult")) 16 16)
(define_function_unit "core" 1 0
(and (and (eq_attr "ldsched" "yes") (eq_attr "is_strongarm" "no"))
(eq_attr "type" "mult")) 4 4)
(define_function_unit "core" 1 0
(define_insn_reservation "mult_ldsched_strongarm" 3
(and (and (eq_attr "ldsched" "yes") (eq_attr "is_strongarm" "yes"))
(eq_attr "type" "mult")) 3 2)
(eq_attr "type" "mult"))
"core*2")
(define_function_unit "core" 1 0 (eq_attr "type" "store2") 3 3)
(define_insn_reservation "mult_ldsched" 4
(and (and (eq_attr "ldsched" "yes") (eq_attr "is_strongarm" "no"))
(eq_attr "type" "mult"))
"core*4")
(define_function_unit "core" 1 0 (eq_attr "type" "store3") 4 4)
(define_function_unit "core" 1 0 (eq_attr "type" "store4") 5 5)
(define_function_unit "core" 1 0
(define_insn_reservation "multi_cycle" 32
(and (eq_attr "core_cycles" "multi")
(eq_attr "type" "!mult,load,store1,store2,store3,store4")) 32 32)
(eq_attr "type" "!mult,load,store1,store2,store3,store4"))
"core*32")
(define_insn_reservation "single_cycle" 1
(eq_attr "core_cycles" "single")
"core")
;;---------------------------------------------------------------------------
;; Insn patterns

View file

@ -22,43 +22,80 @@
;; the Free Software Foundation, 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.
;;--------------------------------------------------------------------
;; FPA automaton.
(define_automaton "armfp")
;; Floating point unit (FPA)
;;--------------------------------------------------------------------
(define_function_unit "fpa" 1 0 (and (eq_attr "fpu" "fpa")
(eq_attr "type" "fdivx")) 71 69)
(define_cpu_unit "fpa" "armfp")
(define_function_unit "fpa" 1 0 (and (eq_attr "fpu" "fpa")
(eq_attr "type" "fdivd")) 59 57)
; The fpa10 doesn't really have a memory read unit, but it can start
; to speculatively execute the instruction in the pipeline, provided
; the data is already loaded, so pretend reads have a delay of 2 (and
; that the pipeline is infinite).
(define_cpu_unit "fpa_mem" "arm")
(define_function_unit "fpa" 1 0 (and (eq_attr "fpu" "fpa")
(eq_attr "type" "fdivs")) 31 29)
(define_insn_reservation "fdivx" 71
(and (eq_attr "fpu" "fpa")
(eq_attr "type" "fdivx"))
"core+fpa*69")
(define_function_unit "fpa" 1 0 (and (eq_attr "fpu" "fpa")
(eq_attr "type" "fmul")) 9 7)
(define_insn_reservation "fdivd" 59
(and (eq_attr "fpu" "fpa")
(eq_attr "type" "fdivd"))
"core+fpa*57")
(define_function_unit "fpa" 1 0 (and (eq_attr "fpu" "fpa")
(eq_attr "type" "ffmul")) 6 4)
(define_insn_reservation "fdivs" 31
(and (eq_attr "fpu" "fpa")
(eq_attr "type" "fdivs"))
"core+fpa*29")
(define_function_unit "fpa" 1 0 (and (eq_attr "fpu" "fpa")
(eq_attr "type" "farith")) 4 2)
(define_insn_reservation "fmul" 9
(and (eq_attr "fpu" "fpa")
(eq_attr "type" "fmul"))
"core+fpa*7")
(define_function_unit "fpa" 1 0 (and (eq_attr "fpu" "fpa")
(eq_attr "type" "ffarith")) 2 2)
(define_insn_reservation "ffmul" 6
(and (eq_attr "fpu" "fpa")
(eq_attr "type" "ffmul"))
"core+fpa*4")
(define_function_unit "fpa" 1 0 (and (eq_attr "fpu" "fpa")
(eq_attr "type" "r_2_f")) 5 3)
(define_insn_reservation "farith" 4
(and (eq_attr "fpu" "fpa")
(eq_attr "type" "farith"))
"core+fpa*2")
(define_function_unit "fpa" 1 0 (and (eq_attr "fpu" "fpa")
(eq_attr "type" "f_2_r")) 1 2)
(define_insn_reservation "ffarith" 2
(and (eq_attr "fpu" "fpa")
(eq_attr "type" "ffarith"))
"core+fpa*2")
; The fpa10 doesn't really have a memory read unit, but it can start to
; speculatively execute the instruction in the pipeline, provided the data
; is already loaded, so pretend reads have a delay of 2 (and that the
; pipeline is infinite).
(define_insn_reservation "r_2_f" 5
(and (eq_attr "fpu" "fpa")
(eq_attr "type" "r_2_f"))
"core+fpa*3")
(define_insn_reservation "f_2_r" 1
(and (eq_attr "fpu" "fpa")
(eq_attr "type" "f_2_r"))
"core+fpa*2")
(define_insn_reservation "f_load" 3
(and (eq_attr "fpu" "fpa") (eq_attr "type" "f_load"))
"fpa_mem+core*3")
(define_insn_reservation "f_store" 4
(and (eq_attr "fpu" "fpa") (eq_attr "type" "f_store"))
"core*4")
(define_insn_reservation "r_mem_f" 6
(and (eq_attr "model_wbuf" "no")
(and (eq_attr "fpu" "fpa") (eq_attr "type" "r_mem_f")))
"core*6")
(define_insn_reservation "f_mem_r" 7
(and (eq_attr "fpu" "fpa") (eq_attr "type" "f_mem_r"))
"core*7")
(define_function_unit "fpa_mem" 1 0 (and (eq_attr "fpu" "fpa")
(eq_attr "type" "f_load")) 3 1)
(define_insn "*addsf3_fpa"
[(set (match_operand:SF 0 "s_register_operand" "=f,f")