athlon.md, [...]: New files.
* athlon.md, k6.md, pentium.md, ppro.md): New files. * i386.md: Move scheduling information into new files. From-SVN: r53350
This commit is contained in:
parent
3df892916c
commit
af2728a4b4
6 changed files with 805 additions and 729 deletions
|
@ -62,6 +62,9 @@ doc:
|
|||
|
||||
Thu May 9 11:50:09 2002 Jeffrey A Law (law@cygnus.com)
|
||||
|
||||
* athlon.md, k6.md, pentium.md, ppro.md): New files.
|
||||
* i386.md: Move scheduling information into new files.
|
||||
|
||||
* i386.md (type attribute): Add "rotate" for rotate insns.
|
||||
(rotate insns): Set type to "rotate".
|
||||
(various attributes and function units): Treat rotate like shift.
|
||||
|
|
206
gcc/config/i386/athlon.md
Normal file
206
gcc/config/i386/athlon.md
Normal file
|
@ -0,0 +1,206 @@
|
|||
;; AMD Athlon Scheduling
|
||||
;; Copyright (C) 2002 Free Software Foundation, Inc.
|
||||
;;
|
||||
;; This file is part of GNU CC.
|
||||
;;
|
||||
;; GNU CC is free software; you can redistribute it and/or modify
|
||||
;; it under the terms of the GNU General Public License as published by
|
||||
;; the Free Software Foundation; either version 2, or (at your option)
|
||||
;; any later version.
|
||||
;;
|
||||
;; GNU CC is distributed in the hope that it will be useful,
|
||||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;; GNU General Public License for more details.
|
||||
;;
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with GNU CC; see the file COPYING. If not, write to
|
||||
;; the Free Software Foundation, 59 Temple Place - Suite 330,
|
||||
;; Boston, MA 02111-1307, USA. */
|
||||
(define_attr "athlon_decode" "direct,vector"
|
||||
(cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,fcmov")
|
||||
(const_string "vector")
|
||||
(and (eq_attr "type" "push")
|
||||
(match_operand 1 "memory_operand" ""))
|
||||
(const_string "vector")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(and (eq_attr "memory" "load,store")
|
||||
(eq_attr "mode" "XF")))
|
||||
(const_string "vector")]
|
||||
(const_string "direct")))
|
||||
|
||||
;; The Athlon does contain three pipelined FP units, three integer units and
|
||||
;; three address generation units.
|
||||
;;
|
||||
;; The predecode logic is determining boundaries of instructions in the 64
|
||||
;; byte cache line. So the cache line straddling problem of K6 might be issue
|
||||
;; here as well, but it is not noted in the documentation.
|
||||
;;
|
||||
;; Three DirectPath instructions decoders and only one VectorPath decoder
|
||||
;; is available. They can decode three DirectPath instructions or one VectorPath
|
||||
;; instruction per cycle.
|
||||
;; Decoded macro instructions are then passed to 72 entry instruction control
|
||||
;; unit, that passes
|
||||
;; it to the specialized integer (18 entry) and fp (36 entry) schedulers.
|
||||
;;
|
||||
;; The load/store queue unit is not attached to the schedulers but
|
||||
;; communicates with all the execution units separately instead.
|
||||
|
||||
(define_function_unit "athlon_vectordec" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "athlon_decode" "vector"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "athlon_directdec" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "athlon_decode" "direct"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "athlon_vectordec" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "athlon_decode" "direct"))
|
||||
1 1 [(eq_attr "athlon_decode" "vector")])
|
||||
|
||||
(define_function_unit "athlon_ieu" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,rotate,ibr,call,callv,icmov,cld,pop,setcc,push,pop"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "athlon_ieu" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "str"))
|
||||
15 15)
|
||||
|
||||
(define_function_unit "athlon_ieu" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "imul"))
|
||||
5 0)
|
||||
|
||||
(define_function_unit "athlon_ieu" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "idiv"))
|
||||
42 0)
|
||||
|
||||
(define_function_unit "athlon_muldiv" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "imul"))
|
||||
5 0)
|
||||
|
||||
(define_function_unit "athlon_muldiv" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "idiv"))
|
||||
42 42)
|
||||
|
||||
(define_attr "athlon_fpunits" "none,store,mul,add,muladd,any"
|
||||
(cond [(eq_attr "type" "fop,fcmp,fistp")
|
||||
(const_string "add")
|
||||
(eq_attr "type" "fmul,fdiv,fpspc,fsgn,fcmov")
|
||||
(const_string "mul")
|
||||
(and (eq_attr "type" "fmov") (eq_attr "memory" "store,both"))
|
||||
(const_string "store")
|
||||
(and (eq_attr "type" "fmov") (eq_attr "memory" "load"))
|
||||
(const_string "any")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(ior (match_operand:SI 1 "register_operand" "")
|
||||
(match_operand 1 "immediate_operand" "")))
|
||||
(const_string "store")
|
||||
(eq_attr "type" "fmov")
|
||||
(const_string "muladd")]
|
||||
(const_string "none")))
|
||||
|
||||
;; We use latencies 1 for definitions. This is OK to model colisions
|
||||
;; in execution units. The real latencies are modeled in the "fp" pipeline.
|
||||
|
||||
;; fsin, fcos: 96-192
|
||||
;; fsincos: 107-211
|
||||
;; fsqrt: 19 for SFmode, 27 for DFmode, 35 for XFmode.
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "fpspc"))
|
||||
100 1)
|
||||
|
||||
;; 16 cycles for SFmode, 20 for DFmode and 24 for XFmode.
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "fdiv"))
|
||||
24 1)
|
||||
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "fop,fmul,fistp"))
|
||||
4 1)
|
||||
|
||||
;; XFmode loads are slow.
|
||||
;; XFmode store is slow too (8 cycles), but we don't need to model it, because
|
||||
;; there are no dependent instructions.
|
||||
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "mode" "XF"))))
|
||||
10 1)
|
||||
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "fmov,fsgn"))
|
||||
2 1)
|
||||
|
||||
;; fcmp and ftst instructions
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(and (eq_attr "type" "fcmp")
|
||||
(eq_attr "athlon_decode" "direct")))
|
||||
3 1)
|
||||
|
||||
;; fcmpi instructions.
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(and (eq_attr "type" "fcmp")
|
||||
(eq_attr "athlon_decode" "vector")))
|
||||
3 1)
|
||||
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "fcmov"))
|
||||
7 1)
|
||||
|
||||
(define_function_unit "athlon_fp_mul" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "athlon_fpunits" "mul"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "athlon_fp_add" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "athlon_fpunits" "add"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "athlon_fp_muladd" 2 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "athlon_fpunits" "muladd,mul,add"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "athlon_fp_store" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "athlon_fpunits" "store"))
|
||||
1 1)
|
||||
|
||||
;; We don't need to model the Address Generation Unit, since we don't model
|
||||
;; the re-order buffer yet and thus we never schedule more than three operations
|
||||
;; at time. Later we may want to experiment with MD_SCHED macros modeling the
|
||||
;; decoders independently on the functional units.
|
||||
|
||||
;(define_function_unit "athlon_agu" 3 0
|
||||
; (and (eq_attr "cpu" "athlon")
|
||||
; (and (eq_attr "memory" "!none")
|
||||
; (eq_attr "athlon_fpunits" "none")))
|
||||
; 1 1)
|
||||
|
||||
;; Model load unit to avoid too long sequences of loads. We don't need to
|
||||
;; model store queue, since it is hardly going to be bottleneck.
|
||||
|
||||
(define_function_unit "athlon_load" 2 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "memory" "load,both"))
|
||||
1 1)
|
||||
|
|
@ -322,735 +322,10 @@
|
|||
[(set_attr "length" "128")
|
||||
(set_attr "type" "multi")])
|
||||
|
||||
;; Pentium Scheduling
|
||||
;;
|
||||
;; The Pentium is an in-order core with two integer pipelines.
|
||||
|
||||
;; True for insns that behave like prefixed insns on the Pentium.
|
||||
(define_attr "pent_prefix" "false,true"
|
||||
(if_then_else (ior (eq_attr "prefix_0f" "1")
|
||||
(ior (eq_attr "prefix_data16" "1")
|
||||
(eq_attr "prefix_rep" "1")))
|
||||
(const_string "true")
|
||||
(const_string "false")))
|
||||
|
||||
;; Categorize how an instruction slots.
|
||||
|
||||
;; The non-MMX Pentium slots an instruction with prefixes on U pipe only,
|
||||
;; while MMX Pentium can slot it on either U or V. Model non-MMX Pentium
|
||||
;; rules, because it results in noticeably better code on non-MMX Pentium
|
||||
;; and doesn't hurt much on MMX. (Prefixed instructions are not very
|
||||
;; common, so the scheduler usualy has a non-prefixed insn to pair).
|
||||
|
||||
(define_attr "pent_pair" "uv,pu,pv,np"
|
||||
(cond [(eq_attr "imm_disp" "true")
|
||||
(const_string "np")
|
||||
(ior (eq_attr "type" "alu1,alu,imov,icmp,test,lea,incdec")
|
||||
(and (eq_attr "type" "pop,push")
|
||||
(eq_attr "memory" "!both")))
|
||||
(if_then_else (eq_attr "pent_prefix" "true")
|
||||
(const_string "pu")
|
||||
(const_string "uv"))
|
||||
(eq_attr "type" "ibr")
|
||||
(const_string "pv")
|
||||
(and (eq_attr "type" "ishift")
|
||||
(match_operand 2 "const_int_operand" ""))
|
||||
(const_string "pu")
|
||||
(and (eq_attr "type" "rotate")
|
||||
(match_operand 2 "const_int_1_operand" ""))
|
||||
(const_string "pu")
|
||||
(and (eq_attr "type" "call")
|
||||
(match_operand 0 "constant_call_address_operand" ""))
|
||||
(const_string "pv")
|
||||
(and (eq_attr "type" "callv")
|
||||
(match_operand 1 "constant_call_address_operand" ""))
|
||||
(const_string "pv")
|
||||
]
|
||||
(const_string "np")))
|
||||
|
||||
(define_automaton "pentium,pentium_fpu")
|
||||
|
||||
;; Pentium do have U and V pipes. Instruction to both pipes
|
||||
;; are alwyas issued together, much like on VLIW.
|
||||
;;
|
||||
;; predecode
|
||||
;; / \
|
||||
;; decodeu decodev
|
||||
;; / | |
|
||||
;; fpu executeu executev
|
||||
;; | | |
|
||||
;; fpu retire retire
|
||||
;; |
|
||||
;; fpu
|
||||
;; We add dummy "port" pipes allocated only first cycle of
|
||||
;; instruction to specify this behaviour.
|
||||
|
||||
(define_cpu_unit "pentium-portu,pentium-portv" "pentium")
|
||||
(define_cpu_unit "pentium-u,pentium-v" "pentium")
|
||||
(absence_set "pentium-portu" "pentium-u,pentium-v")
|
||||
(presence_set "pentium-portv" "pentium-portu")
|
||||
|
||||
;; Floating point instructions can overlap with new issue of integer
|
||||
;; instructions. We model only first cycle of FP pipeline, as it is
|
||||
;; fully pipelined.
|
||||
(define_cpu_unit "pentium-fp" "pentium_fpu")
|
||||
|
||||
;; There is non-pipelined multiplier unit used for complex operations.
|
||||
(define_cpu_unit "pentium-fmul" "pentium_fpu")
|
||||
|
||||
;; Pentium preserves memory ordering, so when load-execute-store
|
||||
;; instruction is executed together with other instruction loading
|
||||
;; data, the execution of the other instruction is delayed to very
|
||||
;; last cycle of first instruction, when data are bypassed.
|
||||
;; We model this by allocating "memory" unit when store is pending
|
||||
;; and using conflicting load units together.
|
||||
|
||||
(define_cpu_unit "pentium-memory" "pentium")
|
||||
(define_cpu_unit "pentium-load0" "pentium")
|
||||
(define_cpu_unit "pentium-load1" "pentium")
|
||||
(absence_set "pentium-load0,pentium-load1" "pentium-memory")
|
||||
|
||||
(define_reservation "pentium-load" "(pentium-load0 | pentium-load1)")
|
||||
(define_reservation "pentium-np" "(pentium-u + pentium-v)")
|
||||
(define_reservation "pentium-uv" "(pentium-u | pentium-v)")
|
||||
(define_reservation "pentium-portuv" "(pentium-portu | pentium-portv)")
|
||||
(define_reservation "pentium-firstu" "(pentium-u + pentium-portu)")
|
||||
(define_reservation "pentium-firstv" "(pentium-v + pentium-portuv)")
|
||||
(define_reservation "pentium-firstuv" "(pentium-uv + pentium-portuv)")
|
||||
(define_reservation "pentium-firstuload" "(pentium-load + pentium-firstu)")
|
||||
(define_reservation "pentium-firstvload" "(pentium-load + pentium-firstv)")
|
||||
(define_reservation "pentium-firstuvload" "(pentium-load + pentium-firstuv)
|
||||
| (pentium-firstv,pentium-v,
|
||||
(pentium-load+pentium-firstv))")
|
||||
(define_reservation "pentium-firstuboth" "(pentium-load + pentium-firstu
|
||||
+ pentium-memory)")
|
||||
(define_reservation "pentium-firstvboth" "(pentium-load + pentium-firstu
|
||||
+ pentium-memory)")
|
||||
(define_reservation "pentium-firstuvboth" "(pentium-load + pentium-firstuv
|
||||
+ pentium-memory)
|
||||
| (pentium-firstv,pentium-v,
|
||||
(pentium-load+pentium-firstv))")
|
||||
|
||||
;; Few common long latency instructions
|
||||
(define_insn_reservation "pent_mul" 11
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "imul"))
|
||||
"pentium-np*11")
|
||||
|
||||
(define_insn_reservation "pent_str" 12
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "str"))
|
||||
"pentium-np*12")
|
||||
|
||||
;; Integer division and some other long latency instruction block all
|
||||
;; units, including the FP pipe. There is no value in modeling the
|
||||
;; latency of these instructions and not modeling the latency
|
||||
;; decreases the size of the DFA.
|
||||
(define_insn_reservation "pent_block" 1
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "idiv"))
|
||||
"pentium-np+pentium-fp")
|
||||
|
||||
(define_insn_reservation "pent_cld" 2
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "cld"))
|
||||
"pentium-np*2")
|
||||
|
||||
;; Moves usually have one cycle penalty, but there are exceptions.
|
||||
(define_insn_reservation "pent_fmov" 1
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(eq_attr "memory" "none,load")))
|
||||
"(pentium-fp+pentium-np)")
|
||||
|
||||
(define_insn_reservation "pent_fpmovxf" 3
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(and (eq_attr "memory" "load,store")
|
||||
(eq_attr "mode" "XF"))))
|
||||
"(pentium-fp+pentium-np)*3")
|
||||
|
||||
(define_insn_reservation "pent_fpstore" 2
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(ior (match_operand 1 "immediate_operand" "")
|
||||
(eq_attr "memory" "store"))))
|
||||
"(pentium-fp+pentium-np)*2")
|
||||
|
||||
(define_insn_reservation "pent_imov" 1
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "imov"))
|
||||
"pentium-firstuv")
|
||||
|
||||
;; Push and pop instructions have 1 cycle latency and special
|
||||
;; hardware bypass allows them to be paired with other push,pop
|
||||
;; and call instructions.
|
||||
(define_bypass 0 "pent_push,pent_pop" "pent_push,pent_pop,pent_call")
|
||||
(define_insn_reservation "pent_push" 1
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "type" "push")
|
||||
(eq_attr "memory" "store")))
|
||||
"pentium-firstuv")
|
||||
|
||||
(define_insn_reservation "pent_pop" 1
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "pop"))
|
||||
"pentium-firstuv")
|
||||
|
||||
;; Call and branch instruction can execute in either pipe, but
|
||||
;; they are only pairable when in the v pipe.
|
||||
(define_insn_reservation "pent_call" 10
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "call,callv"))
|
||||
"pentium-firstv,pentium-v*9")
|
||||
|
||||
(define_insn_reservation "pent_branch" 1
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "ibr"))
|
||||
"pentium-firstv")
|
||||
|
||||
;; Floating point instruction dispatch in U pipe, but continue
|
||||
;; in FP pipeline allowing other isntructions to be executed.
|
||||
(define_insn_reservation "pent_fp" 3
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "fop,fistp"))
|
||||
"(pentium-firstu+pentium-fp),nothing,nothing")
|
||||
|
||||
;; First two cycles of fmul are not pipelined.
|
||||
(define_insn_reservation "pent_fmul" 3
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "fmul"))
|
||||
"(pentium-firstuv+pentium-fp+pentium-fmul),pentium-fmul,nothing")
|
||||
|
||||
;; Long latency FP instructions overlap with integer instructions,
|
||||
;; but only last 2 cycles with FP ones.
|
||||
(define_insn_reservation "pent_fdiv" 39
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "fdiv"))
|
||||
"(pentium-np+pentium-fp+pentium-fmul),
|
||||
(pentium-fp+pentium-fmul)*36,pentium-fmul*2")
|
||||
|
||||
(define_insn_reservation "pent_fpspc" 70
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "fpspc"))
|
||||
"(pentium-np+pentium-fp+pentium-fmul),
|
||||
(pentium-fp+pentium-fmul)*67,pentium-fmul*2")
|
||||
|
||||
;; Integer instructions. Load/execute/store takes 3 cycles,
|
||||
;; load/execute 2 cycles and execute only one cycle.
|
||||
(define_insn_reservation "pent_uv_both" 3
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "uv")
|
||||
(eq_attr "memory" "both")))
|
||||
"pentium-firstuvboth,pentium-uv+pentium-memory,pentium-uv")
|
||||
|
||||
(define_insn_reservation "pent_u_both" 3
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "pu")
|
||||
(eq_attr "memory" "both")))
|
||||
"pentium-firstuboth,pentium-u+pentium-memory,pentium-u")
|
||||
|
||||
(define_insn_reservation "pent_v_both" 3
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "pv")
|
||||
(eq_attr "memory" "both")))
|
||||
"pentium-firstvboth,pentium-v+pentium-memory,pentium-v")
|
||||
|
||||
(define_insn_reservation "pent_np_both" 3
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "np")
|
||||
(eq_attr "memory" "both")))
|
||||
"pentium-np,pentium-np,pentium-np")
|
||||
|
||||
(define_insn_reservation "pent_uv_load" 2
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "uv")
|
||||
(eq_attr "memory" "load")))
|
||||
"pentium-firstuvload,pentium-uv")
|
||||
|
||||
(define_insn_reservation "pent_u_load" 2
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "pu")
|
||||
(eq_attr "memory" "load")))
|
||||
"pentium-firstuload,pentium-u")
|
||||
|
||||
(define_insn_reservation "pent_v_load" 2
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "pv")
|
||||
(eq_attr "memory" "load")))
|
||||
"pentium-firstvload,pentium-v")
|
||||
|
||||
(define_insn_reservation "pent_np_load" 2
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "np")
|
||||
(eq_attr "memory" "load")))
|
||||
"pentium-np,pentium-np")
|
||||
|
||||
(define_insn_reservation "pent_uv" 1
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "uv")
|
||||
(eq_attr "memory" "none")))
|
||||
"pentium-firstuv")
|
||||
|
||||
(define_insn_reservation "pent_u" 1
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "pu")
|
||||
(eq_attr "memory" "none")))
|
||||
"pentium-firstu")
|
||||
|
||||
(define_insn_reservation "pent_v" 1
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "pv")
|
||||
(eq_attr "memory" "none")))
|
||||
"pentium-firstv")
|
||||
|
||||
(define_insn_reservation "pent_np" 1
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "np")
|
||||
(eq_attr "memory" "none")))
|
||||
"pentium-np")
|
||||
|
||||
|
||||
;; Pentium Pro/PII Scheduling
|
||||
;;
|
||||
;; The PPro has an out-of-order core, but the instruction decoders are
|
||||
;; naturally in-order and asymmetric. We get best performance by scheduling
|
||||
;; for the decoders, for in doing so we give the oo execution unit the
|
||||
;; most choices.
|
||||
|
||||
;; Categorize how many uops an ia32 instruction evaluates to:
|
||||
;; one -- an instruction with 1 uop can be decoded by any of the
|
||||
;; three decoders.
|
||||
;; few -- an instruction with 1 to 4 uops can be decoded only by
|
||||
;; decoder 0.
|
||||
;; many -- a complex instruction may take an unspecified number of
|
||||
;; cycles to decode in decoder 0.
|
||||
|
||||
(define_attr "ppro_uops" "one,few,many"
|
||||
(cond [(eq_attr "type" "other,multi,call,callv,fpspc,str")
|
||||
(const_string "many")
|
||||
(eq_attr "type" "icmov,fcmov,str,cld")
|
||||
(const_string "few")
|
||||
(eq_attr "type" "imov")
|
||||
(if_then_else (eq_attr "memory" "store,both")
|
||||
(const_string "few")
|
||||
(const_string "one"))
|
||||
(eq_attr "memory" "!none")
|
||||
(const_string "few")
|
||||
]
|
||||
(const_string "one")))
|
||||
|
||||
;; Rough readiness numbers. Fine tuning happens in i386.c.
|
||||
;;
|
||||
;; p0 describes port 0.
|
||||
;; p01 describes ports 0 and 1 as a pair; alu insns can issue to either.
|
||||
;; p2 describes port 2 for loads.
|
||||
;; p34 describes ports 3 and 4 for stores.
|
||||
;; fpu describes the fpu accessed via port 0.
|
||||
;; ??? It is less than clear if there are separate fadd and fmul units
|
||||
;; that could operate in parallel.
|
||||
;;
|
||||
;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real.
|
||||
|
||||
(define_function_unit "ppro_p0" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "ishift,rotate,lea,ibr,cld"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "ppro_p0" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "imul"))
|
||||
4 1)
|
||||
|
||||
;; ??? Does the divider lock out the pipe while it works,
|
||||
;; or is there a disconnected unit?
|
||||
(define_function_unit "ppro_p0" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "idiv"))
|
||||
17 17)
|
||||
|
||||
(define_function_unit "ppro_p0" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "fop,fsgn,fistp"))
|
||||
3 1)
|
||||
|
||||
(define_function_unit "ppro_p0" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "fcmov"))
|
||||
2 1)
|
||||
|
||||
(define_function_unit "ppro_p0" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "fcmp"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "ppro_p0" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "fmov"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "ppro_p0" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "fmul"))
|
||||
5 1)
|
||||
|
||||
(define_function_unit "ppro_p0" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "fdiv,fpspc"))
|
||||
56 1)
|
||||
|
||||
(define_function_unit "ppro_p01" 2 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "!imov,fmov"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "ppro_p01" 2 0
|
||||
(and (and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "imov,fmov"))
|
||||
(eq_attr "memory" "none"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "ppro_p2" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(ior (eq_attr "type" "pop")
|
||||
(eq_attr "memory" "load,both")))
|
||||
3 1)
|
||||
|
||||
(define_function_unit "ppro_p34" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(ior (eq_attr "type" "push")
|
||||
(eq_attr "memory" "store,both")))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "fpu" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "fop,fsgn,fmov,fcmp,fcmov,fistp"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "fpu" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "fmul"))
|
||||
5 2)
|
||||
|
||||
(define_function_unit "fpu" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "fdiv,fpspc"))
|
||||
56 56)
|
||||
|
||||
;; imul uses the fpu. ??? does it have the same throughput as fmul?
|
||||
(define_function_unit "fpu" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "imul"))
|
||||
4 1)
|
||||
|
||||
;; AMD K6/K6-2 Scheduling
|
||||
;;
|
||||
;; The K6 has similar architecture to PPro. Important difference is, that
|
||||
;; there are only two decoders and they seems to be much slower than execution
|
||||
;; units. So we have to pay much more attention to proper decoding for
|
||||
;; schedulers. We share most of scheduler code for PPro in i386.c
|
||||
;;
|
||||
;; The fp unit is not pipelined and do one operation per two cycles including
|
||||
;; the FXCH.
|
||||
;;
|
||||
;; alu describes both ALU units (ALU-X and ALU-Y).
|
||||
;; alux describes X alu unit
|
||||
;; fpu describes FPU unit
|
||||
;; load describes load unit.
|
||||
;; branch describes branch unit.
|
||||
;; store decsribes store unit. This unit is not modelled completely and only
|
||||
;; used to model lea operation. Otherwise it lie outside of the critical
|
||||
;; path.
|
||||
;;
|
||||
;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real.
|
||||
|
||||
;; The decoder specification is in the PPro section above!
|
||||
|
||||
;; Shift instructions and certain arithmetic are issued only to X pipe.
|
||||
(define_function_unit "k6_alux" 1 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "ishift,rotate,alu1,negnot,cld"))
|
||||
1 1)
|
||||
|
||||
;; The QI mode arithmetic is issued to X pipe only.
|
||||
(define_function_unit "k6_alux" 1 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec")
|
||||
(match_operand:QI 0 "general_operand" "")))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "k6_alu" 2 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "ishift,rotate,alu1,negnot,alu,icmp,test,imovx,incdec,setcc,lea"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "k6_alu" 2 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(and (eq_attr "type" "imov")
|
||||
(eq_attr "memory" "none")))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "k6_branch" 1 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "call,callv,ibr"))
|
||||
1 1)
|
||||
|
||||
;; Load unit have two cycle latency, but we take care for it in adjust_cost
|
||||
(define_function_unit "k6_load" 1 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(ior (eq_attr "type" "pop")
|
||||
(eq_attr "memory" "load,both")))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "k6_load" 1 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(and (eq_attr "type" "str")
|
||||
(eq_attr "memory" "load,both")))
|
||||
10 10)
|
||||
|
||||
;; Lea have two instructions, so latency is probably 2
|
||||
(define_function_unit "k6_store" 1 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "lea"))
|
||||
2 1)
|
||||
|
||||
(define_function_unit "k6_store" 1 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "str"))
|
||||
10 10)
|
||||
|
||||
(define_function_unit "k6_store" 1 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(ior (eq_attr "type" "push")
|
||||
(eq_attr "memory" "store,both")))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "k6_fpu" 1 1
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "fop,fmov,fcmp,fistp"))
|
||||
2 2)
|
||||
|
||||
(define_function_unit "k6_fpu" 1 1
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "fmul"))
|
||||
2 2)
|
||||
|
||||
;; ??? Guess
|
||||
(define_function_unit "k6_fpu" 1 1
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "fdiv,fpspc"))
|
||||
56 56)
|
||||
|
||||
(define_function_unit "k6_alu" 2 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "imul"))
|
||||
2 2)
|
||||
|
||||
(define_function_unit "k6_alux" 1 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "imul"))
|
||||
2 2)
|
||||
|
||||
;; ??? Guess
|
||||
(define_function_unit "k6_alu" 2 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "idiv"))
|
||||
17 17)
|
||||
|
||||
(define_function_unit "k6_alux" 1 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "idiv"))
|
||||
17 17)
|
||||
|
||||
;; AMD Athlon Scheduling
|
||||
;;
|
||||
;; The Athlon does contain three pipelined FP units, three integer units and
|
||||
;; three address generation units.
|
||||
;;
|
||||
;; The predecode logic is determining boundaries of instructions in the 64
|
||||
;; byte cache line. So the cache line straddling problem of K6 might be issue
|
||||
;; here as well, but it is not noted in the documentation.
|
||||
;;
|
||||
;; Three DirectPath instructions decoders and only one VectorPath decoder
|
||||
;; is available. They can decode three DirectPath instructions or one VectorPath
|
||||
;; instruction per cycle.
|
||||
;; Decoded macro instructions are then passed to 72 entry instruction control
|
||||
;; unit, that passes
|
||||
;; it to the specialized integer (18 entry) and fp (36 entry) schedulers.
|
||||
;;
|
||||
;; The load/store queue unit is not attached to the schedulers but
|
||||
;; communicates with all the execution units separately instead.
|
||||
|
||||
(define_attr "athlon_decode" "direct,vector"
|
||||
(cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,fcmov")
|
||||
(const_string "vector")
|
||||
(and (eq_attr "type" "push")
|
||||
(match_operand 1 "memory_operand" ""))
|
||||
(const_string "vector")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(and (eq_attr "memory" "load,store")
|
||||
(eq_attr "mode" "XF")))
|
||||
(const_string "vector")]
|
||||
(const_string "direct")))
|
||||
|
||||
(define_function_unit "athlon_vectordec" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "athlon_decode" "vector"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "athlon_directdec" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "athlon_decode" "direct"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "athlon_vectordec" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "athlon_decode" "direct"))
|
||||
1 1 [(eq_attr "athlon_decode" "vector")])
|
||||
|
||||
(define_function_unit "athlon_ieu" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,rotate,ibr,call,callv,icmov,cld,pop,setcc,push,pop"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "athlon_ieu" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "str"))
|
||||
15 15)
|
||||
|
||||
(define_function_unit "athlon_ieu" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "imul"))
|
||||
5 0)
|
||||
|
||||
(define_function_unit "athlon_ieu" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "idiv"))
|
||||
42 0)
|
||||
|
||||
(define_function_unit "athlon_muldiv" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "imul"))
|
||||
5 0)
|
||||
|
||||
(define_function_unit "athlon_muldiv" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "idiv"))
|
||||
42 42)
|
||||
|
||||
(define_attr "athlon_fpunits" "none,store,mul,add,muladd,any"
|
||||
(cond [(eq_attr "type" "fop,fcmp,fistp")
|
||||
(const_string "add")
|
||||
(eq_attr "type" "fmul,fdiv,fpspc,fsgn,fcmov")
|
||||
(const_string "mul")
|
||||
(and (eq_attr "type" "fmov") (eq_attr "memory" "store,both"))
|
||||
(const_string "store")
|
||||
(and (eq_attr "type" "fmov") (eq_attr "memory" "load"))
|
||||
(const_string "any")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(ior (match_operand:SI 1 "register_operand" "")
|
||||
(match_operand 1 "immediate_operand" "")))
|
||||
(const_string "store")
|
||||
(eq_attr "type" "fmov")
|
||||
(const_string "muladd")]
|
||||
(const_string "none")))
|
||||
|
||||
;; We use latencies 1 for definitions. This is OK to model colisions
|
||||
;; in execution units. The real latencies are modeled in the "fp" pipeline.
|
||||
|
||||
;; fsin, fcos: 96-192
|
||||
;; fsincos: 107-211
|
||||
;; fsqrt: 19 for SFmode, 27 for DFmode, 35 for XFmode.
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "fpspc"))
|
||||
100 1)
|
||||
|
||||
;; 16 cycles for SFmode, 20 for DFmode and 24 for XFmode.
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "fdiv"))
|
||||
24 1)
|
||||
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "fop,fmul,fistp"))
|
||||
4 1)
|
||||
|
||||
;; XFmode loads are slow.
|
||||
;; XFmode store is slow too (8 cycles), but we don't need to model it, because
|
||||
;; there are no dependent instructions.
|
||||
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "mode" "XF"))))
|
||||
10 1)
|
||||
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "fmov,fsgn"))
|
||||
2 1)
|
||||
|
||||
;; fcmp and ftst instructions
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(and (eq_attr "type" "fcmp")
|
||||
(eq_attr "athlon_decode" "direct")))
|
||||
3 1)
|
||||
|
||||
;; fcmpi instructions.
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(and (eq_attr "type" "fcmp")
|
||||
(eq_attr "athlon_decode" "vector")))
|
||||
3 1)
|
||||
|
||||
(define_function_unit "athlon_fp" 3 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "type" "fcmov"))
|
||||
7 1)
|
||||
|
||||
(define_function_unit "athlon_fp_mul" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "athlon_fpunits" "mul"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "athlon_fp_add" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "athlon_fpunits" "add"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "athlon_fp_muladd" 2 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "athlon_fpunits" "muladd,mul,add"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "athlon_fp_store" 1 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "athlon_fpunits" "store"))
|
||||
1 1)
|
||||
|
||||
;; We don't need to model the Address Generation Unit, since we don't model
|
||||
;; the re-order buffer yet and thus we never schedule more than three operations
|
||||
;; at time. Later we may want to experiment with MD_SCHED macros modeling the
|
||||
;; decoders independently on the functional units.
|
||||
|
||||
;(define_function_unit "athlon_agu" 3 0
|
||||
; (and (eq_attr "cpu" "athlon")
|
||||
; (and (eq_attr "memory" "!none")
|
||||
; (eq_attr "athlon_fpunits" "none")))
|
||||
; 1 1)
|
||||
|
||||
;; Model load unit to avoid too long sequences of loads. We don't need to
|
||||
;; model store queue, since it is hardly going to be bottleneck.
|
||||
|
||||
(define_function_unit "athlon_load" 2 0
|
||||
(and (eq_attr "cpu" "athlon")
|
||||
(eq_attr "memory" "load,both"))
|
||||
1 1)
|
||||
|
||||
(include "pentium.md")
|
||||
(include "ppro.md")
|
||||
(include "k6.md")
|
||||
(include "athlon.md")
|
||||
|
||||
;; Compare instructions.
|
||||
|
||||
|
|
136
gcc/config/i386/k6.md
Normal file
136
gcc/config/i386/k6.md
Normal file
|
@ -0,0 +1,136 @@
|
|||
;; AMD K6/K6-2 Scheduling
|
||||
;; Copyright (C) 2002 ;; Free Software Foundation, Inc.
|
||||
;;
|
||||
;; This file is part of GNU CC.
|
||||
;;
|
||||
;; GNU CC is free software; you can redistribute it and/or modify
|
||||
;; it under the terms of the GNU General Public License as published by
|
||||
;; the Free Software Foundation; either version 2, or (at your option)
|
||||
;; any later version.
|
||||
;;
|
||||
;; GNU CC is distributed in the hope that it will be useful,
|
||||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;; GNU General Public License for more details.
|
||||
;;
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with GNU CC; see the file COPYING. If not, write to
|
||||
;; the Free Software Foundation, 59 Temple Place - Suite 330,
|
||||
;; Boston, MA 02111-1307, USA. */
|
||||
;;
|
||||
;; The K6 has similar architecture to PPro. Important difference is, that
|
||||
;; there are only two decoders and they seems to be much slower than execution
|
||||
;; units. So we have to pay much more attention to proper decoding for
|
||||
;; schedulers. We share most of scheduler code for PPro in i386.c
|
||||
;;
|
||||
;; The fp unit is not pipelined and do one operation per two cycles including
|
||||
;; the FXCH.
|
||||
;;
|
||||
;; alu describes both ALU units (ALU-X and ALU-Y).
|
||||
;; alux describes X alu unit
|
||||
;; fpu describes FPU unit
|
||||
;; load describes load unit.
|
||||
;; branch describes branch unit.
|
||||
;; store decsribes store unit. This unit is not modelled completely and only
|
||||
;; used to model lea operation. Otherwise it lie outside of the critical
|
||||
;; path.
|
||||
;;
|
||||
;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real.
|
||||
|
||||
;; The decoder specification is in the PPro section above!
|
||||
|
||||
;; Shift instructions and certain arithmetic are issued only to X pipe.
|
||||
(define_function_unit "k6_alux" 1 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "ishift,rotate,alu1,negnot,cld"))
|
||||
1 1)
|
||||
|
||||
;; The QI mode arithmetic is issued to X pipe only.
|
||||
(define_function_unit "k6_alux" 1 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(and (eq_attr "type" "alu,alu1,negnot,icmp,test,imovx,incdec")
|
||||
(match_operand:QI 0 "general_operand" "")))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "k6_alu" 2 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "ishift,rotate,alu1,negnot,alu,icmp,test,imovx,incdec,setcc,lea"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "k6_alu" 2 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(and (eq_attr "type" "imov")
|
||||
(eq_attr "memory" "none")))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "k6_branch" 1 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "call,callv,ibr"))
|
||||
1 1)
|
||||
|
||||
;; Load unit have two cycle latency, but we take care for it in adjust_cost
|
||||
(define_function_unit "k6_load" 1 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(ior (eq_attr "type" "pop")
|
||||
(eq_attr "memory" "load,both")))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "k6_load" 1 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(and (eq_attr "type" "str")
|
||||
(eq_attr "memory" "load,both")))
|
||||
10 10)
|
||||
|
||||
;; Lea have two instructions, so latency is probably 2
|
||||
(define_function_unit "k6_store" 1 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "lea"))
|
||||
2 1)
|
||||
|
||||
(define_function_unit "k6_store" 1 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "str"))
|
||||
10 10)
|
||||
|
||||
(define_function_unit "k6_store" 1 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(ior (eq_attr "type" "push")
|
||||
(eq_attr "memory" "store,both")))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "k6_fpu" 1 1
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "fop,fmov,fcmp,fistp"))
|
||||
2 2)
|
||||
|
||||
(define_function_unit "k6_fpu" 1 1
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "fmul"))
|
||||
2 2)
|
||||
|
||||
;; ??? Guess
|
||||
(define_function_unit "k6_fpu" 1 1
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "fdiv,fpspc"))
|
||||
56 56)
|
||||
|
||||
(define_function_unit "k6_alu" 2 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "imul"))
|
||||
2 2)
|
||||
|
||||
(define_function_unit "k6_alux" 1 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "imul"))
|
||||
2 2)
|
||||
|
||||
;; ??? Guess
|
||||
(define_function_unit "k6_alu" 2 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "idiv"))
|
||||
17 17)
|
||||
|
||||
(define_function_unit "k6_alux" 1 0
|
||||
(and (eq_attr "cpu" "k6")
|
||||
(eq_attr "type" "idiv"))
|
||||
17 17)
|
306
gcc/config/i386/pentium.md
Normal file
306
gcc/config/i386/pentium.md
Normal file
|
@ -0,0 +1,306 @@
|
|||
;; Pentium Scheduling
|
||||
;; Copyright (C) 2002 Free Software Foundation, Inc.
|
||||
;;
|
||||
;; This file is part of GNU CC.
|
||||
;;
|
||||
;; GNU CC is free software; you can redistribute it and/or modify
|
||||
;; it under the terms of the GNU General Public License as published by
|
||||
;; the Free Software Foundation; either version 2, or (at your option)
|
||||
;; any later version.
|
||||
;;
|
||||
;; GNU CC is distributed in the hope that it will be useful,
|
||||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;; GNU General Public License for more details.
|
||||
;;
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with GNU CC; see the file COPYING. If not, write to
|
||||
;; the Free Software Foundation, 59 Temple Place - Suite 330,
|
||||
;; Boston, MA 02111-1307, USA. */
|
||||
;;
|
||||
;; The Pentium is an in-order core with two integer pipelines.
|
||||
|
||||
;; True for insns that behave like prefixed insns on the Pentium.
|
||||
(define_attr "pent_prefix" "false,true"
|
||||
(if_then_else (ior (eq_attr "prefix_0f" "1")
|
||||
(ior (eq_attr "prefix_data16" "1")
|
||||
(eq_attr "prefix_rep" "1")))
|
||||
(const_string "true")
|
||||
(const_string "false")))
|
||||
|
||||
;; Categorize how an instruction slots.
|
||||
|
||||
;; The non-MMX Pentium slots an instruction with prefixes on U pipe only,
|
||||
;; while MMX Pentium can slot it on either U or V. Model non-MMX Pentium
|
||||
;; rules, because it results in noticeably better code on non-MMX Pentium
|
||||
;; and doesn't hurt much on MMX. (Prefixed instructions are not very
|
||||
;; common, so the scheduler usualy has a non-prefixed insn to pair).
|
||||
|
||||
(define_attr "pent_pair" "uv,pu,pv,np"
|
||||
(cond [(eq_attr "imm_disp" "true")
|
||||
(const_string "np")
|
||||
(ior (eq_attr "type" "alu1,alu,imov,icmp,test,lea,incdec")
|
||||
(and (eq_attr "type" "pop,push")
|
||||
(eq_attr "memory" "!both")))
|
||||
(if_then_else (eq_attr "pent_prefix" "true")
|
||||
(const_string "pu")
|
||||
(const_string "uv"))
|
||||
(eq_attr "type" "ibr")
|
||||
(const_string "pv")
|
||||
(and (eq_attr "type" "ishift")
|
||||
(match_operand 2 "const_int_operand" ""))
|
||||
(const_string "pu")
|
||||
(and (eq_attr "type" "rotate")
|
||||
(match_operand 2 "const_int_1_operand" ""))
|
||||
(const_string "pu")
|
||||
(and (eq_attr "type" "call")
|
||||
(match_operand 0 "constant_call_address_operand" ""))
|
||||
(const_string "pv")
|
||||
(and (eq_attr "type" "callv")
|
||||
(match_operand 1 "constant_call_address_operand" ""))
|
||||
(const_string "pv")
|
||||
]
|
||||
(const_string "np")))
|
||||
|
||||
(define_automaton "pentium,pentium_fpu")
|
||||
|
||||
;; Pentium do have U and V pipes. Instruction to both pipes
|
||||
;; are alwyas issued together, much like on VLIW.
|
||||
;;
|
||||
;; predecode
|
||||
;; / \
|
||||
;; decodeu decodev
|
||||
;; / | |
|
||||
;; fpu executeu executev
|
||||
;; | | |
|
||||
;; fpu retire retire
|
||||
;; |
|
||||
;; fpu
|
||||
;; We add dummy "port" pipes allocated only first cycle of
|
||||
;; instruction to specify this behaviour.
|
||||
|
||||
(define_cpu_unit "pentium-portu,pentium-portv" "pentium")
|
||||
(define_cpu_unit "pentium-u,pentium-v" "pentium")
|
||||
(absence_set "pentium-portu" "pentium-u,pentium-v")
|
||||
(presence_set "pentium-portv" "pentium-portu")
|
||||
|
||||
;; Floating point instructions can overlap with new issue of integer
|
||||
;; instructions. We model only first cycle of FP pipeline, as it is
|
||||
;; fully pipelined.
|
||||
(define_cpu_unit "pentium-fp" "pentium_fpu")
|
||||
|
||||
;; There is non-pipelined multiplier unit used for complex operations.
|
||||
(define_cpu_unit "pentium-fmul" "pentium_fpu")
|
||||
|
||||
;; Pentium preserves memory ordering, so when load-execute-store
|
||||
;; instruction is executed together with other instruction loading
|
||||
;; data, the execution of the other instruction is delayed to very
|
||||
;; last cycle of first instruction, when data are bypassed.
|
||||
;; We model this by allocating "memory" unit when store is pending
|
||||
;; and using conflicting load units together.
|
||||
|
||||
(define_cpu_unit "pentium-memory" "pentium")
|
||||
(define_cpu_unit "pentium-load0" "pentium")
|
||||
(define_cpu_unit "pentium-load1" "pentium")
|
||||
(absence_set "pentium-load0,pentium-load1" "pentium-memory")
|
||||
|
||||
(define_reservation "pentium-load" "(pentium-load0 | pentium-load1)")
|
||||
(define_reservation "pentium-np" "(pentium-u + pentium-v)")
|
||||
(define_reservation "pentium-uv" "(pentium-u | pentium-v)")
|
||||
(define_reservation "pentium-portuv" "(pentium-portu | pentium-portv)")
|
||||
(define_reservation "pentium-firstu" "(pentium-u + pentium-portu)")
|
||||
(define_reservation "pentium-firstv" "(pentium-v + pentium-portuv)")
|
||||
(define_reservation "pentium-firstuv" "(pentium-uv + pentium-portuv)")
|
||||
(define_reservation "pentium-firstuload" "(pentium-load + pentium-firstu)")
|
||||
(define_reservation "pentium-firstvload" "(pentium-load + pentium-firstv)")
|
||||
(define_reservation "pentium-firstuvload" "(pentium-load + pentium-firstuv)
|
||||
| (pentium-firstv,pentium-v,
|
||||
(pentium-load+pentium-firstv))")
|
||||
(define_reservation "pentium-firstuboth" "(pentium-load + pentium-firstu
|
||||
+ pentium-memory)")
|
||||
(define_reservation "pentium-firstvboth" "(pentium-load + pentium-firstu
|
||||
+ pentium-memory)")
|
||||
(define_reservation "pentium-firstuvboth" "(pentium-load + pentium-firstuv
|
||||
+ pentium-memory)
|
||||
| (pentium-firstv,pentium-v,
|
||||
(pentium-load+pentium-firstv))")
|
||||
|
||||
;; Few common long latency instructions
|
||||
(define_insn_reservation "pent_mul" 11
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "imul"))
|
||||
"pentium-np*11")
|
||||
|
||||
(define_insn_reservation "pent_str" 12
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "str"))
|
||||
"pentium-np*12")
|
||||
|
||||
;; Integer division and some other long latency instruction block all
|
||||
;; units, including the FP pipe. There is no value in modeling the
|
||||
;; latency of these instructions and not modeling the latency
|
||||
;; decreases the size of the DFA.
|
||||
(define_insn_reservation "pent_block" 1
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "idiv"))
|
||||
"pentium-np+pentium-fp")
|
||||
|
||||
(define_insn_reservation "pent_cld" 2
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "cld"))
|
||||
"pentium-np*2")
|
||||
|
||||
;; Moves usually have one cycle penalty, but there are exceptions.
|
||||
(define_insn_reservation "pent_fmov" 1
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(eq_attr "memory" "none,load")))
|
||||
"(pentium-fp+pentium-np)")
|
||||
|
||||
(define_insn_reservation "pent_fpmovxf" 3
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(and (eq_attr "memory" "load,store")
|
||||
(eq_attr "mode" "XF"))))
|
||||
"(pentium-fp+pentium-np)*3")
|
||||
|
||||
(define_insn_reservation "pent_fpstore" 2
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "type" "fmov")
|
||||
(ior (match_operand 1 "immediate_operand" "")
|
||||
(eq_attr "memory" "store"))))
|
||||
"(pentium-fp+pentium-np)*2")
|
||||
|
||||
(define_insn_reservation "pent_imov" 1
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "imov"))
|
||||
"pentium-firstuv")
|
||||
|
||||
;; Push and pop instructions have 1 cycle latency and special
|
||||
;; hardware bypass allows them to be paired with other push,pop
|
||||
;; and call instructions.
|
||||
(define_bypass 0 "pent_push,pent_pop" "pent_push,pent_pop,pent_call")
|
||||
(define_insn_reservation "pent_push" 1
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "type" "push")
|
||||
(eq_attr "memory" "store")))
|
||||
"pentium-firstuv")
|
||||
|
||||
(define_insn_reservation "pent_pop" 1
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "pop"))
|
||||
"pentium-firstuv")
|
||||
|
||||
;; Call and branch instruction can execute in either pipe, but
|
||||
;; they are only pairable when in the v pipe.
|
||||
(define_insn_reservation "pent_call" 10
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "call,callv"))
|
||||
"pentium-firstv,pentium-v*9")
|
||||
|
||||
(define_insn_reservation "pent_branch" 1
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "ibr"))
|
||||
"pentium-firstv")
|
||||
|
||||
;; Floating point instruction dispatch in U pipe, but continue
|
||||
;; in FP pipeline allowing other isntructions to be executed.
|
||||
(define_insn_reservation "pent_fp" 3
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "fop,fistp"))
|
||||
"(pentium-firstu+pentium-fp),nothing,nothing")
|
||||
|
||||
;; First two cycles of fmul are not pipelined.
|
||||
(define_insn_reservation "pent_fmul" 3
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "fmul"))
|
||||
"(pentium-firstuv+pentium-fp+pentium-fmul),pentium-fmul,nothing")
|
||||
|
||||
;; Long latency FP instructions overlap with integer instructions,
|
||||
;; but only last 2 cycles with FP ones.
|
||||
(define_insn_reservation "pent_fdiv" 39
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "fdiv"))
|
||||
"(pentium-np+pentium-fp+pentium-fmul),
|
||||
(pentium-fp+pentium-fmul)*36,pentium-fmul*2")
|
||||
|
||||
(define_insn_reservation "pent_fpspc" 70
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(eq_attr "type" "fpspc"))
|
||||
"(pentium-np+pentium-fp+pentium-fmul),
|
||||
(pentium-fp+pentium-fmul)*67,pentium-fmul*2")
|
||||
|
||||
;; Integer instructions. Load/execute/store takes 3 cycles,
|
||||
;; load/execute 2 cycles and execute only one cycle.
|
||||
(define_insn_reservation "pent_uv_both" 3
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "uv")
|
||||
(eq_attr "memory" "both")))
|
||||
"pentium-firstuvboth,pentium-uv+pentium-memory,pentium-uv")
|
||||
|
||||
(define_insn_reservation "pent_u_both" 3
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "pu")
|
||||
(eq_attr "memory" "both")))
|
||||
"pentium-firstuboth,pentium-u+pentium-memory,pentium-u")
|
||||
|
||||
(define_insn_reservation "pent_v_both" 3
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "pv")
|
||||
(eq_attr "memory" "both")))
|
||||
"pentium-firstvboth,pentium-v+pentium-memory,pentium-v")
|
||||
|
||||
(define_insn_reservation "pent_np_both" 3
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "np")
|
||||
(eq_attr "memory" "both")))
|
||||
"pentium-np,pentium-np,pentium-np")
|
||||
|
||||
(define_insn_reservation "pent_uv_load" 2
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "uv")
|
||||
(eq_attr "memory" "load")))
|
||||
"pentium-firstuvload,pentium-uv")
|
||||
|
||||
(define_insn_reservation "pent_u_load" 2
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "pu")
|
||||
(eq_attr "memory" "load")))
|
||||
"pentium-firstuload,pentium-u")
|
||||
|
||||
(define_insn_reservation "pent_v_load" 2
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "pv")
|
||||
(eq_attr "memory" "load")))
|
||||
"pentium-firstvload,pentium-v")
|
||||
|
||||
(define_insn_reservation "pent_np_load" 2
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "np")
|
||||
(eq_attr "memory" "load")))
|
||||
"pentium-np,pentium-np")
|
||||
|
||||
(define_insn_reservation "pent_uv" 1
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "uv")
|
||||
(eq_attr "memory" "none")))
|
||||
"pentium-firstuv")
|
||||
|
||||
(define_insn_reservation "pent_u" 1
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "pu")
|
||||
(eq_attr "memory" "none")))
|
||||
"pentium-firstu")
|
||||
|
||||
(define_insn_reservation "pent_v" 1
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "pv")
|
||||
(eq_attr "memory" "none")))
|
||||
"pentium-firstv")
|
||||
|
||||
(define_insn_reservation "pent_np" 1
|
||||
(and (eq_attr "cpu" "pentium")
|
||||
(and (eq_attr "pent_pair" "np")
|
||||
(eq_attr "memory" "none")))
|
||||
"pentium-np")
|
||||
|
150
gcc/config/i386/ppro.md
Normal file
150
gcc/config/i386/ppro.md
Normal file
|
@ -0,0 +1,150 @@
|
|||
;; Pentium Pro/PII Scheduling
|
||||
;; Copyright (C) 2002 Free Software Foundation, Inc.
|
||||
;;
|
||||
;; This file is part of GNU CC.
|
||||
;;
|
||||
;; GNU CC is free software; you can redistribute it and/or modify
|
||||
;; it under the terms of the GNU General Public License as published by
|
||||
;; the Free Software Foundation; either version 2, or (at your option)
|
||||
;; any later version.
|
||||
;;
|
||||
;; GNU CC is distributed in the hope that it will be useful,
|
||||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;; GNU General Public License for more details.
|
||||
;;
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with GNU CC; see the file COPYING. If not, write to
|
||||
;; the Free Software Foundation, 59 Temple Place - Suite 330,
|
||||
;; Boston, MA 02111-1307, USA. */
|
||||
|
||||
;; Categorize how many uops an ia32 instruction evaluates to:
|
||||
;; one -- an instruction with 1 uop can be decoded by any of the
|
||||
;; three decoders.
|
||||
;; few -- an instruction with 1 to 4 uops can be decoded only by
|
||||
;; decoder 0.
|
||||
;; many -- a complex instruction may take an unspecified number of
|
||||
;; cycles to decode in decoder 0.
|
||||
|
||||
(define_attr "ppro_uops" "one,few,many"
|
||||
(cond [(eq_attr "type" "other,multi,call,callv,fpspc,str")
|
||||
(const_string "many")
|
||||
(eq_attr "type" "icmov,fcmov,str,cld")
|
||||
(const_string "few")
|
||||
(eq_attr "type" "imov")
|
||||
(if_then_else (eq_attr "memory" "store,both")
|
||||
(const_string "few")
|
||||
(const_string "one"))
|
||||
(eq_attr "memory" "!none")
|
||||
(const_string "few")
|
||||
]
|
||||
(const_string "one")))
|
||||
|
||||
;;
|
||||
;; The PPro has an out-of-order core, but the instruction decoders are
|
||||
;; naturally in-order and asymmetric. We get best performance by scheduling
|
||||
;; for the decoders, for in doing so we give the oo execution unit the
|
||||
;; most choices.
|
||||
;;
|
||||
;; Rough readiness numbers. Fine tuning happens in i386.c.
|
||||
;;
|
||||
;; p0 describes port 0.
|
||||
;; p01 describes ports 0 and 1 as a pair; alu insns can issue to either.
|
||||
;; p2 describes port 2 for loads.
|
||||
;; p34 describes ports 3 and 4 for stores.
|
||||
;; fpu describes the fpu accessed via port 0.
|
||||
;; ??? It is less than clear if there are separate fadd and fmul units
|
||||
;; that could operate in parallel.
|
||||
;;
|
||||
;; ??? fxch isn't handled; not an issue until sched3 after reg-stack is real.
|
||||
|
||||
(define_function_unit "ppro_p0" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "ishift,rotate,lea,ibr,cld"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "ppro_p0" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "imul"))
|
||||
4 1)
|
||||
|
||||
;; ??? Does the divider lock out the pipe while it works,
|
||||
;; or is there a disconnected unit?
|
||||
(define_function_unit "ppro_p0" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "idiv"))
|
||||
17 17)
|
||||
|
||||
(define_function_unit "ppro_p0" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "fop,fsgn,fistp"))
|
||||
3 1)
|
||||
|
||||
(define_function_unit "ppro_p0" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "fcmov"))
|
||||
2 1)
|
||||
|
||||
(define_function_unit "ppro_p0" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "fcmp"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "ppro_p0" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "fmov"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "ppro_p0" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "fmul"))
|
||||
5 1)
|
||||
|
||||
(define_function_unit "ppro_p0" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "fdiv,fpspc"))
|
||||
56 1)
|
||||
|
||||
(define_function_unit "ppro_p01" 2 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "!imov,fmov"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "ppro_p01" 2 0
|
||||
(and (and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "imov,fmov"))
|
||||
(eq_attr "memory" "none"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "ppro_p2" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(ior (eq_attr "type" "pop")
|
||||
(eq_attr "memory" "load,both")))
|
||||
3 1)
|
||||
|
||||
(define_function_unit "ppro_p34" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(ior (eq_attr "type" "push")
|
||||
(eq_attr "memory" "store,both")))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "fpu" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "fop,fsgn,fmov,fcmp,fcmov,fistp"))
|
||||
1 1)
|
||||
|
||||
(define_function_unit "fpu" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "fmul"))
|
||||
5 2)
|
||||
|
||||
(define_function_unit "fpu" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "fdiv,fpspc"))
|
||||
56 56)
|
||||
|
||||
;; imul uses the fpu. ??? does it have the same throughput as fmul?
|
||||
(define_function_unit "fpu" 1 0
|
||||
(and (eq_attr "cpu" "pentiumpro")
|
||||
(eq_attr "type" "imul"))
|
||||
4 1)
|
Loading…
Add table
Reference in a new issue