invoke.texi: List r1x000 family under the -march MIPS option.
gcc/ 2008-10-06 Joshua Kinard <kumba@gentoo.org> * doc/invoke.texi: List r1x000 family under the -march MIPS option. * config/mips/mips.h (PROCESSOR_R10000): New processor_type. * config/mips/mips.c (mips_cpu_info_table): Add r10000, r12000, r14000 and r16000. (mips_rtx_cost_data): Add a PROCESSOR_R10000 entry. (mips_issue_rate): Handle PROCESSOR_R10000. * config/mips/mips.md (cpu): Add r10000. Include r10000.md. * config/mips/10000.md: New file. From-SVN: r140913
This commit is contained in:
parent
944258ebc6
commit
7a3446ec7c
6 changed files with 290 additions and 2 deletions
|
@ -1,3 +1,15 @@
|
|||
2008-10-06 Joshua Kinard <kumba@gentoo.org>
|
||||
|
||||
* doc/invoke.texi: List r1x000 family under the -march MIPS option.
|
||||
* config/mips/mips.h (PROCESSOR_R10000): New processor_type.
|
||||
* config/mips/mips.c (mips_cpu_info_table): Add r10000, r12000,
|
||||
r14000 and r16000.
|
||||
(mips_rtx_cost_data): Add a PROCESSOR_R10000 entry.
|
||||
(mips_issue_rate): Handle PROCESSOR_R10000.
|
||||
* config/mips/mips.md (cpu): Add r10000.
|
||||
Include r10000.md.
|
||||
* config/mips/10000.md: New file.
|
||||
|
||||
2008-10-06 Richard Sandiford <rdsandiford@googlemail.com>
|
||||
|
||||
* config/rs6000/rs6000-protos.h (rs6000_find_base_term): Declare.
|
||||
|
|
253
gcc/config/mips/10000.md
Normal file
253
gcc/config/mips/10000.md
Normal file
|
@ -0,0 +1,253 @@
|
|||
;; DFA-based pipeline description for the VR1x000.
|
||||
;; Copyright (C) 2005, 2006, 2008 Free Software Foundation, Inc.
|
||||
;;
|
||||
;; This file is part of GCC.
|
||||
|
||||
;; GCC is free software; you can redistribute it and/or modify it
|
||||
;; under the terms of the GNU General Public License as published
|
||||
;; by the Free Software Foundation; either version 3, or (at your
|
||||
;; option) any later version.
|
||||
|
||||
;; GCC is distributed in the hope that it will be useful, but WITHOUT
|
||||
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
|
||||
;; License for more details.
|
||||
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with GCC; see the file COPYING3. If not see
|
||||
;; <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
;; R12K/R14K/R16K are derivatives of R10K, thus copy its description
|
||||
;; until specific tuning for each is added.
|
||||
|
||||
;; R10000 has an int queue, fp queue, address queue.
|
||||
;; The int queue feeds ALU1 and ALU2.
|
||||
;; The fp queue feeds the fp-adder and fp-multiplier.
|
||||
;; The addr queue feeds the Load/Store unit.
|
||||
;;
|
||||
;; However, we define the fp-adder and fp-multiplier as
|
||||
;; separate automatons, because the fp-multiplier is
|
||||
;; divided into fp-multiplier, fp-division, and
|
||||
;; fp-squareroot units, all of which share the same
|
||||
;; issue and completion logic, yet can operate in
|
||||
;; parallel.
|
||||
;;
|
||||
;; This is based on the model described in the R10K Manual
|
||||
;; and it helps to reduce the size of the automata.
|
||||
(define_automaton "r10k_a_int, r10k_a_fpadder, r10k_a_addr,
|
||||
r10k_a_fpmpy, r10k_a_fpdiv, r10k_a_fpsqrt")
|
||||
|
||||
(define_cpu_unit "r10k_alu1" "r10k_a_int")
|
||||
(define_cpu_unit "r10k_alu2" "r10k_a_int")
|
||||
(define_cpu_unit "r10k_fpadd" "r10k_a_fpadder")
|
||||
(define_cpu_unit "r10k_fpmpy" "r10k_a_fpmpy")
|
||||
(define_cpu_unit "r10k_fpdiv" "r10k_a_fpdiv")
|
||||
(define_cpu_unit "r10k_fpsqrt" "r10k_a_fpsqrt")
|
||||
(define_cpu_unit "r10k_loadstore" "r10k_a_addr")
|
||||
|
||||
|
||||
;; R10k Loads and Stores.
|
||||
(define_insn_reservation "r10k_load" 2
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(eq_attr "type" "load,prefetch,prefetchx"))
|
||||
"r10k_loadstore")
|
||||
|
||||
(define_insn_reservation "r10k_store" 0
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(eq_attr "type" "store,fpstore,fpidxstore"))
|
||||
"r10k_loadstore")
|
||||
|
||||
(define_insn_reservation "r10k_fpload" 3
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(eq_attr "type" "fpload,fpidxload"))
|
||||
"r10k_loadstore")
|
||||
|
||||
|
||||
;; Integer add/sub + logic ops, and mt hi/lo can be done by alu1 or alu2.
|
||||
;; Miscellaneous arith goes here too (this is a guess).
|
||||
(define_insn_reservation "r10k_arith" 1
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(eq_attr "type" "arith,mthilo,slt,clz,const,nop,trap,logical"))
|
||||
"r10k_alu1 | r10k_alu2")
|
||||
|
||||
;; We treat mfhilo differently, because we need to know when
|
||||
;; it's HI and when it's LO.
|
||||
(define_insn_reservation "r10k_mfhi" 1
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(and (eq_attr "type" "mfhilo")
|
||||
(not (match_operand 1 "lo_operand"))))
|
||||
"r10k_alu1 | r10k_alu2")
|
||||
|
||||
(define_insn_reservation "r10k_mflo" 1
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(and (eq_attr "type" "mfhilo")
|
||||
(match_operand 1 "lo_operand")))
|
||||
"r10k_alu1 | r10k_alu2")
|
||||
|
||||
|
||||
;; ALU1 handles shifts, branch eval, and condmove.
|
||||
;;
|
||||
;; Brancher is separate, but part of ALU1, but can only
|
||||
;; do one branch per cycle (is this even implementable?).
|
||||
;;
|
||||
;; Unsure if the brancher handles jumps and calls as well, but since
|
||||
;; they're related, we'll add them here for now.
|
||||
(define_insn_reservation "r10k_brancher" 1
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(eq_attr "type" "shift,branch,jump,call"))
|
||||
"r10k_alu1")
|
||||
|
||||
(define_insn_reservation "r10k_int_cmove" 1
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(and (eq_attr "type" "condmove")
|
||||
(eq_attr "mode" "SI,DI")))
|
||||
"r10k_alu1")
|
||||
|
||||
|
||||
;; Coprocessor Moves.
|
||||
;; mtc1/dmtc1 are handled by ALU1.
|
||||
;; mfc1/dmfc1 are handled by the fp-multiplier.
|
||||
(define_insn_reservation "r10k_mt_xfer" 3
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(eq_attr "type" "mtc"))
|
||||
"r10k_alu1")
|
||||
|
||||
(define_insn_reservation "r10k_mf_xfer" 2
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(eq_attr "type" "mfc"))
|
||||
"r10k_fpmpy")
|
||||
|
||||
|
||||
;; Only ALU2 does int multiplications and divisions.
|
||||
;;
|
||||
;; According to the Vr10000 series user manual,
|
||||
;; integer mult and div insns can be issued one
|
||||
;; cycle earlier if using register Lo. We model
|
||||
;; this by using the Lo value by default, as it
|
||||
;; is the more common value, and use a bypass
|
||||
;; for the Hi value when needed.
|
||||
;;
|
||||
;; Also of note, There are different latencies
|
||||
;; for MULT/DMULT (Lo 5/Hi 6) and MULTU/DMULTU (Lo 6/Hi 7).
|
||||
;; However, gcc does not have separate types
|
||||
;; for these insns. Thus to strike a balance,
|
||||
;; we use the Hi latency value for imul
|
||||
;; operations until the imul type can be split.
|
||||
(define_insn_reservation "r10k_imul_single" 6
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(and (eq_attr "type" "imul,imul3")
|
||||
(eq_attr "mode" "SI")))
|
||||
"r10k_alu2 * 6")
|
||||
|
||||
(define_insn_reservation "r10k_imul_double" 10
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(and (eq_attr "type" "imul,imul3")
|
||||
(eq_attr "mode" "DI")))
|
||||
"r10k_alu2 * 10")
|
||||
|
||||
;; Divides keep ALU2 busy.
|
||||
(define_insn_reservation "r10k_idiv_single" 34
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(and (eq_attr "type" "idiv")
|
||||
(eq_attr "mode" "SI")))
|
||||
"r10k_alu2 * 35")
|
||||
|
||||
(define_insn_reservation "r10k_idiv_double" 66
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(and (eq_attr "type" "idiv")
|
||||
(eq_attr "mode" "DI")))
|
||||
"r10k_alu2 * 67")
|
||||
|
||||
(define_bypass 35 "r10k_idiv_single" "r10k_mfhi")
|
||||
(define_bypass 67 "r10k_idiv_double" "r10k_mfhi")
|
||||
|
||||
|
||||
;; Floating point add/sub, mul, abs value, neg, comp, & moves.
|
||||
(define_insn_reservation "r10k_fp_miscadd" 2
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(eq_attr "type" "fadd,fabs,fneg,fcmp"))
|
||||
"r10k_fpadd")
|
||||
|
||||
(define_insn_reservation "r10k_fp_miscmul" 2
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(eq_attr "type" "fmul,fmove"))
|
||||
"r10k_fpmpy")
|
||||
|
||||
(define_insn_reservation "r10k_fp_cmove" 2
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(and (eq_attr "type" "condmove")
|
||||
(eq_attr "mode" "SF,DF")))
|
||||
"r10k_fpmpy")
|
||||
|
||||
|
||||
;; The fcvt.s.[wl] insn has latency 4, repeat 2.
|
||||
;; All other fcvt insns have latency 2, repeat 1.
|
||||
(define_insn_reservation "r10k_fcvt_single" 4
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(and (eq_attr "type" "fcvt")
|
||||
(eq_attr "cnv_mode" "I2S")))
|
||||
"r10k_fpadd * 2")
|
||||
|
||||
(define_insn_reservation "r10k_fcvt_other" 2
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(and (eq_attr "type" "fcvt")
|
||||
(eq_attr "cnv_mode" "!I2S")))
|
||||
"r10k_fpadd")
|
||||
|
||||
|
||||
;; Run the fmadd insn through fp-adder first, then fp-multiplier.
|
||||
;;
|
||||
;; The latency for fmadd is 2 cycles if the result is used
|
||||
;; by another fmadd instruction.
|
||||
(define_insn_reservation "r10k_fmadd" 4
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(eq_attr "type" "fmadd"))
|
||||
"r10k_fpadd, r10k_fpmpy")
|
||||
|
||||
(define_bypass 2 "r10k_fmadd" "r10k_fmadd")
|
||||
|
||||
|
||||
;; Floating point Divisions & square roots.
|
||||
(define_insn_reservation "r10k_fdiv_single" 12
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(and (eq_attr "type" "fdiv,frdiv")
|
||||
(eq_attr "mode" "SF")))
|
||||
"r10k_fpdiv * 14")
|
||||
|
||||
(define_insn_reservation "r10k_fdiv_double" 19
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(and (eq_attr "type" "fdiv,frdiv")
|
||||
(eq_attr "mode" "DF")))
|
||||
"r10k_fpdiv * 21")
|
||||
|
||||
(define_insn_reservation "r10k_fsqrt_single" 18
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(and (eq_attr "type" "fsqrt")
|
||||
(eq_attr "mode" "SF")))
|
||||
"r10k_fpsqrt * 20")
|
||||
|
||||
(define_insn_reservation "r10k_fsqrt_double" 33
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(and (eq_attr "type" "fsqrt")
|
||||
(eq_attr "mode" "DF")))
|
||||
"r10k_fpsqrt * 35")
|
||||
|
||||
(define_insn_reservation "r10k_frsqrt_single" 30
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(and (eq_attr "type" "frsqrt")
|
||||
(eq_attr "mode" "SF")))
|
||||
"r10k_fpsqrt * 20")
|
||||
|
||||
(define_insn_reservation "r10k_frsqrt_double" 52
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(and (eq_attr "type" "frsqrt")
|
||||
(eq_attr "mode" "DF")))
|
||||
"r10k_fpsqrt * 35")
|
||||
|
||||
|
||||
;; Handle unknown/multi insns here (this is a guess).
|
||||
(define_insn_reservation "r10k_unknown" 1
|
||||
(and (eq_attr "cpu" "r10000")
|
||||
(eq_attr "type" "unknown,multi"))
|
||||
"r10k_alu1 + r10k_alu2")
|
|
@ -607,6 +607,10 @@ static const struct mips_cpu_info mips_cpu_info_table[] = {
|
|||
|
||||
/* MIPS IV processors. */
|
||||
{ "r8000", PROCESSOR_R8000, 4, 0 },
|
||||
{ "r10000", PROCESSOR_R10000, 4, 0 },
|
||||
{ "r12000", PROCESSOR_R10000, 4, 0 },
|
||||
{ "r14000", PROCESSOR_R10000, 4, 0 },
|
||||
{ "r16000", PROCESSOR_R10000, 4, 0 },
|
||||
{ "vr5000", PROCESSOR_R5000, 4, 0 },
|
||||
{ "vr5400", PROCESSOR_R5400, 4, 0 },
|
||||
{ "vr5500", PROCESSOR_R5500, 4, PTF_AVOID_BRANCHLIKELY },
|
||||
|
@ -1015,6 +1019,19 @@ static const struct mips_rtx_cost_data mips_rtx_cost_data[PROCESSOR_MAX] = {
|
|||
1, /* branch_cost */
|
||||
4 /* memory_latency */
|
||||
},
|
||||
{ /* R1x000 */
|
||||
COSTS_N_INSNS (2), /* fp_add */
|
||||
COSTS_N_INSNS (2), /* fp_mult_sf */
|
||||
COSTS_N_INSNS (2), /* fp_mult_df */
|
||||
COSTS_N_INSNS (12), /* fp_div_sf */
|
||||
COSTS_N_INSNS (19), /* fp_div_df */
|
||||
COSTS_N_INSNS (5), /* int_mult_si */
|
||||
COSTS_N_INSNS (9), /* int_mult_di */
|
||||
COSTS_N_INSNS (34), /* int_div_si */
|
||||
COSTS_N_INSNS (66), /* int_div_di */
|
||||
1, /* branch_cost */
|
||||
4 /* memory_latency */
|
||||
},
|
||||
{ /* SB1 */
|
||||
/* These costs are the same as the SB-1A below. */
|
||||
COSTS_N_INSNS (4), /* fp_add */
|
||||
|
@ -10369,7 +10386,10 @@ mips_issue_rate (void)
|
|||
but in reality only a maximum of 3 insns can be issued as
|
||||
floating-point loads and stores also require a slot in the
|
||||
AGEN pipe. */
|
||||
return 4;
|
||||
case PROCESSOR_R10000:
|
||||
/* All R10K Processors are quad-issue (being the first MIPS
|
||||
processors to support this feature). */
|
||||
return 4;
|
||||
|
||||
case PROCESSOR_20KC:
|
||||
case PROCESSOR_R4130:
|
||||
|
|
|
@ -67,6 +67,7 @@ enum processor_type {
|
|||
PROCESSOR_R7000,
|
||||
PROCESSOR_R8000,
|
||||
PROCESSOR_R9000,
|
||||
PROCESSOR_R10000,
|
||||
PROCESSOR_SB1,
|
||||
PROCESSOR_SB1A,
|
||||
PROCESSOR_SR71000,
|
||||
|
|
|
@ -560,7 +560,7 @@
|
|||
;; Attribute describing the processor. This attribute must match exactly
|
||||
;; with the processor_type enumeration in mips.h.
|
||||
(define_attr "cpu"
|
||||
"r3000,4kc,4kp,5kc,5kf,20kc,24kc,24kf2_1,24kf1_1,74kc,74kf2_1,74kf1_1,74kf3_2,loongson_2e,loongson_2f,m4k,octeon,r3900,r6000,r4000,r4100,r4111,r4120,r4130,r4300,r4600,r4650,r5000,r5400,r5500,r7000,r8000,r9000,sb1,sb1a,sr71000,xlr"
|
||||
"r3000,4kc,4kp,5kc,5kf,20kc,24kc,24kf2_1,24kf1_1,74kc,74kf2_1,74kf1_1,74kf3_2,loongson_2e,loongson_2f,m4k,octeon,r3900,r6000,r4000,r4100,r4111,r4120,r4130,r4300,r4600,r4650,r5000,r5400,r5500,r7000,r8000,r9000,r10000,sb1,sb1a,sr71000,xlr"
|
||||
(const (symbol_ref "mips_tune")))
|
||||
|
||||
;; The type of hardware hazard associated with this instruction.
|
||||
|
@ -935,6 +935,7 @@
|
|||
(include "6000.md")
|
||||
(include "7000.md")
|
||||
(include "9000.md")
|
||||
(include "10000.md")
|
||||
(include "loongson2ef.md")
|
||||
(include "octeon.md")
|
||||
(include "sb1.md")
|
||||
|
|
|
@ -12231,6 +12231,7 @@ The processor names are:
|
|||
@samp{r2000}, @samp{r3000}, @samp{r3900}, @samp{r4000}, @samp{r4400},
|
||||
@samp{r4600}, @samp{r4650}, @samp{r6000}, @samp{r8000},
|
||||
@samp{rm7000}, @samp{rm9000},
|
||||
@samp{r10000}, @samp{r12000}, @samp{r14000}, @samp{r16000},
|
||||
@samp{sb1},
|
||||
@samp{sr71000},
|
||||
@samp{vr4100}, @samp{vr4111}, @samp{vr4120}, @samp{vr4130}, @samp{vr4300},
|
||||
|
|
Loading…
Add table
Reference in a new issue