cortex-a7.md: New file.
gcc/ 2012-12-21 Greta Yorsh <Greta.Yorsh@arm.com> * config/arm/cortex-a7.md: New file. * config/arm/t-arm (MD_INCLUDES): Add cortex-a7.md. * config/arm/arm.md: Include cortex-a7.md. (generic_sched): Don't use generic scheduler for Cortex-A7. (generic_vfp): Likewise. * config/arm/arm.c: (TARGET_SCHED_REORDER): Use arm_sched_reorder. (arm_sched_reorder,cortexa7_sched_reorder): New function. (cortexa7_older_only,cortexa7_younger): Likewise. (arm_issue_rate): Add Cortex-A7. From-SVN: r194656
This commit is contained in:
parent
2e612eb2e2
commit
ffeffdcb91
5 changed files with 532 additions and 2 deletions
|
@ -1,3 +1,15 @@
|
|||
2012-12-21 Greta Yorsh <Greta.Yorsh@arm.com>
|
||||
|
||||
* config/arm/cortex-a7.md: New file.
|
||||
* config/arm/t-arm (MD_INCLUDES): Add cortex-a7.md.
|
||||
* config/arm/arm.md: Include cortex-a7.md.
|
||||
(generic_sched): Don't use generic scheduler for Cortex-A7.
|
||||
(generic_vfp): Likewise.
|
||||
* config/arm/arm.c: (TARGET_SCHED_REORDER): Use arm_sched_reorder.
|
||||
(arm_sched_reorder,cortexa7_sched_reorder): New function.
|
||||
(cortexa7_older_only,cortexa7_younger): Likewise.
|
||||
(arm_issue_rate): Add Cortex-A7.
|
||||
|
||||
2012-12-20 Ian Bolton <ian.bolton@arm.com>
|
||||
|
||||
* gcc/config/aarch64/aarch64.md
|
||||
|
|
|
@ -132,6 +132,7 @@ static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
|
|||
static int arm_comp_type_attributes (const_tree, const_tree);
|
||||
static void arm_set_default_type_attributes (tree);
|
||||
static int arm_adjust_cost (rtx, rtx, rtx, int);
|
||||
static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
|
||||
static int optimal_immediate_sequence (enum rtx_code code,
|
||||
unsigned HOST_WIDE_INT val,
|
||||
struct four_ints *return_sequence);
|
||||
|
@ -367,6 +368,9 @@ static const struct attribute_spec arm_attribute_table[] =
|
|||
#undef TARGET_SCHED_ADJUST_COST
|
||||
#define TARGET_SCHED_ADJUST_COST arm_adjust_cost
|
||||
|
||||
#undef TARGET_SCHED_REORDER
|
||||
#define TARGET_SCHED_REORDER arm_sched_reorder
|
||||
|
||||
#undef TARGET_REGISTER_MOVE_COST
|
||||
#define TARGET_REGISTER_MOVE_COST arm_register_move_cost
|
||||
|
||||
|
@ -8694,6 +8698,164 @@ arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/* Return true if and only if this insn can dual-issue only as older. */
|
||||
static bool
|
||||
cortexa7_older_only (rtx insn)
|
||||
{
|
||||
if (recog_memoized (insn) < 0)
|
||||
return false;
|
||||
|
||||
if (get_attr_insn (insn) == INSN_MOV)
|
||||
return false;
|
||||
|
||||
switch (get_attr_type (insn))
|
||||
{
|
||||
case TYPE_ALU_REG:
|
||||
case TYPE_LOAD_BYTE:
|
||||
case TYPE_LOAD1:
|
||||
case TYPE_STORE1:
|
||||
case TYPE_FFARITHS:
|
||||
case TYPE_FADDS:
|
||||
case TYPE_FFARITHD:
|
||||
case TYPE_FADDD:
|
||||
case TYPE_FCPYS:
|
||||
case TYPE_F_CVT:
|
||||
case TYPE_FCMPS:
|
||||
case TYPE_FCMPD:
|
||||
case TYPE_FCONSTS:
|
||||
case TYPE_FCONSTD:
|
||||
case TYPE_FMULS:
|
||||
case TYPE_FMACS:
|
||||
case TYPE_FMULD:
|
||||
case TYPE_FMACD:
|
||||
case TYPE_FDIVS:
|
||||
case TYPE_FDIVD:
|
||||
case TYPE_F_2_R:
|
||||
case TYPE_F_FLAG:
|
||||
case TYPE_F_LOADS:
|
||||
case TYPE_F_STORES:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return true if and only if this insn can dual-issue as younger. */
|
||||
static bool
|
||||
cortexa7_younger (FILE *file, int verbose, rtx insn)
|
||||
{
|
||||
if (recog_memoized (insn) < 0)
|
||||
{
|
||||
if (verbose > 5)
|
||||
fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (get_attr_insn (insn) == INSN_MOV)
|
||||
return true;
|
||||
|
||||
switch (get_attr_type (insn))
|
||||
{
|
||||
case TYPE_SIMPLE_ALU_IMM:
|
||||
case TYPE_SIMPLE_ALU_SHIFT:
|
||||
case TYPE_BRANCH:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Look for an instruction that can dual issue only as an older
|
||||
instruction, and move it in front of any instructions that can
|
||||
dual-issue as younger, while preserving the relative order of all
|
||||
other instructions in the ready list. This is a hueuristic to help
|
||||
dual-issue in later cycles, by postponing issue of more flexible
|
||||
instructions. This heuristic may affect dual issue opportunities
|
||||
in the current cycle. */
|
||||
static void
|
||||
cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
|
||||
int clock)
|
||||
{
|
||||
int i;
|
||||
int first_older_only = -1, first_younger = -1;
|
||||
|
||||
if (verbose > 5)
|
||||
fprintf (file,
|
||||
";; sched_reorder for cycle %d with %d insns in ready list\n",
|
||||
clock,
|
||||
*n_readyp);
|
||||
|
||||
/* Traverse the ready list from the head (the instruction to issue
|
||||
first), and looking for the first instruction that can issue as
|
||||
younger and the first instruction that can dual-issue only as
|
||||
older. */
|
||||
for (i = *n_readyp - 1; i >= 0; i--)
|
||||
{
|
||||
rtx insn = ready[i];
|
||||
if (cortexa7_older_only (insn))
|
||||
{
|
||||
first_older_only = i;
|
||||
if (verbose > 5)
|
||||
fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
|
||||
break;
|
||||
}
|
||||
else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
|
||||
first_younger = i;
|
||||
}
|
||||
|
||||
/* Nothing to reorder because either no younger insn found or insn
|
||||
that can dual-issue only as older appears before any insn that
|
||||
can dual-issue as younger. */
|
||||
if (first_younger == -1)
|
||||
{
|
||||
if (verbose > 5)
|
||||
fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Nothing to reorder because no older-only insn in the ready list. */
|
||||
if (first_older_only == -1)
|
||||
{
|
||||
if (verbose > 5)
|
||||
fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Move first_older_only insn before first_younger. */
|
||||
if (verbose > 5)
|
||||
fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
|
||||
INSN_UID(ready [first_older_only]),
|
||||
INSN_UID(ready [first_younger]));
|
||||
rtx first_older_only_insn = ready [first_older_only];
|
||||
for (i = first_older_only; i < first_younger; i++)
|
||||
{
|
||||
ready[i] = ready[i+1];
|
||||
}
|
||||
|
||||
ready[i] = first_older_only_insn;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Implement TARGET_SCHED_REORDER. */
|
||||
static int
|
||||
arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
|
||||
int clock)
|
||||
{
|
||||
switch (arm_tune)
|
||||
{
|
||||
case cortexa7:
|
||||
cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
|
||||
break;
|
||||
default:
|
||||
/* Do nothing for other cores. */
|
||||
break;
|
||||
}
|
||||
|
||||
return arm_issue_rate ();
|
||||
}
|
||||
|
||||
/* This function implements the target macro TARGET_SCHED_ADJUST_COST.
|
||||
It corrects the value of COST based on the relationship between
|
||||
INSN and DEP through the dependence LINK. It returns the new
|
||||
|
@ -25480,6 +25642,7 @@ arm_issue_rate (void)
|
|||
case cortexr5:
|
||||
case genericv7a:
|
||||
case cortexa5:
|
||||
case cortexa7:
|
||||
case cortexa8:
|
||||
case cortexa9:
|
||||
case fa726te:
|
||||
|
|
|
@ -502,7 +502,7 @@
|
|||
|
||||
(define_attr "generic_sched" "yes,no"
|
||||
(const (if_then_else
|
||||
(ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexa15,cortexm4")
|
||||
(ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexm4")
|
||||
(eq_attr "tune_cortexr4" "yes"))
|
||||
(const_string "no")
|
||||
(const_string "yes"))))
|
||||
|
@ -510,7 +510,7 @@
|
|||
(define_attr "generic_vfp" "yes,no"
|
||||
(const (if_then_else
|
||||
(and (eq_attr "fpu" "vfp")
|
||||
(eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa8,cortexa9,cortexm4")
|
||||
(eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,cortexa8,cortexa9,cortexm4")
|
||||
(eq_attr "tune_cortexr4" "no"))
|
||||
(const_string "yes")
|
||||
(const_string "no"))))
|
||||
|
@ -527,6 +527,7 @@
|
|||
(include "fmp626.md")
|
||||
(include "fa726te.md")
|
||||
(include "cortex-a5.md")
|
||||
(include "cortex-a7.md")
|
||||
(include "cortex-a8.md")
|
||||
(include "cortex-a9.md")
|
||||
(include "cortex-a15.md")
|
||||
|
|
353
gcc/config/arm/cortex-a7.md
Normal file
353
gcc/config/arm/cortex-a7.md
Normal file
|
@ -0,0 +1,353 @@
|
|||
;; ARM Cortex-A7 pipeline description
|
||||
;; Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
;;
|
||||
;; Contributed by ARM Ltd.
|
||||
;; Based on cortex-a5.md which was originally contributed by CodeSourcery.
|
||||
;;
|
||||
;; This file is part of GCC.
|
||||
;;
|
||||
;; GCC is free software; you can redistribute it and/or modify it
|
||||
;; under the terms of the GNU General Public License as published by
|
||||
;; the Free Software Foundation; either version 3, or (at your option)
|
||||
;; any later version.
|
||||
;;
|
||||
;; GCC is distributed in the hope that it will be useful, but
|
||||
;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;; General Public License for more details.
|
||||
;;
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with GCC; see the file COPYING3. If not see
|
||||
;; <http://www.gnu.org/licenses/>.
|
||||
|
||||
(define_automaton "cortex_a7")
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Functional units.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; The Cortex-A7 pipeline integer and vfp pipeline.
|
||||
;; The decode is the same for all instructions, so do not model it.
|
||||
;; We only model the first execution stage because
|
||||
;; instructions always advance one stage per cycle in order.
|
||||
;; We model all of the LS, Branch, ALU, MAC and FPU pipelines together.
|
||||
|
||||
(define_cpu_unit "cortex_a7_ex1, cortex_a7_ex2" "cortex_a7")
|
||||
|
||||
(define_reservation "cortex_a7_both" "cortex_a7_ex1+cortex_a7_ex2")
|
||||
|
||||
(define_cpu_unit "cortex_a7_branch" "cortex_a7")
|
||||
|
||||
;; Cortex-A7 is in order and can dual-issue under limited circumstances.
|
||||
;; ex2 can be reserved only after ex1 is reserved.
|
||||
|
||||
(final_presence_set "cortex_a7_ex2" "cortex_a7_ex1")
|
||||
|
||||
;; Pseudo-unit for blocking the multiply pipeline when a double-precision
|
||||
;; multiply is in progress.
|
||||
|
||||
(define_cpu_unit "cortex_a7_fpmul_pipe" "cortex_a7")
|
||||
|
||||
;; The floating-point add pipeline (ex1/f1 stage), used to model the usage
|
||||
;; of the add pipeline by fmac instructions, etc.
|
||||
|
||||
(define_cpu_unit "cortex_a7_fpadd_pipe" "cortex_a7")
|
||||
|
||||
;; Floating-point div/sqrt (long latency, out-of-order completion).
|
||||
|
||||
(define_cpu_unit "cortex_a7_fp_div_sqrt" "cortex_a7")
|
||||
|
||||
;; Neon pipeline
|
||||
(define_cpu_unit "cortex_a7_neon" "cortex_a7")
|
||||
|
||||
(define_reservation "cortex_a7_all" "cortex_a7_both+\
|
||||
cortex_a7_fpmul_pipe+\
|
||||
cortex_a7_fpadd_pipe+\
|
||||
cortex_a7_fp_div_sqrt+\
|
||||
cortex_a7_neon")
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Branches.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; A direct branch can dual issue either as younger or older instruction,
|
||||
;; but branches cannot dual issue with branches.
|
||||
;; No latency as there is no result.
|
||||
|
||||
(define_insn_reservation "cortex_a7_branch" 0
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "branch")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"(cortex_a7_ex2|cortex_a7_ex1)+cortex_a7_branch")
|
||||
|
||||
;; A call reserves all issue slots. The result is available the next cycle.
|
||||
(define_insn_reservation "cortex_a7_call" 1
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "call")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_all")
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; ALU instructions.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; ALU instruction with an immediate operand can dual-issue.
|
||||
(define_insn_reservation "cortex_a7_alu_imm" 2
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (ior (eq_attr "type" "simple_alu_imm")
|
||||
(ior (eq_attr "type" "simple_alu_shift")
|
||||
(and (eq_attr "insn" "mov")
|
||||
(not (eq_attr "length" "8")))))
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_ex2|cortex_a7_ex1")
|
||||
|
||||
;; ALU instruction with register operands can dual-issue
|
||||
;; with a younger immediate-based instruction.
|
||||
(define_insn_reservation "cortex_a7_alu_reg" 2
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "alu_reg")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_ex1")
|
||||
|
||||
(define_insn_reservation "cortex_a7_alu_shift" 2
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "alu_shift,alu_shift_reg")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_ex1")
|
||||
|
||||
;; Forwarding path for unshifted operands.
|
||||
(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift"
|
||||
"cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_mul")
|
||||
|
||||
(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift"
|
||||
"cortex_a7_store*"
|
||||
"arm_no_early_store_addr_dep")
|
||||
|
||||
(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift"
|
||||
"cortex_a7_alu_shift"
|
||||
"arm_no_early_alu_shift_dep")
|
||||
|
||||
;; The multiplier pipeline can forward results from wr stage only so
|
||||
;; there's no need to specify bypasses.
|
||||
;; Multiply instructions cannot dual-issue.
|
||||
|
||||
(define_insn_reservation "cortex_a7_mul" 2
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "mult")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_both")
|
||||
|
||||
;; The latency depends on the operands, so we use an estimate here.
|
||||
(define_insn_reservation "cortex_a7_idiv" 5
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(eq_attr "insn" "udiv,sdiv"))
|
||||
"cortex_a7_all*5")
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Load/store instructions.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Address-generation happens in the issue stage.
|
||||
;; Double-word accesses can be issued in a single cycle,
|
||||
;; and occupy only one pipeline stage.
|
||||
|
||||
(define_insn_reservation "cortex_a7_load1" 2
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "load_byte,load1")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_ex1")
|
||||
|
||||
(define_insn_reservation "cortex_a7_store1" 0
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "store1")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_ex1")
|
||||
|
||||
(define_insn_reservation "cortex_a7_load2" 2
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "load2")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_both")
|
||||
|
||||
(define_insn_reservation "cortex_a7_store2" 0
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "store2")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_both")
|
||||
|
||||
(define_insn_reservation "cortex_a7_load3" 3
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "load3")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_both, cortex_a7_ex1")
|
||||
|
||||
(define_insn_reservation "cortex_a7_store3" 0
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "store4")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_both, cortex_a7_ex1")
|
||||
|
||||
(define_insn_reservation "cortex_a7_load4" 3
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "load4")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_both, cortex_a7_both")
|
||||
|
||||
(define_insn_reservation "cortex_a7_store4" 0
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "store3")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_both, cortex_a7_both")
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Floating-point arithmetic.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define_insn_reservation "cortex_a7_fpalu" 4
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fcpys,\
|
||||
f_cvt, fcmps, fcmpd")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_ex1+cortex_a7_fpadd_pipe")
|
||||
|
||||
;; For fconsts and fconstd, 8-bit immediate data is passed directly from
|
||||
;; f1 to f3 (which I think reduces the latency by one cycle).
|
||||
|
||||
(define_insn_reservation "cortex_a7_fconst" 3
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "fconsts,fconstd")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_ex1+cortex_a7_fpadd_pipe")
|
||||
|
||||
;; We should try not to attempt to issue a single-precision multiplication in
|
||||
;; the middle of a double-precision multiplication operation (the usage of
|
||||
;; cortex_a7_fpmul_pipe).
|
||||
|
||||
(define_insn_reservation "cortex_a7_fpmuls" 4
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "fmuls")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_ex1+cortex_a7_fpmul_pipe")
|
||||
|
||||
;; For single-precision multiply-accumulate, the add (accumulate) is issued
|
||||
;; whilst the multiply is in F4. The multiply result can then be forwarded
|
||||
;; from F5 to F1. The issue unit is only used once (when we first start
|
||||
;; processing the instruction), but the usage of the FP add pipeline could
|
||||
;; block other instructions attempting to use it simultaneously. We try to
|
||||
;; avoid that using cortex_a7_fpadd_pipe.
|
||||
|
||||
(define_insn_reservation "cortex_a7_fpmacs" 8
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "fmacs")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_ex1+cortex_a7_fpmul_pipe, nothing*3, cortex_a7_fpadd_pipe")
|
||||
|
||||
;; Non-multiply instructions can issue between two cycles of a
|
||||
;; double-precision multiply.
|
||||
|
||||
(define_insn_reservation "cortex_a7_fpmuld" 7
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "fmuld")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*2,\
|
||||
cortex_a7_ex1+cortex_a7_fpmul_pipe")
|
||||
|
||||
(define_insn_reservation "cortex_a7_fpmacd" 11
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "fmacd")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*2,\
|
||||
cortex_a7_ex1+cortex_a7_fpmul_pipe, nothing*3, cortex_a7_fpadd_pipe")
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Floating-point divide/square root instructions.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define_insn_reservation "cortex_a7_fdivs" 16
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "fdivs")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_ex1, cortex_a7_fp_div_sqrt * 14")
|
||||
|
||||
(define_insn_reservation "cortex_a7_fdivd" 29
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "fdivd")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_ex1, cortex_a7_fp_div_sqrt * 28")
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; VFP to/from core transfers.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Core-to-VFP transfers.
|
||||
|
||||
(define_insn_reservation "cortex_a7_r2f" 4
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "r_2_f")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_both")
|
||||
|
||||
(define_insn_reservation "cortex_a7_f2r" 2
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "f_2_r")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_ex1")
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; VFP flag transfer.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Fuxne: The flag forwarding from fmstat to the second instruction is
|
||||
;; not modeled at present.
|
||||
|
||||
(define_insn_reservation "cortex_a7_f_flags" 4
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "f_flag")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_ex1")
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; VFP load/store.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define_insn_reservation "cortex_a7_f_loads" 4
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "f_loads")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_ex1")
|
||||
|
||||
(define_insn_reservation "cortex_a7_f_loadd" 4
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "f_loadd")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_both")
|
||||
|
||||
(define_insn_reservation "cortex_a7_f_stores" 0
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "f_stores")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_ex1")
|
||||
|
||||
(define_insn_reservation "cortex_a7_f_stored" 0
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(and (eq_attr "type" "f_stored")
|
||||
(eq_attr "neon_type" "none")))
|
||||
"cortex_a7_both")
|
||||
|
||||
;; Load-to-use for floating-point values has a penalty of one cycle,
|
||||
;; i.e. a latency of two.
|
||||
|
||||
(define_bypass 2 "cortex_a7_f_loads, cortex_a7_f_loadd"
|
||||
"cortex_a7_fpalu, cortex_a7_fpmacs, cortex_a7_fpmuld,\
|
||||
cortex_a7_fpmacd, cortex_a7_fdivs, cortex_a7_fdivd,\
|
||||
cortex_a7_f2r")
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; NEON load/store.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
|
||||
(define_insn_reservation "cortex_a7_neon" 4
|
||||
(and (eq_attr "tune" "cortexa7")
|
||||
(eq_attr "neon_type" "!none"))
|
||||
"cortex_a7_both*2")
|
|
@ -32,6 +32,7 @@ MD_INCLUDES= $(srcdir)/config/arm/arm1020e.md \
|
|||
$(srcdir)/config/arm/constraints.md \
|
||||
$(srcdir)/config/arm/cortex-a15.md \
|
||||
$(srcdir)/config/arm/cortex-a5.md \
|
||||
$(srcdir)/config/arm/cortex-a7.md \
|
||||
$(srcdir)/config/arm/cortex-a8.md \
|
||||
$(srcdir)/config/arm/cortex-a8-neon.md \
|
||||
$(srcdir)/config/arm/cortex-a9.md \
|
||||
|
|
Loading…
Add table
Reference in a new issue