cortex-a7.md: New file.

gcc/

2012-12-21  Greta Yorsh  <Greta.Yorsh@arm.com>

        * config/arm/cortex-a7.md: New file.
        * config/arm/t-arm (MD_INCLUDES): Add cortex-a7.md.
        * config/arm/arm.md: Include cortex-a7.md.
        (generic_sched): Don't use generic scheduler for Cortex-A7.
        (generic_vfp): Likewise.
        * config/arm/arm.c: (TARGET_SCHED_REORDER): Use arm_sched_reorder.
        (arm_sched_reorder,cortexa7_sched_reorder): New function.
        (cortexa7_older_only,cortexa7_younger): Likewise.
        (arm_issue_rate): Add Cortex-A7.

From-SVN: r194656
This commit is contained in:
Greta Yorsh 2012-12-21 09:49:58 +00:00 committed by Greta Yorsh
parent 2e612eb2e2
commit ffeffdcb91
5 changed files with 532 additions and 2 deletions

View file

@ -1,3 +1,15 @@
2012-12-21 Greta Yorsh <Greta.Yorsh@arm.com>
* config/arm/cortex-a7.md: New file.
* config/arm/t-arm (MD_INCLUDES): Add cortex-a7.md.
* config/arm/arm.md: Include cortex-a7.md.
(generic_sched): Don't use generic scheduler for Cortex-A7.
(generic_vfp): Likewise.
* config/arm/arm.c: (TARGET_SCHED_REORDER): Use arm_sched_reorder.
(arm_sched_reorder,cortexa7_sched_reorder): New function.
(cortexa7_older_only,cortexa7_younger): Likewise.
(arm_issue_rate): Add Cortex-A7.
2012-12-20 Ian Bolton <ian.bolton@arm.com>
* gcc/config/aarch64/aarch64.md

View file

@ -132,6 +132,7 @@ static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
static int arm_comp_type_attributes (const_tree, const_tree);
static void arm_set_default_type_attributes (tree);
static int arm_adjust_cost (rtx, rtx, rtx, int);
static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
static int optimal_immediate_sequence (enum rtx_code code,
unsigned HOST_WIDE_INT val,
struct four_ints *return_sequence);
@ -367,6 +368,9 @@ static const struct attribute_spec arm_attribute_table[] =
#undef TARGET_SCHED_ADJUST_COST
#define TARGET_SCHED_ADJUST_COST arm_adjust_cost
#undef TARGET_SCHED_REORDER
#define TARGET_SCHED_REORDER arm_sched_reorder
#undef TARGET_REGISTER_MOVE_COST
#define TARGET_REGISTER_MOVE_COST arm_register_move_cost
@ -8694,6 +8698,164 @@ arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
}
}
/* Return true if and only if this insn can dual-issue only as older. */
static bool
cortexa7_older_only (rtx insn)
{
if (recog_memoized (insn) < 0)
return false;
if (get_attr_insn (insn) == INSN_MOV)
return false;
switch (get_attr_type (insn))
{
case TYPE_ALU_REG:
case TYPE_LOAD_BYTE:
case TYPE_LOAD1:
case TYPE_STORE1:
case TYPE_FFARITHS:
case TYPE_FADDS:
case TYPE_FFARITHD:
case TYPE_FADDD:
case TYPE_FCPYS:
case TYPE_F_CVT:
case TYPE_FCMPS:
case TYPE_FCMPD:
case TYPE_FCONSTS:
case TYPE_FCONSTD:
case TYPE_FMULS:
case TYPE_FMACS:
case TYPE_FMULD:
case TYPE_FMACD:
case TYPE_FDIVS:
case TYPE_FDIVD:
case TYPE_F_2_R:
case TYPE_F_FLAG:
case TYPE_F_LOADS:
case TYPE_F_STORES:
return true;
default:
return false;
}
}
/* Return true if and only if this insn can dual-issue as younger. */
static bool
cortexa7_younger (FILE *file, int verbose, rtx insn)
{
if (recog_memoized (insn) < 0)
{
if (verbose > 5)
fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
return false;
}
if (get_attr_insn (insn) == INSN_MOV)
return true;
switch (get_attr_type (insn))
{
case TYPE_SIMPLE_ALU_IMM:
case TYPE_SIMPLE_ALU_SHIFT:
case TYPE_BRANCH:
return true;
default:
return false;
}
}
/* Look for an instruction that can dual issue only as an older
instruction, and move it in front of any instructions that can
dual-issue as younger, while preserving the relative order of all
other instructions in the ready list. This is a hueuristic to help
dual-issue in later cycles, by postponing issue of more flexible
instructions. This heuristic may affect dual issue opportunities
in the current cycle. */
static void
cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
int clock)
{
int i;
int first_older_only = -1, first_younger = -1;
if (verbose > 5)
fprintf (file,
";; sched_reorder for cycle %d with %d insns in ready list\n",
clock,
*n_readyp);
/* Traverse the ready list from the head (the instruction to issue
first), and looking for the first instruction that can issue as
younger and the first instruction that can dual-issue only as
older. */
for (i = *n_readyp - 1; i >= 0; i--)
{
rtx insn = ready[i];
if (cortexa7_older_only (insn))
{
first_older_only = i;
if (verbose > 5)
fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
break;
}
else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
first_younger = i;
}
/* Nothing to reorder because either no younger insn found or insn
that can dual-issue only as older appears before any insn that
can dual-issue as younger. */
if (first_younger == -1)
{
if (verbose > 5)
fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
return;
}
/* Nothing to reorder because no older-only insn in the ready list. */
if (first_older_only == -1)
{
if (verbose > 5)
fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
return;
}
/* Move first_older_only insn before first_younger. */
if (verbose > 5)
fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
INSN_UID(ready [first_older_only]),
INSN_UID(ready [first_younger]));
rtx first_older_only_insn = ready [first_older_only];
for (i = first_older_only; i < first_younger; i++)
{
ready[i] = ready[i+1];
}
ready[i] = first_older_only_insn;
return;
}
/* Implement TARGET_SCHED_REORDER. */
static int
arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
int clock)
{
switch (arm_tune)
{
case cortexa7:
cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
break;
default:
/* Do nothing for other cores. */
break;
}
return arm_issue_rate ();
}
/* This function implements the target macro TARGET_SCHED_ADJUST_COST.
It corrects the value of COST based on the relationship between
INSN and DEP through the dependence LINK. It returns the new
@ -25480,6 +25642,7 @@ arm_issue_rate (void)
case cortexr5:
case genericv7a:
case cortexa5:
case cortexa7:
case cortexa8:
case cortexa9:
case fa726te:

View file

@ -502,7 +502,7 @@
(define_attr "generic_sched" "yes,no"
(const (if_then_else
(ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexa15,cortexm4")
(ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexm4")
(eq_attr "tune_cortexr4" "yes"))
(const_string "no")
(const_string "yes"))))
@ -510,7 +510,7 @@
(define_attr "generic_vfp" "yes,no"
(const (if_then_else
(and (eq_attr "fpu" "vfp")
(eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa8,cortexa9,cortexm4")
(eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,cortexa8,cortexa9,cortexm4")
(eq_attr "tune_cortexr4" "no"))
(const_string "yes")
(const_string "no"))))
@ -527,6 +527,7 @@
(include "fmp626.md")
(include "fa726te.md")
(include "cortex-a5.md")
(include "cortex-a7.md")
(include "cortex-a8.md")
(include "cortex-a9.md")
(include "cortex-a15.md")

353
gcc/config/arm/cortex-a7.md Normal file
View file

@ -0,0 +1,353 @@
;; ARM Cortex-A7 pipeline description
;; Copyright (C) 2012 Free Software Foundation, Inc.
;;
;; Contributed by ARM Ltd.
;; Based on cortex-a5.md which was originally contributed by CodeSourcery.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;;
;; GCC is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
(define_automaton "cortex_a7")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Functional units.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The Cortex-A7 pipeline integer and vfp pipeline.
;; The decode is the same for all instructions, so do not model it.
;; We only model the first execution stage because
;; instructions always advance one stage per cycle in order.
;; We model all of the LS, Branch, ALU, MAC and FPU pipelines together.
(define_cpu_unit "cortex_a7_ex1, cortex_a7_ex2" "cortex_a7")
(define_reservation "cortex_a7_both" "cortex_a7_ex1+cortex_a7_ex2")
(define_cpu_unit "cortex_a7_branch" "cortex_a7")
;; Cortex-A7 is in order and can dual-issue under limited circumstances.
;; ex2 can be reserved only after ex1 is reserved.
(final_presence_set "cortex_a7_ex2" "cortex_a7_ex1")
;; Pseudo-unit for blocking the multiply pipeline when a double-precision
;; multiply is in progress.
(define_cpu_unit "cortex_a7_fpmul_pipe" "cortex_a7")
;; The floating-point add pipeline (ex1/f1 stage), used to model the usage
;; of the add pipeline by fmac instructions, etc.
(define_cpu_unit "cortex_a7_fpadd_pipe" "cortex_a7")
;; Floating-point div/sqrt (long latency, out-of-order completion).
(define_cpu_unit "cortex_a7_fp_div_sqrt" "cortex_a7")
;; Neon pipeline
(define_cpu_unit "cortex_a7_neon" "cortex_a7")
(define_reservation "cortex_a7_all" "cortex_a7_both+\
cortex_a7_fpmul_pipe+\
cortex_a7_fpadd_pipe+\
cortex_a7_fp_div_sqrt+\
cortex_a7_neon")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Branches.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; A direct branch can dual issue either as younger or older instruction,
;; but branches cannot dual issue with branches.
;; No latency as there is no result.
(define_insn_reservation "cortex_a7_branch" 0
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "branch")
(eq_attr "neon_type" "none")))
"(cortex_a7_ex2|cortex_a7_ex1)+cortex_a7_branch")
;; A call reserves all issue slots. The result is available the next cycle.
(define_insn_reservation "cortex_a7_call" 1
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "call")
(eq_attr "neon_type" "none")))
"cortex_a7_all")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU instructions.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU instruction with an immediate operand can dual-issue.
(define_insn_reservation "cortex_a7_alu_imm" 2
(and (eq_attr "tune" "cortexa7")
(and (ior (eq_attr "type" "simple_alu_imm")
(ior (eq_attr "type" "simple_alu_shift")
(and (eq_attr "insn" "mov")
(not (eq_attr "length" "8")))))
(eq_attr "neon_type" "none")))
"cortex_a7_ex2|cortex_a7_ex1")
;; ALU instruction with register operands can dual-issue
;; with a younger immediate-based instruction.
(define_insn_reservation "cortex_a7_alu_reg" 2
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "alu_reg")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1")
(define_insn_reservation "cortex_a7_alu_shift" 2
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "alu_shift,alu_shift_reg")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1")
;; Forwarding path for unshifted operands.
(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift"
"cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_mul")
(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift"
"cortex_a7_store*"
"arm_no_early_store_addr_dep")
(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift"
"cortex_a7_alu_shift"
"arm_no_early_alu_shift_dep")
;; The multiplier pipeline can forward results from wr stage only so
;; there's no need to specify bypasses.
;; Multiply instructions cannot dual-issue.
(define_insn_reservation "cortex_a7_mul" 2
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "mult")
(eq_attr "neon_type" "none")))
"cortex_a7_both")
;; The latency depends on the operands, so we use an estimate here.
(define_insn_reservation "cortex_a7_idiv" 5
(and (eq_attr "tune" "cortexa7")
(eq_attr "insn" "udiv,sdiv"))
"cortex_a7_all*5")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Load/store instructions.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Address-generation happens in the issue stage.
;; Double-word accesses can be issued in a single cycle,
;; and occupy only one pipeline stage.
(define_insn_reservation "cortex_a7_load1" 2
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "load_byte,load1")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1")
(define_insn_reservation "cortex_a7_store1" 0
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "store1")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1")
(define_insn_reservation "cortex_a7_load2" 2
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "load2")
(eq_attr "neon_type" "none")))
"cortex_a7_both")
(define_insn_reservation "cortex_a7_store2" 0
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "store2")
(eq_attr "neon_type" "none")))
"cortex_a7_both")
(define_insn_reservation "cortex_a7_load3" 3
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "load3")
(eq_attr "neon_type" "none")))
"cortex_a7_both, cortex_a7_ex1")
(define_insn_reservation "cortex_a7_store3" 0
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "store4")
(eq_attr "neon_type" "none")))
"cortex_a7_both, cortex_a7_ex1")
(define_insn_reservation "cortex_a7_load4" 3
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "load4")
(eq_attr "neon_type" "none")))
"cortex_a7_both, cortex_a7_both")
(define_insn_reservation "cortex_a7_store4" 0
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "store3")
(eq_attr "neon_type" "none")))
"cortex_a7_both, cortex_a7_both")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Floating-point arithmetic.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn_reservation "cortex_a7_fpalu" 4
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fcpys,\
f_cvt, fcmps, fcmpd")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1+cortex_a7_fpadd_pipe")
;; For fconsts and fconstd, 8-bit immediate data is passed directly from
;; f1 to f3 (which I think reduces the latency by one cycle).
(define_insn_reservation "cortex_a7_fconst" 3
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "fconsts,fconstd")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1+cortex_a7_fpadd_pipe")
;; We should try not to attempt to issue a single-precision multiplication in
;; the middle of a double-precision multiplication operation (the usage of
;; cortex_a7_fpmul_pipe).
(define_insn_reservation "cortex_a7_fpmuls" 4
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "fmuls")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1+cortex_a7_fpmul_pipe")
;; For single-precision multiply-accumulate, the add (accumulate) is issued
;; whilst the multiply is in F4. The multiply result can then be forwarded
;; from F5 to F1. The issue unit is only used once (when we first start
;; processing the instruction), but the usage of the FP add pipeline could
;; block other instructions attempting to use it simultaneously. We try to
;; avoid that using cortex_a7_fpadd_pipe.
(define_insn_reservation "cortex_a7_fpmacs" 8
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "fmacs")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1+cortex_a7_fpmul_pipe, nothing*3, cortex_a7_fpadd_pipe")
;; Non-multiply instructions can issue between two cycles of a
;; double-precision multiply.
(define_insn_reservation "cortex_a7_fpmuld" 7
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "fmuld")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*2,\
cortex_a7_ex1+cortex_a7_fpmul_pipe")
(define_insn_reservation "cortex_a7_fpmacd" 11
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "fmacd")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*2,\
cortex_a7_ex1+cortex_a7_fpmul_pipe, nothing*3, cortex_a7_fpadd_pipe")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Floating-point divide/square root instructions.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn_reservation "cortex_a7_fdivs" 16
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "fdivs")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1, cortex_a7_fp_div_sqrt * 14")
(define_insn_reservation "cortex_a7_fdivd" 29
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "fdivd")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1, cortex_a7_fp_div_sqrt * 28")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; VFP to/from core transfers.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Core-to-VFP transfers.
(define_insn_reservation "cortex_a7_r2f" 4
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "r_2_f")
(eq_attr "neon_type" "none")))
"cortex_a7_both")
(define_insn_reservation "cortex_a7_f2r" 2
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "f_2_r")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; VFP flag transfer.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Fuxne: The flag forwarding from fmstat to the second instruction is
;; not modeled at present.
(define_insn_reservation "cortex_a7_f_flags" 4
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "f_flag")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; VFP load/store.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn_reservation "cortex_a7_f_loads" 4
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "f_loads")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1")
(define_insn_reservation "cortex_a7_f_loadd" 4
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "f_loadd")
(eq_attr "neon_type" "none")))
"cortex_a7_both")
(define_insn_reservation "cortex_a7_f_stores" 0
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "f_stores")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1")
(define_insn_reservation "cortex_a7_f_stored" 0
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "f_stored")
(eq_attr "neon_type" "none")))
"cortex_a7_both")
;; Load-to-use for floating-point values has a penalty of one cycle,
;; i.e. a latency of two.
(define_bypass 2 "cortex_a7_f_loads, cortex_a7_f_loadd"
"cortex_a7_fpalu, cortex_a7_fpmacs, cortex_a7_fpmuld,\
cortex_a7_fpmacd, cortex_a7_fdivs, cortex_a7_fdivd,\
cortex_a7_f2r")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; NEON load/store.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn_reservation "cortex_a7_neon" 4
(and (eq_attr "tune" "cortexa7")
(eq_attr "neon_type" "!none"))
"cortex_a7_both*2")

View file

@ -32,6 +32,7 @@ MD_INCLUDES= $(srcdir)/config/arm/arm1020e.md \
$(srcdir)/config/arm/constraints.md \
$(srcdir)/config/arm/cortex-a15.md \
$(srcdir)/config/arm/cortex-a5.md \
$(srcdir)/config/arm/cortex-a7.md \
$(srcdir)/config/arm/cortex-a8.md \
$(srcdir)/config/arm/cortex-a8-neon.md \
$(srcdir)/config/arm/cortex-a9.md \