Commit ffeffdcb by Greta Yorsh Committed by Greta Yorsh

cortex-a7.md: New file.

gcc/

2012-12-21  Greta Yorsh  <Greta.Yorsh@arm.com>

        * config/arm/cortex-a7.md: New file.
        * config/arm/t-arm (MD_INCLUDES): Add cortex-a7.md.
        * config/arm/arm.md: Include cortex-a7.md.
        (generic_sched): Don't use generic scheduler for Cortex-A7.
        (generic_vfp): Likewise.
        * config/arm/arm.c: (TARGET_SCHED_REORDER): Use arm_sched_reorder.
        (arm_sched_reorder,cortexa7_sched_reorder): New function.
        (cortexa7_older_only,cortexa7_younger): Likewise.
        (arm_issue_rate): Add Cortex-A7.

From-SVN: r194656
parent 2e612eb2
2012-12-21 Greta Yorsh <Greta.Yorsh@arm.com>
* config/arm/cortex-a7.md: New file.
* config/arm/t-arm (MD_INCLUDES): Add cortex-a7.md.
* config/arm/arm.md: Include cortex-a7.md.
(generic_sched): Don't use generic scheduler for Cortex-A7.
(generic_vfp): Likewise.
* config/arm/arm.c: (TARGET_SCHED_REORDER): Use arm_sched_reorder.
(arm_sched_reorder,cortexa7_sched_reorder): New function.
(cortexa7_older_only,cortexa7_younger): Likewise.
(arm_issue_rate): Add Cortex-A7.
2012-12-20 Ian Bolton <ian.bolton@arm.com> 2012-12-20 Ian Bolton <ian.bolton@arm.com>
* gcc/config/aarch64/aarch64.md * gcc/config/aarch64/aarch64.md
...@@ -132,6 +132,7 @@ static void arm_output_function_prologue (FILE *, HOST_WIDE_INT); ...@@ -132,6 +132,7 @@ static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
static int arm_comp_type_attributes (const_tree, const_tree); static int arm_comp_type_attributes (const_tree, const_tree);
static void arm_set_default_type_attributes (tree); static void arm_set_default_type_attributes (tree);
static int arm_adjust_cost (rtx, rtx, rtx, int); static int arm_adjust_cost (rtx, rtx, rtx, int);
static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
static int optimal_immediate_sequence (enum rtx_code code, static int optimal_immediate_sequence (enum rtx_code code,
unsigned HOST_WIDE_INT val, unsigned HOST_WIDE_INT val,
struct four_ints *return_sequence); struct four_ints *return_sequence);
...@@ -367,6 +368,9 @@ static const struct attribute_spec arm_attribute_table[] = ...@@ -367,6 +368,9 @@ static const struct attribute_spec arm_attribute_table[] =
#undef TARGET_SCHED_ADJUST_COST #undef TARGET_SCHED_ADJUST_COST
#define TARGET_SCHED_ADJUST_COST arm_adjust_cost #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
#undef TARGET_SCHED_REORDER
#define TARGET_SCHED_REORDER arm_sched_reorder
#undef TARGET_REGISTER_MOVE_COST #undef TARGET_REGISTER_MOVE_COST
#define TARGET_REGISTER_MOVE_COST arm_register_move_cost #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
...@@ -8694,6 +8698,164 @@ arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass, ...@@ -8694,6 +8698,164 @@ arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
} }
} }
/* Return true if and only if this insn can dual-issue only as older. */
static bool
cortexa7_older_only (rtx insn)
{
if (recog_memoized (insn) < 0)
return false;
if (get_attr_insn (insn) == INSN_MOV)
return false;
switch (get_attr_type (insn))
{
case TYPE_ALU_REG:
case TYPE_LOAD_BYTE:
case TYPE_LOAD1:
case TYPE_STORE1:
case TYPE_FFARITHS:
case TYPE_FADDS:
case TYPE_FFARITHD:
case TYPE_FADDD:
case TYPE_FCPYS:
case TYPE_F_CVT:
case TYPE_FCMPS:
case TYPE_FCMPD:
case TYPE_FCONSTS:
case TYPE_FCONSTD:
case TYPE_FMULS:
case TYPE_FMACS:
case TYPE_FMULD:
case TYPE_FMACD:
case TYPE_FDIVS:
case TYPE_FDIVD:
case TYPE_F_2_R:
case TYPE_F_FLAG:
case TYPE_F_LOADS:
case TYPE_F_STORES:
return true;
default:
return false;
}
}
/* Return true if and only if this insn can dual-issue as younger. */
static bool
cortexa7_younger (FILE *file, int verbose, rtx insn)
{
if (recog_memoized (insn) < 0)
{
if (verbose > 5)
fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
return false;
}
if (get_attr_insn (insn) == INSN_MOV)
return true;
switch (get_attr_type (insn))
{
case TYPE_SIMPLE_ALU_IMM:
case TYPE_SIMPLE_ALU_SHIFT:
case TYPE_BRANCH:
return true;
default:
return false;
}
}
/* Look for an instruction that can dual issue only as an older
instruction, and move it in front of any instructions that can
dual-issue as younger, while preserving the relative order of all
other instructions in the ready list. This is a hueuristic to help
dual-issue in later cycles, by postponing issue of more flexible
instructions. This heuristic may affect dual issue opportunities
in the current cycle. */
static void
cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
int clock)
{
int i;
int first_older_only = -1, first_younger = -1;
if (verbose > 5)
fprintf (file,
";; sched_reorder for cycle %d with %d insns in ready list\n",
clock,
*n_readyp);
/* Traverse the ready list from the head (the instruction to issue
first), and looking for the first instruction that can issue as
younger and the first instruction that can dual-issue only as
older. */
for (i = *n_readyp - 1; i >= 0; i--)
{
rtx insn = ready[i];
if (cortexa7_older_only (insn))
{
first_older_only = i;
if (verbose > 5)
fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
break;
}
else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
first_younger = i;
}
/* Nothing to reorder because either no younger insn found or insn
that can dual-issue only as older appears before any insn that
can dual-issue as younger. */
if (first_younger == -1)
{
if (verbose > 5)
fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
return;
}
/* Nothing to reorder because no older-only insn in the ready list. */
if (first_older_only == -1)
{
if (verbose > 5)
fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
return;
}
/* Move first_older_only insn before first_younger. */
if (verbose > 5)
fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
INSN_UID(ready [first_older_only]),
INSN_UID(ready [first_younger]));
rtx first_older_only_insn = ready [first_older_only];
for (i = first_older_only; i < first_younger; i++)
{
ready[i] = ready[i+1];
}
ready[i] = first_older_only_insn;
return;
}
/* Implement TARGET_SCHED_REORDER. */
static int
arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
int clock)
{
switch (arm_tune)
{
case cortexa7:
cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
break;
default:
/* Do nothing for other cores. */
break;
}
return arm_issue_rate ();
}
/* This function implements the target macro TARGET_SCHED_ADJUST_COST. /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
It corrects the value of COST based on the relationship between It corrects the value of COST based on the relationship between
INSN and DEP through the dependence LINK. It returns the new INSN and DEP through the dependence LINK. It returns the new
...@@ -25480,6 +25642,7 @@ arm_issue_rate (void) ...@@ -25480,6 +25642,7 @@ arm_issue_rate (void)
case cortexr5: case cortexr5:
case genericv7a: case genericv7a:
case cortexa5: case cortexa5:
case cortexa7:
case cortexa8: case cortexa8:
case cortexa9: case cortexa9:
case fa726te: case fa726te:
......
...@@ -502,7 +502,7 @@ ...@@ -502,7 +502,7 @@
(define_attr "generic_sched" "yes,no" (define_attr "generic_sched" "yes,no"
(const (if_then_else (const (if_then_else
(ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexa15,cortexm4") (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa7,cortexa8,cortexa9,cortexa15,cortexm4")
(eq_attr "tune_cortexr4" "yes")) (eq_attr "tune_cortexr4" "yes"))
(const_string "no") (const_string "no")
(const_string "yes")))) (const_string "yes"))))
...@@ -510,7 +510,7 @@ ...@@ -510,7 +510,7 @@
(define_attr "generic_vfp" "yes,no" (define_attr "generic_vfp" "yes,no"
(const (if_then_else (const (if_then_else
(and (eq_attr "fpu" "vfp") (and (eq_attr "fpu" "vfp")
(eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa8,cortexa9,cortexm4") (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa7,cortexa8,cortexa9,cortexm4")
(eq_attr "tune_cortexr4" "no")) (eq_attr "tune_cortexr4" "no"))
(const_string "yes") (const_string "yes")
(const_string "no")))) (const_string "no"))))
...@@ -527,6 +527,7 @@ ...@@ -527,6 +527,7 @@
(include "fmp626.md") (include "fmp626.md")
(include "fa726te.md") (include "fa726te.md")
(include "cortex-a5.md") (include "cortex-a5.md")
(include "cortex-a7.md")
(include "cortex-a8.md") (include "cortex-a8.md")
(include "cortex-a9.md") (include "cortex-a9.md")
(include "cortex-a15.md") (include "cortex-a15.md")
......
;; ARM Cortex-A7 pipeline description
;; Copyright (C) 2012 Free Software Foundation, Inc.
;;
;; Contributed by ARM Ltd.
;; Based on cortex-a5.md which was originally contributed by CodeSourcery.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;;
;; GCC is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
(define_automaton "cortex_a7")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Functional units.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The Cortex-A7 pipeline integer and vfp pipeline.
;; The decode is the same for all instructions, so do not model it.
;; We only model the first execution stage because
;; instructions always advance one stage per cycle in order.
;; We model all of the LS, Branch, ALU, MAC and FPU pipelines together.
(define_cpu_unit "cortex_a7_ex1, cortex_a7_ex2" "cortex_a7")
(define_reservation "cortex_a7_both" "cortex_a7_ex1+cortex_a7_ex2")
(define_cpu_unit "cortex_a7_branch" "cortex_a7")
;; Cortex-A7 is in order and can dual-issue under limited circumstances.
;; ex2 can be reserved only after ex1 is reserved.
(final_presence_set "cortex_a7_ex2" "cortex_a7_ex1")
;; Pseudo-unit for blocking the multiply pipeline when a double-precision
;; multiply is in progress.
(define_cpu_unit "cortex_a7_fpmul_pipe" "cortex_a7")
;; The floating-point add pipeline (ex1/f1 stage), used to model the usage
;; of the add pipeline by fmac instructions, etc.
(define_cpu_unit "cortex_a7_fpadd_pipe" "cortex_a7")
;; Floating-point div/sqrt (long latency, out-of-order completion).
(define_cpu_unit "cortex_a7_fp_div_sqrt" "cortex_a7")
;; Neon pipeline
(define_cpu_unit "cortex_a7_neon" "cortex_a7")
(define_reservation "cortex_a7_all" "cortex_a7_both+\
cortex_a7_fpmul_pipe+\
cortex_a7_fpadd_pipe+\
cortex_a7_fp_div_sqrt+\
cortex_a7_neon")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Branches.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; A direct branch can dual issue either as younger or older instruction,
;; but branches cannot dual issue with branches.
;; No latency as there is no result.
(define_insn_reservation "cortex_a7_branch" 0
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "branch")
(eq_attr "neon_type" "none")))
"(cortex_a7_ex2|cortex_a7_ex1)+cortex_a7_branch")
;; A call reserves all issue slots. The result is available the next cycle.
(define_insn_reservation "cortex_a7_call" 1
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "call")
(eq_attr "neon_type" "none")))
"cortex_a7_all")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU instructions.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU instruction with an immediate operand can dual-issue.
(define_insn_reservation "cortex_a7_alu_imm" 2
(and (eq_attr "tune" "cortexa7")
(and (ior (eq_attr "type" "simple_alu_imm")
(ior (eq_attr "type" "simple_alu_shift")
(and (eq_attr "insn" "mov")
(not (eq_attr "length" "8")))))
(eq_attr "neon_type" "none")))
"cortex_a7_ex2|cortex_a7_ex1")
;; ALU instruction with register operands can dual-issue
;; with a younger immediate-based instruction.
(define_insn_reservation "cortex_a7_alu_reg" 2
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "alu_reg")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1")
(define_insn_reservation "cortex_a7_alu_shift" 2
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "alu_shift,alu_shift_reg")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1")
;; Forwarding path for unshifted operands.
(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift"
"cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_mul")
(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift"
"cortex_a7_store*"
"arm_no_early_store_addr_dep")
(define_bypass 1 "cortex_a7_alu_imm,cortex_a7_alu_reg,cortex_a7_alu_shift"
"cortex_a7_alu_shift"
"arm_no_early_alu_shift_dep")
;; The multiplier pipeline can forward results from wr stage only so
;; there's no need to specify bypasses.
;; Multiply instructions cannot dual-issue.
(define_insn_reservation "cortex_a7_mul" 2
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "mult")
(eq_attr "neon_type" "none")))
"cortex_a7_both")
;; The latency depends on the operands, so we use an estimate here.
(define_insn_reservation "cortex_a7_idiv" 5
(and (eq_attr "tune" "cortexa7")
(eq_attr "insn" "udiv,sdiv"))
"cortex_a7_all*5")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Load/store instructions.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Address-generation happens in the issue stage.
;; Double-word accesses can be issued in a single cycle,
;; and occupy only one pipeline stage.
(define_insn_reservation "cortex_a7_load1" 2
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "load_byte,load1")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1")
(define_insn_reservation "cortex_a7_store1" 0
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "store1")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1")
(define_insn_reservation "cortex_a7_load2" 2
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "load2")
(eq_attr "neon_type" "none")))
"cortex_a7_both")
(define_insn_reservation "cortex_a7_store2" 0
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "store2")
(eq_attr "neon_type" "none")))
"cortex_a7_both")
(define_insn_reservation "cortex_a7_load3" 3
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "load3")
(eq_attr "neon_type" "none")))
"cortex_a7_both, cortex_a7_ex1")
(define_insn_reservation "cortex_a7_store3" 0
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "store4")
(eq_attr "neon_type" "none")))
"cortex_a7_both, cortex_a7_ex1")
(define_insn_reservation "cortex_a7_load4" 3
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "load4")
(eq_attr "neon_type" "none")))
"cortex_a7_both, cortex_a7_both")
(define_insn_reservation "cortex_a7_store4" 0
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "store3")
(eq_attr "neon_type" "none")))
"cortex_a7_both, cortex_a7_both")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Floating-point arithmetic.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn_reservation "cortex_a7_fpalu" 4
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fcpys,\
f_cvt, fcmps, fcmpd")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1+cortex_a7_fpadd_pipe")
;; For fconsts and fconstd, 8-bit immediate data is passed directly from
;; f1 to f3 (which I think reduces the latency by one cycle).
(define_insn_reservation "cortex_a7_fconst" 3
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "fconsts,fconstd")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1+cortex_a7_fpadd_pipe")
;; We should try not to attempt to issue a single-precision multiplication in
;; the middle of a double-precision multiplication operation (the usage of
;; cortex_a7_fpmul_pipe).
(define_insn_reservation "cortex_a7_fpmuls" 4
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "fmuls")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1+cortex_a7_fpmul_pipe")
;; For single-precision multiply-accumulate, the add (accumulate) is issued
;; whilst the multiply is in F4. The multiply result can then be forwarded
;; from F5 to F1. The issue unit is only used once (when we first start
;; processing the instruction), but the usage of the FP add pipeline could
;; block other instructions attempting to use it simultaneously. We try to
;; avoid that using cortex_a7_fpadd_pipe.
(define_insn_reservation "cortex_a7_fpmacs" 8
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "fmacs")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1+cortex_a7_fpmul_pipe, nothing*3, cortex_a7_fpadd_pipe")
;; Non-multiply instructions can issue between two cycles of a
;; double-precision multiply.
(define_insn_reservation "cortex_a7_fpmuld" 7
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "fmuld")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*2,\
cortex_a7_ex1+cortex_a7_fpmul_pipe")
(define_insn_reservation "cortex_a7_fpmacd" 11
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "fmacd")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1+cortex_a7_fpmul_pipe, cortex_a7_fpmul_pipe*2,\
cortex_a7_ex1+cortex_a7_fpmul_pipe, nothing*3, cortex_a7_fpadd_pipe")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Floating-point divide/square root instructions.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn_reservation "cortex_a7_fdivs" 16
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "fdivs")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1, cortex_a7_fp_div_sqrt * 14")
(define_insn_reservation "cortex_a7_fdivd" 29
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "fdivd")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1, cortex_a7_fp_div_sqrt * 28")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; VFP to/from core transfers.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Core-to-VFP transfers.
(define_insn_reservation "cortex_a7_r2f" 4
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "r_2_f")
(eq_attr "neon_type" "none")))
"cortex_a7_both")
(define_insn_reservation "cortex_a7_f2r" 2
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "f_2_r")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; VFP flag transfer.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Fuxne: The flag forwarding from fmstat to the second instruction is
;; not modeled at present.
(define_insn_reservation "cortex_a7_f_flags" 4
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "f_flag")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; VFP load/store.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn_reservation "cortex_a7_f_loads" 4
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "f_loads")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1")
(define_insn_reservation "cortex_a7_f_loadd" 4
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "f_loadd")
(eq_attr "neon_type" "none")))
"cortex_a7_both")
(define_insn_reservation "cortex_a7_f_stores" 0
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "f_stores")
(eq_attr "neon_type" "none")))
"cortex_a7_ex1")
(define_insn_reservation "cortex_a7_f_stored" 0
(and (eq_attr "tune" "cortexa7")
(and (eq_attr "type" "f_stored")
(eq_attr "neon_type" "none")))
"cortex_a7_both")
;; Load-to-use for floating-point values has a penalty of one cycle,
;; i.e. a latency of two.
(define_bypass 2 "cortex_a7_f_loads, cortex_a7_f_loadd"
"cortex_a7_fpalu, cortex_a7_fpmacs, cortex_a7_fpmuld,\
cortex_a7_fpmacd, cortex_a7_fdivs, cortex_a7_fdivd,\
cortex_a7_f2r")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; NEON load/store.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn_reservation "cortex_a7_neon" 4
(and (eq_attr "tune" "cortexa7")
(eq_attr "neon_type" "!none"))
"cortex_a7_both*2")
...@@ -32,6 +32,7 @@ MD_INCLUDES= $(srcdir)/config/arm/arm1020e.md \ ...@@ -32,6 +32,7 @@ MD_INCLUDES= $(srcdir)/config/arm/arm1020e.md \
$(srcdir)/config/arm/constraints.md \ $(srcdir)/config/arm/constraints.md \
$(srcdir)/config/arm/cortex-a15.md \ $(srcdir)/config/arm/cortex-a15.md \
$(srcdir)/config/arm/cortex-a5.md \ $(srcdir)/config/arm/cortex-a5.md \
$(srcdir)/config/arm/cortex-a7.md \
$(srcdir)/config/arm/cortex-a8.md \ $(srcdir)/config/arm/cortex-a8.md \
$(srcdir)/config/arm/cortex-a8-neon.md \ $(srcdir)/config/arm/cortex-a8-neon.md \
$(srcdir)/config/arm/cortex-a9.md \ $(srcdir)/config/arm/cortex-a9.md \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment