Commit c02a5ccb by Sanjin Liu Committed by Richard Earnshaw

arm-cores.def: Add Faraday CPU support - fa526/fa626/fa606te/fa626te/fmp626/fa726te.

2010-12-20  Sanjin Liu	<scliu@faraday-tech.com>
	Mingfeng Wu  <mingfeng@faraday-tech.com>

	* config/arm/arm-cores.def: Add Faraday CPU support -
	fa526/fa626/fa606te/fa626te/fmp626/fa726te.
	* config/arm/arm-tune.md: Regenerate.
	* config/arm/arm.c (arm_fa726te_tune): New tune_params for fa726te
	(fa726te_sched_adjust_cost): New cost function for fa726te.
	(arm_issue_rate): Add fa726te.
	* config/arm/arm.md (generic_sched): Add Faraday cores to generic_sched
	and include machine description files.
	* config/arm/bpabi.h (TARGET_FIX_V4BX_SPEC): Add fa526 and fa626.
	* config/arm/t-arm (MD_INCLUDES): Include machine description files for
	Faraday cores.
	* config/arm/t-arm-elf: Add multilib option for Faraday cores.
	* config/arm/t-linux-eabi: Add multilib option for Faraday cores except
	fa526 and fa626.
	* doc/invoke.texi: Document -mcpu for Faraday cores.
	* config/arm/fa526.md: New file.
	* config/arm/fa606te.md: New file.
	* config/arm/fa626te.md: New file.
	* config/arm/fmp626.md: New file.
	* config/arm/fa726te.md: New file.

Co-Authored-By: Mingfeng Wu <mingfeng@faraday-tech.com>

From-SVN: r168090
parent 2e902fab
2010-12-20 Sanjin Liu <scliu@faraday-tech.com>
Mingfeng Wu <mingfeng@faraday-tech.com>
* config/arm/arm-cores.def: Add Faraday CPU support -
fa526/fa626/fa606te/fa626te/fmp626/fa726te.
* config/arm/arm-tune.md: Regenerate.
* config/arm/arm.c (arm_fa726te_tune): New tune_params for fa726te
(fa726te_sched_adjust_cost): New cost function for fa726te.
(arm_issue_rate): Add fa726te.
* config/arm/arm.md (generic_sched): Add Faraday cores to generic_sched
and include machine description files.
* config/arm/bpabi.h (TARGET_FIX_V4BX_SPEC): Add fa526 and fa626.
* config/arm/t-arm (MD_INCLUDES): Include machine description files for
Faraday cores.
* config/arm/t-arm-elf: Add multilib option for Faraday cores.
* config/arm/t-linux-eabi: Add multilib option for Faraday cores except
fa526 and fa626.
* doc/invoke.texi: Document -mcpu for Faraday cores.
* config/arm/fa526.md: New file.
* config/arm/fa606te.md: New file.
* config/arm/fa626te.md: New file.
* config/arm/fmp626.md: New file.
* config/arm/fa726te.md: New file.
2010-12-20 Yvan Roux <yvan.roux@st.com> 2010-12-20 Yvan Roux <yvan.roux@st.com>
* config/arm/lib1funcs.asm (ARM_DIV_BODY case __OPTIMIZE_SIZE__): Fix * config/arm/lib1funcs.asm (ARM_DIV_BODY case __OPTIMIZE_SIZE__): Fix
...@@ -74,6 +74,8 @@ ARM_CORE("strongarm", strongarm, 4, FL_MODE26 | FL_LDSCHED | FL ...@@ -74,6 +74,8 @@ ARM_CORE("strongarm", strongarm, 4, FL_MODE26 | FL_LDSCHED | FL
ARM_CORE("strongarm110", strongarm110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) ARM_CORE("strongarm110", strongarm110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
ARM_CORE("strongarm1100", strongarm1100, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) ARM_CORE("strongarm1100", strongarm1100, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
ARM_CORE("strongarm1110", strongarm1110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul) ARM_CORE("strongarm1110", strongarm1110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
ARM_CORE("fa526", fa526, 4, FL_LDSCHED, fastmul)
ARM_CORE("fa626", fa626, 4, FL_LDSCHED, fastmul)
/* V4T Architecture Processors */ /* V4T Architecture Processors */
ARM_CORE("arm7tdmi", arm7tdmi, 4T, FL_CO_PROC , fastmul) ARM_CORE("arm7tdmi", arm7tdmi, 4T, FL_CO_PROC , fastmul)
...@@ -104,6 +106,10 @@ ARM_CORE("arm1022e", arm1022e, 5TE, FL_LDSCHED, fastmul) ...@@ -104,6 +106,10 @@ ARM_CORE("arm1022e", arm1022e, 5TE, FL_LDSCHED, fastmul)
ARM_CORE("xscale", xscale, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE, xscale) ARM_CORE("xscale", xscale, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE, xscale)
ARM_CORE("iwmmxt", iwmmxt, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT, xscale) ARM_CORE("iwmmxt", iwmmxt, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT, xscale)
ARM_CORE("iwmmxt2", iwmmxt2, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT, xscale) ARM_CORE("iwmmxt2", iwmmxt2, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT, xscale)
ARM_CORE("fa606te", fa606te, 5TE, FL_LDSCHED, 9e)
ARM_CORE("fa626te", fa626te, 5TE, FL_LDSCHED, 9e)
ARM_CORE("fmp626", fmp626, 5TE, FL_LDSCHED, 9e)
ARM_CORE("fa726te", fa726te, 5TE, FL_LDSCHED, fa726te)
/* V5TEJ Architecture Processors */ /* V5TEJ Architecture Processors */
ARM_CORE("arm926ej-s", arm926ejs, 5TEJ, FL_LDSCHED, 9e) ARM_CORE("arm926ej-s", arm926ejs, 5TEJ, FL_LDSCHED, 9e)
......
;; -*- buffer-read-only: t -*- ;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from arm-cores.def ;; Generated automatically by gentune.sh from arm-cores.def
(define_attr "tune" (define_attr "tune"
"arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexm4,cortexm3,cortexm1,cortexm0" "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexm4,cortexm3,cortexm1,cortexm0"
(const (symbol_ref "((enum attr_tune) arm_tune)"))) (const (symbol_ref "((enum attr_tune) arm_tune)")))
...@@ -240,6 +240,7 @@ static rtx arm_trampoline_adjust_address (rtx); ...@@ -240,6 +240,7 @@ static rtx arm_trampoline_adjust_address (rtx);
static rtx arm_pic_static_addr (rtx orig, rtx reg); static rtx arm_pic_static_addr (rtx orig, rtx reg);
static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *); static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *); static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
static enum machine_mode arm_preferred_simd_mode (enum machine_mode); static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
static bool arm_class_likely_spilled_p (reg_class_t); static bool arm_class_likely_spilled_p (reg_class_t);
static bool arm_vector_alignment_reachable (const_tree type, bool is_packed); static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
...@@ -886,6 +887,14 @@ const struct tune_params arm_cortex_a9_tune = ...@@ -886,6 +887,14 @@ const struct tune_params arm_cortex_a9_tune =
ARM_PREFETCH_BENEFICIAL(4,32,32) ARM_PREFETCH_BENEFICIAL(4,32,32)
}; };
const struct tune_params arm_fa726te_tune =
{
arm_9e_rtx_costs,
fa726te_sched_adjust_cost,
1,
ARM_PREFETCH_NOT_BENEFICIAL
};
/* Not all of these give usefully different compilation alternatives, /* Not all of these give usefully different compilation alternatives,
but there is no simple way of generalizing them. */ but there is no simple way of generalizing them. */
...@@ -7996,6 +8005,36 @@ cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost) ...@@ -7996,6 +8005,36 @@ cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
return true; return true;
} }
/* Adjust cost hook for FA726TE. */
static bool
fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
{
/* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
have penalty of 3. */
if (REG_NOTE_KIND (link) == REG_DEP_TRUE
&& recog_memoized (insn) >= 0
&& recog_memoized (dep) >= 0
&& get_attr_conds (dep) == CONDS_SET)
{
/* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
if (get_attr_conds (insn) == CONDS_USE
&& get_attr_type (insn) != TYPE_BRANCH)
{
*cost = 3;
return false;
}
if (GET_CODE (PATTERN (insn)) == COND_EXEC
|| get_attr_conds (insn) == CONDS_USE)
{
*cost = 0;
return false;
}
}
return true;
}
/* This function implements the target macro TARGET_SCHED_ADJUST_COST. /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
It corrects the value of COST based on the relationship between It corrects the value of COST based on the relationship between
INSN and DEP through the dependence LINK. It returns the new INSN and DEP through the dependence LINK. It returns the new
...@@ -22840,6 +22879,7 @@ arm_issue_rate (void) ...@@ -22840,6 +22879,7 @@ arm_issue_rate (void)
case cortexa5: case cortexa5:
case cortexa8: case cortexa8:
case cortexa9: case cortexa9:
case fa726te:
return 2; return 2;
default: default:
......
...@@ -498,7 +498,7 @@ ...@@ -498,7 +498,7 @@
(define_attr "generic_sched" "yes,no" (define_attr "generic_sched" "yes,no"
(const (if_then_else (const (if_then_else
(ior (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexm4") (ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexm4")
(eq_attr "tune_cortexr4" "yes")) (eq_attr "tune_cortexr4" "yes"))
(const_string "no") (const_string "no")
(const_string "yes")))) (const_string "yes"))))
...@@ -516,6 +516,11 @@ ...@@ -516,6 +516,11 @@
(include "arm1020e.md") (include "arm1020e.md")
(include "arm1026ejs.md") (include "arm1026ejs.md")
(include "arm1136jfs.md") (include "arm1136jfs.md")
(include "fa526.md")
(include "fa606te.md")
(include "fa626te.md")
(include "fmp626.md")
(include "fa726te.md")
(include "cortex-a5.md") (include "cortex-a5.md")
(include "cortex-a8.md") (include "cortex-a8.md")
(include "cortex-a9.md") (include "cortex-a9.md")
......
...@@ -52,13 +52,16 @@ ...@@ -52,13 +52,16 @@
/* The BPABI integer comparison routines return { -1, 0, 1 }. */ /* The BPABI integer comparison routines return { -1, 0, 1 }. */
#define TARGET_LIB_INT_CMP_BIASED !TARGET_BPABI #define TARGET_LIB_INT_CMP_BIASED !TARGET_BPABI
#define TARGET_FIX_V4BX_SPEC " %{mcpu=arm8|mcpu=arm810|mcpu=strongarm*|march=armv4:--fix-v4bx}" #define TARGET_FIX_V4BX_SPEC " %{mcpu=arm8|mcpu=arm810|mcpu=strongarm*"\
"|march=armv4|mcpu=fa526|mcpu=fa626:--fix-v4bx}"
#define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15:%{!r:--be8}}}" #define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5"\
"|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15:%{!r:--be8}}}"
/* Tell the assembler to build BPABI binaries. */ /* Tell the assembler to build BPABI binaries. */
#undef SUBTARGET_EXTRA_ASM_SPEC #undef SUBTARGET_EXTRA_ASM_SPEC
#define SUBTARGET_EXTRA_ASM_SPEC "%{mabi=apcs-gnu|mabi=atpcs:-meabi=gnu;:-meabi=5}" TARGET_FIX_V4BX_SPEC #define SUBTARGET_EXTRA_ASM_SPEC \
"%{mabi=apcs-gnu|mabi=atpcs:-meabi=gnu;:-meabi=5}" TARGET_FIX_V4BX_SPEC
#ifndef SUBTARGET_EXTRA_LINK_SPEC #ifndef SUBTARGET_EXTRA_LINK_SPEC
#define SUBTARGET_EXTRA_LINK_SPEC "" #define SUBTARGET_EXTRA_LINK_SPEC ""
......
;; Faraday FA526 Pipeline Description
;; Copyright (C) 2010 Free Software Foundation, Inc.
;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description.
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify it under
;; the terms of the GNU General Public License as published by the Free
;; Software Foundation; either version 3, or (at your option) any later
;; version.
;;
;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
;; for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>. */
;; These descriptions are based on the information contained in the
;; FA526 Core Design Note, Copyright (c) 2010 Faraday Technology Corp.
;;
;; Modeled pipeline characteristics:
;; LD -> any use: latency = 3 (2 cycle penalty).
;; ALU -> any use: latency = 2 (1 cycle penalty).
;; This automaton provides a pipeline description for the Faraday
;; FA526 core.
;;
;; The model given here assumes that the condition for all conditional
;; instructions is "true", i.e., that all of the instructions are
;; actually executed.
(define_automaton "fa526")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Pipelines
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; There is a single pipeline
;;
;; The ALU pipeline has fetch, decode, execute, memory, and
;; write stages. We only need to model the execute, memory and write
;; stages.
;; S E M W
(define_cpu_unit "fa526_core" "fa526")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU instructions require two cycles to execute, and use the ALU
;; pipeline in each of the three stages. The results are available
;; after the execute stage stage has finished.
;;
;; If the destination register is the PC, the pipelines are stalled
;; for several cycles. That case is not modeled here.
;; ALU operations
(define_insn_reservation "526_alu_op" 1
(and (eq_attr "tune" "fa526")
(eq_attr "type" "alu"))
"fa526_core")
(define_insn_reservation "526_alu_shift_op" 2
(and (eq_attr "tune" "fa526")
(eq_attr "type" "alu_shift,alu_shift_reg"))
"fa526_core")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Multiplication Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn_reservation "526_mult1" 2
(and (eq_attr "tune" "fa526")
(eq_attr "insn" "smlalxy,smulxy,smlaxy,smlalxy"))
"fa526_core")
(define_insn_reservation "526_mult2" 5
(and (eq_attr "tune" "fa526")
(eq_attr "insn" "mul,mla,muls,mlas,umull,umlal,smull,smlal,umulls,\
umlals,smulls,smlals,smlawx"))
"fa526_core*4")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Load/Store Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The models for load/store instructions do not accurately describe
;; the difference between operations with a base register writeback
;; (such as "ldm!"). These models assume that all memory references
;; hit in dcache.
(define_insn_reservation "526_load1_op" 3
(and (eq_attr "tune" "fa526")
(eq_attr "type" "load1,load_byte"))
"fa526_core")
(define_insn_reservation "526_load2_op" 4
(and (eq_attr "tune" "fa526")
(eq_attr "type" "load2"))
"fa526_core*2")
(define_insn_reservation "526_load3_op" 5
(and (eq_attr "tune" "fa526")
(eq_attr "type" "load3"))
"fa526_core*3")
(define_insn_reservation "526_load4_op" 6
(and (eq_attr "tune" "fa526")
(eq_attr "type" "load4"))
"fa526_core*4")
(define_insn_reservation "526_store1_op" 0
(and (eq_attr "tune" "fa526")
(eq_attr "type" "store1"))
"fa526_core")
(define_insn_reservation "526_store2_op" 1
(and (eq_attr "tune" "fa526")
(eq_attr "type" "store2"))
"fa526_core*2")
(define_insn_reservation "526_store3_op" 2
(and (eq_attr "tune" "fa526")
(eq_attr "type" "store3"))
"fa526_core*3")
(define_insn_reservation "526_store4_op" 3
(and (eq_attr "tune" "fa526")
(eq_attr "type" "store4"))
"fa526_core*4")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Branch and Call Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Branch instructions are difficult to model accurately. The FA526
;; core can predict most branches. If the branch is predicted
;; correctly, and predicted early enough, the branch can be completely
;; eliminated from the instruction stream. Some branches can
;; therefore appear to require zero cycle to execute. We assume that
;; all branches are predicted correctly, and that the latency is
;; therefore the minimum value.
(define_insn_reservation "526_branch_op" 0
(and (eq_attr "tune" "fa526")
(eq_attr "type" "branch"))
"fa526_core")
;; The latency for a call is actually the latency when the result is available.
;; i.e. R0 ready for int return value. For most cases, the return value is set
;; by a mov instruction, which has 1 cycle latency.
(define_insn_reservation "526_call_op" 1
(and (eq_attr "tune" "fa526")
(eq_attr "type" "call"))
"fa526_core")
;; Faraday FA606TE Pipeline Description
;; Copyright (C) 2010 Free Software Foundation, Inc.
;; Written by Mingfeng Wu, based on ARM926EJ-S Pipeline Description.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify it under
;; the terms of the GNU General Public License as published by the Free
;; Software Foundation; either version 3, or (at your option) any later
;; version.
;;
;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
;; for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>. */
;; These descriptions are based on the information contained in the
;; FA606TE Core Design Note, Copyright (c) 2010 Faraday Technology Corp.
;; Modeled pipeline characteristics:
;; LD -> any use: latency = 2 (1 cycle penalty).
;; ALU -> any use: latency = 1 (0 cycle penalty).
;; This automaton provides a pipeline description for the Faraday
;; FA606TE core.
;;
;; The model given here assumes that the condition for all conditional
;; instructions is "true", i.e., that all of the instructions are
;; actually executed.
(define_automaton "fa606te")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Pipelines
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; There is a single pipeline
;;
;; The ALU pipeline has fetch, decode, execute, memory, and
;; write stages. We only need to model the execute, memory and write
;; stages.
;; E M W
(define_cpu_unit "fa606te_core" "fa606te")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU instructions require two cycles to execute, and use the ALU
;; pipeline in each of the three stages. The results are available
;; after the execute stage stage has finished.
;;
;; If the destination register is the PC, the pipelines are stalled
;; for several cycles. That case is not modeled here.
;; ALU operations
(define_insn_reservation "606te_alu_op" 1
(and (eq_attr "tune" "fa606te")
(eq_attr "type" "alu,alu_shift,alu_shift_reg"))
"fa606te_core")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Multiplication Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn_reservation "606te_mult1" 2
(and (eq_attr "tune" "fa606te")
(eq_attr "insn" "smlalxy"))
"fa606te_core")
(define_insn_reservation "606te_mult2" 3
(and (eq_attr "tune" "fa606te")
(eq_attr "insn" "smlaxy,smulxy,smulwy,smlawy"))
"fa606te_core*2")
(define_insn_reservation "606te_mult3" 4
(and (eq_attr "tune" "fa606te")
(eq_attr "insn" "mul,mla,muls,mlas"))
"fa606te_core*3")
(define_insn_reservation "606te_mult4" 5
(and (eq_attr "tune" "fa606te")
(eq_attr "insn" "umull,umlal,smull,smlal,umulls,umlals,smulls,smlals"))
"fa606te_core*4")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Load/Store Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The models for load/store instructions do not accurately describe
;; the difference between operations with a base register writeback
;; (such as "ldm!"). These models assume that all memory references
;; hit in dcache.
(define_insn_reservation "606te_load1_op" 2
(and (eq_attr "tune" "fa606te")
(eq_attr "type" "load1,load_byte"))
"fa606te_core")
(define_insn_reservation "606te_load2_op" 3
(and (eq_attr "tune" "fa606te")
(eq_attr "type" "load2"))
"fa606te_core*2")
(define_insn_reservation "606te_load3_op" 4
(and (eq_attr "tune" "fa606te")
(eq_attr "type" "load3"))
"fa606te_core*3")
(define_insn_reservation "606te_load4_op" 5
(and (eq_attr "tune" "fa606te")
(eq_attr "type" "load4"))
"fa606te_core*4")
(define_insn_reservation "606te_store1_op" 0
(and (eq_attr "tune" "fa606te")
(eq_attr "type" "store1"))
"fa606te_core")
(define_insn_reservation "606te_store2_op" 1
(and (eq_attr "tune" "fa606te")
(eq_attr "type" "store2"))
"fa606te_core*2")
(define_insn_reservation "606te_store3_op" 2
(and (eq_attr "tune" "fa606te")
(eq_attr "type" "store3"))
"fa606te_core*3")
(define_insn_reservation "606te_store4_op" 3
(and (eq_attr "tune" "fa606te")
(eq_attr "type" "store4"))
"fa606te_core*4")
;;(define_insn_reservation "606te_ldm_op" 9
;; (and (eq_attr "tune" "fa606te")
;; (eq_attr "type" "load2,load3,load4,store2,store3,store4"))
;; "fa606te_core*7")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Branch and Call Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Branch instructions are difficult to model accurately. The FA606TE
;; core can predict most branches. If the branch is predicted
;; correctly, and predicted early enough, the branch can be completely
;; eliminated from the instruction stream. Some branches can
;; therefore appear to require zero cycles to execute. We assume that
;; all branches are predicted correctly, and that the latency is
;; therefore the minimum value.
(define_insn_reservation "606te_branch_op" 0
(and (eq_attr "tune" "fa606te")
(eq_attr "type" "branch"))
"fa606te_core")
;; The latency for a call is actually the latency when the result is available.
;; i.e. R0 ready for int return value. For most cases, the return value is set
;; by a mov instruction, which has 1 cycle latency.
(define_insn_reservation "606te_call_op" 1
(and (eq_attr "tune" "fa606te")
(eq_attr "type" "call"))
"fa606te_core")
;; Faraday FA626TE Pipeline Description
;; Copyright (C) 2010 Free Software Foundation, Inc.
;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify it under
;; the terms of the GNU General Public License as published by the Free
;; Software Foundation; either version 3, or (at your option) any later
;; version.
;;
;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
;; for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>. */
;; These descriptions are based on the information contained in the
;; FA626TE Core Design Note, Copyright (c) 2010 Faraday Technology Corp.
;; Modeled pipeline characteristics:
;; ALU -> simple address LDR/STR: latency = 2 (available after 2 cycles).
;; ALU -> shifted address LDR/STR: latency = 3.
;; ( extra 1 cycle unavoidable stall).
;; ALU -> other use: latency = 2 (available after 2 cycles).
;; LD -> simple address LDR/STR: latency = 3 (available after 3 cycles).
;; LD -> shifted address LDR/STR: latency = 4
;; ( extra 1 cycle unavoidable stall).
;; LD -> any other use: latency = 3 (available after 3 cycles).
;; This automaton provides a pipeline description for the Faraday
;; FA626TE core.
;;
;; The model given here assumes that the condition for all conditional
;; instructions is "true", i.e., that all of the instructions are
;; actually executed.
(define_automaton "fa626te")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Pipelines
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; There is a single pipeline
;;
;; The ALU pipeline has fetch, decode, execute, memory, and
;; write stages. We only need to model the execute, memory and write
;; stages.
;; S E M W
(define_cpu_unit "fa626te_core" "fa626te")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU instructions require two cycles to execute, and use the ALU
;; pipeline in each of the three stages. The results are available
;; after the execute stage stage has finished.
;;
;; If the destination register is the PC, the pipelines are stalled
;; for several cycles. That case is not modeled here.
;; ALU operations
(define_insn_reservation "626te_alu_op" 1
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "type" "alu"))
"fa626te_core")
(define_insn_reservation "626te_alu_shift_op" 2
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "type" "alu_shift,alu_shift_reg"))
"fa626te_core")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Multiplication Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn_reservation "626te_mult1" 2
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "insn" "smulwy,smlawy,smulxy,smlaxy"))
"fa626te_core")
(define_insn_reservation "626te_mult2" 2
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "insn" "mul,mla"))
"fa626te_core")
(define_insn_reservation "626te_mult3" 3
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "insn" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx"))
"fa626te_core*2")
(define_insn_reservation "626te_mult4" 4
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "insn" "smulls,smlals,umulls,umlals"))
"fa626te_core*3")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Load/Store Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The models for load/store instructions do not accurately describe
;; the difference between operations with a base register writeback
;; (such as "ldm!"). These models assume that all memory references
;; hit in dcache.
(define_insn_reservation "626te_load1_op" 3
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "type" "load1,load_byte"))
"fa626te_core")
(define_insn_reservation "626te_load2_op" 4
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "type" "load2,load3"))
"fa626te_core*2")
(define_insn_reservation "626te_load3_op" 5
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "type" "load4"))
"fa626te_core*3")
(define_insn_reservation "626te_store1_op" 0
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "type" "store1"))
"fa626te_core")
(define_insn_reservation "626te_store2_op" 1
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "type" "store2,store3"))
"fa626te_core*2")
(define_insn_reservation "626te_store3_op" 2
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "type" "store4"))
"fa626te_core*3")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Branch and Call Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Branch instructions are difficult to model accurately. The FA626TE
;; core can predict most branches. If the branch is predicted
;; correctly, and predicted early enough, the branch can be completely
;; eliminated from the instruction stream. Some branches can
;; therefore appear to require zero cycle to execute. We assume that
;; all branches are predicted correctly, and that the latency is
;; therefore the minimum value.
(define_insn_reservation "626te_branch_op" 0
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "type" "branch"))
"fa626te_core")
;; The latency for a call is actually the latency when the result is available.
;; i.e. R0 ready for int return value.
(define_insn_reservation "626te_call_op" 1
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "type" "call"))
"fa626te_core")
;; Faraday FA726TE Pipeline Description
;; Copyright (C) 2010 Free Software Foundation, Inc.
;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify it under
;; the terms of the GNU General Public License as published by the Free
;; Software Foundation; either version 3, or (at your option) any later
;; version.
;;
;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
;; for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>. */
;; These descriptions are based on the information contained in the
;; FA726TE Core Design Note, Copyright (c) 2010 Faraday Technology Corp.
;; This automaton provides a pipeline description for the Faraday
;; FA726TE core.
;;
;; The model given here assumes that the condition for all conditional
;; instructions is "true", i.e., that all of the instructions are
;; actually executed.
(define_automaton "fa726te")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Pipelines
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The ALU pipeline has fetch, decode, execute, memory, and
;; write stages. We only need to model the execute, memory and write
;; stages.
;; E1 E2 E3 E4 E5 WB
;;______________________________________________________
;;
;; <-------------- LD/ST ----------->
;; shifter + LU <-- AU -->
;; <-- AU --> shifter + LU CPSR (Pipe 0)
;;______________________________________________________
;;
;; <---------- MUL --------->
;; shifter + LU <-- AU -->
;; <-- AU --> shifter + LU CPSR (Pipe 1)
(define_cpu_unit "fa726te_alu0_pipe,fa726te_alu1_pipe" "fa726te")
(define_cpu_unit "fa726te_mac_pipe" "fa726te")
(define_cpu_unit "fa726te_lsu_pipe_e,fa726te_lsu_pipe_w" "fa726te")
;; Pretend we have 2 LSUs (the second is ONLY for LDR), which can possibly
;; improve code quality.
(define_query_cpu_unit "fa726te_lsu1_pipe_e,fa726te_lsu1_pipe_w" "fa726te")
(define_cpu_unit "fa726te_is0,fa726te_is1" "fa726te")
(define_reservation "fa726te_issue" "(fa726te_is0|fa726te_is1)")
;; Reservation to restrict issue to 1.
(define_reservation "fa726te_blockage" "(fa726te_is0+fa726te_is1)")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU instructions require three cycles to execute, and use the ALU
;; pipeline in each of the three stages. The results are available
;; after the execute stage stage has finished.
;;
;; If the destination register is the PC, the pipelines are stalled
;; for several cycles. That case is not modeled here.
;; Move instructions.
(define_insn_reservation "726te_shift_op" 1
(and (eq_attr "tune" "fa726te")
(eq_attr "insn" "mov,mvn"))
"fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
;; ALU operations with no shifted operand will finished in 1 cycle
;; Other ALU instructions 2 cycles.
(define_insn_reservation "726te_alu_op" 1
(and (eq_attr "tune" "fa726te")
(and (eq_attr "type" "alu")
(not (eq_attr "insn" "mov,mvn"))))
"fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
;; ALU operations with a shift-by-register operand.
;; These really stall in the decoder, in order to read the shift value
;; in the first cycle. If the instruction uses both shifter and AU,
;; it takes 3 cycles.
(define_insn_reservation "726te_alu_shift_op" 3
(and (eq_attr "tune" "fa726te")
(and (eq_attr "type" "alu_shift")
(not (eq_attr "insn" "mov,mvn"))))
"fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
(define_insn_reservation "726te_alu_shift_reg_op" 3
(and (eq_attr "tune" "fa726te")
(and (eq_attr "type" "alu_shift_reg")
(not (eq_attr "insn" "mov,mvn"))))
"fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Multiplication Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Multiplication instructions loop in the execute stage until the
;; instruction has been passed through the multiplier array enough
;; times. Multiply operations occur in both the execute and memory
;; stages of the pipeline
(define_insn_reservation "726te_mult_op" 3
(and (eq_attr "tune" "fa726te")
(eq_attr "insn" "smlalxy,mul,mla,muls,mlas,umull,umlal,smull,smlal,\
umulls,umlals,smulls,smlals,smlawx,smulxy,smlaxy"))
"fa726te_issue+fa726te_mac_pipe")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Load/Store Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The models for load/store instructions do not accurately describe
;; the difference between operations with a base register writeback
;; (such as "ldm!"). These models assume that all memory references
;; hit in dcache.
;; Loads with a shifted offset take 3 cycles, and are (a) probably the
;; most common and (b) the pessimistic assumption will lead to fewer stalls.
;; Scalar loads are pipelined in FA726TE LSU pipe.
;; Here we model the resource conflict between Load@E3-stage & Store@W-stage.
;; The 2nd LSU (lsu1) is to model the fact that if 2 loads are scheduled in the
;; same "bundle", and the 2nd load will introudce another ISSUE stall but is
;; still ok to execute (and may be benefical sometimes).
(define_insn_reservation "726te_load1_op" 3
(and (eq_attr "tune" "fa726te")
(eq_attr "type" "load1,load_byte"))
"(fa726te_issue+fa726te_lsu_pipe_e+fa726te_lsu_pipe_w)\
| (fa726te_issue+fa726te_lsu1_pipe_e+fa726te_lsu1_pipe_w,fa726te_blockage)")
(define_insn_reservation "726te_store1_op" 1
(and (eq_attr "tune" "fa726te")
(eq_attr "type" "store1"))
"fa726te_blockage*2")
;; Load/Store Multiple blocks all pipelines in EX stages until WB.
;; No other instructions can be issued together. Since they essentially
;; prevent all scheduling opportunities, we model them together here.
;; The LDM is breaking into multiple load instructions, later instruction in
;; the pipe 1 is stalled.
(define_insn_reservation "726te_ldm2_op" 4
(and (eq_attr "tune" "fa726te")
(eq_attr "type" "load2,load3"))
"fa726te_blockage*4")
(define_insn_reservation "726te_ldm3_op" 5
(and (eq_attr "tune" "fa726te")
(eq_attr "type" "load4"))
"fa726te_blockage*5")
(define_insn_reservation "726te_stm2_op" 2
(and (eq_attr "tune" "fa726te")
(eq_attr "type" "store2,store3"))
"fa726te_blockage*3")
(define_insn_reservation "726te_stm3_op" 3
(and (eq_attr "tune" "fa726te")
(eq_attr "type" "store4"))
"fa726te_blockage*4")
(define_bypass 1 "726te_load1_op,726te_ldm2_op,726te_ldm3_op" "726te_store1_op,\
726te_stm2_op,726te_stm3_op" "arm_no_early_store_addr_dep")
(define_bypass 0 "726te_shift_op,726te_alu_op,726te_alu_shift_op,\
726te_alu_shift_reg_op,726te_mult_op" "726te_store1_op"
"arm_no_early_store_addr_dep")
(define_bypass 0 "726te_shift_op,726te_alu_op" "726te_shift_op,726te_alu_op")
(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op"
"726te_shift_op,726te_alu_op")
(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op,726te_mult_op"
"726te_alu_shift_op" "arm_no_early_alu_shift_dep")
(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op,726te_mult_op"
"726te_alu_shift_reg_op" "arm_no_early_alu_shift_value_dep")
(define_bypass 1 "726te_mult_op" "726te_shift_op,726te_alu_op")
(define_bypass 4 "726te_load1_op" "726te_mult_op")
(define_bypass 5 "726te_ldm2_op" "726te_mult_op")
(define_bypass 6 "726te_ldm3_op" "726te_mult_op")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Branch and Call Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Branch instructions are difficult to model accurately. The FA726TE
;; core can predict most branches. If the branch is predicted
;; correctly, and predicted early enough, the branch can be completely
;; eliminated from the instruction stream. Some branches can
;; therefore appear to require zero cycle to execute. We assume that
;; all branches are predicted correctly, and that the latency is
;; therefore the minimum value.
(define_insn_reservation "726te_branch_op" 0
(and (eq_attr "tune" "fa726te")
(eq_attr "type" "branch"))
"fa726te_blockage")
;; The latency for a call is actually the latency when the result is available.
;; i.e. R0 is ready for int return value.
(define_insn_reservation "726te_call_op" 1
(and (eq_attr "tune" "fa726te")
(eq_attr "type" "call"))
"fa726te_blockage")
;; Faraday FA626TE Pipeline Description
;; Copyright (C) 2010 Free Software Foundation, Inc.
;; Written by Mingfeng Wu, based on ARM926EJ-S Pipeline Description.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify it under
;; the terms of the GNU General Public License as published by the Free
;; Software Foundation; either version 3, or (at your option) any later
;; version.
;;
;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
;; for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>. */
;; These descriptions are based on the information contained in the
;; FMP626 Core Design Note, Copyright (c) 2010 Faraday Technology Corp.
;; Pipeline architecture
;; S E M W(Q1) Q2
;; ___________________________________________
;; shifter alu
;; mul1 mul2 mul3
;; ld/st1 ld/st2 ld/st3 ld/st4 ld/st5
;; This automaton provides a pipeline description for the Faraday
;; FMP626 core.
;;
;; The model given here assumes that the condition for all conditional
;; instructions is "true", i.e., that all of the instructions are
;; actually executed.
(define_automaton "fmp626")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Pipelines
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; There is a single pipeline
;;
;; The ALU pipeline has fetch, decode, execute, memory, and
;; write stages. We only need to model the execute, memory and write
;; stages.
(define_cpu_unit "fmp626_core" "fmp626")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU instructions require two cycles to execute, and use the ALU
;; pipeline in each of the three stages. The results are available
;; after the execute stage stage has finished.
;;
;; If the destination register is the PC, the pipelines are stalled
;; for several cycles. That case is not modeled here.
;; ALU operations
(define_insn_reservation "mp626_alu_op" 1
(and (eq_attr "tune" "fmp626")
(eq_attr "type" "alu"))
"fmp626_core")
(define_insn_reservation "mp626_alu_shift_op" 2
(and (eq_attr "tune" "fmp626")
(eq_attr "type" "alu_shift,alu_shift_reg"))
"fmp626_core")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Multiplication Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn_reservation "mp626_mult1" 2
(and (eq_attr "tune" "fmp626")
(eq_attr "insn" "smulwy,smlawy,smulxy,smlaxy"))
"fmp626_core")
(define_insn_reservation "mp626_mult2" 2
(and (eq_attr "tune" "fmp626")
(eq_attr "insn" "mul,mla"))
"fmp626_core")
(define_insn_reservation "mp626_mult3" 3
(and (eq_attr "tune" "fmp626")
(eq_attr "insn" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx"))
"fmp626_core*2")
(define_insn_reservation "mp626_mult4" 4
(and (eq_attr "tune" "fmp626")
(eq_attr "insn" "smulls,smlals,umulls,umlals"))
"fmp626_core*3")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Load/Store Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The models for load/store instructions do not accurately describe
;; the difference between operations with a base register writeback
;; (such as "ldm!"). These models assume that all memory references
;; hit in dcache.
(define_insn_reservation "mp626_load1_op" 5
(and (eq_attr "tune" "fmp626")
(eq_attr "type" "load1,load_byte"))
"fmp626_core")
(define_insn_reservation "mp626_load2_op" 6
(and (eq_attr "tune" "fmp626")
(eq_attr "type" "load2,load3"))
"fmp626_core*2")
(define_insn_reservation "mp626_load3_op" 7
(and (eq_attr "tune" "fmp626")
(eq_attr "type" "load4"))
"fmp626_core*3")
(define_insn_reservation "mp626_store1_op" 0
(and (eq_attr "tune" "fmp626")
(eq_attr "type" "store1"))
"fmp626_core")
(define_insn_reservation "mp626_store2_op" 1
(and (eq_attr "tune" "fmp626")
(eq_attr "type" "store2,store3"))
"fmp626_core*2")
(define_insn_reservation "mp626_store3_op" 2
(and (eq_attr "tune" "fmp626")
(eq_attr "type" "store4"))
"fmp626_core*3")
(define_bypass 1 "mp626_load1_op,mp626_load2_op,mp626_load3_op"
"mp626_store1_op,mp626_store2_op,mp626_store3_op"
"arm_no_early_store_addr_dep")
(define_bypass 1 "mp626_alu_op,mp626_alu_shift_op,mp626_mult1,mp626_mult2,\
mp626_mult3,mp626_mult4" "mp626_store1_op"
"arm_no_early_store_addr_dep")
(define_bypass 1 "mp626_alu_shift_op" "mp626_alu_op")
(define_bypass 1 "mp626_alu_shift_op" "mp626_alu_shift_op"
"arm_no_early_alu_shift_dep")
(define_bypass 1 "mp626_mult1,mp626_mult2" "mp626_alu_shift_op"
"arm_no_early_alu_shift_dep")
(define_bypass 2 "mp626_mult3" "mp626_alu_shift_op"
"arm_no_early_alu_shift_dep")
(define_bypass 3 "mp626_mult4" "mp626_alu_shift_op"
"arm_no_early_alu_shift_dep")
(define_bypass 1 "mp626_mult1,mp626_mult2" "mp626_alu_op")
(define_bypass 2 "mp626_mult3" "mp626_alu_op")
(define_bypass 3 "mp626_mult4" "mp626_alu_op")
(define_bypass 4 "mp626_load1_op" "mp626_alu_op")
(define_bypass 5 "mp626_load2_op" "mp626_alu_op")
(define_bypass 6 "mp626_load3_op" "mp626_alu_op")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Branch and Call Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Branch instructions are difficult to model accurately. The FMP626
;; core can predict most branches. If the branch is predicted
;; correctly, and predicted early enough, the branch can be completely
;; eliminated from the instruction stream. Some branches can
;; therefore appear to require zero cycle to execute. We assume that
;; all branches are predicted correctly, and that the latency is
;; therefore the minimum value.
(define_insn_reservation "mp626_branch_op" 0
(and (eq_attr "tune" "fmp626")
(eq_attr "type" "branch"))
"fmp626_core")
;; The latency for a call is actually the latency when the result is available.
;; i.e. R0 ready for int return value.
(define_insn_reservation "mp626_call_op" 1
(and (eq_attr "tune" "fmp626")
(eq_attr "type" "call"))
"fmp626_core")
...@@ -24,6 +24,11 @@ MD_INCLUDES= $(srcdir)/config/arm/arm-tune.md \ ...@@ -24,6 +24,11 @@ MD_INCLUDES= $(srcdir)/config/arm/arm-tune.md \
$(srcdir)/config/arm/arm1020e.md \ $(srcdir)/config/arm/arm1020e.md \
$(srcdir)/config/arm/arm1026ejs.md \ $(srcdir)/config/arm/arm1026ejs.md \
$(srcdir)/config/arm/arm1136jfs.md \ $(srcdir)/config/arm/arm1136jfs.md \
$(srcdir)/config/arm/fa526.md \
$(srcdir)/config/arm/fa606te.md \
$(srcdir)/config/arm/fa626te.md \
$(srcdir)/config/arm/fmp626.md \
$(srcdir)/config/arm/fa726te.md \
$(srcdir)/config/arm/arm926ejs.md \ $(srcdir)/config/arm/arm926ejs.md \
$(srcdir)/config/arm/cirrus.md \ $(srcdir)/config/arm/cirrus.md \
$(srcdir)/config/arm/fpa.md \ $(srcdir)/config/arm/fpa.md \
......
...@@ -36,6 +36,10 @@ MULTILIB_DIRNAMES = arm thumb ...@@ -36,6 +36,10 @@ MULTILIB_DIRNAMES = arm thumb
MULTILIB_EXCEPTIONS = MULTILIB_EXCEPTIONS =
MULTILIB_MATCHES = MULTILIB_MATCHES =
#MULTILIB_OPTIONS += mcpu=fa526/mcpu=fa626/mcpu=fa606te/mcpu=fa626te/mcpu=fmp626/mcpu=fa726te
#MULTILIB_DIRNAMES += fa526 fa626 fa606te fa626te fmp626 fa726te
#MULTILIB_EXCEPTIONS += *mthumb*/*mcpu=fa526 *mthumb*/*mcpu=fa626
#MULTILIB_OPTIONS += march=armv7 #MULTILIB_OPTIONS += march=armv7
#MULTILIB_DIRNAMES += thumb2 #MULTILIB_DIRNAMES += thumb2
#MULTILIB_EXCEPTIONS += march=armv7* marm/*march=armv7* #MULTILIB_EXCEPTIONS += march=armv7* marm/*march=armv7*
...@@ -52,6 +56,8 @@ MULTILIB_MATCHES = ...@@ -52,6 +56,8 @@ MULTILIB_MATCHES =
MULTILIB_OPTIONS += mfloat-abi=hard MULTILIB_OPTIONS += mfloat-abi=hard
MULTILIB_DIRNAMES += fpu MULTILIB_DIRNAMES += fpu
MULTILIB_EXCEPTIONS += *mthumb/*mfloat-abi=hard* MULTILIB_EXCEPTIONS += *mthumb/*mfloat-abi=hard*
#MULTILIB_EXCEPTIONS += *mcpu=fa526/*mfloat-abi=hard*
#MULTILIB_EXCEPTIONS += *mcpu=fa626/*mfloat-abi=hard*
# MULTILIB_OPTIONS += mcpu=ep9312 # MULTILIB_OPTIONS += mcpu=ep9312
# MULTILIB_DIRNAMES += ep9312 # MULTILIB_DIRNAMES += ep9312
......
...@@ -24,6 +24,10 @@ TARGET_LIBGCC2_CFLAGS = -fPIC ...@@ -24,6 +24,10 @@ TARGET_LIBGCC2_CFLAGS = -fPIC
MULTILIB_OPTIONS = MULTILIB_OPTIONS =
MULTILIB_DIRNAMES = MULTILIB_DIRNAMES =
#MULTILIB_OPTIONS += mcpu=fa606te/mcpu=fa626te/mcpu=fmp626/mcpu=fa726te
#MULTILIB_DIRNAMES += fa606te fa626te fmp626 fa726te
#MULTILIB_EXCEPTIONS += *mthumb/*mcpu=fa606te *mthumb/*mcpu=fa626te *mthumb/*mcpu=fmp626 *mthumb/*mcpu=fa726te*
# Use a version of div0 which raises SIGFPE, and a special __clear_cache. # Use a version of div0 which raises SIGFPE, and a special __clear_cache.
LIB1ASMFUNCS := $(filter-out _dvmd_tls,$(LIB1ASMFUNCS)) _dvmd_lnx _clear_cache LIB1ASMFUNCS := $(filter-out _dvmd_tls,$(LIB1ASMFUNCS)) _dvmd_lnx _clear_cache
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment