Commit c02a5ccb by Sanjin Liu Committed by Richard Earnshaw

arm-cores.def: Add Faraday CPU support - fa526/fa626/fa606te/fa626te/fmp626/fa726te.

2010-12-20  Sanjin Liu	<scliu@faraday-tech.com>
	Mingfeng Wu  <mingfeng@faraday-tech.com>

	* config/arm/arm-cores.def: Add Faraday CPU support -
	fa526/fa626/fa606te/fa626te/fmp626/fa726te.
	* config/arm/arm-tune.md: Regenerate.
	* config/arm/arm.c (arm_fa726te_tune): New tune_params for fa726te
	(fa726te_sched_adjust_cost): New cost function for fa726te.
	(arm_issue_rate): Add fa726te.
	* config/arm/arm.md (generic_sched): Add Faraday cores to generic_sched
	and include machine description files.
	* config/arm/bpabi.h (TARGET_FIX_V4BX_SPEC): Add fa526 and fa626.
	* config/arm/t-arm (MD_INCLUDES): Include machine description files for
	Faraday cores.
	* config/arm/t-arm-elf: Add multilib option for Faraday cores.
	* config/arm/t-linux-eabi: Add multilib option for Faraday cores except
	fa526 and fa626.
	* doc/invoke.texi: Document -mcpu for Faraday cores.
	* config/arm/fa526.md: New file.
	* config/arm/fa606te.md: New file.
	* config/arm/fa626te.md: New file.
	* config/arm/fmp626.md: New file.
	* config/arm/fa726te.md: New file.

Co-Authored-By: Mingfeng Wu <mingfeng@faraday-tech.com>

From-SVN: r168090
parent 2e902fab
2010-12-20 Sanjin Liu <scliu@faraday-tech.com>
Mingfeng Wu <mingfeng@faraday-tech.com>
* config/arm/arm-cores.def: Add Faraday CPU support -
fa526/fa626/fa606te/fa626te/fmp626/fa726te.
* config/arm/arm-tune.md: Regenerate.
* config/arm/arm.c (arm_fa726te_tune): New tune_params for fa726te
(fa726te_sched_adjust_cost): New cost function for fa726te.
(arm_issue_rate): Add fa726te.
* config/arm/arm.md (generic_sched): Add Faraday cores to generic_sched
and include machine description files.
* config/arm/bpabi.h (TARGET_FIX_V4BX_SPEC): Add fa526 and fa626.
* config/arm/t-arm (MD_INCLUDES): Include machine description files for
Faraday cores.
* config/arm/t-arm-elf: Add multilib option for Faraday cores.
* config/arm/t-linux-eabi: Add multilib option for Faraday cores except
fa526 and fa626.
* doc/invoke.texi: Document -mcpu for Faraday cores.
* config/arm/fa526.md: New file.
* config/arm/fa606te.md: New file.
* config/arm/fa626te.md: New file.
* config/arm/fmp626.md: New file.
* config/arm/fa726te.md: New file.
2010-12-20 Yvan Roux <yvan.roux@st.com>
* config/arm/lib1funcs.asm (ARM_DIV_BODY case __OPTIMIZE_SIZE__): Fix
......@@ -74,6 +74,8 @@ ARM_CORE("strongarm", strongarm, 4, FL_MODE26 | FL_LDSCHED | FL
ARM_CORE("strongarm110", strongarm110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
ARM_CORE("strongarm1100", strongarm1100, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
ARM_CORE("strongarm1110", strongarm1110, 4, FL_MODE26 | FL_LDSCHED | FL_STRONG, fastmul)
ARM_CORE("fa526", fa526, 4, FL_LDSCHED, fastmul)
ARM_CORE("fa626", fa626, 4, FL_LDSCHED, fastmul)
/* V4T Architecture Processors */
ARM_CORE("arm7tdmi", arm7tdmi, 4T, FL_CO_PROC , fastmul)
......@@ -104,6 +106,10 @@ ARM_CORE("arm1022e", arm1022e, 5TE, FL_LDSCHED, fastmul)
ARM_CORE("xscale", xscale, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE, xscale)
ARM_CORE("iwmmxt", iwmmxt, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT, xscale)
ARM_CORE("iwmmxt2", iwmmxt2, 5TE, FL_LDSCHED | FL_STRONG | FL_XSCALE | FL_IWMMXT, xscale)
ARM_CORE("fa606te", fa606te, 5TE, FL_LDSCHED, 9e)
ARM_CORE("fa626te", fa626te, 5TE, FL_LDSCHED, 9e)
ARM_CORE("fmp626", fmp626, 5TE, FL_LDSCHED, 9e)
ARM_CORE("fa726te", fa726te, 5TE, FL_LDSCHED, fa726te)
/* V5TEJ Architecture Processors */
ARM_CORE("arm926ej-s", arm926ejs, 5TEJ, FL_LDSCHED, 9e)
......
;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from arm-cores.def
(define_attr "tune"
"arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexm4,cortexm3,cortexm1,cortexm0"
"arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7d,arm7di,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7m,arm7dm,arm7dmi,arm8,arm810,strongarm,strongarm110,strongarm1100,strongarm1110,fa526,fa626,arm7tdmi,arm7tdmis,arm710t,arm720t,arm740t,arm9,arm9tdmi,arm920,arm920t,arm922t,arm940t,ep9312,arm10tdmi,arm1020t,arm9e,arm946es,arm966es,arm968es,arm10e,arm1020e,arm1022e,xscale,iwmmxt,iwmmxt2,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1026ejs,arm1136js,arm1136jfs,arm1176jzs,arm1176jzfs,mpcorenovfp,mpcore,arm1156t2s,arm1156t2fs,cortexa5,cortexa8,cortexa9,cortexa15,cortexr4,cortexr4f,cortexm4,cortexm3,cortexm1,cortexm0"
(const (symbol_ref "((enum attr_tune) arm_tune)")))
......@@ -240,6 +240,7 @@ static rtx arm_trampoline_adjust_address (rtx);
static rtx arm_pic_static_addr (rtx orig, rtx reg);
static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
static bool arm_class_likely_spilled_p (reg_class_t);
static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
......@@ -886,6 +887,14 @@ const struct tune_params arm_cortex_a9_tune =
ARM_PREFETCH_BENEFICIAL(4,32,32)
};
const struct tune_params arm_fa726te_tune =
{
arm_9e_rtx_costs,
fa726te_sched_adjust_cost,
1,
ARM_PREFETCH_NOT_BENEFICIAL
};
/* Not all of these give usefully different compilation alternatives,
but there is no simple way of generalizing them. */
......@@ -7996,6 +8005,36 @@ cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
return true;
}
/* Adjust cost hook for FA726TE. */
static bool
fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
{
/* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
have penalty of 3. */
if (REG_NOTE_KIND (link) == REG_DEP_TRUE
&& recog_memoized (insn) >= 0
&& recog_memoized (dep) >= 0
&& get_attr_conds (dep) == CONDS_SET)
{
/* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
if (get_attr_conds (insn) == CONDS_USE
&& get_attr_type (insn) != TYPE_BRANCH)
{
*cost = 3;
return false;
}
if (GET_CODE (PATTERN (insn)) == COND_EXEC
|| get_attr_conds (insn) == CONDS_USE)
{
*cost = 0;
return false;
}
}
return true;
}
/* This function implements the target macro TARGET_SCHED_ADJUST_COST.
It corrects the value of COST based on the relationship between
INSN and DEP through the dependence LINK. It returns the new
......@@ -22840,6 +22879,7 @@ arm_issue_rate (void)
case cortexa5:
case cortexa8:
case cortexa9:
case fa726te:
return 2;
default:
......
......@@ -498,7 +498,7 @@
(define_attr "generic_sched" "yes,no"
(const (if_then_else
(ior (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexm4")
(ior (eq_attr "tune" "fa526,fa626,fa606te,fa626te,fmp626,fa726te,arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexm4")
(eq_attr "tune_cortexr4" "yes"))
(const_string "no")
(const_string "yes"))))
......@@ -516,6 +516,11 @@
(include "arm1020e.md")
(include "arm1026ejs.md")
(include "arm1136jfs.md")
(include "fa526.md")
(include "fa606te.md")
(include "fa626te.md")
(include "fmp626.md")
(include "fa726te.md")
(include "cortex-a5.md")
(include "cortex-a8.md")
(include "cortex-a9.md")
......
......@@ -52,13 +52,16 @@
/* The BPABI integer comparison routines return { -1, 0, 1 }. */
#define TARGET_LIB_INT_CMP_BIASED !TARGET_BPABI
#define TARGET_FIX_V4BX_SPEC " %{mcpu=arm8|mcpu=arm810|mcpu=strongarm*|march=armv4:--fix-v4bx}"
#define TARGET_FIX_V4BX_SPEC " %{mcpu=arm8|mcpu=arm810|mcpu=strongarm*"\
"|march=armv4|mcpu=fa526|mcpu=fa626:--fix-v4bx}"
#define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15:%{!r:--be8}}}"
#define BE8_LINK_SPEC " %{mbig-endian:%{march=armv7-a|mcpu=cortex-a5"\
"|mcpu=cortex-a8|mcpu=cortex-a9|mcpu=cortex-a15:%{!r:--be8}}}"
/* Tell the assembler to build BPABI binaries. */
#undef SUBTARGET_EXTRA_ASM_SPEC
#define SUBTARGET_EXTRA_ASM_SPEC "%{mabi=apcs-gnu|mabi=atpcs:-meabi=gnu;:-meabi=5}" TARGET_FIX_V4BX_SPEC
#define SUBTARGET_EXTRA_ASM_SPEC \
"%{mabi=apcs-gnu|mabi=atpcs:-meabi=gnu;:-meabi=5}" TARGET_FIX_V4BX_SPEC
#ifndef SUBTARGET_EXTRA_LINK_SPEC
#define SUBTARGET_EXTRA_LINK_SPEC ""
......
;; Faraday FA526 Pipeline Description
;; Copyright (C) 2010 Free Software Foundation, Inc.
;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description.
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify it under
;; the terms of the GNU General Public License as published by the Free
;; Software Foundation; either version 3, or (at your option) any later
;; version.
;;
;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
;; for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>. */
;; These descriptions are based on the information contained in the
;; FA526 Core Design Note, Copyright (c) 2010 Faraday Technology Corp.
;;
;; Modeled pipeline characteristics:
;; LD -> any use: latency = 3 (2 cycle penalty).
;; ALU -> any use: latency = 2 (1 cycle penalty).
;; This automaton provides a pipeline description for the Faraday
;; FA526 core.
;;
;; The model given here assumes that the condition for all conditional
;; instructions is "true", i.e., that all of the instructions are
;; actually executed.
(define_automaton "fa526")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Pipelines
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; There is a single pipeline
;;
;; The ALU pipeline has fetch, decode, execute, memory, and
;; write stages. We only need to model the execute, memory and write
;; stages.
;; S E M W
(define_cpu_unit "fa526_core" "fa526")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU instructions require two cycles to execute, and use the ALU
;; pipeline in each of the three stages. The results are available
;; after the execute stage stage has finished.
;;
;; If the destination register is the PC, the pipelines are stalled
;; for several cycles. That case is not modeled here.
;; ALU operations
(define_insn_reservation "526_alu_op" 1
(and (eq_attr "tune" "fa526")
(eq_attr "type" "alu"))
"fa526_core")
(define_insn_reservation "526_alu_shift_op" 2
(and (eq_attr "tune" "fa526")
(eq_attr "type" "alu_shift,alu_shift_reg"))
"fa526_core")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Multiplication Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn_reservation "526_mult1" 2
(and (eq_attr "tune" "fa526")
(eq_attr "insn" "smlalxy,smulxy,smlaxy,smlalxy"))
"fa526_core")
(define_insn_reservation "526_mult2" 5
(and (eq_attr "tune" "fa526")
(eq_attr "insn" "mul,mla,muls,mlas,umull,umlal,smull,smlal,umulls,\
umlals,smulls,smlals,smlawx"))
"fa526_core*4")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Load/Store Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The models for load/store instructions do not accurately describe
;; the difference between operations with a base register writeback
;; (such as "ldm!"). These models assume that all memory references
;; hit in dcache.
(define_insn_reservation "526_load1_op" 3
(and (eq_attr "tune" "fa526")
(eq_attr "type" "load1,load_byte"))
"fa526_core")
(define_insn_reservation "526_load2_op" 4
(and (eq_attr "tune" "fa526")
(eq_attr "type" "load2"))
"fa526_core*2")
(define_insn_reservation "526_load3_op" 5
(and (eq_attr "tune" "fa526")
(eq_attr "type" "load3"))
"fa526_core*3")
(define_insn_reservation "526_load4_op" 6
(and (eq_attr "tune" "fa526")
(eq_attr "type" "load4"))
"fa526_core*4")
(define_insn_reservation "526_store1_op" 0
(and (eq_attr "tune" "fa526")
(eq_attr "type" "store1"))
"fa526_core")
(define_insn_reservation "526_store2_op" 1
(and (eq_attr "tune" "fa526")
(eq_attr "type" "store2"))
"fa526_core*2")
(define_insn_reservation "526_store3_op" 2
(and (eq_attr "tune" "fa526")
(eq_attr "type" "store3"))
"fa526_core*3")
(define_insn_reservation "526_store4_op" 3
(and (eq_attr "tune" "fa526")
(eq_attr "type" "store4"))
"fa526_core*4")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Branch and Call Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Branch instructions are difficult to model accurately. The FA526
;; core can predict most branches. If the branch is predicted
;; correctly, and predicted early enough, the branch can be completely
;; eliminated from the instruction stream. Some branches can
;; therefore appear to require zero cycle to execute. We assume that
;; all branches are predicted correctly, and that the latency is
;; therefore the minimum value.
(define_insn_reservation "526_branch_op" 0
(and (eq_attr "tune" "fa526")
(eq_attr "type" "branch"))
"fa526_core")
;; The latency for a call is actually the latency when the result is available.
;; i.e. R0 ready for int return value. For most cases, the return value is set
;; by a mov instruction, which has 1 cycle latency.
(define_insn_reservation "526_call_op" 1
(and (eq_attr "tune" "fa526")
(eq_attr "type" "call"))
"fa526_core")
;; Faraday FA606TE Pipeline Description
;; Copyright (C) 2010 Free Software Foundation, Inc.
;; Written by Mingfeng Wu, based on ARM926EJ-S Pipeline Description.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify it under
;; the terms of the GNU General Public License as published by the Free
;; Software Foundation; either version 3, or (at your option) any later
;; version.
;;
;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
;; for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>. */
;; These descriptions are based on the information contained in the
;; FA606TE Core Design Note, Copyright (c) 2010 Faraday Technology Corp.
;; Modeled pipeline characteristics:
;; LD -> any use: latency = 2 (1 cycle penalty).
;; ALU -> any use: latency = 1 (0 cycle penalty).
;; This automaton provides a pipeline description for the Faraday
;; FA606TE core.
;;
;; The model given here assumes that the condition for all conditional
;; instructions is "true", i.e., that all of the instructions are
;; actually executed.
(define_automaton "fa606te")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Pipelines
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; There is a single pipeline
;;
;; The ALU pipeline has fetch, decode, execute, memory, and
;; write stages. We only need to model the execute, memory and write
;; stages.
;; E M W
(define_cpu_unit "fa606te_core" "fa606te")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU instructions require two cycles to execute, and use the ALU
;; pipeline in each of the three stages. The results are available
;; after the execute stage stage has finished.
;;
;; If the destination register is the PC, the pipelines are stalled
;; for several cycles. That case is not modeled here.
;; ALU operations
(define_insn_reservation "606te_alu_op" 1
(and (eq_attr "tune" "fa606te")
(eq_attr "type" "alu,alu_shift,alu_shift_reg"))
"fa606te_core")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Multiplication Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn_reservation "606te_mult1" 2
(and (eq_attr "tune" "fa606te")
(eq_attr "insn" "smlalxy"))
"fa606te_core")
(define_insn_reservation "606te_mult2" 3
(and (eq_attr "tune" "fa606te")
(eq_attr "insn" "smlaxy,smulxy,smulwy,smlawy"))
"fa606te_core*2")
(define_insn_reservation "606te_mult3" 4
(and (eq_attr "tune" "fa606te")
(eq_attr "insn" "mul,mla,muls,mlas"))
"fa606te_core*3")
(define_insn_reservation "606te_mult4" 5
(and (eq_attr "tune" "fa606te")
(eq_attr "insn" "umull,umlal,smull,smlal,umulls,umlals,smulls,smlals"))
"fa606te_core*4")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Load/Store Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The models for load/store instructions do not accurately describe
;; the difference between operations with a base register writeback
;; (such as "ldm!"). These models assume that all memory references
;; hit in dcache.
(define_insn_reservation "606te_load1_op" 2
(and (eq_attr "tune" "fa606te")
(eq_attr "type" "load1,load_byte"))
"fa606te_core")
(define_insn_reservation "606te_load2_op" 3
(and (eq_attr "tune" "fa606te")
(eq_attr "type" "load2"))
"fa606te_core*2")
(define_insn_reservation "606te_load3_op" 4
(and (eq_attr "tune" "fa606te")
(eq_attr "type" "load3"))
"fa606te_core*3")
(define_insn_reservation "606te_load4_op" 5
(and (eq_attr "tune" "fa606te")
(eq_attr "type" "load4"))
"fa606te_core*4")
(define_insn_reservation "606te_store1_op" 0
(and (eq_attr "tune" "fa606te")
(eq_attr "type" "store1"))
"fa606te_core")
(define_insn_reservation "606te_store2_op" 1
(and (eq_attr "tune" "fa606te")
(eq_attr "type" "store2"))
"fa606te_core*2")
(define_insn_reservation "606te_store3_op" 2
(and (eq_attr "tune" "fa606te")
(eq_attr "type" "store3"))
"fa606te_core*3")
(define_insn_reservation "606te_store4_op" 3
(and (eq_attr "tune" "fa606te")
(eq_attr "type" "store4"))
"fa606te_core*4")
;;(define_insn_reservation "606te_ldm_op" 9
;; (and (eq_attr "tune" "fa606te")
;; (eq_attr "type" "load2,load3,load4,store2,store3,store4"))
;; "fa606te_core*7")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Branch and Call Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Branch instructions are difficult to model accurately. The FA606TE
;; core can predict most branches. If the branch is predicted
;; correctly, and predicted early enough, the branch can be completely
;; eliminated from the instruction stream. Some branches can
;; therefore appear to require zero cycles to execute. We assume that
;; all branches are predicted correctly, and that the latency is
;; therefore the minimum value.
(define_insn_reservation "606te_branch_op" 0
(and (eq_attr "tune" "fa606te")
(eq_attr "type" "branch"))
"fa606te_core")
;; The latency for a call is actually the latency when the result is available.
;; i.e. R0 ready for int return value. For most cases, the return value is set
;; by a mov instruction, which has 1 cycle latency.
(define_insn_reservation "606te_call_op" 1
(and (eq_attr "tune" "fa606te")
(eq_attr "type" "call"))
"fa606te_core")
;; Faraday FA626TE Pipeline Description
;; Copyright (C) 2010 Free Software Foundation, Inc.
;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify it under
;; the terms of the GNU General Public License as published by the Free
;; Software Foundation; either version 3, or (at your option) any later
;; version.
;;
;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
;; for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>. */
;; These descriptions are based on the information contained in the
;; FA626TE Core Design Note, Copyright (c) 2010 Faraday Technology Corp.
;; Modeled pipeline characteristics:
;; ALU -> simple address LDR/STR: latency = 2 (available after 2 cycles).
;; ALU -> shifted address LDR/STR: latency = 3.
;; ( extra 1 cycle unavoidable stall).
;; ALU -> other use: latency = 2 (available after 2 cycles).
;; LD -> simple address LDR/STR: latency = 3 (available after 3 cycles).
;; LD -> shifted address LDR/STR: latency = 4
;; ( extra 1 cycle unavoidable stall).
;; LD -> any other use: latency = 3 (available after 3 cycles).
;; This automaton provides a pipeline description for the Faraday
;; FA626TE core.
;;
;; The model given here assumes that the condition for all conditional
;; instructions is "true", i.e., that all of the instructions are
;; actually executed.
(define_automaton "fa626te")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Pipelines
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; There is a single pipeline
;;
;; The ALU pipeline has fetch, decode, execute, memory, and
;; write stages. We only need to model the execute, memory and write
;; stages.
;; S E M W
(define_cpu_unit "fa626te_core" "fa626te")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU instructions require two cycles to execute, and use the ALU
;; pipeline in each of the three stages. The results are available
;; after the execute stage stage has finished.
;;
;; If the destination register is the PC, the pipelines are stalled
;; for several cycles. That case is not modeled here.
;; ALU operations
(define_insn_reservation "626te_alu_op" 1
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "type" "alu"))
"fa626te_core")
(define_insn_reservation "626te_alu_shift_op" 2
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "type" "alu_shift,alu_shift_reg"))
"fa626te_core")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Multiplication Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn_reservation "626te_mult1" 2
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "insn" "smulwy,smlawy,smulxy,smlaxy"))
"fa626te_core")
(define_insn_reservation "626te_mult2" 2
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "insn" "mul,mla"))
"fa626te_core")
(define_insn_reservation "626te_mult3" 3
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "insn" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx"))
"fa626te_core*2")
(define_insn_reservation "626te_mult4" 4
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "insn" "smulls,smlals,umulls,umlals"))
"fa626te_core*3")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Load/Store Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The models for load/store instructions do not accurately describe
;; the difference between operations with a base register writeback
;; (such as "ldm!"). These models assume that all memory references
;; hit in dcache.
(define_insn_reservation "626te_load1_op" 3
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "type" "load1,load_byte"))
"fa626te_core")
(define_insn_reservation "626te_load2_op" 4
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "type" "load2,load3"))
"fa626te_core*2")
(define_insn_reservation "626te_load3_op" 5
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "type" "load4"))
"fa626te_core*3")
(define_insn_reservation "626te_store1_op" 0
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "type" "store1"))
"fa626te_core")
(define_insn_reservation "626te_store2_op" 1
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "type" "store2,store3"))
"fa626te_core*2")
(define_insn_reservation "626te_store3_op" 2
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "type" "store4"))
"fa626te_core*3")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Branch and Call Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Branch instructions are difficult to model accurately. The FA626TE
;; core can predict most branches. If the branch is predicted
;; correctly, and predicted early enough, the branch can be completely
;; eliminated from the instruction stream. Some branches can
;; therefore appear to require zero cycle to execute. We assume that
;; all branches are predicted correctly, and that the latency is
;; therefore the minimum value.
(define_insn_reservation "626te_branch_op" 0
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "type" "branch"))
"fa626te_core")
;; The latency for a call is actually the latency when the result is available.
;; i.e. R0 ready for int return value.
(define_insn_reservation "626te_call_op" 1
(and (eq_attr "tune" "fa626,fa626te")
(eq_attr "type" "call"))
"fa626te_core")
;; Faraday FA726TE Pipeline Description
;; Copyright (C) 2010 Free Software Foundation, Inc.
;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify it under
;; the terms of the GNU General Public License as published by the Free
;; Software Foundation; either version 3, or (at your option) any later
;; version.
;;
;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
;; for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>. */
;; These descriptions are based on the information contained in the
;; FA726TE Core Design Note, Copyright (c) 2010 Faraday Technology Corp.
;; This automaton provides a pipeline description for the Faraday
;; FA726TE core.
;;
;; The model given here assumes that the condition for all conditional
;; instructions is "true", i.e., that all of the instructions are
;; actually executed.
(define_automaton "fa726te")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Pipelines
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The ALU pipeline has fetch, decode, execute, memory, and
;; write stages. We only need to model the execute, memory and write
;; stages.
;; E1 E2 E3 E4 E5 WB
;;______________________________________________________
;;
;; <-------------- LD/ST ----------->
;; shifter + LU <-- AU -->
;; <-- AU --> shifter + LU CPSR (Pipe 0)
;;______________________________________________________
;;
;; <---------- MUL --------->
;; shifter + LU <-- AU -->
;; <-- AU --> shifter + LU CPSR (Pipe 1)
(define_cpu_unit "fa726te_alu0_pipe,fa726te_alu1_pipe" "fa726te")
(define_cpu_unit "fa726te_mac_pipe" "fa726te")
(define_cpu_unit "fa726te_lsu_pipe_e,fa726te_lsu_pipe_w" "fa726te")
;; Pretend we have 2 LSUs (the second is ONLY for LDR), which can possibly
;; improve code quality.
(define_query_cpu_unit "fa726te_lsu1_pipe_e,fa726te_lsu1_pipe_w" "fa726te")
(define_cpu_unit "fa726te_is0,fa726te_is1" "fa726te")
(define_reservation "fa726te_issue" "(fa726te_is0|fa726te_is1)")
;; Reservation to restrict issue to 1.
(define_reservation "fa726te_blockage" "(fa726te_is0+fa726te_is1)")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU instructions require three cycles to execute, and use the ALU
;; pipeline in each of the three stages. The results are available
;; after the execute stage stage has finished.
;;
;; If the destination register is the PC, the pipelines are stalled
;; for several cycles. That case is not modeled here.
;; Move instructions.
(define_insn_reservation "726te_shift_op" 1
(and (eq_attr "tune" "fa726te")
(eq_attr "insn" "mov,mvn"))
"fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
;; ALU operations with no shifted operand will finished in 1 cycle
;; Other ALU instructions 2 cycles.
(define_insn_reservation "726te_alu_op" 1
(and (eq_attr "tune" "fa726te")
(and (eq_attr "type" "alu")
(not (eq_attr "insn" "mov,mvn"))))
"fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
;; ALU operations with a shift-by-register operand.
;; These really stall in the decoder, in order to read the shift value
;; in the first cycle. If the instruction uses both shifter and AU,
;; it takes 3 cycles.
(define_insn_reservation "726te_alu_shift_op" 3
(and (eq_attr "tune" "fa726te")
(and (eq_attr "type" "alu_shift")
(not (eq_attr "insn" "mov,mvn"))))
"fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
(define_insn_reservation "726te_alu_shift_reg_op" 3
(and (eq_attr "tune" "fa726te")
(and (eq_attr "type" "alu_shift_reg")
(not (eq_attr "insn" "mov,mvn"))))
"fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Multiplication Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Multiplication instructions loop in the execute stage until the
;; instruction has been passed through the multiplier array enough
;; times. Multiply operations occur in both the execute and memory
;; stages of the pipeline
(define_insn_reservation "726te_mult_op" 3
(and (eq_attr "tune" "fa726te")
(eq_attr "insn" "smlalxy,mul,mla,muls,mlas,umull,umlal,smull,smlal,\
umulls,umlals,smulls,smlals,smlawx,smulxy,smlaxy"))
"fa726te_issue+fa726te_mac_pipe")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Load/Store Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The models for load/store instructions do not accurately describe
;; the difference between operations with a base register writeback
;; (such as "ldm!"). These models assume that all memory references
;; hit in dcache.
;; Loads with a shifted offset take 3 cycles, and are (a) probably the
;; most common and (b) the pessimistic assumption will lead to fewer stalls.
;; Scalar loads are pipelined in FA726TE LSU pipe.
;; Here we model the resource conflict between Load@E3-stage & Store@W-stage.
;; The 2nd LSU (lsu1) is to model the fact that if 2 loads are scheduled in the
;; same "bundle", and the 2nd load will introudce another ISSUE stall but is
;; still ok to execute (and may be benefical sometimes).
(define_insn_reservation "726te_load1_op" 3
(and (eq_attr "tune" "fa726te")
(eq_attr "type" "load1,load_byte"))
"(fa726te_issue+fa726te_lsu_pipe_e+fa726te_lsu_pipe_w)\
| (fa726te_issue+fa726te_lsu1_pipe_e+fa726te_lsu1_pipe_w,fa726te_blockage)")
(define_insn_reservation "726te_store1_op" 1
(and (eq_attr "tune" "fa726te")
(eq_attr "type" "store1"))
"fa726te_blockage*2")
;; Load/Store Multiple blocks all pipelines in EX stages until WB.
;; No other instructions can be issued together. Since they essentially
;; prevent all scheduling opportunities, we model them together here.
;; The LDM is breaking into multiple load instructions, later instruction in
;; the pipe 1 is stalled.
(define_insn_reservation "726te_ldm2_op" 4
(and (eq_attr "tune" "fa726te")
(eq_attr "type" "load2,load3"))
"fa726te_blockage*4")
(define_insn_reservation "726te_ldm3_op" 5
(and (eq_attr "tune" "fa726te")
(eq_attr "type" "load4"))
"fa726te_blockage*5")
(define_insn_reservation "726te_stm2_op" 2
(and (eq_attr "tune" "fa726te")
(eq_attr "type" "store2,store3"))
"fa726te_blockage*3")
(define_insn_reservation "726te_stm3_op" 3
(and (eq_attr "tune" "fa726te")
(eq_attr "type" "store4"))
"fa726te_blockage*4")
(define_bypass 1 "726te_load1_op,726te_ldm2_op,726te_ldm3_op" "726te_store1_op,\
726te_stm2_op,726te_stm3_op" "arm_no_early_store_addr_dep")
(define_bypass 0 "726te_shift_op,726te_alu_op,726te_alu_shift_op,\
726te_alu_shift_reg_op,726te_mult_op" "726te_store1_op"
"arm_no_early_store_addr_dep")
(define_bypass 0 "726te_shift_op,726te_alu_op" "726te_shift_op,726te_alu_op")
(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op"
"726te_shift_op,726te_alu_op")
(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op,726te_mult_op"
"726te_alu_shift_op" "arm_no_early_alu_shift_dep")
(define_bypass 1 "726te_alu_shift_op,726te_alu_shift_reg_op,726te_mult_op"
"726te_alu_shift_reg_op" "arm_no_early_alu_shift_value_dep")
(define_bypass 1 "726te_mult_op" "726te_shift_op,726te_alu_op")
(define_bypass 4 "726te_load1_op" "726te_mult_op")
(define_bypass 5 "726te_ldm2_op" "726te_mult_op")
(define_bypass 6 "726te_ldm3_op" "726te_mult_op")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Branch and Call Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Branch instructions are difficult to model accurately. The FA726TE
;; core can predict most branches. If the branch is predicted
;; correctly, and predicted early enough, the branch can be completely
;; eliminated from the instruction stream. Some branches can
;; therefore appear to require zero cycle to execute. We assume that
;; all branches are predicted correctly, and that the latency is
;; therefore the minimum value.
(define_insn_reservation "726te_branch_op" 0
(and (eq_attr "tune" "fa726te")
(eq_attr "type" "branch"))
"fa726te_blockage")
;; The latency for a call is actually the latency when the result is available.
;; i.e. R0 is ready for int return value.
(define_insn_reservation "726te_call_op" 1
(and (eq_attr "tune" "fa726te")
(eq_attr "type" "call"))
"fa726te_blockage")
;; Faraday FA626TE Pipeline Description
;; Copyright (C) 2010 Free Software Foundation, Inc.
;; Written by Mingfeng Wu, based on ARM926EJ-S Pipeline Description.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify it under
;; the terms of the GNU General Public License as published by the Free
;; Software Foundation; either version 3, or (at your option) any later
;; version.
;;
;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
;; for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>. */
;; These descriptions are based on the information contained in the
;; FMP626 Core Design Note, Copyright (c) 2010 Faraday Technology Corp.
;; Pipeline architecture
;; S E M W(Q1) Q2
;; ___________________________________________
;; shifter alu
;; mul1 mul2 mul3
;; ld/st1 ld/st2 ld/st3 ld/st4 ld/st5
;; This automaton provides a pipeline description for the Faraday
;; FMP626 core.
;;
;; The model given here assumes that the condition for all conditional
;; instructions is "true", i.e., that all of the instructions are
;; actually executed.
(define_automaton "fmp626")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Pipelines
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; There is a single pipeline
;;
;; The ALU pipeline has fetch, decode, execute, memory, and
;; write stages. We only need to model the execute, memory and write
;; stages.
(define_cpu_unit "fmp626_core" "fmp626")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU instructions require two cycles to execute, and use the ALU
;; pipeline in each of the three stages. The results are available
;; after the execute stage stage has finished.
;;
;; If the destination register is the PC, the pipelines are stalled
;; for several cycles. That case is not modeled here.
;; ALU operations
(define_insn_reservation "mp626_alu_op" 1
(and (eq_attr "tune" "fmp626")
(eq_attr "type" "alu"))
"fmp626_core")
(define_insn_reservation "mp626_alu_shift_op" 2
(and (eq_attr "tune" "fmp626")
(eq_attr "type" "alu_shift,alu_shift_reg"))
"fmp626_core")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Multiplication Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn_reservation "mp626_mult1" 2
(and (eq_attr "tune" "fmp626")
(eq_attr "insn" "smulwy,smlawy,smulxy,smlaxy"))
"fmp626_core")
(define_insn_reservation "mp626_mult2" 2
(and (eq_attr "tune" "fmp626")
(eq_attr "insn" "mul,mla"))
"fmp626_core")
(define_insn_reservation "mp626_mult3" 3
(and (eq_attr "tune" "fmp626")
(eq_attr "insn" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx"))
"fmp626_core*2")
(define_insn_reservation "mp626_mult4" 4
(and (eq_attr "tune" "fmp626")
(eq_attr "insn" "smulls,smlals,umulls,umlals"))
"fmp626_core*3")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Load/Store Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The models for load/store instructions do not accurately describe
;; the difference between operations with a base register writeback
;; (such as "ldm!"). These models assume that all memory references
;; hit in dcache.
(define_insn_reservation "mp626_load1_op" 5
(and (eq_attr "tune" "fmp626")
(eq_attr "type" "load1,load_byte"))
"fmp626_core")
(define_insn_reservation "mp626_load2_op" 6
(and (eq_attr "tune" "fmp626")
(eq_attr "type" "load2,load3"))
"fmp626_core*2")
(define_insn_reservation "mp626_load3_op" 7
(and (eq_attr "tune" "fmp626")
(eq_attr "type" "load4"))
"fmp626_core*3")
(define_insn_reservation "mp626_store1_op" 0
(and (eq_attr "tune" "fmp626")
(eq_attr "type" "store1"))
"fmp626_core")
(define_insn_reservation "mp626_store2_op" 1
(and (eq_attr "tune" "fmp626")
(eq_attr "type" "store2,store3"))
"fmp626_core*2")
(define_insn_reservation "mp626_store3_op" 2
(and (eq_attr "tune" "fmp626")
(eq_attr "type" "store4"))
"fmp626_core*3")
(define_bypass 1 "mp626_load1_op,mp626_load2_op,mp626_load3_op"
"mp626_store1_op,mp626_store2_op,mp626_store3_op"
"arm_no_early_store_addr_dep")
(define_bypass 1 "mp626_alu_op,mp626_alu_shift_op,mp626_mult1,mp626_mult2,\
mp626_mult3,mp626_mult4" "mp626_store1_op"
"arm_no_early_store_addr_dep")
(define_bypass 1 "mp626_alu_shift_op" "mp626_alu_op")
(define_bypass 1 "mp626_alu_shift_op" "mp626_alu_shift_op"
"arm_no_early_alu_shift_dep")
(define_bypass 1 "mp626_mult1,mp626_mult2" "mp626_alu_shift_op"
"arm_no_early_alu_shift_dep")
(define_bypass 2 "mp626_mult3" "mp626_alu_shift_op"
"arm_no_early_alu_shift_dep")
(define_bypass 3 "mp626_mult4" "mp626_alu_shift_op"
"arm_no_early_alu_shift_dep")
(define_bypass 1 "mp626_mult1,mp626_mult2" "mp626_alu_op")
(define_bypass 2 "mp626_mult3" "mp626_alu_op")
(define_bypass 3 "mp626_mult4" "mp626_alu_op")
(define_bypass 4 "mp626_load1_op" "mp626_alu_op")
(define_bypass 5 "mp626_load2_op" "mp626_alu_op")
(define_bypass 6 "mp626_load3_op" "mp626_alu_op")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Branch and Call Instructions
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Branch instructions are difficult to model accurately. The FMP626
;; core can predict most branches. If the branch is predicted
;; correctly, and predicted early enough, the branch can be completely
;; eliminated from the instruction stream. Some branches can
;; therefore appear to require zero cycle to execute. We assume that
;; all branches are predicted correctly, and that the latency is
;; therefore the minimum value.
(define_insn_reservation "mp626_branch_op" 0
(and (eq_attr "tune" "fmp626")
(eq_attr "type" "branch"))
"fmp626_core")
;; The latency for a call is actually the latency when the result is available.
;; i.e. R0 ready for int return value.
(define_insn_reservation "mp626_call_op" 1
(and (eq_attr "tune" "fmp626")
(eq_attr "type" "call"))
"fmp626_core")
......@@ -24,6 +24,11 @@ MD_INCLUDES= $(srcdir)/config/arm/arm-tune.md \
$(srcdir)/config/arm/arm1020e.md \
$(srcdir)/config/arm/arm1026ejs.md \
$(srcdir)/config/arm/arm1136jfs.md \
$(srcdir)/config/arm/fa526.md \
$(srcdir)/config/arm/fa606te.md \
$(srcdir)/config/arm/fa626te.md \
$(srcdir)/config/arm/fmp626.md \
$(srcdir)/config/arm/fa726te.md \
$(srcdir)/config/arm/arm926ejs.md \
$(srcdir)/config/arm/cirrus.md \
$(srcdir)/config/arm/fpa.md \
......
......@@ -36,6 +36,10 @@ MULTILIB_DIRNAMES = arm thumb
MULTILIB_EXCEPTIONS =
MULTILIB_MATCHES =
#MULTILIB_OPTIONS += mcpu=fa526/mcpu=fa626/mcpu=fa606te/mcpu=fa626te/mcpu=fmp626/mcpu=fa726te
#MULTILIB_DIRNAMES += fa526 fa626 fa606te fa626te fmp626 fa726te
#MULTILIB_EXCEPTIONS += *mthumb*/*mcpu=fa526 *mthumb*/*mcpu=fa626
#MULTILIB_OPTIONS += march=armv7
#MULTILIB_DIRNAMES += thumb2
#MULTILIB_EXCEPTIONS += march=armv7* marm/*march=armv7*
......@@ -52,6 +56,8 @@ MULTILIB_MATCHES =
MULTILIB_OPTIONS += mfloat-abi=hard
MULTILIB_DIRNAMES += fpu
MULTILIB_EXCEPTIONS += *mthumb/*mfloat-abi=hard*
#MULTILIB_EXCEPTIONS += *mcpu=fa526/*mfloat-abi=hard*
#MULTILIB_EXCEPTIONS += *mcpu=fa626/*mfloat-abi=hard*
# MULTILIB_OPTIONS += mcpu=ep9312
# MULTILIB_DIRNAMES += ep9312
......
......@@ -24,6 +24,10 @@ TARGET_LIBGCC2_CFLAGS = -fPIC
MULTILIB_OPTIONS =
MULTILIB_DIRNAMES =
#MULTILIB_OPTIONS += mcpu=fa606te/mcpu=fa626te/mcpu=fmp626/mcpu=fa726te
#MULTILIB_DIRNAMES += fa606te fa626te fmp626 fa726te
#MULTILIB_EXCEPTIONS += *mthumb/*mcpu=fa606te *mthumb/*mcpu=fa626te *mthumb/*mcpu=fmp626 *mthumb/*mcpu=fa726te*
# Use a version of div0 which raises SIGFPE, and a special __clear_cache.
LIB1ASMFUNCS := $(filter-out _dvmd_tls,$(LIB1ASMFUNCS)) _dvmd_lnx _clear_cache
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment