Commit 47d8f18d by Jie Zhang Committed by Jie Zhang

arm.c (arm_address_offset_is_imm): New.

	* config/arm/arm.c (arm_address_offset_is_imm): New.
	(arm_early_store_addr_dep): New.
	(arm_early_load_addr_dep): New.
	* config/arm/arm-protos.h (arm_early_store_addr_dep): Declare.
	(arm_early_load_addr_dep): Declare.
	(arm_address_offset_is_imm): Declare.
	* config/arm/cortex-m4.md: New file.
	* config/arm/cortex-m4-fpu.md: New file.
	* config/arm/arm.md: Include cortex-m4.md and cortex-m4-fpu.md.
	(attr generic_sched): Exclude cortexm4.
	(attr generic_vfp): Exclude cortexm4.

From-SVN: r164440
parent 06baaba3
2010-09-20 Jie Zhang <jie@codesourcery.com>
* config/arm/arm.c (arm_address_offset_is_imm): New.
(arm_early_store_addr_dep): New.
(arm_early_load_addr_dep): New.
* config/arm/arm-protos.h (arm_early_store_addr_dep): Declare.
(arm_early_load_addr_dep): Declare.
(arm_address_offset_is_imm): Declare.
* config/arm/cortex-m4.md: New file.
* config/arm/cortex-m4-fpu.md: New file.
* config/arm/arm.md: Include cortex-m4.md and cortex-m4-fpu.md.
(attr generic_sched): Exclude cortexm4.
(attr generic_vfp): Exclude cortexm4.
2010-09-20 Richard Guenther <rguenther@suse.de> 2010-09-20 Richard Guenther <rguenther@suse.de>
PR middle-end/45704 PR middle-end/45704
......
...@@ -86,6 +86,8 @@ extern int arm_coproc_mem_operand (rtx, bool); ...@@ -86,6 +86,8 @@ extern int arm_coproc_mem_operand (rtx, bool);
extern int neon_vector_mem_operand (rtx, int); extern int neon_vector_mem_operand (rtx, int);
extern int neon_struct_mem_operand (rtx); extern int neon_struct_mem_operand (rtx);
extern int arm_no_early_store_addr_dep (rtx, rtx); extern int arm_no_early_store_addr_dep (rtx, rtx);
extern int arm_early_store_addr_dep (rtx, rtx);
extern int arm_early_load_addr_dep (rtx, rtx);
extern int arm_no_early_alu_shift_dep (rtx, rtx); extern int arm_no_early_alu_shift_dep (rtx, rtx);
extern int arm_no_early_alu_shift_value_dep (rtx, rtx); extern int arm_no_early_alu_shift_value_dep (rtx, rtx);
extern int arm_no_early_mul_dep (rtx, rtx); extern int arm_no_early_mul_dep (rtx, rtx);
...@@ -127,6 +129,7 @@ extern const char *output_move_quad (rtx *); ...@@ -127,6 +129,7 @@ extern const char *output_move_quad (rtx *);
extern const char *output_move_vfp (rtx *operands); extern const char *output_move_vfp (rtx *operands);
extern const char *output_move_neon (rtx *operands); extern const char *output_move_neon (rtx *operands);
extern int arm_attr_length_move_neon (rtx); extern int arm_attr_length_move_neon (rtx);
extern int arm_address_offset_is_imm (rtx);
extern const char *output_add_immediate (rtx *); extern const char *output_add_immediate (rtx *);
extern const char *arithmetic_instr (rtx, int); extern const char *arithmetic_instr (rtx, int);
extern void output_ascii_pseudo_op (FILE *, const unsigned char *, int); extern void output_ascii_pseudo_op (FILE *, const unsigned char *, int);
......
...@@ -13345,6 +13345,34 @@ arm_attr_length_move_neon (rtx insn) ...@@ -13345,6 +13345,34 @@ arm_attr_length_move_neon (rtx insn)
return 4; return 4;
} }
/* Return nonzero if the offset in the address is an immediate. Otherwise,
return zero. */
int
arm_address_offset_is_imm (rtx insn)
{
rtx mem, addr;
extract_insn_cached (insn);
if (REG_P (recog_data.operand[0]))
return 0;
mem = recog_data.operand[0];
gcc_assert (MEM_P (mem));
addr = XEXP (mem, 0);
if (GET_CODE (addr) == REG
|| (GET_CODE (addr) == PLUS
&& GET_CODE (XEXP (addr, 0)) == REG
&& GET_CODE (XEXP (addr, 1)) == CONST_INT))
return 1;
else
return 0;
}
/* Output an ADD r, s, #n where n may be too big for one instruction. /* Output an ADD r, s, #n where n may be too big for one instruction.
If adding zero to one register, output nothing. */ If adding zero to one register, output nothing. */
const char * const char *
...@@ -21503,6 +21531,38 @@ arm_no_early_store_addr_dep (rtx producer, rtx consumer) ...@@ -21503,6 +21531,38 @@ arm_no_early_store_addr_dep (rtx producer, rtx consumer)
return !reg_overlap_mentioned_p (value, addr); return !reg_overlap_mentioned_p (value, addr);
} }
/* Return nonzero if the CONSUMER instruction (a store) does need
PRODUCER's value to calculate the address. */
int
arm_early_store_addr_dep (rtx producer, rtx consumer)
{
return !arm_no_early_store_addr_dep (producer, consumer);
}
/* Return nonzero if the CONSUMER instruction (a load) does need
PRODUCER's value to calculate the address. */
int
arm_early_load_addr_dep (rtx producer, rtx consumer)
{
rtx value = PATTERN (producer);
rtx addr = PATTERN (consumer);
if (GET_CODE (value) == COND_EXEC)
value = COND_EXEC_CODE (value);
if (GET_CODE (value) == PARALLEL)
value = XVECEXP (value, 0, 0);
value = XEXP (value, 0);
if (GET_CODE (addr) == COND_EXEC)
addr = COND_EXEC_CODE (addr);
if (GET_CODE (addr) == PARALLEL)
addr = XVECEXP (addr, 0, 0);
addr = XEXP (addr, 1);
return reg_overlap_mentioned_p (value, addr);
}
/* Return nonzero if the CONSUMER instruction (an ALU op) does not /* Return nonzero if the CONSUMER instruction (an ALU op) does not
have an early register shift value or amount dependency on the have an early register shift value or amount dependency on the
result of PRODUCER. */ result of PRODUCER. */
......
...@@ -498,7 +498,7 @@ ...@@ -498,7 +498,7 @@
(define_attr "generic_sched" "yes,no" (define_attr "generic_sched" "yes,no"
(const (if_then_else (const (if_then_else
(ior (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9") (ior (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9,cortexm4")
(eq_attr "tune_cortexr4" "yes")) (eq_attr "tune_cortexr4" "yes"))
(const_string "no") (const_string "no")
(const_string "yes")))) (const_string "yes"))))
...@@ -506,7 +506,7 @@ ...@@ -506,7 +506,7 @@
(define_attr "generic_vfp" "yes,no" (define_attr "generic_vfp" "yes,no"
(const (if_then_else (const (if_then_else
(and (eq_attr "fpu" "vfp") (and (eq_attr "fpu" "vfp")
(eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa8,cortexa9") (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa8,cortexa9,cortexm4")
(eq_attr "tune_cortexr4" "no")) (eq_attr "tune_cortexr4" "no"))
(const_string "yes") (const_string "yes")
(const_string "no")))) (const_string "no"))))
...@@ -521,6 +521,8 @@ ...@@ -521,6 +521,8 @@
(include "cortex-a9.md") (include "cortex-a9.md")
(include "cortex-r4.md") (include "cortex-r4.md")
(include "cortex-r4f.md") (include "cortex-r4f.md")
(include "cortex-m4.md")
(include "cortex-m4-fpu.md")
(include "vfp11.md") (include "vfp11.md")
......
;; ARM Cortex-M4 FPU pipeline description
;; Copyright (C) 2010 Free Software Foundation, Inc.
;; Contributed by CodeSourcery.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;;
;; GCC is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
;; Use an artifial unit to model FPU.
(define_cpu_unit "cortex_m4_v" "cortex_m4")
(define_reservation "cortex_m4_ex_v" "cortex_m4_ex+cortex_m4_v")
;; Integer instructions following VDIV or VSQRT complete out-of-order.
(define_insn_reservation "cortex_m4_fdivs" 15
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "fdivs"))
"cortex_m4_ex_v,cortex_m4_v*13")
(define_insn_reservation "cortex_m4_vmov_1" 1
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "fcpys,fconsts"))
"cortex_m4_ex_v")
(define_insn_reservation "cortex_m4_vmov_2" 2
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "f_2_r,r_2_f"))
"cortex_m4_ex_v*2")
(define_insn_reservation "cortex_m4_fmuls" 2
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "fmuls"))
"cortex_m4_ex_v")
(define_insn_reservation "cortex_m4_fmacs" 4
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "fmacs"))
"cortex_m4_ex_v*3")
(define_insn_reservation "cortex_m4_ffariths" 1
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "ffariths"))
"cortex_m4_ex_v")
(define_insn_reservation "cortex_m4_fadds" 2
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "fadds"))
"cortex_m4_ex_v")
(define_insn_reservation "cortex_m4_fcmps" 1
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "fcmps"))
"cortex_m4_ex_v")
(define_insn_reservation "cortex_m4_f_flag" 1
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "f_flag"))
"cortex_m4_ex_v")
(define_insn_reservation "cortex_m4_f_cvt" 2
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "f_cvt"))
"cortex_m4_ex_v")
(define_insn_reservation "cortex_m4_f_load" 2
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "f_load"))
"cortex_m4_ex_v*2")
(define_insn_reservation "cortex_m4_f_store" 2
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "f_store"))
"cortex_m4_ex_v*2")
(define_insn_reservation "cortex_m4_f_loadd" 3
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "f_loadd"))
"cortex_m4_ex_v*3")
(define_insn_reservation "cortex_m4_f_stored" 3
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "f_stored"))
"cortex_m4_ex_v*3")
;; MAC instructions consume their addend one cycle later. If the result
;; of an arithmetic instruction is consumed as the addend of the following
;; MAC instruction, the latency can be decreased by one.
(define_bypass 1 "cortex_m4_fadds,cortex_m4_fmuls,cortex_m4_f_cvt"
"cortex_m4_fmacs"
"arm_no_early_mul_dep")
(define_bypass 3 "cortex_m4_fmacs"
"cortex_m4_fmacs"
"arm_no_early_mul_dep")
(define_bypass 14 "cortex_m4_fdivs"
"cortex_m4_fmacs"
"arm_no_early_mul_dep")
;; ARM Cortex-M4 pipeline description
;; Copyright (C) 2010 Free Software Foundation, Inc.
;; Contributed by CodeSourcery.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;;
;; GCC is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
(define_automaton "cortex_m4")
;; We model the pipelining of LDR instructions by using two artificial units.
(define_cpu_unit "cortex_m4_a" "cortex_m4")
(define_cpu_unit "cortex_m4_b" "cortex_m4")
(define_reservation "cortex_m4_ex" "cortex_m4_a+cortex_m4_b")
;; ALU and multiply is one cycle.
(define_insn_reservation "cortex_m4_alu" 1
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "alu,alu_shift,alu_shift_reg,mult"))
"cortex_m4_ex")
;; Byte, half-word and word load is two cycles.
(define_insn_reservation "cortex_m4_load1" 2
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "load_byte,load1"))
"cortex_m4_a, cortex_m4_b")
;; str rx, [ry, #imm] is always one cycle.
(define_insn_reservation "cortex_m4_store1_1" 1
(and (and (eq_attr "tune" "cortexm4")
(eq_attr "type" "store1"))
(ne (symbol_ref ("arm_address_offset_is_imm (insn)")) (const_int 0)))
"cortex_m4_a")
;; Other byte, half-word and word load is two cycles.
(define_insn_reservation "cortex_m4_store1_2" 2
(and (and (eq_attr "tune" "cortexm4")
(eq_attr "type" "store1"))
(eq (symbol_ref ("arm_address_offset_is_imm (insn)")) (const_int 0)))
"cortex_m4_a*2")
(define_insn_reservation "cortex_m4_load2" 3
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "load2"))
"cortex_m4_ex*3")
(define_insn_reservation "cortex_m4_store2" 3
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "store2"))
"cortex_m4_ex*3")
(define_insn_reservation "cortex_m4_load3" 4
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "load3"))
"cortex_m4_ex*4")
(define_insn_reservation "cortex_m4_store3" 4
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "store3"))
"cortex_m4_ex*4")
(define_insn_reservation "cortex_m4_load4" 5
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "load4"))
"cortex_m4_ex*5")
(define_insn_reservation "cortex_m4_store4" 5
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "store4"))
"cortex_m4_ex*5")
;; If the address of load or store depends on the result of the preceding
;; instruction, the latency is increased by one.
(define_bypass 2 "cortex_m4_alu"
"cortex_m4_load1"
"arm_early_load_addr_dep")
(define_bypass 2 "cortex_m4_alu"
"cortex_m4_store1_1,cortex_m4_store1_2"
"arm_early_store_addr_dep")
(define_insn_reservation "cortex_m4_branch" 3
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "branch"))
"cortex_m4_ex*3")
(define_insn_reservation "cortex_m4_call" 3
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "call"))
"cortex_m4_ex*3")
(define_insn_reservation "cortex_m4_block" 1
(and (eq_attr "tune" "cortexm4")
(eq_attr "type" "block"))
"cortex_m4_ex")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment