Commit e4bbb037 by Wilco Dijkstra Committed by Wilco Dijkstra

This patch improves the accuracy of the Cortex-A53 integer scheduler...

This patch improves the accuracy of the Cortex-A53 integer scheduler, 
resulting in performance gains across a wide range of benchmarks.

    gcc/
	* config/arm/cortex-a53.md: Use final_presence_set for in-order.
	(cortex_a53_shift): Add mov_shift.
	(cortex_a53_shift_reg): Add new reservation for register shifts.
	(cortex_a53_alu): Remove bfm.
	(cortex_a53_alu_shift): Add bfm, remove mov_shift.
	(cortex_a53_alu_extr): Add new reservation for EXTR.
	(bypasses): Improve bypass modelling.

From-SVN: r238048
parent 136236bd
2016-07-06 Wilco Dijkstra <wdijkstr@arm.com>
* config/arm/cortex-a53.md: Use final_presence_set for in-order.
(cortex_a53_shift): Add mov_shift.
(cortex_a53_shift_reg): Add new reservation for register shifts.
(cortex_a53_alu): Remove bfm.
(cortex_a53_alu_shift): Add bfm, remove mov_shift.
(cortex_a53_alu_extr): Add new reservation for EXTR.
(bypasses): Improve bypass modelling.
2016-07-06 Senthil Kumar Selvaraj <senthil_kumar.selvaraj@atmel.com> 2016-07-06 Senthil Kumar Selvaraj <senthil_kumar.selvaraj@atmel.com>
PR target/50739 PR target/50739
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
(define_cpu_unit "cortex_a53_slot0" "cortex_a53") (define_cpu_unit "cortex_a53_slot0" "cortex_a53")
(define_cpu_unit "cortex_a53_slot1" "cortex_a53") (define_cpu_unit "cortex_a53_slot1" "cortex_a53")
(final_presence_set "cortex_a53_slot1" "cortex_a53_slot0")
(define_reservation "cortex_a53_slot_any" (define_reservation "cortex_a53_slot_any"
"cortex_a53_slot0\ "cortex_a53_slot0\
...@@ -71,41 +72,43 @@ ...@@ -71,41 +72,43 @@
(define_insn_reservation "cortex_a53_shift" 2 (define_insn_reservation "cortex_a53_shift" 2
(and (eq_attr "tune" "cortexa53") (and (eq_attr "tune" "cortexa53")
(eq_attr "type" "adr,shift_imm,shift_reg,mov_imm,mvn_imm")) (eq_attr "type" "adr,shift_imm,mov_imm,mvn_imm,mov_shift"))
"cortex_a53_slot_any") "cortex_a53_slot_any")
(define_insn_reservation "cortex_a53_alu_rotate_imm" 2 (define_insn_reservation "cortex_a53_shift_reg" 2
(and (eq_attr "tune" "cortexa53") (and (eq_attr "tune" "cortexa53")
(eq_attr "type" "rotate_imm")) (eq_attr "type" "shift_reg,mov_shift_reg"))
"(cortex_a53_slot1) "cortex_a53_slot_any+cortex_a53_hazard")
| (cortex_a53_single_issue)")
(define_insn_reservation "cortex_a53_alu" 3 (define_insn_reservation "cortex_a53_alu" 3
(and (eq_attr "tune" "cortexa53") (and (eq_attr "tune" "cortexa53")
(eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm, (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,
alu_sreg,alus_sreg,logic_reg,logics_reg, alu_sreg,alus_sreg,logic_reg,logics_reg,
adc_imm,adcs_imm,adc_reg,adcs_reg, adc_imm,adcs_imm,adc_reg,adcs_reg,
bfm,csel,clz,rbit,rev,alu_dsp_reg, csel,clz,rbit,rev,alu_dsp_reg,
mov_reg,mvn_reg, mov_reg,mvn_reg,mrs,multiple,no_insn"))
mrs,multiple,no_insn"))
"cortex_a53_slot_any") "cortex_a53_slot_any")
(define_insn_reservation "cortex_a53_alu_shift" 3 (define_insn_reservation "cortex_a53_alu_shift" 3
(and (eq_attr "tune" "cortexa53") (and (eq_attr "tune" "cortexa53")
(eq_attr "type" "alu_shift_imm,alus_shift_imm, (eq_attr "type" "alu_shift_imm,alus_shift_imm,
crc,logic_shift_imm,logics_shift_imm, crc,logic_shift_imm,logics_shift_imm,
alu_ext,alus_ext, alu_ext,alus_ext,bfm,extend,mvn_shift"))
extend,mov_shift,mvn_shift"))
"cortex_a53_slot_any") "cortex_a53_slot_any")
(define_insn_reservation "cortex_a53_alu_shift_reg" 3 (define_insn_reservation "cortex_a53_alu_shift_reg" 3
(and (eq_attr "tune" "cortexa53") (and (eq_attr "tune" "cortexa53")
(eq_attr "type" "alu_shift_reg,alus_shift_reg, (eq_attr "type" "alu_shift_reg,alus_shift_reg,
logic_shift_reg,logics_shift_reg, logic_shift_reg,logics_shift_reg,
mov_shift_reg,mvn_shift_reg")) mvn_shift_reg"))
"cortex_a53_slot_any+cortex_a53_hazard") "cortex_a53_slot_any+cortex_a53_hazard")
(define_insn_reservation "cortex_a53_mul" 3 (define_insn_reservation "cortex_a53_alu_extr" 3
(and (eq_attr "tune" "cortexa53")
(eq_attr "type" "rotate_imm"))
"cortex_a53_slot1|cortex_a53_single_issue")
(define_insn_reservation "cortex_a53_mul" 4
(and (eq_attr "tune" "cortexa53") (and (eq_attr "tune" "cortexa53")
(ior (eq_attr "mul32" "yes") (ior (eq_attr "mul32" "yes")
(eq_attr "mul64" "yes"))) (eq_attr "mul64" "yes")))
...@@ -189,49 +192,43 @@ ...@@ -189,49 +192,43 @@
(define_insn_reservation "cortex_a53_branch" 0 (define_insn_reservation "cortex_a53_branch" 0
(and (eq_attr "tune" "cortexa53") (and (eq_attr "tune" "cortexa53")
(eq_attr "type" "branch,call")) (eq_attr "type" "branch,call"))
"cortex_a53_slot_any,cortex_a53_branch") "cortex_a53_slot_any+cortex_a53_branch")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; General-purpose register bypasses ;; General-purpose register bypasses
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Model bypasses for unshifted operands to ALU instructions. ;; Model bypasses for ALU to ALU instructions.
(define_bypass 1 "cortex_a53_shift" (define_bypass 0 "cortex_a53_shift*"
"cortex_a53_shift") "cortex_a53_alu")
(define_bypass 1 "cortex_a53_alu, (define_bypass 1 "cortex_a53_shift*"
cortex_a53_alu_shift*, "cortex_a53_shift*,cortex_a53_alu_*")
cortex_a53_alu_rotate_imm,
cortex_a53_shift" (define_bypass 1 "cortex_a53_alu*"
"cortex_a53_alu") "cortex_a53_alu")
(define_bypass 2 "cortex_a53_alu, (define_bypass 1 "cortex_a53_alu*"
cortex_a53_alu_shift*"
"cortex_a53_alu_shift*" "cortex_a53_alu_shift*"
"aarch_forward_to_shift_is_not_shifted_reg") "aarch_forward_to_shift_is_not_shifted_reg")
;; In our model, we allow any general-purpose register operation to (define_bypass 2 "cortex_a53_alu*"
;; bypass to the accumulator operand of an integer MADD-like operation. "cortex_a53_alu_*,cortex_a53_shift*")
(define_bypass 1 "cortex_a53_alu*, ;; Model a bypass from MUL/MLA to MLA instructions.
cortex_a53_load*,
cortex_a53_mul" (define_bypass 1 "cortex_a53_mul"
"cortex_a53_mul" "cortex_a53_mul"
"aarch_accumulator_forwarding") "aarch_accumulator_forwarding")
;; Model a bypass from MLA/MUL to many ALU instructions. ;; Model a bypass from MUL/MLA to ALU instructions.
(define_bypass 2 "cortex_a53_mul" (define_bypass 2 "cortex_a53_mul"
"cortex_a53_alu, "cortex_a53_alu")
cortex_a53_alu_shift*")
;; We get neater schedules by allowing an MLA/MUL to feed an
;; early load address dependency to a load.
(define_bypass 2 "cortex_a53_mul" (define_bypass 3 "cortex_a53_mul"
"cortex_a53_load*" "cortex_a53_alu_*,cortex_a53_shift*")
"arm_early_load_addr_dep")
;; Model bypasses for loads which are to be consumed by the ALU. ;; Model bypasses for loads which are to be consumed by the ALU.
...@@ -239,47 +236,37 @@ ...@@ -239,47 +236,37 @@
"cortex_a53_alu") "cortex_a53_alu")
(define_bypass 3 "cortex_a53_load1" (define_bypass 3 "cortex_a53_load1"
"cortex_a53_alu_shift*") "cortex_a53_alu_*,cortex_a53_shift*")
(define_bypass 3 "cortex_a53_load2"
"cortex_a53_alu")
;; Model a bypass for ALU instructions feeding stores. ;; Model a bypass for ALU instructions feeding stores.
(define_bypass 1 "cortex_a53_alu*" (define_bypass 0 "cortex_a53_alu*,cortex_a53_shift*"
"cortex_a53_store1, "cortex_a53_store*"
cortex_a53_store2,
cortex_a53_store3plus"
"arm_no_early_store_addr_dep") "arm_no_early_store_addr_dep")
;; Model a bypass for load and multiply instructions feeding stores. ;; Model a bypass for load and multiply instructions feeding stores.
(define_bypass 2 "cortex_a53_mul, (define_bypass 1 "cortex_a53_mul,
cortex_a53_load1, cortex_a53_load*"
cortex_a53_load2, "cortex_a53_store*"
cortex_a53_load3plus"
"cortex_a53_store1,
cortex_a53_store2,
cortex_a53_store3plus"
"arm_no_early_store_addr_dep") "arm_no_early_store_addr_dep")
;; Model a GP->FP register move as similar to stores. ;; Model a GP->FP register move as similar to stores.
(define_bypass 1 "cortex_a53_alu*" (define_bypass 0 "cortex_a53_alu*,cortex_a53_shift*"
"cortex_a53_r2f") "cortex_a53_r2f")
(define_bypass 2 "cortex_a53_mul, (define_bypass 1 "cortex_a53_mul,
cortex_a53_load1, cortex_a53_load*"
cortex_a53_load2,
cortex_a53_load3plus"
"cortex_a53_r2f") "cortex_a53_r2f")
;; Shifts feeding Load/Store addresses may not be ready in time. ;; Model flag forwarding to branches.
(define_bypass 3 "cortex_a53_shift" (define_bypass 0 "cortex_a53_alu*,cortex_a53_shift*"
"cortex_a53_load*" "cortex_a53_branch")
"arm_early_load_addr_dep")
(define_bypass 3 "cortex_a53_shift"
"cortex_a53_store*"
"arm_early_store_addr_dep")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Floating-point/Advanced SIMD. ;; Floating-point/Advanced SIMD.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment