Commit 25cc2199 by Evandro Menezes Committed by Evandro Menezes

Tweak the pipeline model for Exynos M1

gcc/
	* config/aarch64/aarch64.c (exynosm1_tunings): Enable fusion of AES{D,E}
	and AESMC pairs.
	* config/arm/exynos-m1.md: Change cost of STP, fix bypass for stores
	and add bypass for AES{D,E} and AESMC pairs.

From-SVN: r233647
parent 220ab1cc
2016-02-23 Evandro Menezes <e.menezes@samsung.com> 2016-02-23 Evandro Menezes <e.menezes@samsung.com>
* config/arm/exynos-m1.md: Change cost of STP, fix bypass for stores
and add bypass for AES{D,E} and AESMC pairs.
* config/aarch64/aarch64.c (exynosm1_tunings): Enable fusion of AES{D,E}
and AESMC pairs.
2016-02-23 Evandro Menezes <e.menezes@samsung.com>
* config/aarch64/aarch64.c (exynosm1_tunings): Enable the Newton * config/aarch64/aarch64.c (exynosm1_tunings): Enable the Newton
series for reciprocal square root in Exynos M1. series for reciprocal square root in Exynos M1.
......
...@@ -526,7 +526,7 @@ static const struct tune_params exynosm1_tunings = ...@@ -526,7 +526,7 @@ static const struct tune_params exynosm1_tunings =
&generic_branch_cost, &generic_branch_cost,
4, /* memmov_cost */ 4, /* memmov_cost */
3, /* issue_rate */ 3, /* issue_rate */
(AARCH64_FUSE_NOTHING), /* fusible_ops */ (AARCH64_FUSE_AES_AESMC), /* fusible_ops */
4, /* function_align. */ 4, /* function_align. */
4, /* jump_align. */ 4, /* jump_align. */
4, /* loop_align. */ 4, /* loop_align. */
......
...@@ -248,10 +248,6 @@ ...@@ -248,10 +248,6 @@
(eq_attr "type" "neon_load4_all_lanes, neon_load4_all_lanes_q") (eq_attr "type" "neon_load4_all_lanes, neon_load4_all_lanes_q")
(const_string "neon_load4_all") (const_string "neon_load4_all")
(eq_attr "type" "f_stores, f_stored,\
neon_stp, neon_stp_q")
(const_string "neon_store")
(eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q") (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q")
(const_string "neon_store1_1") (const_string "neon_store1_1")
...@@ -730,8 +726,14 @@ ...@@ -730,8 +726,14 @@
(define_insn_reservation (define_insn_reservation
"exynos_m1_neon_store" 1 "exynos_m1_neon_store" 1
(and (eq_attr "tune" "exynosm1") (and (eq_attr "tune" "exynosm1")
(eq_attr "exynos_m1_neon_type" "neon_store")) (eq_attr "type" "f_stores, f_stored, neon_stp"))
"(em1_fst, em1_st)") "em1_sfst")
(define_insn_reservation
"exynos_m1_neon_store_q" 3
(and (eq_attr "tune" "exynosm1")
(eq_attr "type" "neon_stp_q"))
"(em1_sfst * 2)")
(define_insn_reservation (define_insn_reservation
"exynos_m1_neon_store1_1" 1 "exynos_m1_neon_store1_1" 1
...@@ -761,7 +763,7 @@ ...@@ -761,7 +763,7 @@
"exynos_m1_neon_store1_one" 7 "exynos_m1_neon_store1_one" 7
(and (eq_attr "tune" "exynosm1") (and (eq_attr "tune" "exynosm1")
(eq_attr "exynos_m1_neon_type" "neon_store1_one")) (eq_attr "exynos_m1_neon_type" "neon_store1_one"))
"(em1_fst, em1_st)") "em1_sfst")
(define_insn_reservation (define_insn_reservation
"exynos_m1_neon_store2" 7 "exynos_m1_neon_store2" 7
...@@ -892,7 +894,9 @@ ...@@ -892,7 +894,9 @@
;; Pre-decrement and post-increment addressing modes update the register quickly. ;; Pre-decrement and post-increment addressing modes update the register quickly.
;; TODO: figure out how to tell the addressing mode register from the loaded one. ;; TODO: figure out how to tell the addressing mode register from the loaded one.
(define_bypass 1 "exynos_m1_store*" "exynos_m1_store*") (define_bypass 1 "exynos_m1_store*, exynos_m1_neon_store*"
"exynos_m1_store*, exynos_m1_neon_store*,
exynos_m1_load*, exynos_m1_neon_load*")
;; MLAs can feed other MLAs quickly. ;; MLAs can feed other MLAs quickly.
(define_bypass 1 "exynos_m1_mla*" "exynos_m1_mla*") (define_bypass 1 "exynos_m1_mla*" "exynos_m1_mla*")
...@@ -908,7 +912,6 @@ ...@@ -908,7 +912,6 @@
(define_bypass 5 "exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step" (define_bypass 5 "exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step"
"exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\ "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\
exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step") exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step")
(define_bypass 3 "exynos_m1_fp_add" (define_bypass 3 "exynos_m1_fp_add"
"exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac") "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac")
(define_bypass 3 "exynos_m1_neon_fp_add" (define_bypass 3 "exynos_m1_neon_fp_add"
...@@ -947,6 +950,11 @@ ...@@ -947,6 +950,11 @@
"exynos_m1_crypto_simple, exynos_m1_crypto_complex,\ "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\
exynos_m1_crypto_poly*") exynos_m1_crypto_poly*")
;; AES{D,E}/AESMC pairs can feed each other instantly.
(define_bypass 0 "exynos_m1_crypto_simple"
"exynos_m1_crypto_simple"
"aarch_crypto_can_dual_issue")
;; Predicted branches take no time, but mispredicted ones take forever anyway. ;; Predicted branches take no time, but mispredicted ones take forever anyway.
(define_bypass 1 "exynos_m1_*" (define_bypass 1 "exynos_m1_*"
"exynos_m1_call, exynos_m1_branch") "exynos_m1_call, exynos_m1_branch")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment