Commit ed6eb6dc by Kyrylo Tkachov Committed by Kyrylo Tkachov

[ARM/AArch64][2/2] Crypto intrinsics tuning for Cortex-A53 - pipeline description

	* config/arm/aarch-common.c (aarch_crypto_can_dual_issue): New.
	* config/arm/aarch-common-protos.h (aarch_crypto_can_dual_issue):
	Declare extern.
	* config/arm/cortex-a53.md: Add reservations and bypass for crypto
	instructions as well as AdvancedSIMD loads.

From-SVN: r208910
parent b10baa95
2014-03-28 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/arm/aarch-common.c (aarch_crypto_can_dual_issue): New.
* config/arm/aarch-common-protos.h (aarch_crypto_can_dual_issue):
Declare extern.
* config/arm/cortex-a53.md: Add reservations and bypass for crypto
instructions as well as AdvancedSIMD loads.
2014-03-28 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/aarch64/aarch64-simd.md (aarch64_crypto_aes<aes_op>v16qi):
Use crypto_aese type.
(aarch64_crypto_aes<aesmc_op>v16qi): Use crypto_aesmc type.
......
......@@ -23,6 +23,7 @@
#ifndef GCC_AARCH_COMMON_PROTOS_H
#define GCC_AARCH_COMMON_PROTOS_H
extern int aarch_crypto_can_dual_issue (rtx, rtx);
extern int arm_early_load_addr_dep (rtx, rtx);
extern int arm_early_store_addr_dep (rtx, rtx);
extern int arm_mac_accumulator_is_mul_result (rtx, rtx);
......
......@@ -31,6 +31,42 @@
#include "c-family/c-common.h"
#include "rtl.h"
/* In ARMv8-A there's a general expectation that AESE/AESMC
and AESD/AESIMC sequences of the form:
AESE Vn, _
AESMC Vn, Vn
will issue both instructions in a single cycle on super-scalar
implementations. This function identifies such pairs. */
int
aarch_crypto_can_dual_issue (rtx producer, rtx consumer)
{
rtx producer_src, consumer_src;
producer = single_set (producer);
consumer = single_set (consumer);
producer_src = producer ? SET_SRC (producer) : NULL;
consumer_src = consumer ? SET_SRC (consumer) : NULL;
if (producer_src && consumer_src
&& GET_CODE (producer_src) == UNSPEC && GET_CODE (consumer_src) == UNSPEC
&& ((XINT (producer_src, 1) == UNSPEC_AESE
&& XINT (consumer_src, 1) == UNSPEC_AESMC)
|| (XINT (producer_src, 1) == UNSPEC_AESD
&& XINT (consumer_src, 1) == UNSPEC_AESIMC)))
{
unsigned int regno = REGNO (SET_DEST (producer));
return REGNO (SET_DEST (consumer)) == regno
&& REGNO (XVECEXP (consumer_src, 0, 0)) == regno;
}
return 0;
}
typedef struct
{
rtx_code search_code;
......
......@@ -61,6 +61,11 @@
(define_cpu_unit "cortex_a53_fp_div_sqrt" "cortex_a53")
;; The Advanced SIMD pipelines.
(define_cpu_unit "cortex_a53_simd0" "cortex_a53")
(define_cpu_unit "cortex_a53_simd1" "cortex_a53")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ALU instructions.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
......@@ -248,6 +253,39 @@
"cortex_a53_slot0, cortex_a53_fp_div_sqrt * 28")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ARMv8-A Cryptographic extensions.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn_reservation "cortex_a53_crypto_aese" 2
(and (eq_attr "tune" "cortexa53")
(eq_attr "type" "crypto_aese"))
"cortex_a53_simd0")
(define_insn_reservation "cortex_a53_crypto_aesmc" 2
(and (eq_attr "tune" "cortexa53")
(eq_attr "type" "crypto_aesmc"))
"cortex_a53_simd0 | cortex_a53_simd1")
(define_insn_reservation "cortex_a53_crypto_sha1_fast" 2
(and (eq_attr "tune" "cortexa53")
(eq_attr "type" "crypto_sha1_fast, crypto_sha256_fast"))
"cortex_a53_simd0")
(define_insn_reservation "cortex_a53_crypto_sha1_xor" 3
(and (eq_attr "tune" "cortexa53")
(eq_attr "type" "crypto_sha1_xor"))
"cortex_a53_simd0")
(define_insn_reservation "cortex_a53_crypto_sha_slow" 5
(and (eq_attr "tune" "cortexa53")
(eq_attr "type" "crypto_sha1_slow, crypto_sha256_slow"))
"cortex_a53_simd0")
(define_bypass 0 "cortex_a53_crypto_aese"
"cortex_a53_crypto_aesmc"
"aarch_crypto_can_dual_issue")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; VFP to/from core transfers.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
......@@ -284,6 +322,16 @@
(eq_attr "type" "f_loadd"))
"cortex_a53_slot0")
(define_insn_reservation "cortex_a53_f_load_2reg" 5
(and (eq_attr "tune" "cortexa53")
(eq_attr "type" "neon_load2_2reg_q"))
"(cortex_a53_slot_any+cortex_a53_ls)*2")
(define_insn_reservation "cortex_a53_f_loadq" 5
(and (eq_attr "tune" "cortexa53")
(eq_attr "type" "neon_load1_1reg_q"))
"cortex_a53_slot_any+cortex_a53_ls")
(define_insn_reservation "cortex_a53_f_stores" 0
(and (eq_attr "tune" "cortexa53")
(eq_attr "type" "f_stores"))
......@@ -307,3 +355,11 @@
cortex_a53_fdivs, cortex_a53_fdivd,\
cortex_a53_f2r")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Crude Advanced SIMD approximation.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn_reservation "cortex_53_advsimd" 4
(and (eq_attr "tune" "cortexa53")
(eq_attr "is_neon_type" "yes"))
"cortex_a53_simd0")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment