Commit 5169fa77 by Sylvia Taylor Committed by Kyrylo Tkachov

[aarch64]: redefine aes patterns

This first patch removes aarch64 usage of the aese/aesmc and aesd/aesimc
fusions (i.e. aes fusion) implemented in the scheduler due to unpredictable
behaviour observed in cases such as:
- when register allocation goes bad (e.g. extra movs)
- aes operations with xor and zeroed keys among interleaved operations

A more stable version should be provided by instead doing the aes fusion
during the combine pass. Since the aese and aesd patterns have been
rewritten as encapsulating a xor operation, the existing combine fusion
patterns have also been updated. The purpose is to simplify the need of
having additional combine patterns for cases like the ones below:

For AESE (though it also applies to AESD as both have a xor operation):

    data = data ^ key;
    data = vaeseq_u8(data, zero);
    ---
    eor         v1.16b, v0.16b, v1.16b
    aese        v1.16b, v2.16b

Should mean and generate the same as:

    data = vaeseq_u8(data, key);
    ---
    aese        v1.16b, v0.16b

2019-07-09  Sylvia Taylor  <sylvia.taylor@arm.com>

	* config/aarch64/aarch64-simd.md
	(aarch64_crypto_aes<aes_op>v16qi): Redefine pattern with xor.
	(aarch64_crypto_aes<aesmc_op>v16qi): Remove attribute enabled.
	(*aarch64_crypto_aes<aes_op>v16qi_xor_combine): Remove both.
	(*aarch64_crypto_aese_fused,
	*aarch64_crypto_aesd_fused): Update to new definition.
	* config/aarch64/aarch64.c
	(aarch_macro_fusion_pair_p): Remove aese/aesmc fusion check.

	* gcc.target/aarch64/crypto-fuse-1.c: Remove.
	* gcc.target/aarch64/crypto-fuse-2.c: Remove.
	* gcc.target/aarch64/aes-fuse-1.c: New testcase.
	* gcc.target/aarch64/aes-fuse-2.c: New testcase.

From-SVN: r273304
parent b6118487
2019-07-09 Sylvia Taylor <sylvia.taylor@arm.com>
* config/aarch64/aarch64-simd.md
(aarch64_crypto_aes<aes_op>v16qi): Redefine pattern with xor.
(aarch64_crypto_aes<aesmc_op>v16qi): Remove attribute enabled.
(*aarch64_crypto_aes<aes_op>v16qi_xor_combine): Remove both.
(*aarch64_crypto_aese_fused,
*aarch64_crypto_aesd_fused): Update to new definition.
* config/aarch64/aarch64.c
(aarch_macro_fusion_pair_p): Remove aese/aesmc fusion check.
2019-07-09 Richard Biener <rguenther@suse.de> 2019-07-09 Richard Biener <rguenther@suse.de>
* gimple-match.h (gimple_match_op::resimplify): New. * gimple-match.h (gimple_match_op::resimplify): New.
......
...@@ -6053,56 +6053,23 @@ ...@@ -6053,56 +6053,23 @@
(define_insn "aarch64_crypto_aes<aes_op>v16qi" (define_insn "aarch64_crypto_aes<aes_op>v16qi"
[(set (match_operand:V16QI 0 "register_operand" "=w") [(set (match_operand:V16QI 0 "register_operand" "=w")
(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0") (unspec:V16QI
(match_operand:V16QI 2 "register_operand" "w")] [(xor:V16QI
(match_operand:V16QI 1 "register_operand" "%0")
(match_operand:V16QI 2 "register_operand" "w"))]
CRYPTO_AES))] CRYPTO_AES))]
"TARGET_SIMD && TARGET_AES" "TARGET_SIMD && TARGET_AES"
"aes<aes_op>\\t%0.16b, %2.16b" "aes<aes_op>\\t%0.16b, %2.16b"
[(set_attr "type" "crypto_aese")] [(set_attr "type" "crypto_aese")]
) )
(define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
[(set (match_operand:V16QI 0 "register_operand" "=w")
(unspec:V16QI [(xor:V16QI
(match_operand:V16QI 1 "register_operand" "%0")
(match_operand:V16QI 2 "register_operand" "w"))
(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")]
CRYPTO_AES))]
"TARGET_SIMD && TARGET_AES"
"aes<aes_op>\\t%0.16b, %2.16b"
[(set_attr "type" "crypto_aese")]
)
(define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
[(set (match_operand:V16QI 0 "register_operand" "=w")
(unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")
(xor:V16QI (match_operand:V16QI 1 "register_operand" "%0")
(match_operand:V16QI 2 "register_operand" "w"))]
CRYPTO_AES))]
"TARGET_SIMD && TARGET_AES"
"aes<aes_op>\\t%0.16b, %2.16b"
[(set_attr "type" "crypto_aese")]
)
;; When AES/AESMC fusion is enabled we want the register allocation to
;; look like:
;; AESE Vn, _
;; AESMC Vn, Vn
;; So prefer to tie operand 1 to operand 0 when fusing.
(define_insn "aarch64_crypto_aes<aesmc_op>v16qi" (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
[(set (match_operand:V16QI 0 "register_operand" "=w,w") [(set (match_operand:V16QI 0 "register_operand" "=w")
(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")] (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
CRYPTO_AESMC))] CRYPTO_AESMC))]
"TARGET_SIMD && TARGET_AES" "TARGET_SIMD && TARGET_AES"
"aes<aesmc_op>\\t%0.16b, %1.16b" "aes<aesmc_op>\\t%0.16b, %1.16b"
[(set_attr "type" "crypto_aesmc") [(set_attr "type" "crypto_aesmc")]
(set_attr_alternative "enabled"
[(if_then_else (match_test
"aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
(const_string "yes" )
(const_string "no"))
(const_string "yes")])]
) )
;; When AESE/AESMC fusion is enabled we really want to keep the two together ;; When AESE/AESMC fusion is enabled we really want to keep the two together
...@@ -6111,12 +6078,14 @@ ...@@ -6111,12 +6078,14 @@
;; Mash the two together during combine. ;; Mash the two together during combine.
(define_insn "*aarch64_crypto_aese_fused" (define_insn "*aarch64_crypto_aese_fused"
[(set (match_operand:V16QI 0 "register_operand" "=&w") [(set (match_operand:V16QI 0 "register_operand" "=w")
(unspec:V16QI (unspec:V16QI
[(unspec:V16QI [(unspec:V16QI
[(match_operand:V16QI 1 "register_operand" "0") [(xor:V16QI
(match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE) (match_operand:V16QI 1 "register_operand" "%0")
] UNSPEC_AESMC))] (match_operand:V16QI 2 "register_operand" "w"))]
UNSPEC_AESE)]
UNSPEC_AESMC))]
"TARGET_SIMD && TARGET_AES "TARGET_SIMD && TARGET_AES
&& aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
"aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b" "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
...@@ -6130,12 +6099,14 @@ ...@@ -6130,12 +6099,14 @@
;; Mash the two together during combine. ;; Mash the two together during combine.
(define_insn "*aarch64_crypto_aesd_fused" (define_insn "*aarch64_crypto_aesd_fused"
[(set (match_operand:V16QI 0 "register_operand" "=&w") [(set (match_operand:V16QI 0 "register_operand" "=w")
(unspec:V16QI (unspec:V16QI
[(unspec:V16QI [(unspec:V16QI
[(match_operand:V16QI 1 "register_operand" "0") [(xor:V16QI
(match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD) (match_operand:V16QI 1 "register_operand" "%0")
] UNSPEC_AESIMC))] (match_operand:V16QI 2 "register_operand" "w"))]
UNSPEC_AESD)]
UNSPEC_AESIMC))]
"TARGET_SIMD && TARGET_AES "TARGET_SIMD && TARGET_AES
&& aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
"aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b" "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
......
...@@ -17965,10 +17965,6 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) ...@@ -17965,10 +17965,6 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
} }
} }
if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)
&& aarch_crypto_can_dual_issue (prev, curr))
return true;
if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH) if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
&& any_condjump_p (curr)) && any_condjump_p (curr))
{ {
......
2019-07-09 Sylvia Taylor <sylvia.taylor@arm.com>
* gcc.target/aarch64/crypto-fuse-1.c: Remove.
* gcc.target/aarch64/crypto-fuse-2.c: Remove.
* gcc.target/aarch64/aes-fuse-1.c: New testcase.
* gcc.target/aarch64/aes-fuse-2.c: New testcase.
2019-07-09 Christophe Lyon <christophe.lyon@linaro.org> 2019-07-09 Christophe Lyon <christophe.lyon@linaro.org>
* gcc.target/arm/cmse/bitfield-1.c: Fix address of .gnu.sgstubs * gcc.target/arm/cmse/bitfield-1.c: Fix address of .gnu.sgstubs
......
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */ /* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */
/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
#include <arm_neon.h> #include <arm_neon.h>
#define AESE(r, v, key) (r = vaeseq_u8 ((v), (key))); #define AESE(r, v, key) (r = vaeseq_u8 ((v), (key)));
#define AESMC(r, i) (r = vaesmcq_u8 (i)) #define AESMC(r, i) (r = vaesmcq_u8 (i))
const uint8x16_t zero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
uint8x16_t dummy; uint8x16_t dummy;
uint8x16_t a; uint8x16_t a;
uint8x16_t b; uint8x16_t b;
uint8x16_t c; uint8x16_t c;
uint8x16_t d; uint8x16_t d;
uint8x16_t e; uint8x16_t x;
uint8x16_t y;
uint8x16_t k;
void foo (void)
void
foo (void)
{ {
AESE (a, a, e); AESE (a, a, k);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
AESE (b, b, e); AESE (b, b, k);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
AESE (c, c, e); AESE (c, c, k);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
AESE (d, d, e); AESE (d, d, k);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
AESMC (a, a); x = x ^ k;
AESE (x, x, zero);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
AESMC (b, b); y = y ^ k;
AESE (y, y, zero);
dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy);
AESMC (d, d);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
AESMC (c, c); AESMC (c, c);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
AESMC (d, d); AESMC (b, b);
} dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy);
AESMC (a, a);
dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy);
/* { dg-final { scan-assembler-times "crypto_aese_fused" 4 } } */ AESMC (y, y);
dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy);
AESMC (x, x);
}
/* { dg-final { scan-assembler-times "crypto_aese_fused" 6 } } */
/* { dg-final { scan-assembler-not "veor" } } */
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */ /* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */
/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
#include <arm_neon.h> #include <arm_neon.h>
#define AESE(r, v, key) (r = vaesdq_u8 ((v), (key))); #define AESD(r, v, key) (r = vaesdq_u8 ((v), (key)));
#define AESMC(r, i) (r = vaesimcq_u8 (i)) #define AESIMC(r, i) (r = vaesimcq_u8 (i))
const uint8x16_t zero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
uint8x16_t dummy; uint8x16_t dummy;
uint8x16_t a; uint8x16_t a;
uint8x16_t b; uint8x16_t b;
uint8x16_t c; uint8x16_t c;
uint8x16_t d; uint8x16_t d;
uint8x16_t e; uint8x16_t x;
uint8x16_t y;
uint8x16_t k;
void void foo (void)
foo (void)
{ {
AESE (a, a, e); AESD (a, a, k);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
AESE (b, b, e); AESD (b, b, k);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
AESE (c, c, e); AESD (c, c, k);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
AESE (d, d, e); AESD (d, d, k);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
AESMC (a, a); x = x ^ k;
AESD (x, x, zero);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
AESMC (b, b); y = y ^ k;
AESD (y, y, zero);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
AESMC (c, c);
AESIMC (d, d);
dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy);
AESIMC (c, c);
dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy);
AESIMC (b, b);
dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy);
AESIMC (a, a);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy); dummy = vaddq_u8 (dummy, dummy);
AESMC (d, d);
}
/* { dg-final { scan-assembler-times "crypto_aesd_fused" 4 } } */ AESIMC (y, y);
dummy = vaddq_u8 (dummy, dummy);
dummy = vaddq_u8 (dummy, dummy);
AESIMC (x, x);
}
/* { dg-final { scan-assembler-times "crypto_aesd_fused" 6 } } */
/* { dg-final { scan-assembler-not "veor" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment