[aarch64]: redefine aes patterns
This first patch removes aarch64 usage of the aese/aesmc and aesd/aesimc fusions (i.e. aes fusion) implemented in the scheduler due to unpredictable behaviour observed in cases such as: - when register allocation goes bad (e.g. extra movs) - aes operations with xor and zeroed keys among interleaved operations A more stable version should be provided by instead doing the aes fusion during the combine pass. Since the aese and aesd patterns have been rewritten as encapsulating a xor operation, the existing combine fusion patterns have also been updated. The purpose is to simplify the need of having additional combine patterns for cases like the ones below: For AESE (though it also applies to AESD as both have a xor operation): data = data ^ key; data = vaeseq_u8(data, zero); --- eor v1.16b, v0.16b, v1.16b aese v1.16b, v2.16b Should mean and generate the same as: data = vaeseq_u8(data, key); --- aese v1.16b, v0.16b 2019-07-09 Sylvia Taylor <sylvia.taylor@arm.com> * config/aarch64/aarch64-simd.md (aarch64_crypto_aes<aes_op>v16qi): Redefine pattern with xor. (aarch64_crypto_aes<aesmc_op>v16qi): Remove attribute enabled. (*aarch64_crypto_aes<aes_op>v16qi_xor_combine): Remove both. (*aarch64_crypto_aese_fused, *aarch64_crypto_aesd_fused): Update to new definition. * config/aarch64/aarch64.c (aarch_macro_fusion_pair_p): Remove aese/aesmc fusion check. * gcc.target/aarch64/crypto-fuse-1.c: Remove. * gcc.target/aarch64/crypto-fuse-2.c: Remove. * gcc.target/aarch64/aes-fuse-1.c: New testcase. * gcc.target/aarch64/aes-fuse-2.c: New testcase. From-SVN: r273304
This commit is contained in:
parent
b61184875e
commit
5169fa7732
7 changed files with 135 additions and 109 deletions
|
@ -1,3 +1,14 @@
|
|||
2019-07-09 Sylvia Taylor <sylvia.taylor@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-simd.md
|
||||
(aarch64_crypto_aes<aes_op>v16qi): Redefine pattern with xor.
|
||||
(aarch64_crypto_aes<aesmc_op>v16qi): Remove attribute enabled.
|
||||
(*aarch64_crypto_aes<aes_op>v16qi_xor_combine): Remove both.
|
||||
(*aarch64_crypto_aese_fused,
|
||||
*aarch64_crypto_aesd_fused): Update to new definition.
|
||||
* config/aarch64/aarch64.c
|
||||
(aarch_macro_fusion_pair_p): Remove aese/aesmc fusion check.
|
||||
|
||||
2019-07-09 Richard Biener <rguenther@suse.de>
|
||||
|
||||
* gimple-match.h (gimple_match_op::resimplify): New.
|
||||
|
|
|
@ -6053,56 +6053,23 @@
|
|||
|
||||
(define_insn "aarch64_crypto_aes<aes_op>v16qi"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "=w")
|
||||
(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0")
|
||||
(match_operand:V16QI 2 "register_operand" "w")]
|
||||
(unspec:V16QI
|
||||
[(xor:V16QI
|
||||
(match_operand:V16QI 1 "register_operand" "%0")
|
||||
(match_operand:V16QI 2 "register_operand" "w"))]
|
||||
CRYPTO_AES))]
|
||||
"TARGET_SIMD && TARGET_AES"
|
||||
"aes<aes_op>\\t%0.16b, %2.16b"
|
||||
[(set_attr "type" "crypto_aese")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "=w")
|
||||
(unspec:V16QI [(xor:V16QI
|
||||
(match_operand:V16QI 1 "register_operand" "%0")
|
||||
(match_operand:V16QI 2 "register_operand" "w"))
|
||||
(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")]
|
||||
CRYPTO_AES))]
|
||||
"TARGET_SIMD && TARGET_AES"
|
||||
"aes<aes_op>\\t%0.16b, %2.16b"
|
||||
[(set_attr "type" "crypto_aese")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "=w")
|
||||
(unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")
|
||||
(xor:V16QI (match_operand:V16QI 1 "register_operand" "%0")
|
||||
(match_operand:V16QI 2 "register_operand" "w"))]
|
||||
CRYPTO_AES))]
|
||||
"TARGET_SIMD && TARGET_AES"
|
||||
"aes<aes_op>\\t%0.16b, %2.16b"
|
||||
[(set_attr "type" "crypto_aese")]
|
||||
)
|
||||
|
||||
;; When AES/AESMC fusion is enabled we want the register allocation to
|
||||
;; look like:
|
||||
;; AESE Vn, _
|
||||
;; AESMC Vn, Vn
|
||||
;; So prefer to tie operand 1 to operand 0 when fusing.
|
||||
|
||||
(define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "=w,w")
|
||||
(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
|
||||
[(set (match_operand:V16QI 0 "register_operand" "=w")
|
||||
(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
|
||||
CRYPTO_AESMC))]
|
||||
"TARGET_SIMD && TARGET_AES"
|
||||
"aes<aesmc_op>\\t%0.16b, %1.16b"
|
||||
[(set_attr "type" "crypto_aesmc")
|
||||
(set_attr_alternative "enabled"
|
||||
[(if_then_else (match_test
|
||||
"aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
|
||||
(const_string "yes" )
|
||||
(const_string "no"))
|
||||
(const_string "yes")])]
|
||||
[(set_attr "type" "crypto_aesmc")]
|
||||
)
|
||||
|
||||
;; When AESE/AESMC fusion is enabled we really want to keep the two together
|
||||
|
@ -6111,12 +6078,14 @@
|
|||
;; Mash the two together during combine.
|
||||
|
||||
(define_insn "*aarch64_crypto_aese_fused"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "=&w")
|
||||
[(set (match_operand:V16QI 0 "register_operand" "=w")
|
||||
(unspec:V16QI
|
||||
[(unspec:V16QI
|
||||
[(match_operand:V16QI 1 "register_operand" "0")
|
||||
(match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
|
||||
] UNSPEC_AESMC))]
|
||||
[(xor:V16QI
|
||||
(match_operand:V16QI 1 "register_operand" "%0")
|
||||
(match_operand:V16QI 2 "register_operand" "w"))]
|
||||
UNSPEC_AESE)]
|
||||
UNSPEC_AESMC))]
|
||||
"TARGET_SIMD && TARGET_AES
|
||||
&& aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
|
||||
"aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
|
||||
|
@ -6130,12 +6099,14 @@
|
|||
;; Mash the two together during combine.
|
||||
|
||||
(define_insn "*aarch64_crypto_aesd_fused"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "=&w")
|
||||
[(set (match_operand:V16QI 0 "register_operand" "=w")
|
||||
(unspec:V16QI
|
||||
[(unspec:V16QI
|
||||
[(match_operand:V16QI 1 "register_operand" "0")
|
||||
(match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
|
||||
] UNSPEC_AESIMC))]
|
||||
[(xor:V16QI
|
||||
(match_operand:V16QI 1 "register_operand" "%0")
|
||||
(match_operand:V16QI 2 "register_operand" "w"))]
|
||||
UNSPEC_AESD)]
|
||||
UNSPEC_AESIMC))]
|
||||
"TARGET_SIMD && TARGET_AES
|
||||
&& aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
|
||||
"aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
|
||||
|
|
|
@ -17965,10 +17965,6 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
|
|||
}
|
||||
}
|
||||
|
||||
if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)
|
||||
&& aarch_crypto_can_dual_issue (prev, curr))
|
||||
return true;
|
||||
|
||||
if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
|
||||
&& any_condjump_p (curr))
|
||||
{
|
||||
|
|
|
@ -1,3 +1,10 @@
|
|||
2019-07-09 Sylvia Taylor <sylvia.taylor@arm.com>
|
||||
|
||||
* gcc.target/aarch64/crypto-fuse-1.c: Remove.
|
||||
* gcc.target/aarch64/crypto-fuse-2.c: Remove.
|
||||
* gcc.target/aarch64/aes-fuse-1.c: New testcase.
|
||||
* gcc.target/aarch64/aes-fuse-2.c: New testcase.
|
||||
|
||||
2019-07-09 Christophe Lyon <christophe.lyon@linaro.org>
|
||||
|
||||
* gcc.target/arm/cmse/bitfield-1.c: Fix address of .gnu.sgstubs
|
||||
|
|
|
@ -1,45 +1,66 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */
|
||||
/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#define AESE(r, v, key) (r = vaeseq_u8 ((v), (key)));
|
||||
#define AESMC(r, i) (r = vaesmcq_u8 (i))
|
||||
|
||||
const uint8x16_t zero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
uint8x16_t dummy;
|
||||
uint8x16_t a;
|
||||
uint8x16_t b;
|
||||
uint8x16_t c;
|
||||
uint8x16_t d;
|
||||
uint8x16_t e;
|
||||
uint8x16_t x;
|
||||
uint8x16_t y;
|
||||
uint8x16_t k;
|
||||
|
||||
void foo (void)
|
||||
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
AESE (a, a, e);
|
||||
AESE (a, a, k);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
AESE (b, b, e);
|
||||
AESE (b, b, k);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
AESE (c, c, e);
|
||||
AESE (c, c, k);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
AESE (d, d, e);
|
||||
AESE (d, d, k);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
|
||||
AESMC (a, a);
|
||||
x = x ^ k;
|
||||
AESE (x, x, zero);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
AESMC (b, b);
|
||||
y = y ^ k;
|
||||
AESE (y, y, zero);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
|
||||
AESMC (d, d);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
AESMC (c, c);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
AESMC (d, d);
|
||||
AESMC (b, b);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
AESMC (a, a);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
|
||||
AESMC (y, y);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
AESMC (x, x);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "crypto_aese_fused" 4 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times "crypto_aese_fused" 6 } } */
|
||||
/* { dg-final { scan-assembler-not "veor" } } */
|
65
gcc/testsuite/gcc.target/aarch64/aes-fuse-2.c
Normal file
65
gcc/testsuite/gcc.target/aarch64/aes-fuse-2.c
Normal file
|
@ -0,0 +1,65 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */
|
||||
/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#define AESD(r, v, key) (r = vaesdq_u8 ((v), (key)));
|
||||
#define AESIMC(r, i) (r = vaesimcq_u8 (i))
|
||||
|
||||
const uint8x16_t zero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
uint8x16_t dummy;
|
||||
uint8x16_t a;
|
||||
uint8x16_t b;
|
||||
uint8x16_t c;
|
||||
uint8x16_t d;
|
||||
uint8x16_t x;
|
||||
uint8x16_t y;
|
||||
uint8x16_t k;
|
||||
|
||||
void foo (void)
|
||||
{
|
||||
AESD (a, a, k);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
AESD (b, b, k);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
AESD (c, c, k);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
AESD (d, d, k);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
|
||||
x = x ^ k;
|
||||
AESD (x, x, zero);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
y = y ^ k;
|
||||
AESD (y, y, zero);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
|
||||
AESIMC (d, d);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
AESIMC (c, c);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
AESIMC (b, b);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
AESIMC (a, a);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
|
||||
AESIMC (y, y);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
AESIMC (x, x);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "crypto_aesd_fused" 6 } } */
|
||||
/* { dg-final { scan-assembler-not "veor" } } */
|
|
@ -1,45 +0,0 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#define AESE(r, v, key) (r = vaesdq_u8 ((v), (key)));
|
||||
#define AESMC(r, i) (r = vaesimcq_u8 (i))
|
||||
|
||||
uint8x16_t dummy;
|
||||
uint8x16_t a;
|
||||
uint8x16_t b;
|
||||
uint8x16_t c;
|
||||
uint8x16_t d;
|
||||
uint8x16_t e;
|
||||
|
||||
void
|
||||
foo (void)
|
||||
{
|
||||
AESE (a, a, e);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
AESE (b, b, e);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
AESE (c, c, e);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
AESE (d, d, e);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
|
||||
AESMC (a, a);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
AESMC (b, b);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
AESMC (c, c);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
dummy = vaddq_u8 (dummy, dummy);
|
||||
AESMC (d, d);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "crypto_aesd_fused" 4 } } */
|
||||
|
Loading…
Add table
Reference in a new issue