From 7d8e8f89732b1f13752e1b370852c7bcbbbde259 Mon Sep 17 00:00:00 2001 From: Andrew Pinski Date: Tue, 4 Feb 2025 22:24:52 -0800 Subject: [PATCH] aarch64: gimple fold aes[ed] [PR114522] Instead of waiting to get combine/rtl optimizations fixed here. This fixes the builtins at the gimple level. It should provide for slightly faster compile time since we have a simplification earlier on. Built and tested for aarch64-linux-gnu. gcc/ChangeLog: PR target/114522 * config/aarch64/aarch64-builtins.cc (aarch64_fold_aes_op): New function. (aarch64_general_gimple_fold_builtin): Call aarch64_fold_aes_op for crypto_aese and crypto_aesd. Signed-off-by: Andrew Pinski --- gcc/config/aarch64/aarch64-builtins.cc | 29 ++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index 6d5479c2e44..128cc365d3d 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -4722,6 +4722,30 @@ aarch64_fold_combine (gcall *stmt) return gimple_build_assign (gimple_call_lhs (stmt), ctor); } +/* Fold a call to vaeseq_u8 and vaesdq_u8. + That is `vaeseq_u8 (x ^ y, 0)` gets folded + into `vaeseq_u8 (x, y)`.*/ +static gimple * +aarch64_fold_aes_op (gcall *stmt) +{ + tree arg0 = gimple_call_arg (stmt, 0); + tree arg1 = gimple_call_arg (stmt, 1); + if (integer_zerop (arg0)) + arg0 = arg1; + else if (!integer_zerop (arg1)) + return nullptr; + if (TREE_CODE (arg0) != SSA_NAME) + return nullptr; + if (!has_single_use (arg0)) + return nullptr; + auto *s = dyn_cast (SSA_NAME_DEF_STMT (arg0)); + if (!s || gimple_assign_rhs_code (s) != BIT_XOR_EXPR) + return nullptr; + gimple_call_set_arg (stmt, 0, gimple_assign_rhs1 (s)); + gimple_call_set_arg (stmt, 1, gimple_assign_rhs2 (s)); + return stmt; +} + /* Fold a call to vld1, given that it loads something of type TYPE. */ static gimple * aarch64_fold_load (gcall *stmt, tree type) @@ -4983,6 +5007,11 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt, gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); break; + VAR1 (BINOPU, crypto_aese, 0, DEFAULT, v16qi) + VAR1 (BINOPU, crypto_aesd, 0, DEFAULT, v16qi) + new_stmt = aarch64_fold_aes_op (stmt); + break; + /* Lower sqrt builtins to gimple/internal function sqrt. */ BUILTIN_VHSDF_DF (UNOP, sqrt, 2, FP) new_stmt = gimple_build_call_internal (IFN_SQRT,