From 8968e92cc65ef4cfe9dbba86b7873490139100b0 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Mon, 19 Jun 2023 10:56:37 +0100 Subject: [PATCH] simplify-rtx: Simplify VEC_CONCAT of SUBREG and VEC_CONCAT from same vector In the testcase for this patch we try to vec_concat the lowpart and highpart of a vector, but the lowpart is expressed as a subreg. simplify-rtx.cc does not recognise this and combine ends up trying to match: Trying 7 -> 8: 7: r93:V2SI=vec_select(r95:V4SI,parallel) 8: r97:V4SI=vec_concat(r95:V4SI#0,r93:V2SI) REG_DEAD r95:V4SI REG_DEAD r93:V2SI Failed to match this instruction: (set (reg:V4SI 97) (vec_concat:V4SI (subreg:V2SI (reg/v:V4SI 95 [ a ]) 0) (vec_select:V2SI (reg/v:V4SI 95 [ a ]) (parallel:V4SI [ (const_int 2 [0x2]) (const_int 3 [0x3]) ])))) This should be just (set (reg:V4SI 97) (reg:V4SI 95)). This patch adds such a simplification. The testcase is a bit artificial, but I do have other aarch64-specific patterns that I want to optimise later that rely on this simplification happening. Without this patch for the testcase we generate: foo: dup d31, v0.d[1] ins v0.d[1], v31.d[0] ret whereas we should just not generate anything as the operation is ultimately a no-op. Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf. gcc/ChangeLog: * simplify-rtx.cc (simplify_context::simplify_binary_operation_1): Simplify vec_concat of lowpart subreg and high part vec_select. gcc/testsuite/ChangeLog: * gcc.target/aarch64/simd/low-high-combine_1.c: New test. --- gcc/simplify-rtx.cc | 11 ++++++++ .../aarch64/simd/low-high-combine_1.c | 28 +++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/low-high-combine_1.c diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc index 21b7eb484d0..9c68d360672 100644 --- a/gcc/simplify-rtx.cc +++ b/gcc/simplify-rtx.cc @@ -4860,6 +4860,17 @@ simplify_ashift: return simplify_gen_binary (VEC_SELECT, mode, XEXP (trueop0, 0), gen_rtx_PARALLEL (VOIDmode, vec)); } + /* (vec_concat: + (subreg_lowpart:N OP) + (vec_select:N OP P)) --> OP when P selects the high half + of the OP. */ + if (GET_CODE (trueop0) == SUBREG + && subreg_lowpart_p (trueop0) + && GET_CODE (trueop1) == VEC_SELECT + && SUBREG_REG (trueop0) == XEXP (trueop1, 0) + && !side_effects_p (XEXP (trueop1, 0)) + && vec_series_highpart_p (op1_mode, mode, XEXP (trueop1, 1))) + return XEXP (trueop1, 0); } return 0; diff --git a/gcc/testsuite/gcc.target/aarch64/simd/low-high-combine_1.c b/gcc/testsuite/gcc.target/aarch64/simd/low-high-combine_1.c new file mode 100644 index 00000000000..0b502d593f2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/low-high-combine_1.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include + +/* +** foo_le: { target aarch64_little_endian } +** ret +*/ + +int32x4_t +foo_le (int32x4_t a) +{ + return vcombine_s32 (vget_low_s32 (a), vget_high_s32 (a)); +} + +/* +** foo_be: { target aarch64_big_endian } +** ret +*/ + +int32x4_t +foo_be (int32x4_t a) +{ + return vcombine_s32 (vget_high_s32 (a), vget_low_s32 (a)); +} +