From 4986946f3b761dd4c3e0d79ca735c90e33f4bb83 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 14 Jun 2021 20:56:18 +0200 Subject: [PATCH] i386: Split V2HImode *punpckwd to SSE instruction [PR101058] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit V2HImode *punpckwd should not be split to the insn that depends on TARGET_MMX_WITH_SSE, since the later is disabled on 32bit targets. Also return true early from ix86_vectorize_vec_perm_const when testing with V2HI mode. *punpckwd can be used to implement all permutations. 2021-06-14 Uroš Bizjak gcc/ PR target/101058 * config/i386/i386-expand.c (ix86_vectorize_vec_perm_const): Return true early when testing with V2HImode. * config/i386/mmx.md (*punpckwd): Split to sse2_pshuflw_1. gcc/testsuite/ PR target/101058 * gcc.target/i386/pr101058.c: New test. --- gcc/config/i386/i386-expand.c | 9 ++++++--- gcc/config/i386/mmx.md | 13 +++++++------ gcc/testsuite/gcc.target/i386/pr101058.c | 12 ++++++++++++ 3 files changed, 25 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr101058.c diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 6e33f6f8196..dee3df2e3a0 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -20446,9 +20446,12 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, return false; break; case E_V2HImode: - if (!TARGET_SSE2) - return false; - break; + if (!TARGET_SSE2) + return false; + /* All implementable with *punpckwd. */ + if (d.testing_p) + return true; + break; case E_V2DImode: case E_V2DFmode: if (!TARGET_SSE) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index f9e7d2786c6..1a9e7b024dd 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -3368,16 +3368,18 @@ (vec_concat:V4HI (match_operand:V2HI 1 "register_operand" "0,Yw") (match_operand:V2HI 2 "register_operand" "x,Yw")) - (parallel [(match_operand 3 "const_0_to_3_operand") - (match_operand 4 "const_0_to_3_operand")])))] + (parallel [(match_operand 3 "const_0_to_3_operand") + (match_operand 4 "const_0_to_3_operand")])))] "TARGET_SSE2" "#" "&& reload_completed" [(set (match_dup 5) - (vec_select:V4HI + (vec_select:V8HI (match_dup 5) (parallel [(match_dup 3) (match_dup 4) - (const_int 0) (const_int 0)])))] + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)])))] { rtx dest = lowpart_subreg (V8HImode, operands[0], V2HImode); rtx op1 = lowpart_subreg (V8HImode, operands[1], V2HImode); @@ -3395,8 +3397,7 @@ operands[3] = GEN_INT (sel0); operands[4] = GEN_INT (sel1); - - operands[5] = lowpart_subreg (V4HImode, dest, V8HImode); + operands[5] = dest; } [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog") diff --git a/gcc/testsuite/gcc.target/i386/pr101058.c b/gcc/testsuite/gcc.target/i386/pr101058.c new file mode 100644 index 00000000000..a2b251ce07d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101058.c @@ -0,0 +1,12 @@ +/* PR target/101058 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -msse2 -mno-mmx" } */ + +short add90Hybrid_a_1; +short *add90Hybrid_b, *add90Hybrid_c, *add90Hybrid_d; +void add90Hybrid() { + for (int i; i < 200; i += 2) { + add90Hybrid_c[i] = add90Hybrid_b[i]; + add90Hybrid_d[i] = add90Hybrid_a_1 - add90Hybrid_b[i + 1]; + } +}