From f65878178ab05180a5937f11f8fdb755678a82ce Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 6 Jul 2021 19:27:34 +0200 Subject: [PATCH] i386: Add variable vec_set for 32bit vectors [PR97194] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To generate sane code a SSE4.1 variable PBLENDV instruction is needed. Also enable variable vec_set through vec_setm_operand predicate for TARGET_SSE4_1 instead of TARGET_AVX2. ix86_expand_vector_init_duplicate is able to emulate vpbroadcast{b,w} with pxor/pshufb. 2021-07-06 Uroš Bizjak gcc/ PR target/97194 * config/i386/predicates.md (vec_setm_operand): Enable register_operand for TARGET_SSE4_1. * config/i386/mmx.md (vec_setv2hi): Use vec_setm_operand as operand 2 predicate. Call ix86_expand_vector_set_var for non-constant index operand. (vec_setv4qi): Use vec_setm_mmx_operand as operand 2 predicate. Call ix86_expand_vector_set_var for non-constant index operand. gcc/testsuite/ PR target/97194 * gcc.target/i386/sse4_1-vec-set-1a.c: New test. * gcc.target/i386/sse4_1-vec-set-2a.c: Ditto. --- gcc/config/i386/mmx.md | 18 +++++--- gcc/config/i386/predicates.md | 2 +- .../gcc.target/i386/sse4_1-vec-set-1a.c | 20 +++++++++ .../gcc.target/i386/sse4_1-vec-set-2a.c | 44 +++++++++++++++++++ 4 files changed, 77 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2a.c diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 4ead8beff50..7e83b64ab59 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -3534,11 +3534,14 @@ (define_expand "vec_setv2hi" [(match_operand:V2HI 0 "register_operand") (match_operand:HI 1 "register_operand") - (match_operand 2 "const_int_operand")] + (match_operand 2 "vec_setm_operand")] "TARGET_SSE2" { - ix86_expand_vector_set (false, operands[0], operands[1], - INTVAL (operands[2])); + if (CONST_INT_P (operands[2])) + ix86_expand_vector_set (false, operands[0], operands[1], + INTVAL (operands[2])); + else + ix86_expand_vector_set_var (operands[0], operands[1], operands[2]); DONE; }) @@ -3556,11 +3559,14 @@ (define_expand "vec_setv4qi" [(match_operand:V4QI 0 "register_operand") (match_operand:QI 1 "register_operand") - (match_operand 2 "const_int_operand")] + (match_operand 2 "vec_setm_mmx_operand")] "TARGET_SSE4_1" { - ix86_expand_vector_set (false, operands[0], operands[1], - INTVAL (operands[2])); + if (CONST_INT_P (operands[2])) + ix86_expand_vector_set (false, operands[0], operands[1], + INTVAL (operands[2])); + else + ix86_expand_vector_set_var (operands[0], operands[1], operands[2]); DONE; }) diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index c4b35c82506..9488632ce24 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1023,7 +1023,7 @@ ;; True for registers, or const_int_operand, used to vec_setm expander. (define_predicate "vec_setm_operand" (ior (and (match_operand 0 "register_operand") - (match_test "TARGET_AVX2")) + (match_test "TARGET_SSE4_1")) (match_code "const_int"))) (define_predicate "vec_setm_mmx_operand" diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1a.c b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1a.c new file mode 100644 index 00000000000..e2a67a66764 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1a.c @@ -0,0 +1,20 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-msse4.1 -O2" } */ +/* { dg-final { scan-assembler-times {(?n)v?pcmpeq[bwd]} 2 } } */ +/* { dg-final { scan-assembler-times {(?n)v?p?blendv} 2 } } */ + +typedef char v4qi __attribute__ ((vector_size (4))); +typedef short v2hi __attribute__ ((vector_size (4))); + +#define FOO(VTYPE, TYPE) \ + VTYPE \ + __attribute__ ((noipa)) \ + foo_##VTYPE (VTYPE a, TYPE b, unsigned int c) \ + { \ + a[c] = b; \ + return a; \ + } \ + +FOO (v4qi, char); + +FOO (v2hi, short); diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2a.c b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2a.c new file mode 100644 index 00000000000..5a945be5abc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2a.c @@ -0,0 +1,44 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O2 -msse4.1" } */ + + +#ifndef CHECK +#define CHECK "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK + +#include "sse4_1-vec-set-1a.c" + +#define CALC_TEST(vtype, type, N, idx) \ +do \ + { \ + int i,val = idx * idx - idx * 3 + 16; \ + type res[N],exp[N]; \ + vtype resv; \ + for (i = 0; i < N; i++) \ + { \ + res[i] = i * i - i * 3 + 15; \ + exp[i] = res[i]; \ + } \ + exp[idx] = val; \ + resv = foo_##vtype (*(vtype *)&res[0], val, idx); \ + for (i = 0; i < N; i++) \ + { \ + if (resv[i] != exp[i]) \ + abort (); \ + } \ + } \ +while (0) + +static void +TEST (void) +{ + CALC_TEST (v4qi, char, 4, 2); + CALC_TEST (v2hi, short, 2, 1); +}