i386: Add variable vec_set for 32bit vectors [PR97194]

To generate sane code a SSE4.1 variable PBLENDV instruction is needed.

Also enable variable vec_set through vec_setm_operand predicate
for TARGET_SSE4_1 instead of TARGET_AVX2.  ix86_expand_vector_init_duplicate
is able to emulate vpbroadcast{b,w} with pxor/pshufb.

2021-07-06  Uroš Bizjak  <ubizjak@gmail.com>

gcc/
	PR target/97194
	* config/i386/predicates.md (vec_setm_operand): Enable
	register_operand for TARGET_SSE4_1.
	* config/i386/mmx.md (vec_setv2hi): Use vec_setm_operand
	as operand 2 predicate.  Call ix86_expand_vector_set_var
	for non-constant index operand.
	(vec_setv4qi): Use vec_setm_mmx_operand as operand 2 predicate.
	Call ix86_expand_vector_set_var for non-constant index operand.

gcc/testsuite/

	PR target/97194
	* gcc.target/i386/sse4_1-vec-set-1a.c: New test.
	* gcc.target/i386/sse4_1-vec-set-2a.c: Ditto.
This commit is contained in:
Uros Bizjak 2021-07-06 19:27:34 +02:00
parent 6b096c1731
commit f65878178a
4 changed files with 77 additions and 7 deletions

View file

@ -3534,11 +3534,14 @@
(define_expand "vec_setv2hi"
[(match_operand:V2HI 0 "register_operand")
(match_operand:HI 1 "register_operand")
(match_operand 2 "const_int_operand")]
(match_operand 2 "vec_setm_operand")]
"TARGET_SSE2"
{
ix86_expand_vector_set (false, operands[0], operands[1],
INTVAL (operands[2]));
if (CONST_INT_P (operands[2]))
ix86_expand_vector_set (false, operands[0], operands[1],
INTVAL (operands[2]));
else
ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
DONE;
})
@ -3556,11 +3559,14 @@
(define_expand "vec_setv4qi"
[(match_operand:V4QI 0 "register_operand")
(match_operand:QI 1 "register_operand")
(match_operand 2 "const_int_operand")]
(match_operand 2 "vec_setm_mmx_operand")]
"TARGET_SSE4_1"
{
ix86_expand_vector_set (false, operands[0], operands[1],
INTVAL (operands[2]));
if (CONST_INT_P (operands[2]))
ix86_expand_vector_set (false, operands[0], operands[1],
INTVAL (operands[2]));
else
ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
DONE;
})

View file

@ -1023,7 +1023,7 @@
;; True for registers, or const_int_operand, used to vec_setm expander.
(define_predicate "vec_setm_operand"
(ior (and (match_operand 0 "register_operand")
(match_test "TARGET_AVX2"))
(match_test "TARGET_SSE4_1"))
(match_code "const_int")))
(define_predicate "vec_setm_mmx_operand"

View file

@ -0,0 +1,20 @@
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-msse4.1 -O2" } */
/* { dg-final { scan-assembler-times {(?n)v?pcmpeq[bwd]} 2 } } */
/* { dg-final { scan-assembler-times {(?n)v?p?blendv} 2 } } */
typedef char v4qi __attribute__ ((vector_size (4)));
typedef short v2hi __attribute__ ((vector_size (4)));
#define FOO(VTYPE, TYPE) \
VTYPE \
__attribute__ ((noipa)) \
foo_##VTYPE (VTYPE a, TYPE b, unsigned int c) \
{ \
a[c] = b; \
return a; \
} \
FOO (v4qi, char);
FOO (v2hi, short);

View file

@ -0,0 +1,44 @@
/* { dg-do run { target { ! ia32 } } } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.1" } */
#ifndef CHECK
#define CHECK "sse4_1-check.h"
#endif
#ifndef TEST
#define TEST sse4_1_test
#endif
#include CHECK
#include "sse4_1-vec-set-1a.c"
#define CALC_TEST(vtype, type, N, idx) \
do \
{ \
int i,val = idx * idx - idx * 3 + 16; \
type res[N],exp[N]; \
vtype resv; \
for (i = 0; i < N; i++) \
{ \
res[i] = i * i - i * 3 + 15; \
exp[i] = res[i]; \
} \
exp[idx] = val; \
resv = foo_##vtype (*(vtype *)&res[0], val, idx); \
for (i = 0; i < N; i++) \
{ \
if (resv[i] != exp[i]) \
abort (); \
} \
} \
while (0)
static void
TEST (void)
{
CALC_TEST (v4qi, char, 4, 2);
CALC_TEST (v2hi, short, 2, 1);
}