i386: Add variable vec_set for 32bit vectors [PR97194]
To generate sane code a SSE4.1 variable PBLENDV instruction is needed. Also enable variable vec_set through vec_setm_operand predicate for TARGET_SSE4_1 instead of TARGET_AVX2. ix86_expand_vector_init_duplicate is able to emulate vpbroadcast{b,w} with pxor/pshufb. 2021-07-06 Uroš Bizjak <ubizjak@gmail.com> gcc/ PR target/97194 * config/i386/predicates.md (vec_setm_operand): Enable register_operand for TARGET_SSE4_1. * config/i386/mmx.md (vec_setv2hi): Use vec_setm_operand as operand 2 predicate. Call ix86_expand_vector_set_var for non-constant index operand. (vec_setv4qi): Use vec_setm_mmx_operand as operand 2 predicate. Call ix86_expand_vector_set_var for non-constant index operand. gcc/testsuite/ PR target/97194 * gcc.target/i386/sse4_1-vec-set-1a.c: New test. * gcc.target/i386/sse4_1-vec-set-2a.c: Ditto.
This commit is contained in:
parent
6b096c1731
commit
f65878178a
4 changed files with 77 additions and 7 deletions
|
@ -3534,11 +3534,14 @@
|
|||
(define_expand "vec_setv2hi"
|
||||
[(match_operand:V2HI 0 "register_operand")
|
||||
(match_operand:HI 1 "register_operand")
|
||||
(match_operand 2 "const_int_operand")]
|
||||
(match_operand 2 "vec_setm_operand")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
ix86_expand_vector_set (false, operands[0], operands[1],
|
||||
INTVAL (operands[2]));
|
||||
if (CONST_INT_P (operands[2]))
|
||||
ix86_expand_vector_set (false, operands[0], operands[1],
|
||||
INTVAL (operands[2]));
|
||||
else
|
||||
ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
@ -3556,11 +3559,14 @@
|
|||
(define_expand "vec_setv4qi"
|
||||
[(match_operand:V4QI 0 "register_operand")
|
||||
(match_operand:QI 1 "register_operand")
|
||||
(match_operand 2 "const_int_operand")]
|
||||
(match_operand 2 "vec_setm_mmx_operand")]
|
||||
"TARGET_SSE4_1"
|
||||
{
|
||||
ix86_expand_vector_set (false, operands[0], operands[1],
|
||||
INTVAL (operands[2]));
|
||||
if (CONST_INT_P (operands[2]))
|
||||
ix86_expand_vector_set (false, operands[0], operands[1],
|
||||
INTVAL (operands[2]));
|
||||
else
|
||||
ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
|
|
@ -1023,7 +1023,7 @@
|
|||
;; True for registers, or const_int_operand, used to vec_setm expander.
|
||||
(define_predicate "vec_setm_operand"
|
||||
(ior (and (match_operand 0 "register_operand")
|
||||
(match_test "TARGET_AVX2"))
|
||||
(match_test "TARGET_SSE4_1"))
|
||||
(match_code "const_int")))
|
||||
|
||||
(define_predicate "vec_setm_mmx_operand"
|
||||
|
|
20
gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1a.c
Normal file
20
gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1a.c
Normal file
|
@ -0,0 +1,20 @@
|
|||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-msse4.1 -O2" } */
|
||||
/* { dg-final { scan-assembler-times {(?n)v?pcmpeq[bwd]} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {(?n)v?p?blendv} 2 } } */
|
||||
|
||||
typedef char v4qi __attribute__ ((vector_size (4)));
|
||||
typedef short v2hi __attribute__ ((vector_size (4)));
|
||||
|
||||
#define FOO(VTYPE, TYPE) \
|
||||
VTYPE \
|
||||
__attribute__ ((noipa)) \
|
||||
foo_##VTYPE (VTYPE a, TYPE b, unsigned int c) \
|
||||
{ \
|
||||
a[c] = b; \
|
||||
return a; \
|
||||
} \
|
||||
|
||||
FOO (v4qi, char);
|
||||
|
||||
FOO (v2hi, short);
|
44
gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2a.c
Normal file
44
gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2a.c
Normal file
|
@ -0,0 +1,44 @@
|
|||
/* { dg-do run { target { ! ia32 } } } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
/* { dg-options "-O2 -msse4.1" } */
|
||||
|
||||
|
||||
#ifndef CHECK
|
||||
#define CHECK "sse4_1-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST sse4_1_test
|
||||
#endif
|
||||
|
||||
#include CHECK
|
||||
|
||||
#include "sse4_1-vec-set-1a.c"
|
||||
|
||||
#define CALC_TEST(vtype, type, N, idx) \
|
||||
do \
|
||||
{ \
|
||||
int i,val = idx * idx - idx * 3 + 16; \
|
||||
type res[N],exp[N]; \
|
||||
vtype resv; \
|
||||
for (i = 0; i < N; i++) \
|
||||
{ \
|
||||
res[i] = i * i - i * 3 + 15; \
|
||||
exp[i] = res[i]; \
|
||||
} \
|
||||
exp[idx] = val; \
|
||||
resv = foo_##vtype (*(vtype *)&res[0], val, idx); \
|
||||
for (i = 0; i < N; i++) \
|
||||
{ \
|
||||
if (resv[i] != exp[i]) \
|
||||
abort (); \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
static void
|
||||
TEST (void)
|
||||
{
|
||||
CALC_TEST (v4qi, char, 4, 2);
|
||||
CALC_TEST (v2hi, short, 2, 1);
|
||||
}
|
Loading…
Add table
Reference in a new issue