i386: Add missing vector extend patterns [PR92658]
Add missing insn pattern for v2qi -> v2si vector extend and named expanders to activate generation of vector extends to 8-byte and 4-byte vectors. gcc/ChangeLog: PR target/92658 * config/i386/mmx.md (sse4_1_<code>v2qiv2si2): New insn pattern. (<insn>v4qiv4hi2): New expander. (<insn>v2hiv2si2): Ditto. (<insn>v2qiv2si2): Ditto. (<insn>v2qiv2hi2): Ditto. gcc/testsuite/ChangeLog: PR target/92658 * gcc.target/i386/pr92658-sse4-4b.c: New test. * gcc.target/i386/pr92658-sse4-8b.c: New test.
This commit is contained in:
parent
bdc10c2bfa
commit
608e7f3ab4
3 changed files with 159 additions and 0 deletions
|
@ -3543,6 +3543,18 @@
|
|||
(set_attr "prefix" "orig,orig,maybe_evex")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_expand "<insn>v4qiv4hi2"
|
||||
[(set (match_operand:V4HI 0 "register_operand")
|
||||
(any_extend:V4HI
|
||||
(match_operand:V4QI 1 "register_operand")))]
|
||||
"TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
|
||||
{
|
||||
rtx op1 = force_reg (V4QImode, operands[1]);
|
||||
op1 = lowpart_subreg (V8QImode, op1, V4QImode);
|
||||
emit_insn (gen_sse4_1_<code>v4qiv4hi2 (operands[0], op1));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "sse4_1_<code>v2hiv2si2"
|
||||
[(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,v")
|
||||
(any_extend:V2SI
|
||||
|
@ -3557,6 +3569,44 @@
|
|||
(set_attr "prefix" "orig,orig,maybe_evex")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_expand "<insn>v2hiv2si2"
|
||||
[(set (match_operand:V2SI 0 "register_operand")
|
||||
(any_extend:V2SI
|
||||
(match_operand:V2HI 1 "register_operand")))]
|
||||
"TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
|
||||
{
|
||||
rtx op1 = force_reg (V2HImode, operands[1]);
|
||||
op1 = lowpart_subreg (V4HImode, op1, V2HImode);
|
||||
emit_insn (gen_sse4_1_<code>v2hiv2si2 (operands[0], op1));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "sse4_1_<code>v2qiv2si2"
|
||||
[(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,v")
|
||||
(any_extend:V2SI
|
||||
(vec_select:V2QI
|
||||
(match_operand:V4QI 1 "register_operand" "Yr,*x,v")
|
||||
(parallel [(const_int 0) (const_int 1)]))))]
|
||||
"TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
|
||||
"%vpmov<extsuffix>bd\t{%1, %0|%0, %1}"
|
||||
[(set_attr "isa" "noavx,noavx,avx")
|
||||
(set_attr "type" "ssemov")
|
||||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "prefix" "orig,orig,maybe_evex")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_expand "<insn>v2qiv2si2"
|
||||
[(set (match_operand:V2SI 0 "register_operand")
|
||||
(any_extend:V2SI
|
||||
(match_operand:V2QI 1 "register_operand")))]
|
||||
"TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
|
||||
{
|
||||
rtx op1 = force_reg (V2QImode, operands[1]);
|
||||
op1 = lowpart_subreg (V4QImode, op1, V2QImode);
|
||||
emit_insn (gen_sse4_1_<code>v2qiv2si2 (operands[0], op1));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "sse4_1_<code>v2qiv2hi2"
|
||||
[(set (match_operand:V2HI 0 "register_operand" "=Yr,*x,Yw")
|
||||
(any_extend:V2HI
|
||||
|
@ -3571,6 +3621,18 @@
|
|||
(set_attr "prefix" "orig,orig,maybe_evex")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_expand "<insn>v2qiv2hi2"
|
||||
[(set (match_operand:V2HI 0 "register_operand")
|
||||
(any_extend:V2HI
|
||||
(match_operand:V2QI 1 "register_operand")))]
|
||||
"TARGET_SSE4_1"
|
||||
{
|
||||
rtx op1 = force_reg (V2QImode, operands[1]);
|
||||
op1 = lowpart_subreg (V4QImode, op1, V2QImode);
|
||||
emit_insn (gen_sse4_1_<code>v2qiv2hi2 (operands[0], op1));
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Pack/unpack vector modes
|
||||
(define_mode_attr mmxpackmode
|
||||
[(V4HI "V8QI") (V2SI "V4HI")])
|
||||
|
|
26
gcc/testsuite/gcc.target/i386/pr92658-sse4-4b.c
Normal file
26
gcc/testsuite/gcc.target/i386/pr92658-sse4-4b.c
Normal file
|
@ -0,0 +1,26 @@
|
|||
/* PR target/92658 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mtune=icelake-server -ftree-vectorize -msse4.1" } */
|
||||
|
||||
typedef unsigned char v4qi __attribute__((vector_size (4)));
|
||||
typedef unsigned short v2hi __attribute__((vector_size (4)));
|
||||
|
||||
void
|
||||
foo_u8_u16 (v2hi * dst, v4qi * __restrict src)
|
||||
{
|
||||
unsigned short tem[2];
|
||||
tem[0] = (*src)[0];
|
||||
tem[1] = (*src)[1];
|
||||
dst[0] = *(v2hi *) tem;
|
||||
}
|
||||
|
||||
void
|
||||
bar_u8_u16 (v2hi * dst, v4qi src)
|
||||
{
|
||||
unsigned short tem[4];
|
||||
tem[0] = src[0];
|
||||
tem[1] = src[1];
|
||||
dst[0] = *(v2hi *) tem;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "pmovzxbw" 2 } } */
|
71
gcc/testsuite/gcc.target/i386/pr92658-sse4-8b.c
Normal file
71
gcc/testsuite/gcc.target/i386/pr92658-sse4-8b.c
Normal file
|
@ -0,0 +1,71 @@
|
|||
/* PR target/92658 */
|
||||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -mtune=icelake-server -ftree-vectorize -msse4.1" } */
|
||||
|
||||
typedef unsigned char v8qi __attribute__((vector_size (8)));
|
||||
typedef unsigned short v4hi __attribute__((vector_size (8)));
|
||||
typedef unsigned int v2si __attribute__((vector_size (8)));
|
||||
|
||||
void
|
||||
foo_u8_u16 (v4hi * dst, v8qi * __restrict src)
|
||||
{
|
||||
unsigned short tem[4];
|
||||
tem[0] = (*src)[0];
|
||||
tem[1] = (*src)[1];
|
||||
tem[2] = (*src)[2];
|
||||
tem[3] = (*src)[3];
|
||||
dst[0] = *(v4hi *) tem;
|
||||
}
|
||||
|
||||
void
|
||||
bar_u8_u16 (v4hi * dst, v8qi src)
|
||||
{
|
||||
unsigned short tem[4];
|
||||
tem[0] = src[0];
|
||||
tem[1] = src[1];
|
||||
tem[2] = src[2];
|
||||
tem[3] = src[3];
|
||||
dst[0] = *(v4hi *) tem;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "pmovzxbw" 2 } } */
|
||||
|
||||
void
|
||||
foo_u8_u32 (v2si * dst, v8qi * __restrict src)
|
||||
{
|
||||
unsigned int tem[2];
|
||||
tem[0] = (*src)[0];
|
||||
tem[1] = (*src)[1];
|
||||
dst[0] = *(v2si *) tem;
|
||||
}
|
||||
|
||||
void
|
||||
bar_u8_u32 (v2si * dst, v8qi src)
|
||||
{
|
||||
unsigned int tem[2];
|
||||
tem[0] = src[0];
|
||||
tem[1] = src[1];
|
||||
dst[0] = *(v2si *) tem;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "pmovzxbd" 2 } } */
|
||||
|
||||
void
|
||||
foo_u16_u32 (v2si * dst, v4hi * __restrict src)
|
||||
{
|
||||
unsigned int tem[2];
|
||||
tem[0] = (*src)[0];
|
||||
tem[1] = (*src)[1];
|
||||
dst[0] = *(v2si *) tem;
|
||||
}
|
||||
|
||||
void
|
||||
bar_u16_u32 (v2si * dst, v4hi src)
|
||||
{
|
||||
unsigned int tem[2];
|
||||
tem[0] = src[0];
|
||||
tem[1] = src[1];
|
||||
dst[0] = *(v2si *) tem;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "pmovzxwd" 2 } } */
|
Loading…
Add table
Reference in a new issue