i386: Add insert and extract patterns for 4-byte vectors [PR100637]
The patch introduces insert and extract patterns for 4-byte vectors. It effectively only emits PINSR and PEXTR instructions when available, otherwise falls back to generic code that emulates these instructions via inserts, extracts, logic operations and shifts in integer registers. Please note that generic fallback produces better code than the current approach of constructing new vector in memory (due to store forwarding stall) so also enable QImode 8-byte vector inserts only with TARGET_SSE4_1. 2021-06-03 Uroš Bizjak <ubizjak@gmail.com> gcc/ PR target/100637 * config/i386/i386-expand.c (ix86_expand_vector_set): Handle V2HI and V4QI modes. (ix86_expand_vector_extract): Ditto. * config/i386/mmx.md (*pinsrw): New insn pattern. (*pinsrb): Ditto. (*pextrw): Ditto. (*pextrw_zext): Ditto. (*pextrb): Ditto. (*pextrb_zext): Ditto. (vec_setv2hi): New expander. (vec_extractv2hihi): Ditto. (vec_setv4qi): Ditto. (vec_extractv4qiqi): Ditto. (vec_setv8qi): Enable only for TARGET_SSE4_1. (vec_extractv8qiqi): Ditto. gcc/testsuite/ PR target/100637 * gcc.target/i386/vperm-v2hi.c: New test. * gcc.target/i386/vperm-v4qi.c: Ditto.
This commit is contained in:
parent
52e130652a
commit
5883e56756
4 changed files with 268 additions and 2 deletions
|
@ -14968,6 +14968,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
|
|||
return;
|
||||
|
||||
case E_V8HImode:
|
||||
case E_V2HImode:
|
||||
use_vec_merge = TARGET_SSE2;
|
||||
break;
|
||||
case E_V4HImode:
|
||||
|
@ -14975,6 +14976,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
|
|||
break;
|
||||
|
||||
case E_V16QImode:
|
||||
case E_V4QImode:
|
||||
use_vec_merge = TARGET_SSE4_1;
|
||||
break;
|
||||
|
||||
|
@ -15274,6 +15276,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
|
|||
break;
|
||||
|
||||
case E_V8HImode:
|
||||
case E_V2HImode:
|
||||
use_vec_extr = TARGET_SSE2;
|
||||
break;
|
||||
case E_V4HImode:
|
||||
|
@ -15294,6 +15297,9 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
|
|||
return;
|
||||
}
|
||||
break;
|
||||
case E_V4QImode:
|
||||
use_vec_extr = TARGET_SSE4_1;
|
||||
break;
|
||||
|
||||
case E_V8SFmode:
|
||||
if (TARGET_AVX)
|
||||
|
|
|
@ -3092,7 +3092,7 @@
|
|||
[(match_operand:V8QI 0 "register_operand")
|
||||
(match_operand:QI 1 "register_operand")
|
||||
(match_operand 2 "const_int_operand")]
|
||||
"TARGET_MMX || TARGET_MMX_WITH_SSE"
|
||||
"TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
|
||||
{
|
||||
ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
|
||||
INTVAL (operands[2]));
|
||||
|
@ -3103,7 +3103,7 @@
|
|||
[(match_operand:QI 0 "register_operand")
|
||||
(match_operand:V8QI 1 "register_operand")
|
||||
(match_operand 2 "const_int_operand")]
|
||||
"TARGET_MMX || TARGET_MMX_WITH_SSE"
|
||||
"TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
|
||||
{
|
||||
ix86_expand_vector_extract (TARGET_MMX_WITH_SSE, operands[0],
|
||||
operands[1], INTVAL (operands[2]));
|
||||
|
@ -3120,6 +3120,178 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "*pinsrw"
|
||||
[(set (match_operand:V2HI 0 "register_operand" "=x,YW")
|
||||
(vec_merge:V2HI
|
||||
(vec_duplicate:V2HI
|
||||
(match_operand:HI 2 "nonimmediate_operand" "rm,rm"))
|
||||
(match_operand:V2HI 1 "register_operand" "0,YW")
|
||||
(match_operand:SI 3 "const_int_operand")))]
|
||||
"TARGET_SSE2
|
||||
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
|
||||
< GET_MODE_NUNITS (V2HImode))"
|
||||
{
|
||||
operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
|
||||
switch (which_alternative)
|
||||
{
|
||||
case 1:
|
||||
if (MEM_P (operands[2]))
|
||||
return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
|
||||
else
|
||||
return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
|
||||
case 0:
|
||||
if (MEM_P (operands[2]))
|
||||
return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
|
||||
else
|
||||
return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
[(set_attr "isa" "noavx,avx")
|
||||
(set_attr "type" "sselog")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "*pinsrb"
|
||||
[(set (match_operand:V4QI 0 "register_operand" "=x,YW")
|
||||
(vec_merge:V4QI
|
||||
(vec_duplicate:V4QI
|
||||
(match_operand:QI 2 "nonimmediate_operand" "rm,rm"))
|
||||
(match_operand:V4QI 1 "register_operand" "0,YW")
|
||||
(match_operand:SI 3 "const_int_operand")))]
|
||||
"TARGET_SSE4_1
|
||||
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
|
||||
< GET_MODE_NUNITS (V4QImode))"
|
||||
{
|
||||
operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
|
||||
switch (which_alternative)
|
||||
{
|
||||
case 1:
|
||||
if (MEM_P (operands[2]))
|
||||
return "vpinsrb\t{%3, %2, %1, %0|%0, %1, %2, %3}";
|
||||
else
|
||||
return "vpinsrb\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
|
||||
case 0:
|
||||
if (MEM_P (operands[2]))
|
||||
return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
|
||||
else
|
||||
return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
[(set_attr "isa" "noavx,avx")
|
||||
(set_attr "type" "sselog")
|
||||
(set_attr "prefix_data16" "1")
|
||||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "prefix" "orig,vex")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "*pextrw"
|
||||
[(set (match_operand:HI 0 "register_sse4nonimm_operand" "=r,m")
|
||||
(vec_select:HI
|
||||
(match_operand:V2HI 1 "register_operand" "YW,YW")
|
||||
(parallel [(match_operand:SI 2 "const_0_to_1_operand" "n,n")])))]
|
||||
"TARGET_SSE2"
|
||||
"@
|
||||
%vpextrw\t{%2, %1, %k0|%k0, %1, %2}
|
||||
%vpextrw\t{%2, %1, %0|%0, %1, %2}"
|
||||
[(set_attr "isa" "*,sse4")
|
||||
(set_attr "type" "sselog1")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "*pextrw_zext"
|
||||
[(set (match_operand:SWI48 0 "register_operand" "=r")
|
||||
(zero_extend:SWI48
|
||||
(vec_select:HI
|
||||
(match_operand:V2HI 1 "register_operand" "YW")
|
||||
(parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")]))))]
|
||||
"TARGET_SSE2"
|
||||
"%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
|
||||
[(set_attr "type" "sselog1")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "*pextrb"
|
||||
[(set (match_operand:QI 0 "nonimmediate_operand" "=r,m")
|
||||
(vec_select:QI
|
||||
(match_operand:V4QI 1 "register_operand" "YW,YW")
|
||||
(parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")])))]
|
||||
"TARGET_SSE4_1"
|
||||
"@
|
||||
%vpextrb\t{%2, %1, %k0|%k0, %1, %2}
|
||||
%vpextrb\t{%2, %1, %0|%0, %1, %2}"
|
||||
[(set_attr "type" "sselog1")
|
||||
(set_attr "prefix_data16" "1")
|
||||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "*pextrb_zext"
|
||||
[(set (match_operand:SWI248 0 "register_operand" "=r")
|
||||
(zero_extend:SWI248
|
||||
(vec_select:QI
|
||||
(match_operand:V4QI 1 "register_operand" "YW")
|
||||
(parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
|
||||
"TARGET_SSE4_1"
|
||||
"%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
|
||||
[(set_attr "type" "sselog1")
|
||||
(set_attr "prefix_data16" "1")
|
||||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_expand "vec_setv2hi"
|
||||
[(match_operand:V2HI 0 "register_operand")
|
||||
(match_operand:HI 1 "register_operand")
|
||||
(match_operand 2 "const_int_operand")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
ix86_expand_vector_set (false, operands[0], operands[1],
|
||||
INTVAL (operands[2]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_extractv2hihi"
|
||||
[(match_operand:HI 0 "register_operand")
|
||||
(match_operand:V2HI 1 "register_operand")
|
||||
(match_operand 2 "const_int_operand")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
ix86_expand_vector_extract (false, operands[0],
|
||||
operands[1], INTVAL (operands[2]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_setv4qi"
|
||||
[(match_operand:V4QI 0 "register_operand")
|
||||
(match_operand:QI 1 "register_operand")
|
||||
(match_operand 2 "const_int_operand")]
|
||||
"TARGET_SSE4_1"
|
||||
{
|
||||
ix86_expand_vector_set (false, operands[0], operands[1],
|
||||
INTVAL (operands[2]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_extractv4qiqi"
|
||||
[(match_operand:QI 0 "register_operand")
|
||||
(match_operand:V4QI 1 "register_operand")
|
||||
(match_operand 2 "const_int_operand")]
|
||||
"TARGET_SSE4_1"
|
||||
{
|
||||
ix86_expand_vector_extract (false, operands[0],
|
||||
operands[1], INTVAL (operands[2]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;
|
||||
;; Miscellaneous
|
||||
|
|
41
gcc/testsuite/gcc.target/i386/vperm-v2hi.c
Normal file
41
gcc/testsuite/gcc.target/i386/vperm-v2hi.c
Normal file
|
@ -0,0 +1,41 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O -msse2" } */
|
||||
/* { dg-require-effective-target sse2 } */
|
||||
|
||||
#include "isa-check.h"
|
||||
#include "sse-os-support.h"
|
||||
|
||||
typedef short S;
|
||||
typedef short V __attribute__((vector_size(4)));
|
||||
typedef short IV __attribute__((vector_size(4)));
|
||||
typedef union { S s[2]; V v; } U;
|
||||
|
||||
static U i[2], b, c;
|
||||
|
||||
extern int memcmp (const void *, const void *, __SIZE_TYPE__);
|
||||
#define assert(T) ((T) || (__builtin_trap (), 0))
|
||||
|
||||
#define TEST(E0, E1) \
|
||||
b.v = __builtin_shuffle (i[0].v, i[1].v, (IV){E0, E1}); \
|
||||
c.s[0] = i[0].s[E0]; \
|
||||
c.s[1] = i[0].s[E1]; \
|
||||
__asm__("" : : : "memory"); \
|
||||
assert (memcmp (&b, &c, sizeof(c)) == 0);
|
||||
|
||||
#include "vperm-2-2.inc"
|
||||
|
||||
int main()
|
||||
{
|
||||
check_isa ();
|
||||
|
||||
if (!sse_os_support ())
|
||||
exit (0);
|
||||
|
||||
i[0].s[0] = 0;
|
||||
i[0].s[1] = 1;
|
||||
i[0].s[2] = 2;
|
||||
i[0].s[3] = 3;
|
||||
|
||||
check();
|
||||
return 0;
|
||||
}
|
47
gcc/testsuite/gcc.target/i386/vperm-v4qi.c
Normal file
47
gcc/testsuite/gcc.target/i386/vperm-v4qi.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O -msse2" } */
|
||||
/* { dg-require-effective-target sse2 } */
|
||||
|
||||
#include "isa-check.h"
|
||||
#include "sse-os-support.h"
|
||||
|
||||
typedef char S;
|
||||
typedef char V __attribute__((vector_size(4)));
|
||||
typedef char IV __attribute__((vector_size(4)));
|
||||
typedef union { S s[4]; V v; } U;
|
||||
|
||||
static U i[2], b, c;
|
||||
|
||||
extern int memcmp (const void *, const void *, __SIZE_TYPE__);
|
||||
#define assert(T) ((T) || (__builtin_trap (), 0))
|
||||
|
||||
#define TEST(E0, E1, E2, E3) \
|
||||
b.v = __builtin_shuffle (i[0].v, i[1].v, (IV){E0, E1, E2, E3}); \
|
||||
c.s[0] = i[0].s[E0]; \
|
||||
c.s[1] = i[0].s[E1]; \
|
||||
c.s[2] = i[0].s[E2]; \
|
||||
c.s[3] = i[0].s[E3]; \
|
||||
__asm__("" : : : "memory"); \
|
||||
assert (memcmp (&b, &c, sizeof(c)) == 0);
|
||||
|
||||
#include "vperm-4-2.inc"
|
||||
|
||||
int main()
|
||||
{
|
||||
check_isa ();
|
||||
|
||||
if (!sse_os_support ())
|
||||
exit (0);
|
||||
|
||||
i[0].s[0] = 0;
|
||||
i[0].s[1] = 1;
|
||||
i[0].s[2] = 2;
|
||||
i[0].s[3] = 3;
|
||||
i[0].s[4] = 4;
|
||||
i[0].s[5] = 5;
|
||||
i[0].s[6] = 6;
|
||||
i[0].s[7] = 7;
|
||||
|
||||
check();
|
||||
return 0;
|
||||
}
|
Loading…
Add table
Reference in a new issue