Support vec_set/vec_extract/vec_init for V4HF/V2HF.
gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_expand_vector_init_duplicate): Handle V4HF/V4BF and V2HF/V2BF. (ix86_expand_vector_init_one_nonzero): Ditto. (ix86_expand_vector_init_one_var): Ditto. (ix86_expand_vector_init_general): Ditto. (ix86_expand_vector_set_var): Ditto. (ix86_expand_vector_set): Ditto. (ix86_expand_vector_extract): Ditto. * config/i386/mmx.md (mmxdoublevecmode): Extend to V4HF/V4BF/V2HF/V2BF. (*mmx_pinsrw): Extend to V4FI_64, add a new alternative (&x, x, x), add a new define_split after the pattern. (*mmx_pextrw<mode>): New define_insn. (mmx_pshufw_1): Rename to .. (mmx_pshufw<mode>_1): .. this, extend to V4FI_64. (*mmx_pblendw64): Extend to V4FI_64. (*vec_dup<mode>): New define_insn. (vec_setv4hi): Rename to .. (vec_set<mode>): .. this, and extend to V4FI_64 (vec_extractv4hihi): Rename to .. (vec_extract<mode><mmxscalarmodelower>): .. this, and extend to V4FI_64. (vec_init<mode><mmxscalarmodelower>): New define_insn. (*pinsrw): Extend to V2FI_32, add a new alternative (&x, x, x), and add a new define_split after it. (*pextrw<mode>): New define_insn. (vec_setv2hi): Rename to .. (vec_set<mode>): .. this, extend to V2FI_32. (vec_extractv2hihi): Rename to .. (vec_extract<mode><mmxscalarmodelower>): .. this, extend to V2FI_32. (*punpckwd): Extend to V2FI_32. (*pshufw_1): Rename to .. (*pshufw<mode>_1): .. this, extend to V2FI_32. (vec_initv2hihi): Rename to .. (vec_init<mode><mmxscalarmodelower>): .. this, and extend to V2FI_32. (*vec_dup<mode>): New define_insn. * config/i386/sse.md (*vec_extract<mode>): Refine constraint from v to Yw. gcc/testsuite/ChangeLog: * gcc.target/i386/part-vect-vec_elem-1.c: New test. * gcc.target/i386/part-vect-vec_elem-2.c: New test.
This commit is contained in:
parent
b51bfee1be
commit
2794d510b9
5 changed files with 541 additions and 64 deletions
|
@ -15592,6 +15592,17 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
|
|||
}
|
||||
goto widen;
|
||||
|
||||
case E_V4HFmode:
|
||||
case E_V4BFmode:
|
||||
if (TARGET_MMX_WITH_SSE)
|
||||
{
|
||||
val = force_reg (GET_MODE_INNER (mode), val);
|
||||
rtx x = gen_rtx_VEC_DUPLICATE (mode, val);
|
||||
emit_insn (gen_rtx_SET (target, x));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
case E_V2HImode:
|
||||
if (TARGET_SSE2)
|
||||
{
|
||||
|
@ -15605,6 +15616,17 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
|
|||
}
|
||||
return false;
|
||||
|
||||
case E_V2HFmode:
|
||||
case E_V2BFmode:
|
||||
if (TARGET_SSE2)
|
||||
{
|
||||
val = force_reg (GET_MODE_INNER (mode), val);
|
||||
rtx x = gen_rtx_VEC_DUPLICATE (mode, val);
|
||||
emit_insn (gen_rtx_SET (target, x));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
case E_V8QImode:
|
||||
case E_V4QImode:
|
||||
if (!mmx_ok)
|
||||
|
@ -15815,6 +15837,8 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
|
|||
use_vector_set = TARGET_MMX_WITH_SSE && TARGET_SSE4_1;
|
||||
break;
|
||||
case E_V4HImode:
|
||||
case E_V4HFmode:
|
||||
case E_V4BFmode:
|
||||
use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
|
||||
break;
|
||||
case E_V4QImode:
|
||||
|
@ -16051,6 +16075,8 @@ ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
|
|||
case E_V4SImode:
|
||||
case E_V8HImode:
|
||||
case E_V4HImode:
|
||||
case E_V4HFmode:
|
||||
case E_V4BFmode:
|
||||
break;
|
||||
|
||||
case E_V16QImode:
|
||||
|
@ -16438,6 +16464,7 @@ ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
|
|||
rtx ops[64], op0, op1, op2, op3, op4, op5;
|
||||
machine_mode half_mode = VOIDmode;
|
||||
machine_mode quarter_mode = VOIDmode;
|
||||
machine_mode int_inner_mode = VOIDmode;
|
||||
int n, i;
|
||||
|
||||
switch (mode)
|
||||
|
@ -16582,6 +16609,13 @@ quarter:
|
|||
ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
|
||||
return;
|
||||
|
||||
case E_V4HFmode:
|
||||
case E_V4BFmode:
|
||||
case E_V2HFmode:
|
||||
case E_V2BFmode:
|
||||
int_inner_mode = HImode;
|
||||
break;
|
||||
|
||||
case E_V4HImode:
|
||||
case E_V8QImode:
|
||||
|
||||
|
@ -16613,6 +16647,16 @@ quarter:
|
|||
for (j = 0; j < n_elt_per_word; ++j)
|
||||
{
|
||||
rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
|
||||
if (int_inner_mode != E_VOIDmode)
|
||||
{
|
||||
gcc_assert (TARGET_SSE2 && int_inner_mode == HImode);
|
||||
rtx tmp = gen_reg_rtx (int_inner_mode);
|
||||
elt = lowpart_subreg (int_inner_mode,
|
||||
force_reg (inner_mode, elt),
|
||||
inner_mode);
|
||||
emit_move_insn (tmp, elt);
|
||||
elt = tmp;
|
||||
}
|
||||
elt = convert_modes (tmp_mode, inner_mode, elt, true);
|
||||
|
||||
if (j == 0)
|
||||
|
@ -16839,6 +16883,14 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx idx)
|
|||
case E_V16SFmode:
|
||||
cmp_mode = V16SImode;
|
||||
break;
|
||||
case E_V2HFmode:
|
||||
case E_V2BFmode:
|
||||
cmp_mode = V2HImode;
|
||||
break;
|
||||
case E_V4HFmode:
|
||||
case E_V4BFmode:
|
||||
cmp_mode = V4HImode;
|
||||
break;
|
||||
case E_V8HFmode:
|
||||
cmp_mode = V8HImode;
|
||||
break;
|
||||
|
@ -17085,9 +17137,13 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
|
|||
case E_V8HFmode:
|
||||
case E_V8BFmode:
|
||||
case E_V2HImode:
|
||||
case E_V2HFmode:
|
||||
case E_V2BFmode:
|
||||
use_vec_merge = TARGET_SSE2;
|
||||
break;
|
||||
case E_V4HImode:
|
||||
case E_V4HFmode:
|
||||
case E_V4BFmode:
|
||||
use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
|
||||
break;
|
||||
|
||||
|
@ -17428,9 +17484,13 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
|
|||
case E_V8HFmode:
|
||||
case E_V8BFmode:
|
||||
case E_V2HImode:
|
||||
case E_V2HFmode:
|
||||
case E_V2BFmode:
|
||||
use_vec_extr = TARGET_SSE2;
|
||||
break;
|
||||
case E_V4HImode:
|
||||
case E_V4HFmode:
|
||||
case E_V4BFmode:
|
||||
use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
|
||||
break;
|
||||
|
||||
|
|
|
@ -112,11 +112,21 @@
|
|||
|
||||
;; Mapping of vector modes to a vector mode of double size
|
||||
(define_mode_attr mmxdoublevecmode
|
||||
[(V2SF "V4SF") (V2SI "V4SI") (V4HF "V8HF") (V4HI "V8HI")])
|
||||
[(V2SF "V4SF") (V2SI "V4SI") (V4HF "V8HF") (V4HI "V8HI")
|
||||
(V2HI "V4HI") (V2HF "V4HF") (V2BF "V4BF")])
|
||||
|
||||
;; Mapping of vector modes back to the scalar modes
|
||||
(define_mode_attr mmxscalarmode
|
||||
[(V2SI "SI") (V2SF "SF")])
|
||||
[(V2SI "SI") (V2SF "SF")
|
||||
(V4HF "HF") (V4BF "BF")
|
||||
(V2HF "HF") (V2BF "BF")
|
||||
(V4HI "HI") (V2HI "HI")])
|
||||
|
||||
(define_mode_attr mmxscalarmodelower
|
||||
[(V2SI "si") (V2SF "sf")
|
||||
(V4HF "hf") (V4BF "bf")
|
||||
(V2HF "hf") (V2BF "bf")
|
||||
(V4HI "hi") (V2HI "hi")])
|
||||
|
||||
(define_mode_attr Yv_Yw
|
||||
[(V8QI "Yw") (V4HI "Yw") (V2SI "Yv") (V1DI "Yv") (V2SF "Yv")])
|
||||
|
@ -4882,11 +4892,11 @@
|
|||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "*mmx_pinsrw"
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=y,x,YW")
|
||||
(vec_merge:V4HI
|
||||
(vec_duplicate:V4HI
|
||||
(match_operand:HI 2 "nonimmediate_operand" "rm,rm,rm"))
|
||||
(match_operand:V4HI 1 "register_operand" "0,0,YW")
|
||||
[(set (match_operand:V4FI_64 0 "register_operand" "=y,x,YW,&x")
|
||||
(vec_merge:V4FI_64
|
||||
(vec_duplicate:V4FI_64
|
||||
(match_operand:<mmxscalarmode> 2 "nonimmediate_operand" "rm,rm,rm,x"))
|
||||
(match_operand:V4FI_64 1 "register_operand" "0,0,YW,x")
|
||||
(match_operand:SI 3 "const_int_operand")))]
|
||||
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
|
||||
&& (TARGET_SSE || TARGET_3DNOW_A)
|
||||
|
@ -4896,6 +4906,8 @@
|
|||
operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
|
||||
switch (which_alternative)
|
||||
{
|
||||
case 3:
|
||||
return "#";
|
||||
case 2:
|
||||
if (MEM_P (operands[2]))
|
||||
return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
|
||||
|
@ -4911,11 +4923,28 @@
|
|||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
[(set_attr "isa" "*,sse2_noavx,avx")
|
||||
(set_attr "mmx_isa" "native,*,*")
|
||||
(set_attr "type" "mmxcvt,sselog,sselog")
|
||||
[(set_attr "isa" "*,sse2_noavx,avx,sse4")
|
||||
(set_attr "mmx_isa" "native,*,*,*")
|
||||
(set_attr "type" "mmxcvt,sselog,sselog,sselog")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "mode" "DI,TI,TI")])
|
||||
(set_attr "mode" "DI,TI,TI,TI")])
|
||||
|
||||
;; For TARGET_SSE2, implement insert from XMM reg with PSHULFW + PBLENDW.
|
||||
(define_split
|
||||
[(set (match_operand:V4FI_64 0 "sse_reg_operand")
|
||||
(vec_merge:V4FI_64
|
||||
(vec_duplicate:V4FI_64
|
||||
(match_operand:<mmxscalarmode> 2 "sse_reg_operand"))
|
||||
(match_operand:V4FI_64 1 "sse_reg_operand")
|
||||
(match_operand:SI 3 "const_int_operand")))]
|
||||
"TARGET_MMX_WITH_SSE && TARGET_SSE4_1 && reload_completed
|
||||
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
|
||||
< GET_MODE_NUNITS (<MODE>mode))"
|
||||
[(set (match_dup 0)
|
||||
(vec_duplicate:V4FI_64 (match_dup 2)))
|
||||
(set (match_dup 0)
|
||||
(vec_merge:V4FI_64 (match_dup 1) (match_dup 0) (match_dup 3)))]
|
||||
"operands[3] = GEN_INT (~INTVAL (operands[3]) & 0xf);")
|
||||
|
||||
(define_insn "*mmx_pinsrb"
|
||||
[(set (match_operand:V8QI 0 "register_operand" "=x,YW")
|
||||
|
@ -4973,6 +5002,41 @@
|
|||
(set_attr "prefix" "orig,maybe_vex,maybe_vex,maybe_evex")
|
||||
(set_attr "mode" "DI,TI,TI,TI")])
|
||||
|
||||
(define_insn "*mmx_pextrw<mode>"
|
||||
[(set (match_operand:<mmxscalarmode> 0 "register_sse4nonimm_operand" "=?r,?r,jm,m,x,Yw")
|
||||
(vec_select:<mmxscalarmode>
|
||||
(match_operand:V4F_64 1 "register_operand" "y,YW,YW,YW,0,YW")
|
||||
(parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
|
||||
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
|
||||
&& (TARGET_SSE || TARGET_3DNOW_A)"
|
||||
{
|
||||
switch (which_alternative)
|
||||
{
|
||||
case 0:
|
||||
case 1:
|
||||
return "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}";
|
||||
case 2:
|
||||
case 3:
|
||||
return "%vpextrw\t{%2, %1, %0|%0, %1, %2}";
|
||||
case 4:
|
||||
operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
|
||||
return "psrldq\t{%2, %0|%0, %2}";
|
||||
case 5:
|
||||
operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
|
||||
return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
[(set_attr "isa" "*,sse2,sse4_noavx,avx,noavx,avx")
|
||||
(set_attr "addr" "*,*,gpr16,*,*,*")
|
||||
(set_attr "mmx_isa" "native,*,*,*,*,*")
|
||||
(set_attr "type" "mmxcvt,sselog1,sselog1,sselog1,sseishft1,sseishft1")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "prefix" "orig,maybe_vex,maybe_vex,maybe_evex,orig,maybe_evex")
|
||||
(set_attr "mode" "DI,TI,TI,TI,TI,TI")])
|
||||
|
||||
(define_insn "*mmx_pextrw_zext"
|
||||
[(set (match_operand:SWI48 0 "register_operand" "=r,r")
|
||||
(zero_extend:SWI48
|
||||
|
@ -5069,18 +5133,18 @@
|
|||
&& (TARGET_SSE || TARGET_3DNOW_A)"
|
||||
{
|
||||
int mask = INTVAL (operands[2]);
|
||||
emit_insn (gen_mmx_pshufw_1 (operands[0], operands[1],
|
||||
GEN_INT ((mask >> 0) & 3),
|
||||
GEN_INT ((mask >> 2) & 3),
|
||||
GEN_INT ((mask >> 4) & 3),
|
||||
GEN_INT ((mask >> 6) & 3)));
|
||||
emit_insn (gen_mmx_pshufwv4hi_1 (operands[0], operands[1],
|
||||
GEN_INT ((mask >> 0) & 3),
|
||||
GEN_INT ((mask >> 2) & 3),
|
||||
GEN_INT ((mask >> 4) & 3),
|
||||
GEN_INT ((mask >> 6) & 3)));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "mmx_pshufw_1"
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=y,Yw")
|
||||
(vec_select:V4HI
|
||||
(match_operand:V4HI 1 "register_mmxmem_operand" "ym,Yw")
|
||||
(define_insn "mmx_pshufw<mode>_1"
|
||||
[(set (match_operand:V4FI_64 0 "register_operand" "=y,Yw")
|
||||
(vec_select:V4FI_64
|
||||
(match_operand:V4FI_64 1 "register_mmxmem_operand" "ym,Yw")
|
||||
(parallel [(match_operand 2 "const_0_to_3_operand")
|
||||
(match_operand 3 "const_0_to_3_operand")
|
||||
(match_operand 4 "const_0_to_3_operand")
|
||||
|
@ -5134,10 +5198,10 @@
|
|||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "*mmx_pblendw64"
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=Yr,*x,x")
|
||||
(vec_merge:V4HI
|
||||
(match_operand:V4HI 2 "register_operand" "Yr,*x,x")
|
||||
(match_operand:V4HI 1 "register_operand" "0,0,x")
|
||||
[(set (match_operand:V4FI_64 0 "register_operand" "=Yr,*x,x")
|
||||
(vec_merge:V4FI_64
|
||||
(match_operand:V4FI_64 2 "register_operand" "Yr,*x,x")
|
||||
(match_operand:V4FI_64 1 "register_operand" "0,0,x")
|
||||
(match_operand:SI 3 "const_0_to_15_operand")))]
|
||||
"TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
|
||||
"@
|
||||
|
@ -5152,10 +5216,10 @@
|
|||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "*mmx_pblendw32"
|
||||
[(set (match_operand:V2HI 0 "register_operand" "=Yr,*x,x")
|
||||
(vec_merge:V2HI
|
||||
(match_operand:V2HI 2 "register_operand" "Yr,*x,x")
|
||||
(match_operand:V2HI 1 "register_operand" "0,0,x")
|
||||
[(set (match_operand:V2FI_32 0 "register_operand" "=Yr,*x,x")
|
||||
(vec_merge:V2FI_32
|
||||
(match_operand:V2FI_32 2 "register_operand" "Yr,*x,x")
|
||||
(match_operand:V2FI_32 1 "register_operand" "0,0,x")
|
||||
(match_operand:SI 3 "const_0_to_7_operand")))]
|
||||
"TARGET_SSE4_1"
|
||||
"@
|
||||
|
@ -5212,6 +5276,16 @@
|
|||
(set_attr "length_immediate" "1")
|
||||
(set_attr "mode" "DI,TI")])
|
||||
|
||||
(define_insn "*vec_dup<mode>"
|
||||
[(set (match_operand:V4F_64 0 "register_operand" "=Yw")
|
||||
(vec_duplicate:V4F_64
|
||||
(match_operand:<mmxscalarmode> 1 "register_operand" "Yw")))]
|
||||
"TARGET_MMX_WITH_SSE"
|
||||
"%vpshuflw\t{$0, %1, %0|%0, %1, 0}"
|
||||
[(set_attr "isa" "sse2")
|
||||
(set_attr "type" "sselog1")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "*vec_dupv2si"
|
||||
[(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
|
||||
|
@ -5405,9 +5479,9 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_setv4hi"
|
||||
[(match_operand:V4HI 0 "register_operand")
|
||||
(match_operand:HI 1 "register_operand")
|
||||
(define_expand "vec_set<mode>"
|
||||
[(match_operand:V4FI_64 0 "register_operand")
|
||||
(match_operand:<mmxscalarmode> 1 "register_operand")
|
||||
(match_operand 2 "vec_setm_mmx_operand")]
|
||||
"TARGET_MMX || TARGET_MMX_WITH_SSE"
|
||||
{
|
||||
|
@ -5419,9 +5493,9 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_extractv4hihi"
|
||||
[(match_operand:HI 0 "register_operand")
|
||||
(match_operand:V4HI 1 "register_operand")
|
||||
(define_expand "vec_extract<mode><mmxscalarmodelower>"
|
||||
[(match_operand:<mmxscalarmode> 0 "register_operand")
|
||||
(match_operand:V4FI_64 1 "register_operand")
|
||||
(match_operand 2 "const_int_operand")]
|
||||
"TARGET_MMX || TARGET_MMX_WITH_SSE"
|
||||
{
|
||||
|
@ -5440,6 +5514,16 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_init<mode><mmxscalarmodelower>"
|
||||
[(match_operand:V4F_64 0 "register_operand")
|
||||
(match_operand 1)]
|
||||
"TARGET_MMX_WITH_SSE"
|
||||
{
|
||||
ix86_expand_vector_init (TARGET_MMX_WITH_SSE, operands[0],
|
||||
operands[1]);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_setv8qi"
|
||||
[(match_operand:V8QI 0 "register_operand")
|
||||
(match_operand:QI 1 "register_operand")
|
||||
|
@ -5476,11 +5560,11 @@
|
|||
})
|
||||
|
||||
(define_insn "*pinsrw"
|
||||
[(set (match_operand:V2HI 0 "register_operand" "=x,YW")
|
||||
(vec_merge:V2HI
|
||||
(vec_duplicate:V2HI
|
||||
(match_operand:HI 2 "nonimmediate_operand" "rm,rm"))
|
||||
(match_operand:V2HI 1 "register_operand" "0,YW")
|
||||
[(set (match_operand:V2FI_32 0 "register_operand" "=x,YW,&x")
|
||||
(vec_merge:V2FI_32
|
||||
(vec_duplicate:V2FI_32
|
||||
(match_operand:<mmxscalarmode> 2 "nonimmediate_operand" "rm,rm,x"))
|
||||
(match_operand:V2FI_32 1 "register_operand" "0,YW,x")
|
||||
(match_operand:SI 3 "const_int_operand")))]
|
||||
"TARGET_SSE2
|
||||
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
|
||||
|
@ -5489,6 +5573,8 @@
|
|||
operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
|
||||
switch (which_alternative)
|
||||
{
|
||||
case 2:
|
||||
return "#";
|
||||
case 1:
|
||||
if (MEM_P (operands[2]))
|
||||
return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
|
||||
|
@ -5503,11 +5589,29 @@
|
|||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
[(set_attr "isa" "noavx,avx")
|
||||
[(set_attr "isa" "noavx,avx,sse4")
|
||||
(set_attr "type" "sselog")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
;; For TARGET_SSE2, implement insert from XMM reg with PSHULFW + PBLENDW.
|
||||
(define_split
|
||||
[(set (match_operand:V2FI_32 0 "sse_reg_operand")
|
||||
(vec_merge:V2FI_32
|
||||
(vec_duplicate:V2FI_32
|
||||
(match_operand:<mmxscalarmode> 2 "sse_reg_operand"))
|
||||
(match_operand:V2FI_32 1 "sse_reg_operand")
|
||||
(match_operand:SI 3 "const_int_operand")))]
|
||||
"TARGET_SSE4_1 && reload_completed
|
||||
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
|
||||
< GET_MODE_NUNITS (<MODE>mode))"
|
||||
[(set (match_dup 0)
|
||||
(vec_duplicate:V2FI_32 (match_dup 2)))
|
||||
(set (match_dup 0)
|
||||
(vec_merge:V2FI_32 (match_dup 1) (match_dup 0) (match_dup 3)))]
|
||||
"operands[3] = GEN_INT (~INTVAL (operands[3]) & 0x3);")
|
||||
|
||||
|
||||
(define_insn "*pinsrb"
|
||||
[(set (match_operand:V4QI 0 "register_operand" "=x,YW")
|
||||
(vec_merge:V4QI
|
||||
|
@ -5561,6 +5665,39 @@
|
|||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "*pextrw<mode>"
|
||||
[(set (match_operand:<mmxscalarmode> 0 "register_sse4nonimm_operand" "=?r,jm,m,x,Yw")
|
||||
(vec_select:<mmxscalarmode>
|
||||
(match_operand:V2F_32 1 "register_operand" "YW,YW,YW,0,YW")
|
||||
(parallel [(match_operand:SI 2 "const_0_to_1_operand")])))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
switch (which_alternative)
|
||||
{
|
||||
case 0:
|
||||
return "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}";
|
||||
case 1:
|
||||
return "pextrw\t{%2, %1, %0|%0, %1, %2}";
|
||||
case 2:
|
||||
return "vpextrw\t{%2, %1, %0|%0, %1, %2}";
|
||||
case 3:
|
||||
operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
|
||||
return "psrldq\t{%2, %0|%0, %2}";
|
||||
case 4:
|
||||
operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
|
||||
return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
[(set_attr "isa" "*,sse4_noavx,avx,noavx,avx")
|
||||
(set_attr "addr" "*,gpr16,*,*,*")
|
||||
(set_attr "type" "sselog1,sselog1,sselog1,sseishft1,sseishft1")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "prefix" "maybe_vex,orig,maybe_evex,orig,maybe_evex")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "*pextrw_zext"
|
||||
[(set (match_operand:SWI48 0 "register_operand" "=r")
|
||||
(zero_extend:SWI48
|
||||
|
@ -5608,9 +5745,9 @@
|
|||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_expand "vec_setv2hi"
|
||||
[(match_operand:V2HI 0 "register_operand")
|
||||
(match_operand:HI 1 "register_operand")
|
||||
(define_expand "vec_set<mode>"
|
||||
[(match_operand:V2FI_32 0 "register_operand")
|
||||
(match_operand:<mmxscalarmode> 1 "register_operand")
|
||||
(match_operand 2 "vec_setm_sse41_operand")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
|
@ -5622,9 +5759,9 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_extractv2hihi"
|
||||
[(match_operand:HI 0 "register_operand")
|
||||
(match_operand:V2HI 1 "register_operand")
|
||||
(define_expand "vec_extract<mode><mmxscalarmodelower>"
|
||||
[(match_operand:<mmxscalarmode> 0 "register_operand")
|
||||
(match_operand:V2FI_32 1 "register_operand")
|
||||
(match_operand 2 "const_int_operand")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
|
@ -5659,29 +5796,29 @@
|
|||
})
|
||||
|
||||
(define_insn_and_split "*punpckwd"
|
||||
[(set (match_operand:V2HI 0 "register_operand" "=x,Yw")
|
||||
(vec_select:V2HI
|
||||
(vec_concat:V4HI
|
||||
(match_operand:V2HI 1 "register_operand" "0,Yw")
|
||||
(match_operand:V2HI 2 "register_operand" "x,Yw"))
|
||||
[(set (match_operand:V2FI_32 0 "register_operand" "=x,Yw")
|
||||
(vec_select:V2FI_32
|
||||
(vec_concat:<mmxdoublevecmode>
|
||||
(match_operand:V2FI_32 1 "register_operand" "0,Yw")
|
||||
(match_operand:V2FI_32 2 "register_operand" "x,Yw"))
|
||||
(parallel [(match_operand 3 "const_0_to_3_operand")
|
||||
(match_operand 4 "const_0_to_3_operand")])))]
|
||||
"TARGET_SSE2"
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
[(set (match_dup 5)
|
||||
(vec_select:V8HI
|
||||
(vec_select:<mmxxmmmode>
|
||||
(match_dup 5)
|
||||
(parallel [(match_dup 3) (match_dup 4)
|
||||
(const_int 2) (const_int 3)
|
||||
(const_int 4) (const_int 5)
|
||||
(const_int 6) (const_int 7)])))]
|
||||
{
|
||||
rtx dest = lowpart_subreg (V8HImode, operands[0], V2HImode);
|
||||
rtx op1 = lowpart_subreg (V8HImode, operands[1], V2HImode);
|
||||
rtx op2 = lowpart_subreg (V8HImode, operands[2], V2HImode);
|
||||
rtx dest = lowpart_subreg (<mmxxmmmode>mode, operands[0], <MODE>mode);
|
||||
rtx op1 = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode);
|
||||
rtx op2 = lowpart_subreg (<mmxxmmmode>mode, operands[2], <MODE>mode);
|
||||
|
||||
emit_insn (gen_vec_interleave_lowv8hi (dest, op1, op2));
|
||||
emit_insn (gen_vec_interleave_low<mmxxmmmodelower> (dest, op1, op2));
|
||||
|
||||
static const int map[4] = { 0, 2, 1, 3 };
|
||||
|
||||
|
@ -5699,10 +5836,10 @@
|
|||
(set_attr "type" "sselog")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "*pshufw_1"
|
||||
[(set (match_operand:V2HI 0 "register_operand" "=Yw")
|
||||
(vec_select:V2HI
|
||||
(match_operand:V2HI 1 "register_operand" "Yw")
|
||||
(define_insn "*pshufw<mode>_1"
|
||||
[(set (match_operand:V2FI_32 0 "register_operand" "=Yw")
|
||||
(vec_select:V2FI_32
|
||||
(match_operand:V2FI_32 1 "register_operand" "Yw")
|
||||
(parallel [(match_operand 2 "const_0_to_1_operand")
|
||||
(match_operand 3 "const_0_to_1_operand")])))]
|
||||
"TARGET_SSE2"
|
||||
|
@ -5731,8 +5868,18 @@
|
|||
(set_attr "length_immediate" "1")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_expand "vec_initv2hihi"
|
||||
[(match_operand:V2HI 0 "register_operand")
|
||||
(define_insn "*vec_dup<mode>"
|
||||
[(set (match_operand:V2F_32 0 "register_operand" "=Yw")
|
||||
(vec_duplicate:V2F_32
|
||||
(match_operand:<mmxscalarmode> 1 "register_operand" "Yw")))]
|
||||
"TARGET_SSE2"
|
||||
"%vpshuflw\t{$0, %1, %0|%0, %1, 0}"
|
||||
[(set_attr "type" "sselog1")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_expand "vec_init<mode><mmxscalarmodelower>"
|
||||
[(match_operand:V2FI_32 0 "register_operand")
|
||||
(match_operand 1)]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
|
|
|
@ -12372,9 +12372,9 @@
|
|||
"operands[1] = gen_lowpart (<ssescalarmode>mode, operands[1]);")
|
||||
|
||||
(define_insn "*vec_extract<mode>"
|
||||
[(set (match_operand:HFBF 0 "register_sse4nonimm_operand" "=?r,jm,m,x,v")
|
||||
[(set (match_operand:HFBF 0 "register_sse4nonimm_operand" "=?r,jm,m,x,Yw")
|
||||
(vec_select:HFBF
|
||||
(match_operand:<ssevecmode> 1 "register_operand" "v,x,v,0,v")
|
||||
(match_operand:<ssevecmode> 1 "register_operand" "v,x,v,0,YW")
|
||||
(parallel
|
||||
[(match_operand:SI 2 "const_0_to_7_operand")])))]
|
||||
"TARGET_SSE2"
|
||||
|
|
135
gcc/testsuite/gcc.target/i386/part-vect-vec_elem-1.c
Normal file
135
gcc/testsuite/gcc.target/i386/part-vect-vec_elem-1.c
Normal file
|
@ -0,0 +1,135 @@
|
|||
/* { dg-do run { target { ! ia32 } } } */
|
||||
/* { dg-options "-O1 -msse4.1" } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
typedef _Float16 v4hf __attribute__((vector_size(8)));
|
||||
|
||||
v4hf
|
||||
__attribute__((noipa))
|
||||
vector_init_dupv4hf (_Float16 a)
|
||||
{
|
||||
return __extension__(v4hf){a, a, a, a};
|
||||
}
|
||||
|
||||
v4hf
|
||||
__attribute__((noipa))
|
||||
vector_init_allzero (_Float16 a)
|
||||
{
|
||||
return __extension__(v4hf){0, 0, 0, 0};
|
||||
}
|
||||
|
||||
v4hf
|
||||
__attribute__((noipa))
|
||||
vector_init_one_nonzero (_Float16 a)
|
||||
{
|
||||
return __extension__(v4hf){0, 0, a, 0};
|
||||
}
|
||||
|
||||
v4hf
|
||||
__attribute__((noipa))
|
||||
vector_init_one_var (_Float16 a)
|
||||
{
|
||||
return __extension__(v4hf){1, 2, a, 4};
|
||||
}
|
||||
|
||||
v4hf
|
||||
__attribute__((noipa))
|
||||
vector_init_general (_Float16 a, _Float16 a1, _Float16 a2, _Float16 a3)
|
||||
{
|
||||
return __extension__(v4hf){a3, a2, a1, a};
|
||||
}
|
||||
|
||||
v4hf
|
||||
__attribute__((noipa))
|
||||
vec_set (_Float16 a, v4hf b)
|
||||
{
|
||||
b[1] = a;
|
||||
return b;
|
||||
}
|
||||
|
||||
v4hf
|
||||
__attribute__((noipa))
|
||||
vec_set_var (_Float16 a, v4hf b, int c)
|
||||
{
|
||||
b[c] = a;
|
||||
return b;
|
||||
}
|
||||
|
||||
_Float16
|
||||
__attribute__((noipa))
|
||||
vec_extract (v4hf b)
|
||||
{
|
||||
return b[2];
|
||||
}
|
||||
|
||||
static void
|
||||
sse4_1_test ()
|
||||
{
|
||||
typedef union {
|
||||
_Float16 a[4];
|
||||
v4hf x;}union64hf;
|
||||
union64hf res, exp, src;
|
||||
|
||||
res.x = vector_init_dupv4hf (1.0f16);
|
||||
for (int i = 0; i != 4; i++)
|
||||
exp.a[i] = 1.0f16;
|
||||
if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
res.x = vector_init_allzero (1.0f16);
|
||||
for (int i = 0; i != 4; i++)
|
||||
exp.a[i] = 0.0f16;
|
||||
if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
res.x = vector_init_one_nonzero (1.0f16);
|
||||
for (int i = 0; i != 4; i++)
|
||||
exp.a[i] = 0.0f16;
|
||||
exp.a[2] = 1.0f16;
|
||||
if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
res.x = vector_init_one_var (3.0f16);
|
||||
for (int i = 0; i != 4; i++)
|
||||
exp.a[i] = i + 1;
|
||||
if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
res.x = vector_init_general (4.0, 3.0f, 2.0f, 1.0);
|
||||
for (int i = 0; i != 4; i++)
|
||||
exp.a[i] = 1 + i;
|
||||
if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
for (int i = 0; i != 4; i++)
|
||||
{
|
||||
src.a[i] = i;
|
||||
exp.a[i] = i;
|
||||
}
|
||||
res.x = vec_set (3.0f, src.x);
|
||||
exp.a[1] = 3.0f;
|
||||
if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
for (int i = 0; i != 4; i++)
|
||||
{
|
||||
src.a[i] = i;
|
||||
exp.a[i] = i;
|
||||
}
|
||||
res.x = vec_set_var (3.0f, src.x, 1);
|
||||
exp.a[1] = 3.0f;
|
||||
if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
for (int i = 0; i != 4; i++)
|
||||
{
|
||||
src.a[i] = i;
|
||||
exp.a[i] = i;
|
||||
}
|
||||
_Float16 res_scalar = vec_extract (src.x);
|
||||
if (res_scalar != 2.0f)
|
||||
__builtin_abort ();
|
||||
return ;
|
||||
}
|
135
gcc/testsuite/gcc.target/i386/part-vect-vec_elem-2.c
Normal file
135
gcc/testsuite/gcc.target/i386/part-vect-vec_elem-2.c
Normal file
|
@ -0,0 +1,135 @@
|
|||
/* { dg-do run { target { ! ia32 } } } */
|
||||
/* { dg-options "-O1 -msse4.1" } */
|
||||
/* { dg-require-effective-target sse4 } */
|
||||
|
||||
#include "sse4_1-check.h"
|
||||
|
||||
typedef _Float16 v2hf __attribute__((vector_size(4)));
|
||||
|
||||
v2hf
|
||||
__attribute__((noipa))
|
||||
vector_init_dupv2hf (_Float16 a)
|
||||
{
|
||||
return __extension__(v2hf){a, a};
|
||||
}
|
||||
|
||||
v2hf
|
||||
__attribute__((noipa))
|
||||
vector_init_allzero (_Float16 a)
|
||||
{
|
||||
return __extension__(v2hf){0, 0};
|
||||
}
|
||||
|
||||
v2hf
|
||||
__attribute__((noipa))
|
||||
vector_init_one_nonzero (_Float16 a)
|
||||
{
|
||||
return __extension__(v2hf){0, a};
|
||||
}
|
||||
|
||||
v2hf
|
||||
__attribute__((noipa))
|
||||
vector_init_one_var (_Float16 a)
|
||||
{
|
||||
return __extension__(v2hf){1, a};
|
||||
}
|
||||
|
||||
v2hf
|
||||
__attribute__((noipa))
|
||||
vector_init_general (_Float16 a1, _Float16 a2)
|
||||
{
|
||||
return __extension__(v2hf){a2, a1};
|
||||
}
|
||||
|
||||
v2hf
|
||||
__attribute__((noipa))
|
||||
vec_set (_Float16 a, v2hf b)
|
||||
{
|
||||
b[1] = a;
|
||||
return b;
|
||||
}
|
||||
|
||||
v2hf
|
||||
__attribute__((noipa))
|
||||
vec_set_var (_Float16 a, v2hf b, int c)
|
||||
{
|
||||
b[c] = a;
|
||||
return b;
|
||||
}
|
||||
|
||||
_Float16
|
||||
__attribute__((noipa))
|
||||
vec_extract (v2hf b)
|
||||
{
|
||||
return b[1];
|
||||
}
|
||||
|
||||
static void
|
||||
sse4_1_test ()
|
||||
{
|
||||
typedef union {
|
||||
_Float16 a[2];
|
||||
v2hf x;}union64hf;
|
||||
union64hf res, exp, src;
|
||||
|
||||
res.x = vector_init_dupv2hf (1.0f16);
|
||||
for (int i = 0; i != 2; i++)
|
||||
exp.a[i] = 1.0f16;
|
||||
if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
res.x = vector_init_allzero (1.0f16);
|
||||
for (int i = 0; i != 2; i++)
|
||||
exp.a[i] = 0.0f16;
|
||||
if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
res.x = vector_init_one_nonzero (1.0f16);
|
||||
for (int i = 0; i != 2; i++)
|
||||
exp.a[i] = 0.0f16;
|
||||
exp.a[1] = 1.0f16;
|
||||
if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
res.x = vector_init_one_var (3.0f16);
|
||||
exp.a[0] = 1;
|
||||
exp.a[1] = 3;
|
||||
if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
res.x = vector_init_general (2.0f, 1.0);
|
||||
for (int i = 0; i != 2; i++)
|
||||
exp.a[i] = 1 + i;
|
||||
if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
for (int i = 0; i != 2; i++)
|
||||
{
|
||||
src.a[i] = i;
|
||||
exp.a[i] = i;
|
||||
}
|
||||
res.x = vec_set (3.0f, src.x);
|
||||
exp.a[1] = 3.0f;
|
||||
if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
for (int i = 0; i != 2; i++)
|
||||
{
|
||||
src.a[i] = i;
|
||||
exp.a[i] = i;
|
||||
}
|
||||
res.x = vec_set_var (3.0f, src.x, 1);
|
||||
exp.a[1] = 3.0f;
|
||||
if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0)
|
||||
__builtin_abort ();
|
||||
|
||||
for (int i = 0; i != 2; i++)
|
||||
{
|
||||
src.a[i] = i;
|
||||
exp.a[i] = i;
|
||||
}
|
||||
_Float16 res_scalar = vec_extract (src.x);
|
||||
if (res_scalar != 1.0f)
|
||||
__builtin_abort ();
|
||||
return ;
|
||||
}
|
Loading…
Add table
Reference in a new issue