Support vec_set/vec_extract/vec_init for V4HF/V2HF.

gcc/ChangeLog:

	* config/i386/i386-expand.cc
	(ix86_expand_vector_init_duplicate): Handle V4HF/V4BF and
	V2HF/V2BF.
	(ix86_expand_vector_init_one_nonzero): Ditto.
	(ix86_expand_vector_init_one_var): Ditto.
	(ix86_expand_vector_init_general): Ditto.
	(ix86_expand_vector_set_var): Ditto.
	(ix86_expand_vector_set): Ditto.
	(ix86_expand_vector_extract): Ditto.
	* config/i386/mmx.md
	(mmxdoublevecmode): Extend to V4HF/V4BF/V2HF/V2BF.
	(*mmx_pinsrw): Extend to V4FI_64, add a new alternative (&x,
	x, x), add a new define_split after the pattern.
	(*mmx_pextrw<mode>): New define_insn.
	(mmx_pshufw_1): Rename to ..
	(mmx_pshufw<mode>_1): .. this, extend to V4FI_64.
	(*mmx_pblendw64): Extend to V4FI_64.
	(*vec_dup<mode>): New define_insn.
	(vec_setv4hi): Rename to ..
	(vec_set<mode>): .. this, and extend to V4FI_64
	(vec_extractv4hihi): Rename to ..
	(vec_extract<mode><mmxscalarmodelower>): .. this, and extend
	to V4FI_64.
	(vec_init<mode><mmxscalarmodelower>): New define_insn.
	(*pinsrw): Extend to V2FI_32, add a new alternative (&x,
	x, x), and add a new define_split after it.
	(*pextrw<mode>): New define_insn.
	(vec_setv2hi): Rename to ..
	(vec_set<mode>): .. this, extend to V2FI_32.
	(vec_extractv2hihi): Rename to ..
	(vec_extract<mode><mmxscalarmodelower>): .. this, extend to
	V2FI_32.
	(*punpckwd): Extend to V2FI_32.
	(*pshufw_1): Rename to ..
	(*pshufw<mode>_1): .. this, extend to V2FI_32.
	(vec_initv2hihi): Rename to ..
	(vec_init<mode><mmxscalarmodelower>): .. this, and extend to
	V2FI_32.
	(*vec_dup<mode>): New define_insn.
	* config/i386/sse.md (*vec_extract<mode>): Refine constraint
	from v to Yw.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/part-vect-vec_elem-1.c: New test.
	* gcc.target/i386/part-vect-vec_elem-2.c: New test.
This commit is contained in:
liuhongt 2023-11-08 14:52:01 +08:00
parent b51bfee1be
commit 2794d510b9
5 changed files with 541 additions and 64 deletions

View file

@ -15592,6 +15592,17 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
}
goto widen;
case E_V4HFmode:
case E_V4BFmode:
if (TARGET_MMX_WITH_SSE)
{
val = force_reg (GET_MODE_INNER (mode), val);
rtx x = gen_rtx_VEC_DUPLICATE (mode, val);
emit_insn (gen_rtx_SET (target, x));
return true;
}
return false;
case E_V2HImode:
if (TARGET_SSE2)
{
@ -15605,6 +15616,17 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
}
return false;
case E_V2HFmode:
case E_V2BFmode:
if (TARGET_SSE2)
{
val = force_reg (GET_MODE_INNER (mode), val);
rtx x = gen_rtx_VEC_DUPLICATE (mode, val);
emit_insn (gen_rtx_SET (target, x));
return true;
}
return false;
case E_V8QImode:
case E_V4QImode:
if (!mmx_ok)
@ -15815,6 +15837,8 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
use_vector_set = TARGET_MMX_WITH_SSE && TARGET_SSE4_1;
break;
case E_V4HImode:
case E_V4HFmode:
case E_V4BFmode:
use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
break;
case E_V4QImode:
@ -16051,6 +16075,8 @@ ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
case E_V4SImode:
case E_V8HImode:
case E_V4HImode:
case E_V4HFmode:
case E_V4BFmode:
break;
case E_V16QImode:
@ -16438,6 +16464,7 @@ ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
rtx ops[64], op0, op1, op2, op3, op4, op5;
machine_mode half_mode = VOIDmode;
machine_mode quarter_mode = VOIDmode;
machine_mode int_inner_mode = VOIDmode;
int n, i;
switch (mode)
@ -16582,6 +16609,13 @@ quarter:
ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
return;
case E_V4HFmode:
case E_V4BFmode:
case E_V2HFmode:
case E_V2BFmode:
int_inner_mode = HImode;
break;
case E_V4HImode:
case E_V8QImode:
@ -16613,6 +16647,16 @@ quarter:
for (j = 0; j < n_elt_per_word; ++j)
{
rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
if (int_inner_mode != E_VOIDmode)
{
gcc_assert (TARGET_SSE2 && int_inner_mode == HImode);
rtx tmp = gen_reg_rtx (int_inner_mode);
elt = lowpart_subreg (int_inner_mode,
force_reg (inner_mode, elt),
inner_mode);
emit_move_insn (tmp, elt);
elt = tmp;
}
elt = convert_modes (tmp_mode, inner_mode, elt, true);
if (j == 0)
@ -16839,6 +16883,14 @@ ix86_expand_vector_set_var (rtx target, rtx val, rtx idx)
case E_V16SFmode:
cmp_mode = V16SImode;
break;
case E_V2HFmode:
case E_V2BFmode:
cmp_mode = V2HImode;
break;
case E_V4HFmode:
case E_V4BFmode:
cmp_mode = V4HImode;
break;
case E_V8HFmode:
cmp_mode = V8HImode;
break;
@ -17085,9 +17137,13 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
case E_V8HFmode:
case E_V8BFmode:
case E_V2HImode:
case E_V2HFmode:
case E_V2BFmode:
use_vec_merge = TARGET_SSE2;
break;
case E_V4HImode:
case E_V4HFmode:
case E_V4BFmode:
use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
break;
@ -17428,9 +17484,13 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
case E_V8HFmode:
case E_V8BFmode:
case E_V2HImode:
case E_V2HFmode:
case E_V2BFmode:
use_vec_extr = TARGET_SSE2;
break;
case E_V4HImode:
case E_V4HFmode:
case E_V4BFmode:
use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
break;

View file

@ -112,11 +112,21 @@
;; Mapping of vector modes to a vector mode of double size
(define_mode_attr mmxdoublevecmode
[(V2SF "V4SF") (V2SI "V4SI") (V4HF "V8HF") (V4HI "V8HI")])
[(V2SF "V4SF") (V2SI "V4SI") (V4HF "V8HF") (V4HI "V8HI")
(V2HI "V4HI") (V2HF "V4HF") (V2BF "V4BF")])
;; Mapping of vector modes back to the scalar modes
(define_mode_attr mmxscalarmode
[(V2SI "SI") (V2SF "SF")])
[(V2SI "SI") (V2SF "SF")
(V4HF "HF") (V4BF "BF")
(V2HF "HF") (V2BF "BF")
(V4HI "HI") (V2HI "HI")])
(define_mode_attr mmxscalarmodelower
[(V2SI "si") (V2SF "sf")
(V4HF "hf") (V4BF "bf")
(V2HF "hf") (V2BF "bf")
(V4HI "hi") (V2HI "hi")])
(define_mode_attr Yv_Yw
[(V8QI "Yw") (V4HI "Yw") (V2SI "Yv") (V1DI "Yv") (V2SF "Yv")])
@ -4882,11 +4892,11 @@
(set_attr "mode" "TI")])
(define_insn "*mmx_pinsrw"
[(set (match_operand:V4HI 0 "register_operand" "=y,x,YW")
(vec_merge:V4HI
(vec_duplicate:V4HI
(match_operand:HI 2 "nonimmediate_operand" "rm,rm,rm"))
(match_operand:V4HI 1 "register_operand" "0,0,YW")
[(set (match_operand:V4FI_64 0 "register_operand" "=y,x,YW,&x")
(vec_merge:V4FI_64
(vec_duplicate:V4FI_64
(match_operand:<mmxscalarmode> 2 "nonimmediate_operand" "rm,rm,rm,x"))
(match_operand:V4FI_64 1 "register_operand" "0,0,YW,x")
(match_operand:SI 3 "const_int_operand")))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& (TARGET_SSE || TARGET_3DNOW_A)
@ -4896,6 +4906,8 @@
operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
switch (which_alternative)
{
case 3:
return "#";
case 2:
if (MEM_P (operands[2]))
return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
@ -4911,11 +4923,28 @@
gcc_unreachable ();
}
}
[(set_attr "isa" "*,sse2_noavx,avx")
(set_attr "mmx_isa" "native,*,*")
(set_attr "type" "mmxcvt,sselog,sselog")
[(set_attr "isa" "*,sse2_noavx,avx,sse4")
(set_attr "mmx_isa" "native,*,*,*")
(set_attr "type" "mmxcvt,sselog,sselog,sselog")
(set_attr "length_immediate" "1")
(set_attr "mode" "DI,TI,TI")])
(set_attr "mode" "DI,TI,TI,TI")])
;; For TARGET_SSE2, implement insert from XMM reg with PSHULFW + PBLENDW.
(define_split
[(set (match_operand:V4FI_64 0 "sse_reg_operand")
(vec_merge:V4FI_64
(vec_duplicate:V4FI_64
(match_operand:<mmxscalarmode> 2 "sse_reg_operand"))
(match_operand:V4FI_64 1 "sse_reg_operand")
(match_operand:SI 3 "const_int_operand")))]
"TARGET_MMX_WITH_SSE && TARGET_SSE4_1 && reload_completed
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
< GET_MODE_NUNITS (<MODE>mode))"
[(set (match_dup 0)
(vec_duplicate:V4FI_64 (match_dup 2)))
(set (match_dup 0)
(vec_merge:V4FI_64 (match_dup 1) (match_dup 0) (match_dup 3)))]
"operands[3] = GEN_INT (~INTVAL (operands[3]) & 0xf);")
(define_insn "*mmx_pinsrb"
[(set (match_operand:V8QI 0 "register_operand" "=x,YW")
@ -4973,6 +5002,41 @@
(set_attr "prefix" "orig,maybe_vex,maybe_vex,maybe_evex")
(set_attr "mode" "DI,TI,TI,TI")])
(define_insn "*mmx_pextrw<mode>"
[(set (match_operand:<mmxscalarmode> 0 "register_sse4nonimm_operand" "=?r,?r,jm,m,x,Yw")
(vec_select:<mmxscalarmode>
(match_operand:V4F_64 1 "register_operand" "y,YW,YW,YW,0,YW")
(parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& (TARGET_SSE || TARGET_3DNOW_A)"
{
switch (which_alternative)
{
case 0:
case 1:
return "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}";
case 2:
case 3:
return "%vpextrw\t{%2, %1, %0|%0, %1, %2}";
case 4:
operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
return "psrldq\t{%2, %0|%0, %2}";
case 5:
operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
default:
gcc_unreachable ();
}
}
[(set_attr "isa" "*,sse2,sse4_noavx,avx,noavx,avx")
(set_attr "addr" "*,*,gpr16,*,*,*")
(set_attr "mmx_isa" "native,*,*,*,*,*")
(set_attr "type" "mmxcvt,sselog1,sselog1,sselog1,sseishft1,sseishft1")
(set_attr "length_immediate" "1")
(set_attr "prefix" "orig,maybe_vex,maybe_vex,maybe_evex,orig,maybe_evex")
(set_attr "mode" "DI,TI,TI,TI,TI,TI")])
(define_insn "*mmx_pextrw_zext"
[(set (match_operand:SWI48 0 "register_operand" "=r,r")
(zero_extend:SWI48
@ -5069,18 +5133,18 @@
&& (TARGET_SSE || TARGET_3DNOW_A)"
{
int mask = INTVAL (operands[2]);
emit_insn (gen_mmx_pshufw_1 (operands[0], operands[1],
GEN_INT ((mask >> 0) & 3),
GEN_INT ((mask >> 2) & 3),
GEN_INT ((mask >> 4) & 3),
GEN_INT ((mask >> 6) & 3)));
emit_insn (gen_mmx_pshufwv4hi_1 (operands[0], operands[1],
GEN_INT ((mask >> 0) & 3),
GEN_INT ((mask >> 2) & 3),
GEN_INT ((mask >> 4) & 3),
GEN_INT ((mask >> 6) & 3)));
DONE;
})
(define_insn "mmx_pshufw_1"
[(set (match_operand:V4HI 0 "register_operand" "=y,Yw")
(vec_select:V4HI
(match_operand:V4HI 1 "register_mmxmem_operand" "ym,Yw")
(define_insn "mmx_pshufw<mode>_1"
[(set (match_operand:V4FI_64 0 "register_operand" "=y,Yw")
(vec_select:V4FI_64
(match_operand:V4FI_64 1 "register_mmxmem_operand" "ym,Yw")
(parallel [(match_operand 2 "const_0_to_3_operand")
(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
@ -5134,10 +5198,10 @@
(set_attr "mode" "TI")])
(define_insn "*mmx_pblendw64"
[(set (match_operand:V4HI 0 "register_operand" "=Yr,*x,x")
(vec_merge:V4HI
(match_operand:V4HI 2 "register_operand" "Yr,*x,x")
(match_operand:V4HI 1 "register_operand" "0,0,x")
[(set (match_operand:V4FI_64 0 "register_operand" "=Yr,*x,x")
(vec_merge:V4FI_64
(match_operand:V4FI_64 2 "register_operand" "Yr,*x,x")
(match_operand:V4FI_64 1 "register_operand" "0,0,x")
(match_operand:SI 3 "const_0_to_15_operand")))]
"TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
"@
@ -5152,10 +5216,10 @@
(set_attr "mode" "TI")])
(define_insn "*mmx_pblendw32"
[(set (match_operand:V2HI 0 "register_operand" "=Yr,*x,x")
(vec_merge:V2HI
(match_operand:V2HI 2 "register_operand" "Yr,*x,x")
(match_operand:V2HI 1 "register_operand" "0,0,x")
[(set (match_operand:V2FI_32 0 "register_operand" "=Yr,*x,x")
(vec_merge:V2FI_32
(match_operand:V2FI_32 2 "register_operand" "Yr,*x,x")
(match_operand:V2FI_32 1 "register_operand" "0,0,x")
(match_operand:SI 3 "const_0_to_7_operand")))]
"TARGET_SSE4_1"
"@
@ -5212,6 +5276,16 @@
(set_attr "length_immediate" "1")
(set_attr "mode" "DI,TI")])
(define_insn "*vec_dup<mode>"
[(set (match_operand:V4F_64 0 "register_operand" "=Yw")
(vec_duplicate:V4F_64
(match_operand:<mmxscalarmode> 1 "register_operand" "Yw")))]
"TARGET_MMX_WITH_SSE"
"%vpshuflw\t{$0, %1, %0|%0, %1, 0}"
[(set_attr "isa" "sse2")
(set_attr "type" "sselog1")
(set_attr "length_immediate" "1")
(set_attr "mode" "TI")])
(define_insn "*vec_dupv2si"
[(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
@ -5405,9 +5479,9 @@
DONE;
})
(define_expand "vec_setv4hi"
[(match_operand:V4HI 0 "register_operand")
(match_operand:HI 1 "register_operand")
(define_expand "vec_set<mode>"
[(match_operand:V4FI_64 0 "register_operand")
(match_operand:<mmxscalarmode> 1 "register_operand")
(match_operand 2 "vec_setm_mmx_operand")]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
{
@ -5419,9 +5493,9 @@
DONE;
})
(define_expand "vec_extractv4hihi"
[(match_operand:HI 0 "register_operand")
(match_operand:V4HI 1 "register_operand")
(define_expand "vec_extract<mode><mmxscalarmodelower>"
[(match_operand:<mmxscalarmode> 0 "register_operand")
(match_operand:V4FI_64 1 "register_operand")
(match_operand 2 "const_int_operand")]
"TARGET_MMX || TARGET_MMX_WITH_SSE"
{
@ -5440,6 +5514,16 @@
DONE;
})
(define_expand "vec_init<mode><mmxscalarmodelower>"
[(match_operand:V4F_64 0 "register_operand")
(match_operand 1)]
"TARGET_MMX_WITH_SSE"
{
ix86_expand_vector_init (TARGET_MMX_WITH_SSE, operands[0],
operands[1]);
DONE;
})
(define_expand "vec_setv8qi"
[(match_operand:V8QI 0 "register_operand")
(match_operand:QI 1 "register_operand")
@ -5476,11 +5560,11 @@
})
(define_insn "*pinsrw"
[(set (match_operand:V2HI 0 "register_operand" "=x,YW")
(vec_merge:V2HI
(vec_duplicate:V2HI
(match_operand:HI 2 "nonimmediate_operand" "rm,rm"))
(match_operand:V2HI 1 "register_operand" "0,YW")
[(set (match_operand:V2FI_32 0 "register_operand" "=x,YW,&x")
(vec_merge:V2FI_32
(vec_duplicate:V2FI_32
(match_operand:<mmxscalarmode> 2 "nonimmediate_operand" "rm,rm,x"))
(match_operand:V2FI_32 1 "register_operand" "0,YW,x")
(match_operand:SI 3 "const_int_operand")))]
"TARGET_SSE2
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
@ -5489,6 +5573,8 @@
operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
switch (which_alternative)
{
case 2:
return "#";
case 1:
if (MEM_P (operands[2]))
return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
@ -5503,11 +5589,29 @@
gcc_unreachable ();
}
}
[(set_attr "isa" "noavx,avx")
[(set_attr "isa" "noavx,avx,sse4")
(set_attr "type" "sselog")
(set_attr "length_immediate" "1")
(set_attr "mode" "TI")])
;; For TARGET_SSE2, implement insert from XMM reg with PSHULFW + PBLENDW.
(define_split
[(set (match_operand:V2FI_32 0 "sse_reg_operand")
(vec_merge:V2FI_32
(vec_duplicate:V2FI_32
(match_operand:<mmxscalarmode> 2 "sse_reg_operand"))
(match_operand:V2FI_32 1 "sse_reg_operand")
(match_operand:SI 3 "const_int_operand")))]
"TARGET_SSE4_1 && reload_completed
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
< GET_MODE_NUNITS (<MODE>mode))"
[(set (match_dup 0)
(vec_duplicate:V2FI_32 (match_dup 2)))
(set (match_dup 0)
(vec_merge:V2FI_32 (match_dup 1) (match_dup 0) (match_dup 3)))]
"operands[3] = GEN_INT (~INTVAL (operands[3]) & 0x3);")
(define_insn "*pinsrb"
[(set (match_operand:V4QI 0 "register_operand" "=x,YW")
(vec_merge:V4QI
@ -5561,6 +5665,39 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "*pextrw<mode>"
[(set (match_operand:<mmxscalarmode> 0 "register_sse4nonimm_operand" "=?r,jm,m,x,Yw")
(vec_select:<mmxscalarmode>
(match_operand:V2F_32 1 "register_operand" "YW,YW,YW,0,YW")
(parallel [(match_operand:SI 2 "const_0_to_1_operand")])))]
"TARGET_SSE2"
{
switch (which_alternative)
{
case 0:
return "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}";
case 1:
return "pextrw\t{%2, %1, %0|%0, %1, %2}";
case 2:
return "vpextrw\t{%2, %1, %0|%0, %1, %2}";
case 3:
operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
return "psrldq\t{%2, %0|%0, %2}";
case 4:
operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
default:
gcc_unreachable ();
}
}
[(set_attr "isa" "*,sse4_noavx,avx,noavx,avx")
(set_attr "addr" "*,gpr16,*,*,*")
(set_attr "type" "sselog1,sselog1,sselog1,sseishft1,sseishft1")
(set_attr "length_immediate" "1")
(set_attr "prefix" "maybe_vex,orig,maybe_evex,orig,maybe_evex")
(set_attr "mode" "TI")])
(define_insn "*pextrw_zext"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(zero_extend:SWI48
@ -5608,9 +5745,9 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_expand "vec_setv2hi"
[(match_operand:V2HI 0 "register_operand")
(match_operand:HI 1 "register_operand")
(define_expand "vec_set<mode>"
[(match_operand:V2FI_32 0 "register_operand")
(match_operand:<mmxscalarmode> 1 "register_operand")
(match_operand 2 "vec_setm_sse41_operand")]
"TARGET_SSE2"
{
@ -5622,9 +5759,9 @@
DONE;
})
(define_expand "vec_extractv2hihi"
[(match_operand:HI 0 "register_operand")
(match_operand:V2HI 1 "register_operand")
(define_expand "vec_extract<mode><mmxscalarmodelower>"
[(match_operand:<mmxscalarmode> 0 "register_operand")
(match_operand:V2FI_32 1 "register_operand")
(match_operand 2 "const_int_operand")]
"TARGET_SSE2"
{
@ -5659,29 +5796,29 @@
})
(define_insn_and_split "*punpckwd"
[(set (match_operand:V2HI 0 "register_operand" "=x,Yw")
(vec_select:V2HI
(vec_concat:V4HI
(match_operand:V2HI 1 "register_operand" "0,Yw")
(match_operand:V2HI 2 "register_operand" "x,Yw"))
[(set (match_operand:V2FI_32 0 "register_operand" "=x,Yw")
(vec_select:V2FI_32
(vec_concat:<mmxdoublevecmode>
(match_operand:V2FI_32 1 "register_operand" "0,Yw")
(match_operand:V2FI_32 2 "register_operand" "x,Yw"))
(parallel [(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")])))]
"TARGET_SSE2"
"#"
"&& reload_completed"
[(set (match_dup 5)
(vec_select:V8HI
(vec_select:<mmxxmmmode>
(match_dup 5)
(parallel [(match_dup 3) (match_dup 4)
(const_int 2) (const_int 3)
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)])))]
{
rtx dest = lowpart_subreg (V8HImode, operands[0], V2HImode);
rtx op1 = lowpart_subreg (V8HImode, operands[1], V2HImode);
rtx op2 = lowpart_subreg (V8HImode, operands[2], V2HImode);
rtx dest = lowpart_subreg (<mmxxmmmode>mode, operands[0], <MODE>mode);
rtx op1 = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode);
rtx op2 = lowpart_subreg (<mmxxmmmode>mode, operands[2], <MODE>mode);
emit_insn (gen_vec_interleave_lowv8hi (dest, op1, op2));
emit_insn (gen_vec_interleave_low<mmxxmmmodelower> (dest, op1, op2));
static const int map[4] = { 0, 2, 1, 3 };
@ -5699,10 +5836,10 @@
(set_attr "type" "sselog")
(set_attr "mode" "TI")])
(define_insn "*pshufw_1"
[(set (match_operand:V2HI 0 "register_operand" "=Yw")
(vec_select:V2HI
(match_operand:V2HI 1 "register_operand" "Yw")
(define_insn "*pshufw<mode>_1"
[(set (match_operand:V2FI_32 0 "register_operand" "=Yw")
(vec_select:V2FI_32
(match_operand:V2FI_32 1 "register_operand" "Yw")
(parallel [(match_operand 2 "const_0_to_1_operand")
(match_operand 3 "const_0_to_1_operand")])))]
"TARGET_SSE2"
@ -5731,8 +5868,18 @@
(set_attr "length_immediate" "1")
(set_attr "mode" "TI")])
(define_expand "vec_initv2hihi"
[(match_operand:V2HI 0 "register_operand")
(define_insn "*vec_dup<mode>"
[(set (match_operand:V2F_32 0 "register_operand" "=Yw")
(vec_duplicate:V2F_32
(match_operand:<mmxscalarmode> 1 "register_operand" "Yw")))]
"TARGET_SSE2"
"%vpshuflw\t{$0, %1, %0|%0, %1, 0}"
[(set_attr "type" "sselog1")
(set_attr "length_immediate" "1")
(set_attr "mode" "TI")])
(define_expand "vec_init<mode><mmxscalarmodelower>"
[(match_operand:V2FI_32 0 "register_operand")
(match_operand 1)]
"TARGET_SSE2"
{

View file

@ -12372,9 +12372,9 @@
"operands[1] = gen_lowpart (<ssescalarmode>mode, operands[1]);")
(define_insn "*vec_extract<mode>"
[(set (match_operand:HFBF 0 "register_sse4nonimm_operand" "=?r,jm,m,x,v")
[(set (match_operand:HFBF 0 "register_sse4nonimm_operand" "=?r,jm,m,x,Yw")
(vec_select:HFBF
(match_operand:<ssevecmode> 1 "register_operand" "v,x,v,0,v")
(match_operand:<ssevecmode> 1 "register_operand" "v,x,v,0,YW")
(parallel
[(match_operand:SI 2 "const_0_to_7_operand")])))]
"TARGET_SSE2"

View file

@ -0,0 +1,135 @@
/* { dg-do run { target { ! ia32 } } } */
/* { dg-options "-O1 -msse4.1" } */
/* { dg-require-effective-target sse4 } */
#include "sse4_1-check.h"
typedef _Float16 v4hf __attribute__((vector_size(8)));
v4hf
__attribute__((noipa))
vector_init_dupv4hf (_Float16 a)
{
return __extension__(v4hf){a, a, a, a};
}
v4hf
__attribute__((noipa))
vector_init_allzero (_Float16 a)
{
return __extension__(v4hf){0, 0, 0, 0};
}
v4hf
__attribute__((noipa))
vector_init_one_nonzero (_Float16 a)
{
return __extension__(v4hf){0, 0, a, 0};
}
v4hf
__attribute__((noipa))
vector_init_one_var (_Float16 a)
{
return __extension__(v4hf){1, 2, a, 4};
}
v4hf
__attribute__((noipa))
vector_init_general (_Float16 a, _Float16 a1, _Float16 a2, _Float16 a3)
{
return __extension__(v4hf){a3, a2, a1, a};
}
v4hf
__attribute__((noipa))
vec_set (_Float16 a, v4hf b)
{
b[1] = a;
return b;
}
v4hf
__attribute__((noipa))
vec_set_var (_Float16 a, v4hf b, int c)
{
b[c] = a;
return b;
}
_Float16
__attribute__((noipa))
vec_extract (v4hf b)
{
return b[2];
}
static void
sse4_1_test ()
{
typedef union {
_Float16 a[4];
v4hf x;}union64hf;
union64hf res, exp, src;
res.x = vector_init_dupv4hf (1.0f16);
for (int i = 0; i != 4; i++)
exp.a[i] = 1.0f16;
if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0)
__builtin_abort ();
res.x = vector_init_allzero (1.0f16);
for (int i = 0; i != 4; i++)
exp.a[i] = 0.0f16;
if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0)
__builtin_abort ();
res.x = vector_init_one_nonzero (1.0f16);
for (int i = 0; i != 4; i++)
exp.a[i] = 0.0f16;
exp.a[2] = 1.0f16;
if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0)
__builtin_abort ();
res.x = vector_init_one_var (3.0f16);
for (int i = 0; i != 4; i++)
exp.a[i] = i + 1;
if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0)
__builtin_abort ();
res.x = vector_init_general (4.0, 3.0f, 2.0f, 1.0);
for (int i = 0; i != 4; i++)
exp.a[i] = 1 + i;
if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0)
__builtin_abort ();
for (int i = 0; i != 4; i++)
{
src.a[i] = i;
exp.a[i] = i;
}
res.x = vec_set (3.0f, src.x);
exp.a[1] = 3.0f;
if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0)
__builtin_abort ();
for (int i = 0; i != 4; i++)
{
src.a[i] = i;
exp.a[i] = i;
}
res.x = vec_set_var (3.0f, src.x, 1);
exp.a[1] = 3.0f;
if (__builtin_memcmp (&res.a[0], &exp.a[0], 8) != 0)
__builtin_abort ();
for (int i = 0; i != 4; i++)
{
src.a[i] = i;
exp.a[i] = i;
}
_Float16 res_scalar = vec_extract (src.x);
if (res_scalar != 2.0f)
__builtin_abort ();
return ;
}

View file

@ -0,0 +1,135 @@
/* { dg-do run { target { ! ia32 } } } */
/* { dg-options "-O1 -msse4.1" } */
/* { dg-require-effective-target sse4 } */
#include "sse4_1-check.h"
typedef _Float16 v2hf __attribute__((vector_size(4)));
v2hf
__attribute__((noipa))
vector_init_dupv2hf (_Float16 a)
{
return __extension__(v2hf){a, a};
}
v2hf
__attribute__((noipa))
vector_init_allzero (_Float16 a)
{
return __extension__(v2hf){0, 0};
}
v2hf
__attribute__((noipa))
vector_init_one_nonzero (_Float16 a)
{
return __extension__(v2hf){0, a};
}
v2hf
__attribute__((noipa))
vector_init_one_var (_Float16 a)
{
return __extension__(v2hf){1, a};
}
v2hf
__attribute__((noipa))
vector_init_general (_Float16 a1, _Float16 a2)
{
return __extension__(v2hf){a2, a1};
}
v2hf
__attribute__((noipa))
vec_set (_Float16 a, v2hf b)
{
b[1] = a;
return b;
}
v2hf
__attribute__((noipa))
vec_set_var (_Float16 a, v2hf b, int c)
{
b[c] = a;
return b;
}
_Float16
__attribute__((noipa))
vec_extract (v2hf b)
{
return b[1];
}
static void
sse4_1_test ()
{
typedef union {
_Float16 a[2];
v2hf x;}union64hf;
union64hf res, exp, src;
res.x = vector_init_dupv2hf (1.0f16);
for (int i = 0; i != 2; i++)
exp.a[i] = 1.0f16;
if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0)
__builtin_abort ();
res.x = vector_init_allzero (1.0f16);
for (int i = 0; i != 2; i++)
exp.a[i] = 0.0f16;
if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0)
__builtin_abort ();
res.x = vector_init_one_nonzero (1.0f16);
for (int i = 0; i != 2; i++)
exp.a[i] = 0.0f16;
exp.a[1] = 1.0f16;
if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0)
__builtin_abort ();
res.x = vector_init_one_var (3.0f16);
exp.a[0] = 1;
exp.a[1] = 3;
if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0)
__builtin_abort ();
res.x = vector_init_general (2.0f, 1.0);
for (int i = 0; i != 2; i++)
exp.a[i] = 1 + i;
if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0)
__builtin_abort ();
for (int i = 0; i != 2; i++)
{
src.a[i] = i;
exp.a[i] = i;
}
res.x = vec_set (3.0f, src.x);
exp.a[1] = 3.0f;
if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0)
__builtin_abort ();
for (int i = 0; i != 2; i++)
{
src.a[i] = i;
exp.a[i] = i;
}
res.x = vec_set_var (3.0f, src.x, 1);
exp.a[1] = 3.0f;
if (__builtin_memcmp (&res.a[0], &exp.a[0], 4) != 0)
__builtin_abort ();
for (int i = 0; i != 2; i++)
{
src.a[i] = i;
exp.a[i] = i;
}
_Float16 res_scalar = vec_extract (src.x);
if (res_scalar != 1.0f)
__builtin_abort ();
return ;
}