ii386: Add missing two element 64bit vector permutations [PR89021]
In addition to V8QI permutations, several other missing permutations are added for 64bit vector modes for TARGET_SSSE3 and TARGET_SSE4_1 targets. 2021-06-16 Uroš Bizjak <ubizjak@gmail.com> gcc/ PR target/89021 * config/i386/i386-expand.c (expand_vec_perm_2perm_pblendv): Handle 64bit modes for TARGET_SSE4_1. (expand_vec_perm_pshufb2): Handle 64bit modes for TARGET_SSSE3. (expand_vec_perm_even_odd_pack): Handle V4HI mode. (expand_vec_perm_even_odd_1) <case E_V4HImode>: Expand via expand_vec_perm_pshufb2 for TARGET_SSSE3 and via expand_vec_perm_even_odd_pack for TARGET_SSE4_1. * config/i386/mmx.md (mmx_packusdw): New insn pattern.
This commit is contained in:
parent
c25e3bf879
commit
dd835ec24b
2 changed files with 82 additions and 25 deletions
|
@ -17633,8 +17633,10 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
|
|||
|
||||
if (vmode == V8QImode)
|
||||
{
|
||||
rtx m128 = GEN_INT (-128);
|
||||
|
||||
for (i = nelt; i < 16; ++i)
|
||||
rperm[i] = constm1_rtx;
|
||||
rperm[i] = m128;
|
||||
vpmode = V16QImode;
|
||||
}
|
||||
|
||||
|
@ -18972,7 +18974,8 @@ expand_vec_perm_2perm_pblendv (struct expand_vec_perm_d *d, bool two_insn)
|
|||
;
|
||||
else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
|
||||
;
|
||||
else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
|
||||
else if (TARGET_SSE4_1 && (GET_MODE_SIZE (vmode) == 16
|
||||
|| GET_MODE_SIZE (vmode) == 8))
|
||||
;
|
||||
else
|
||||
return false;
|
||||
|
@ -19229,14 +19232,31 @@ expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
|
|||
{
|
||||
rtx rperm[2][16], vperm, l, h, op, m128;
|
||||
unsigned int i, nelt, eltsz;
|
||||
machine_mode mode;
|
||||
rtx (*gen) (rtx, rtx, rtx);
|
||||
|
||||
if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
|
||||
if (!TARGET_SSSE3 || (GET_MODE_SIZE (d->vmode) != 16
|
||||
&& GET_MODE_SIZE (d->vmode) != 8))
|
||||
return false;
|
||||
gcc_assert (!d->one_operand_p);
|
||||
|
||||
if (d->testing_p)
|
||||
return true;
|
||||
|
||||
switch (GET_MODE_SIZE (d->vmode))
|
||||
{
|
||||
case 8:
|
||||
mode = V8QImode;
|
||||
gen = gen_mmx_pshufbv8qi3;
|
||||
break;
|
||||
case 16:
|
||||
mode = V16QImode;
|
||||
gen = gen_ssse3_pshufbv16qi3;
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
nelt = d->nelt;
|
||||
eltsz = GET_MODE_UNIT_SIZE (d->vmode);
|
||||
|
||||
|
@ -19247,7 +19267,7 @@ expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
|
|||
m128 = GEN_INT (-128);
|
||||
for (i = 0; i < nelt; ++i)
|
||||
{
|
||||
unsigned j, e = d->perm[i];
|
||||
unsigned j, k, e = d->perm[i];
|
||||
unsigned which = (e >= nelt);
|
||||
if (e >= nelt)
|
||||
e -= nelt;
|
||||
|
@ -19257,26 +19277,29 @@ expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
|
|||
rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
|
||||
rperm[1-which][i*eltsz + j] = m128;
|
||||
}
|
||||
|
||||
for (k = i*eltsz + j; k < 16; ++k)
|
||||
rperm[0][k] = rperm[1][k] = m128;
|
||||
}
|
||||
|
||||
vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
|
||||
vperm = force_reg (V16QImode, vperm);
|
||||
|
||||
l = gen_reg_rtx (V16QImode);
|
||||
op = gen_lowpart (V16QImode, d->op0);
|
||||
emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
|
||||
l = gen_reg_rtx (mode);
|
||||
op = gen_lowpart (mode, d->op0);
|
||||
emit_insn (gen (l, op, vperm));
|
||||
|
||||
vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
|
||||
vperm = force_reg (V16QImode, vperm);
|
||||
|
||||
h = gen_reg_rtx (V16QImode);
|
||||
op = gen_lowpart (V16QImode, d->op1);
|
||||
emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
|
||||
h = gen_reg_rtx (mode);
|
||||
op = gen_lowpart (mode, d->op1);
|
||||
emit_insn (gen (h, op, vperm));
|
||||
|
||||
op = d->target;
|
||||
if (d->vmode != V16QImode)
|
||||
op = gen_reg_rtx (V16QImode);
|
||||
emit_insn (gen_iorv16qi3 (op, l, h));
|
||||
if (d->vmode != mode)
|
||||
op = gen_reg_rtx (mode);
|
||||
emit_insn (gen_rtx_SET (op, gen_rtx_IOR (mode, l, h)));
|
||||
if (op != d->target)
|
||||
emit_move_insn (d->target, gen_lowpart (d->vmode, op));
|
||||
|
||||
|
@ -19455,6 +19478,17 @@ expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
|
|||
|
||||
switch (d->vmode)
|
||||
{
|
||||
case E_V4HImode:
|
||||
/* Required for "pack". */
|
||||
if (!TARGET_SSE4_1)
|
||||
return false;
|
||||
c = 0xffff;
|
||||
s = 16;
|
||||
half_mode = V2SImode;
|
||||
gen_and = gen_andv2si3;
|
||||
gen_pack = gen_mmx_packusdw;
|
||||
gen_shift = gen_lshrv2si3;
|
||||
break;
|
||||
case E_V8HImode:
|
||||
/* Required for "pack". */
|
||||
if (!TARGET_SSE4_1)
|
||||
|
@ -19507,7 +19541,7 @@ expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
|
|||
end_perm = true;
|
||||
break;
|
||||
default:
|
||||
/* Only V8QI, V8HI, V16QI, V16HI and V32QI modes
|
||||
/* Only V4HI, V8QI, V8HI, V16QI, V16HI and V32QI modes
|
||||
are more profitable than general shuffles. */
|
||||
return false;
|
||||
}
|
||||
|
@ -19698,18 +19732,25 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
|
|||
break;
|
||||
|
||||
case E_V4HImode:
|
||||
if (d->testing_p)
|
||||
break;
|
||||
/* We need 2*log2(N)-1 operations to achieve odd/even
|
||||
with interleave. */
|
||||
t1 = gen_reg_rtx (V4HImode);
|
||||
emit_insn (gen_mmx_punpckhwd (t1, d->op0, d->op1));
|
||||
emit_insn (gen_mmx_punpcklwd (d->target, d->op0, d->op1));
|
||||
if (odd)
|
||||
t2 = gen_mmx_punpckhwd (d->target, d->target, t1);
|
||||
if (TARGET_SSE4_1)
|
||||
return expand_vec_perm_even_odd_pack (d);
|
||||
else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
|
||||
return expand_vec_perm_pshufb2 (d);
|
||||
else
|
||||
t2 = gen_mmx_punpcklwd (d->target, d->target, t1);
|
||||
emit_insn (t2);
|
||||
{
|
||||
if (d->testing_p)
|
||||
break;
|
||||
/* We need 2*log2(N)-1 operations to achieve odd/even
|
||||
with interleave. */
|
||||
t1 = gen_reg_rtx (V4HImode);
|
||||
emit_insn (gen_mmx_punpckhwd (t1, d->op0, d->op1));
|
||||
emit_insn (gen_mmx_punpcklwd (d->target, d->op0, d->op1));
|
||||
if (odd)
|
||||
t2 = gen_mmx_punpckhwd (d->target, d->target, t1);
|
||||
else
|
||||
t2 = gen_mmx_punpcklwd (d->target, d->target, t1);
|
||||
emit_insn (t2);
|
||||
}
|
||||
break;
|
||||
|
||||
case E_V8HImode:
|
||||
|
|
|
@ -2477,6 +2477,22 @@
|
|||
(set_attr "type" "mmxshft,sselog,sselog")
|
||||
(set_attr "mode" "DI,TI,TI")])
|
||||
|
||||
(define_insn_and_split "mmx_packusdw"
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=Yr,*x,Yw")
|
||||
(vec_concat:V4HI
|
||||
(us_truncate:V2HI
|
||||
(match_operand:V2SI 1 "register_operand" "0,0,Yw"))
|
||||
(us_truncate:V2HI
|
||||
(match_operand:V2SI 2 "register_operand" "Yr,*x,Yw"))))]
|
||||
"TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
[(const_int 0)]
|
||||
"ix86_split_mmx_pack (operands, US_TRUNCATE); DONE;"
|
||||
[(set_attr "isa" "noavx,noavx,avx")
|
||||
(set_attr "type" "sselog")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn_and_split "mmx_punpckhbw"
|
||||
[(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
|
||||
(vec_select:V8QI
|
||||
|
|
Loading…
Add table
Reference in a new issue