i386.c (vselect_insn): New variable.

* config/i386/i386.c (vselect_insn): New variable.
	(init_vselect_insn): New function.
	(expand_vselect, expand_vselect_insn): Add testing_p argument.
	Call init_vselect_insn if vselect_insn is NULL.  Adjust
	PATTERN (vselect_insn), instead of creating a new insn each time,
	only emit a copy of it if not testing and recog has been successful.
	(expand_vec_perm_pshufb, expand_vec_perm_1,
	expand_vec_perm_pshuflw_pshufhw, expand_vec_perm_broadcast_1): Adjust
	callers.

From-SVN: r185578
This commit is contained in:
Jakub Jelinek 2012-03-20 17:27:05 +01:00 committed by Jakub Jelinek
parent 6945a32ec3
commit e5069d626e
2 changed files with 86 additions and 26 deletions

View file

@ -1,5 +1,15 @@
2012-03-20 Jakub Jelinek <jakub@redhat.com>
* config/i386/i386.c (vselect_insn): New variable.
(init_vselect_insn): New function.
(expand_vselect, expand_vselect_insn): Add testing_p argument.
Call init_vselect_insn if vselect_insn is NULL. Adjust
PATTERN (vselect_insn), instead of creating a new insn each time,
only emit a copy of it if not testing and recog has been successful.
(expand_vec_perm_pshufb, expand_vec_perm_1,
expand_vec_perm_pshuflw_pshufhw, expand_vec_perm_broadcast_1): Adjust
callers.
PR target/52607
* config/i386/i386.md ("isa" attribute): Add avx2 and noavx2.
("enabled" attribute): Handle avx2 and noavx2 isas.

View file

@ -35535,43 +35535,88 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
}
}
/* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
insn every time. */
static GTY(()) rtx vselect_insn;
/* Initialize vselect_insn. */
static void
init_vselect_insn (void)
{
unsigned i;
rtx x;
x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
for (i = 0; i < MAX_VECT_LEN; ++i)
XVECEXP (x, 0, i) = const0_rtx;
x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
const0_rtx), x);
x = gen_rtx_SET (VOIDmode, const0_rtx, x);
start_sequence ();
vselect_insn = emit_insn (x);
end_sequence ();
}
/* Construct (set target (vec_select op0 (parallel perm))) and
return true if that's a valid instruction in the active ISA. */
static bool
expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
expand_vselect (rtx target, rtx op0, const unsigned char *perm,
unsigned nelt, bool testing_p)
{
rtx rperm[MAX_VECT_LEN], x;
unsigned i;
unsigned int i;
rtx x, save_vconcat;
int icode;
if (vselect_insn == NULL_RTX)
init_vselect_insn ();
x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
PUT_NUM_ELEM (XVEC (x, 0), nelt);
for (i = 0; i < nelt; ++i)
rperm[i] = GEN_INT (perm[i]);
XVECEXP (x, 0, i) = GEN_INT (perm[i]);
save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
SET_DEST (PATTERN (vselect_insn)) = target;
icode = recog_memoized (vselect_insn);
x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
x = gen_rtx_SET (VOIDmode, target, x);
if (icode >= 0 && !testing_p)
emit_insn (copy_rtx (PATTERN (vselect_insn)));
x = emit_insn (x);
if (recog_memoized (x) < 0)
{
remove_insn (x);
return false;
}
return true;
SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
INSN_CODE (vselect_insn) = -1;
return icode >= 0;
}
/* Similar, but generate a vec_concat from op0 and op1 as well. */
static bool
expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
const unsigned char *perm, unsigned nelt)
const unsigned char *perm, unsigned nelt,
bool testing_p)
{
enum machine_mode v2mode;
rtx x;
bool ok;
if (vselect_insn == NULL_RTX)
init_vselect_insn ();
v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
return expand_vselect (target, x, perm, nelt);
x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
PUT_MODE (x, v2mode);
XEXP (x, 0) = op0;
XEXP (x, 1) = op1;
ok = expand_vselect (target, x, perm, nelt, testing_p);
XEXP (x, 0) = const0_rtx;
XEXP (x, 1) = const0_rtx;
return ok;
}
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
@ -35903,7 +35948,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
return true;
return expand_vselect (gen_lowpart (V4DImode, d->target),
gen_lowpart (V4DImode, d->op0),
perm, 4);
perm, 4, false);
}
/* Next see if vpermd can be used. */
@ -36051,7 +36096,7 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
}
}
if (expand_vselect (d->target, d->op0, perm2, nelt))
if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
return true;
/* There are plenty of patterns in sse.md that are written for
@ -36065,7 +36110,8 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
perm2[i] = d->perm[i] & mask;
perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
}
if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
d->testing_p))
return true;
/* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
@ -36079,13 +36125,15 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
}
if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
d->testing_p))
return true;
}
}
/* Finally, try the fully general two operand permute. */
if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
d->testing_p))
return true;
/* Recognize interleave style patterns with reversed operands. */
@ -36101,7 +36149,8 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
perm2[i] = e;
}
if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
d->testing_p))
return true;
}
@ -36149,14 +36198,14 @@ expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
memcpy (perm2, d->perm, 4);
for (i = 4; i < 8; ++i)
perm2[i] = i;
ok = expand_vselect (d->target, d->op0, perm2, 8);
ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
gcc_assert (ok);
/* Emit the pshufhw. */
memcpy (perm2 + 4, d->perm + 4, 4);
for (i = 0; i < 4; ++i)
perm2[i] = i;
ok = expand_vselect (d->target, d->target, perm2, 8);
ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
gcc_assert (ok);
return true;
@ -37190,7 +37239,8 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
while (vmode != V4SImode);
memset (perm2, elt, 4);
ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4,
d->testing_p);
gcc_assert (ok);
return true;