Change vec_perm checking and expansion level.
The can_vec_perm_p interface changed to use a C integer array. This allows easy re-use from the rtl level and the gimple level within the vectorizer. It allows both to determine if a given permutation is (un-)supported without having to create tree/rtl garbage. The expand_vec_perm interface changed to use rtl. This allows easy re-use from the rtl level, so that expand_vec_perm can be used in the fallback implementation of other optabs. * target.def (vec_perm_const_ok): Change parameters to mode and array of indicies. * doc/tm.texi: Rebuild. * config/i386/i386.c (ix86_vectorize_vec_perm_const_ok): Change parameters to mode and array of indicies. * expr.c (expand_expr_real_2) [VEC_PERM_EXPR]: Expand operands here. * optabs.c (can_vec_perm_p): Rename from can_vec_perm_expr_p. Change parameters to mode and array of indicies. (expand_vec_perm_1): Rename from expand_vec_perm_expr_1. (expand_vec_perm): Rename from expand_vec_perm_expr. Change parameters to mode and rtx inputs. Try lowering to QImode vec_perm_const before trying fully variable permutation. * optabs.h: Update decls. * tree-vect-generic.c (lower_vec_perm): Extract array of indices from VECTOR_CST to pass to can_vec_perm_p. * tree-vect-slp.c (vect_get_mask_element): Change mask parameter type from int pointer to unsigned char pointer. (vect_transform_slp_perm_load): Update for change to can_vec_perm_p. * tree-vect-stmts.c (perm_mask_for_reverse): Likewise. From-SVN: r180449
This commit is contained in:
parent
c3962b13f7
commit
22e4dee74f
10 changed files with 202 additions and 152 deletions
|
@ -1,5 +1,25 @@
|
|||
2011-10-25 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* target.def (vec_perm_const_ok): Change parameters to mode and
|
||||
array of indicies.
|
||||
* doc/tm.texi: Rebuild.
|
||||
* config/i386/i386.c (ix86_vectorize_vec_perm_const_ok): Change
|
||||
parameters to mode and array of indicies.
|
||||
* expr.c (expand_expr_real_2) [VEC_PERM_EXPR]: Expand operands here.
|
||||
* optabs.c (can_vec_perm_p): Rename from can_vec_perm_expr_p.
|
||||
Change parameters to mode and array of indicies.
|
||||
(expand_vec_perm_1): Rename from expand_vec_perm_expr_1.
|
||||
(expand_vec_perm): Rename from expand_vec_perm_expr. Change
|
||||
parameters to mode and rtx inputs. Try lowering to QImode
|
||||
vec_perm_const before trying fully variable permutation.
|
||||
* optabs.h: Update decls.
|
||||
* tree-vect-generic.c (lower_vec_perm): Extract array of indices from
|
||||
VECTOR_CST to pass to can_vec_perm_p.
|
||||
* tree-vect-slp.c (vect_get_mask_element): Change mask parameter type
|
||||
from int pointer to unsigned char pointer.
|
||||
(vect_transform_slp_perm_load): Update for change to can_vec_perm_p.
|
||||
* tree-vect-stmts.c (perm_mask_for_reverse): Likewise.
|
||||
|
||||
* tree.def (VEC_EXTRACT_EVEN_EXPR): Fix typo in text name.
|
||||
(VEC_EXTRACT_ODD_EXPR, VEC_INTERLEAVE_HIGH_EXPR,
|
||||
VEC_INTERLEAVE_LOW_EXPR): Likewise.
|
||||
|
|
|
@ -36477,14 +36477,14 @@ ix86_expand_vec_perm_const (rtx operands[4])
|
|||
/* Implement targetm.vectorize.vec_perm_const_ok. */
|
||||
|
||||
static bool
|
||||
ix86_vectorize_vec_perm_const_ok (tree vec_type, tree mask)
|
||||
ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
|
||||
const unsigned char *sel)
|
||||
{
|
||||
struct expand_vec_perm_d d;
|
||||
unsigned int i, nelt, which;
|
||||
bool ret, one_vec;
|
||||
tree list;
|
||||
|
||||
d.vmode = TYPE_MODE (vec_type);
|
||||
d.vmode = vmode;
|
||||
d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
|
||||
d.testing_p = true;
|
||||
|
||||
|
@ -36505,19 +36505,13 @@ ix86_vectorize_vec_perm_const_ok (tree vec_type, tree mask)
|
|||
|
||||
/* Extract the values from the vector CST into the permutation
|
||||
array in D. */
|
||||
list = TREE_VECTOR_CST_ELTS (mask);
|
||||
for (i = which = 0; i < nelt; ++i, list = TREE_CHAIN (list))
|
||||
memcpy (d.perm, sel, nelt);
|
||||
for (i = which = 0; i < nelt; ++i)
|
||||
{
|
||||
unsigned HOST_WIDE_INT e;
|
||||
|
||||
gcc_checking_assert (host_integerp (TREE_VALUE (list), 1));
|
||||
e = tree_low_cst (TREE_VALUE (list), 1);
|
||||
unsigned char e = d.perm[i];
|
||||
gcc_assert (e < 2 * nelt);
|
||||
|
||||
which |= (e < nelt ? 1 : 2);
|
||||
d.perm[i] = e;
|
||||
}
|
||||
gcc_assert (list == NULL);
|
||||
|
||||
/* For all elements from second vector, fold the elements to first. */
|
||||
if (which == 2)
|
||||
|
|
|
@ -5711,7 +5711,7 @@ misalignment value (@var{misalign}).
|
|||
Return true if vector alignment is reachable (by peeling N iterations) for the given type.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST_OK (tree @var{vec_type}, tree @var{mask})
|
||||
@deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST_OK (enum @var{machine_mode}, const unsigned char *@var{sel})
|
||||
Return true if a vector created for @code{vec_perm_const} is valid.
|
||||
@end deftypefn
|
||||
|
||||
|
|
|
@ -8752,9 +8752,11 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
|
|||
goto binop;
|
||||
|
||||
case VEC_PERM_EXPR:
|
||||
target = expand_vec_perm_expr (type, treeop0, treeop1, treeop2, target);
|
||||
gcc_assert (target);
|
||||
return target;
|
||||
expand_operands (treeop0, treeop1, target, &op0, &op1, EXPAND_NORMAL);
|
||||
op2 = expand_normal (treeop2);
|
||||
temp = expand_vec_perm (mode, op0, op1, op2, target);
|
||||
gcc_assert (temp);
|
||||
return temp;
|
||||
|
||||
case DOT_PROD_EXPR:
|
||||
{
|
||||
|
|
223
gcc/optabs.c
223
gcc/optabs.c
|
@ -6701,20 +6701,22 @@ vector_compare_rtx (tree cond, bool unsignedp, enum insn_code icode)
|
|||
of the CPU. SEL may be NULL, which stands for an unknown constant. */
|
||||
|
||||
bool
|
||||
can_vec_perm_expr_p (tree type, tree sel)
|
||||
can_vec_perm_p (enum machine_mode mode, bool variable,
|
||||
const unsigned char *sel)
|
||||
{
|
||||
enum machine_mode mode, qimode;
|
||||
mode = TYPE_MODE (type);
|
||||
enum machine_mode qimode;
|
||||
|
||||
/* If the target doesn't implement a vector mode for the vector type,
|
||||
then no operations are supported. */
|
||||
if (!VECTOR_MODE_P (mode))
|
||||
return false;
|
||||
|
||||
if (sel == NULL || TREE_CODE (sel) == VECTOR_CST)
|
||||
if (!variable)
|
||||
{
|
||||
if (direct_optab_handler (vec_perm_const_optab, mode) != CODE_FOR_nothing
|
||||
&& (sel == NULL || targetm.vectorize.vec_perm_const_ok (type, sel)))
|
||||
&& (sel == NULL
|
||||
|| targetm.vectorize.vec_perm_const_ok == NULL
|
||||
|| targetm.vectorize.vec_perm_const_ok (mode, sel)))
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -6722,6 +6724,8 @@ can_vec_perm_expr_p (tree type, tree sel)
|
|||
return true;
|
||||
|
||||
/* We allow fallback to a QI vector mode, and adjust the mask. */
|
||||
if (GET_MODE_INNER (mode) == QImode)
|
||||
return false;
|
||||
qimode = mode_for_vector (QImode, GET_MODE_SIZE (mode));
|
||||
if (!VECTOR_MODE_P (qimode))
|
||||
return false;
|
||||
|
@ -6732,9 +6736,9 @@ can_vec_perm_expr_p (tree type, tree sel)
|
|||
if (direct_optab_handler (vec_perm_optab, qimode) == CODE_FOR_nothing)
|
||||
return false;
|
||||
|
||||
/* In order to support the lowering of non-constant permutations,
|
||||
/* In order to support the lowering of variable permutations,
|
||||
we need to support shifts and adds. */
|
||||
if (sel != NULL && TREE_CODE (sel) != VECTOR_CST)
|
||||
if (variable)
|
||||
{
|
||||
if (GET_MODE_UNIT_SIZE (mode) > 2
|
||||
&& optab_handler (ashl_optab, mode) == CODE_FOR_nothing
|
||||
|
@ -6747,11 +6751,11 @@ can_vec_perm_expr_p (tree type, tree sel)
|
|||
return true;
|
||||
}
|
||||
|
||||
/* A subroutine of expand_vec_perm_expr for expanding one vec_perm insn. */
|
||||
/* A subroutine of expand_vec_perm for expanding one vec_perm insn. */
|
||||
|
||||
static rtx
|
||||
expand_vec_perm_expr_1 (enum insn_code icode, rtx target,
|
||||
rtx v0, rtx v1, rtx sel)
|
||||
expand_vec_perm_1 (enum insn_code icode, rtx target,
|
||||
rtx v0, rtx v1, rtx sel)
|
||||
{
|
||||
enum machine_mode tmode = GET_MODE (target);
|
||||
enum machine_mode smode = GET_MODE (sel);
|
||||
|
@ -6783,119 +6787,130 @@ expand_vec_perm_expr_1 (enum insn_code icode, rtx target,
|
|||
return NULL_RTX;
|
||||
}
|
||||
|
||||
/* Generate instructions for VEC_PERM_EXPR given its type and three
|
||||
operands. */
|
||||
/* Generate instructions for vec_perm optab given its mode
|
||||
and three operands. */
|
||||
|
||||
rtx
|
||||
expand_vec_perm_expr (tree type, tree v0, tree v1, tree sel, rtx target)
|
||||
expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
|
||||
{
|
||||
enum insn_code icode;
|
||||
enum machine_mode mode = TYPE_MODE (type);
|
||||
enum machine_mode qimode;
|
||||
rtx v0_rtx, v1_rtx, sel_rtx, *vec, vt, tmp;
|
||||
unsigned int i, w, e, u;
|
||||
rtx tmp, sel_qi;
|
||||
rtvec vec;
|
||||
|
||||
if (!target)
|
||||
if (!target || GET_MODE (target) != mode)
|
||||
target = gen_reg_rtx (mode);
|
||||
v0_rtx = expand_normal (v0);
|
||||
if (operand_equal_p (v0, v1, 0))
|
||||
v1_rtx = v0_rtx;
|
||||
else
|
||||
v1_rtx = expand_normal (v1);
|
||||
sel_rtx = expand_normal (sel);
|
||||
|
||||
/* If the input is a constant, expand it specially. */
|
||||
if (CONSTANT_P (sel_rtx))
|
||||
{
|
||||
icode = direct_optab_handler (vec_perm_const_optab, mode);
|
||||
if (icode != CODE_FOR_nothing
|
||||
&& targetm.vectorize.vec_perm_const_ok (TREE_TYPE (v0), sel)
|
||||
&& (tmp = expand_vec_perm_expr_1 (icode, target, v0_rtx,
|
||||
v1_rtx, sel_rtx)) != NULL)
|
||||
return tmp;
|
||||
}
|
||||
|
||||
/* Otherwise fall back to a fully variable permuation. */
|
||||
icode = direct_optab_handler (vec_perm_optab, mode);
|
||||
if (icode != CODE_FOR_nothing
|
||||
&& (tmp = expand_vec_perm_expr_1 (icode, target, v0_rtx,
|
||||
v1_rtx, sel_rtx)) != NULL)
|
||||
return tmp;
|
||||
|
||||
/* As a special case to aid several targets, lower the element-based
|
||||
permutation to a byte-based permutation and try again. */
|
||||
qimode = mode_for_vector (QImode, GET_MODE_SIZE (mode));
|
||||
if (!VECTOR_MODE_P (qimode))
|
||||
return NULL_RTX;
|
||||
|
||||
/* ??? For completeness, we ought to check the QImode version of
|
||||
vec_perm_const_optab. But all users of this implicit lowering
|
||||
feature implement the variable vec_perm_optab. */
|
||||
icode = direct_optab_handler (vec_perm_optab, qimode);
|
||||
if (icode == CODE_FOR_nothing)
|
||||
return NULL_RTX;
|
||||
|
||||
w = GET_MODE_SIZE (mode);
|
||||
e = GET_MODE_NUNITS (mode);
|
||||
u = GET_MODE_UNIT_SIZE (mode);
|
||||
vec = XALLOCAVEC (rtx, w);
|
||||
|
||||
if (CONSTANT_P (sel_rtx))
|
||||
/* Set QIMODE to a different vector mode with byte elements.
|
||||
If no such mode, or if MODE already has byte elements, use VOIDmode. */
|
||||
qimode = VOIDmode;
|
||||
if (GET_MODE_INNER (mode) != QImode)
|
||||
{
|
||||
unsigned int j;
|
||||
for (i = 0; i < e; ++i)
|
||||
{
|
||||
unsigned int this_e = INTVAL (XVECEXP (sel_rtx, 0, i));
|
||||
this_e &= 2 * e - 1;
|
||||
this_e *= u;
|
||||
|
||||
for (j = 0; j < u; ++j)
|
||||
vec[i * u + j] = GEN_INT (this_e + j);
|
||||
}
|
||||
sel_rtx = gen_rtx_CONST_VECTOR (qimode, gen_rtvec_v (w, vec));
|
||||
qimode = mode_for_vector (QImode, w);
|
||||
if (!VECTOR_MODE_P (qimode))
|
||||
qimode = VOIDmode;
|
||||
}
|
||||
|
||||
/* If the input is a constant, expand it specially. */
|
||||
if (CONSTANT_P (sel))
|
||||
{
|
||||
icode = direct_optab_handler (vec_perm_const_optab, mode);
|
||||
if (icode != CODE_FOR_nothing)
|
||||
{
|
||||
tmp = expand_vec_perm_1 (icode, target, v0, v1, sel);
|
||||
if (tmp)
|
||||
return tmp;
|
||||
}
|
||||
|
||||
/* Fall back to a constant byte-based permutation. */
|
||||
if (qimode != VOIDmode)
|
||||
{
|
||||
icode = direct_optab_handler (vec_perm_const_optab, qimode);
|
||||
if (icode != CODE_FOR_nothing)
|
||||
{
|
||||
vec = rtvec_alloc (w);
|
||||
for (i = 0; i < e; ++i)
|
||||
{
|
||||
unsigned int j, this_e;
|
||||
|
||||
this_e = INTVAL (XVECEXP (sel, 0, i));
|
||||
this_e &= 2 * e - 1;
|
||||
this_e *= u;
|
||||
|
||||
for (j = 0; j < u; ++j)
|
||||
RTVEC_ELT (vec, i * u + j) = GEN_INT (this_e + j);
|
||||
}
|
||||
sel_qi = gen_rtx_CONST_VECTOR (qimode, vec);
|
||||
|
||||
tmp = expand_vec_perm_1 (icode, gen_lowpart (qimode, target),
|
||||
gen_lowpart (qimode, v0),
|
||||
gen_lowpart (qimode, v1), sel_qi);
|
||||
if (tmp)
|
||||
return gen_lowpart (mode, tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Otherwise expand as a fully variable permuation. */
|
||||
icode = direct_optab_handler (vec_perm_optab, mode);
|
||||
if (icode != CODE_FOR_nothing)
|
||||
{
|
||||
tmp = expand_vec_perm_1 (icode, target, v0, v1, sel);
|
||||
if (tmp)
|
||||
return tmp;
|
||||
}
|
||||
|
||||
/* As a special case to aid several targets, lower the element-based
|
||||
permutation to a byte-based permutation and try again. */
|
||||
if (qimode == VOIDmode)
|
||||
return NULL_RTX;
|
||||
icode = direct_optab_handler (vec_perm_optab, qimode);
|
||||
if (icode == CODE_FOR_nothing)
|
||||
return NULL_RTX;
|
||||
|
||||
/* Multiply each element by its byte size. */
|
||||
if (u == 2)
|
||||
sel = expand_simple_binop (mode, PLUS, sel, sel, sel, 0, OPTAB_DIRECT);
|
||||
else
|
||||
sel = expand_simple_binop (mode, ASHIFT, sel, GEN_INT (exact_log2 (u)),
|
||||
sel, 0, OPTAB_DIRECT);
|
||||
gcc_assert (sel != NULL);
|
||||
|
||||
/* Broadcast the low byte each element into each of its bytes. */
|
||||
vec = rtvec_alloc (w);
|
||||
for (i = 0; i < w; ++i)
|
||||
{
|
||||
/* Multiply each element by its byte size. */
|
||||
if (u == 2)
|
||||
sel_rtx = expand_simple_binop (mode, PLUS, sel_rtx, sel_rtx,
|
||||
sel_rtx, 0, OPTAB_DIRECT);
|
||||
else
|
||||
sel_rtx = expand_simple_binop (mode, ASHIFT, sel_rtx,
|
||||
GEN_INT (exact_log2 (u)),
|
||||
sel_rtx, 0, OPTAB_DIRECT);
|
||||
gcc_assert (sel_rtx);
|
||||
|
||||
/* Broadcast the low byte each element into each of its bytes. */
|
||||
for (i = 0; i < w; ++i)
|
||||
{
|
||||
int this_e = i / u * u;
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
this_e += u - 1;
|
||||
vec[i] = GEN_INT (this_e);
|
||||
}
|
||||
vt = gen_rtx_CONST_VECTOR (qimode, gen_rtvec_v (w, vec));
|
||||
sel_rtx = gen_lowpart (qimode, sel_rtx);
|
||||
sel_rtx = expand_vec_perm_expr_1 (icode, gen_reg_rtx (qimode),
|
||||
sel_rtx, sel_rtx, vt);
|
||||
gcc_assert (sel_rtx != NULL);
|
||||
|
||||
/* Add the byte offset to each byte element. */
|
||||
/* Note that the definition of the indicies here is memory ordering,
|
||||
so there should be no difference between big and little endian. */
|
||||
for (i = 0; i < w; ++i)
|
||||
vec[i] = GEN_INT (i % u);
|
||||
vt = gen_rtx_CONST_VECTOR (qimode, gen_rtvec_v (w, vec));
|
||||
sel_rtx = expand_simple_binop (qimode, PLUS, sel_rtx, vt,
|
||||
NULL_RTX, 0, OPTAB_DIRECT);
|
||||
gcc_assert (sel_rtx);
|
||||
int this_e = i / u * u;
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
this_e += u - 1;
|
||||
RTVEC_ELT (vec, i) = GEN_INT (this_e);
|
||||
}
|
||||
tmp = gen_rtx_CONST_VECTOR (qimode, vec);
|
||||
sel = gen_lowpart (qimode, sel);
|
||||
sel = expand_vec_perm (qimode, sel, sel, tmp, NULL);
|
||||
gcc_assert (sel != NULL);
|
||||
|
||||
tmp = expand_vec_perm_expr_1 (icode, gen_lowpart (qimode, target),
|
||||
gen_lowpart (qimode, v0_rtx),
|
||||
gen_lowpart (qimode, v1_rtx), sel_rtx);
|
||||
gcc_assert (tmp != NULL);
|
||||
/* Add the byte offset to each byte element. */
|
||||
/* Note that the definition of the indicies here is memory ordering,
|
||||
so there should be no difference between big and little endian. */
|
||||
vec = rtvec_alloc (w);
|
||||
for (i = 0; i < w; ++i)
|
||||
RTVEC_ELT (vec, i) = GEN_INT (i % u);
|
||||
tmp = gen_rtx_CONST_VECTOR (qimode, vec);
|
||||
sel = expand_simple_binop (qimode, PLUS, sel, tmp, sel, 0, OPTAB_DIRECT);
|
||||
gcc_assert (sel != NULL);
|
||||
|
||||
return gen_lowpart (mode, tmp);
|
||||
tmp = expand_vec_perm_1 (icode, gen_lowpart (qimode, target),
|
||||
gen_lowpart (qimode, v0),
|
||||
gen_lowpart (qimode, v1), sel);
|
||||
if (tmp)
|
||||
tmp = gen_lowpart (mode, tmp);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -901,10 +901,10 @@ extern rtx expand_vec_cond_expr (tree, tree, tree, tree, rtx);
|
|||
extern rtx expand_vec_shift_expr (sepops, rtx);
|
||||
|
||||
/* Return tree if target supports vector operations for VEC_PERM_EXPR. */
|
||||
extern bool can_vec_perm_expr_p (tree, tree);
|
||||
extern bool can_vec_perm_p (enum machine_mode, bool, const unsigned char *);
|
||||
|
||||
/* Generate code for VEC_PERM_EXPR. */
|
||||
extern rtx expand_vec_perm_expr (tree, tree, tree, tree, rtx);
|
||||
extern rtx expand_vec_perm (enum machine_mode, rtx, rtx, rtx, rtx);
|
||||
|
||||
/* Return the insn used to implement mode MODE of OP, or CODE_FOR_nothing
|
||||
if the target does not have such an insn. */
|
||||
|
|
|
@ -985,12 +985,13 @@ DEFHOOK
|
|||
bool, (const_tree type, bool is_packed),
|
||||
default_builtin_vector_alignment_reachable)
|
||||
|
||||
/* Return true if a vector created for vec_perm_const is valid. */
|
||||
/* Return true if a vector created for vec_perm_const is valid.
|
||||
A NULL indicates that all constants are valid permutations. */
|
||||
DEFHOOK
|
||||
(vec_perm_const_ok,
|
||||
"",
|
||||
bool, (tree vec_type, tree mask),
|
||||
hook_bool_tree_tree_true)
|
||||
bool, (enum machine_mode, const unsigned char *sel),
|
||||
NULL)
|
||||
|
||||
/* Return true if the target supports misaligned store/load of a
|
||||
specific factor denoted in the third parameter. The last parameter
|
||||
|
|
|
@ -641,13 +641,23 @@ lower_vec_perm (gimple_stmt_iterator *gsi)
|
|||
location_t loc = gimple_location (gsi_stmt (*gsi));
|
||||
unsigned i;
|
||||
|
||||
if (can_vec_perm_expr_p (vect_type, mask))
|
||||
if (TREE_CODE (mask) == VECTOR_CST)
|
||||
{
|
||||
unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
|
||||
tree vals = TREE_VECTOR_CST_ELTS (mask);
|
||||
|
||||
for (i = 0; i < elements; ++i, vals = TREE_CHAIN (vals))
|
||||
sel_int[i] = TREE_INT_CST_LOW (TREE_VALUE (vals));
|
||||
|
||||
if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
|
||||
return;
|
||||
}
|
||||
else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
|
||||
return;
|
||||
|
||||
warning_at (loc, OPT_Wvector_operation_performance,
|
||||
"vector shuffling operation will be expanded piecewise");
|
||||
|
||||
|
||||
v = VEC_alloc (constructor_elt, gc, elements);
|
||||
for (i = 0; i < elements; i++)
|
||||
{
|
||||
|
|
|
@ -2356,7 +2356,7 @@ vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
|
|||
static bool
|
||||
vect_get_mask_element (gimple stmt, int first_mask_element, int m,
|
||||
int mask_nunits, bool only_one_vec, int index,
|
||||
int *mask, int *current_mask_element,
|
||||
unsigned char *mask, int *current_mask_element,
|
||||
bool *need_next_vector, int *number_of_mask_fixes,
|
||||
bool *mask_fixed, bool *needs_first_vector)
|
||||
{
|
||||
|
@ -2443,14 +2443,18 @@ vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
|
|||
gimple next_scalar_stmt;
|
||||
int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
|
||||
int first_mask_element;
|
||||
int index, unroll_factor, *mask, current_mask_element, ncopies;
|
||||
int index, unroll_factor, current_mask_element, ncopies;
|
||||
unsigned char *mask;
|
||||
bool only_one_vec = false, need_next_vector = false;
|
||||
int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
|
||||
int number_of_mask_fixes = 1;
|
||||
bool mask_fixed = false;
|
||||
bool needs_first_vector = false;
|
||||
enum machine_mode mode;
|
||||
|
||||
if (!can_vec_perm_expr_p (vectype, NULL_TREE))
|
||||
mode = TYPE_MODE (vectype);
|
||||
|
||||
if (!can_vec_perm_p (mode, false, NULL))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
{
|
||||
|
@ -2467,7 +2471,7 @@ vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
|
|||
(TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
|
||||
mask_type = get_vectype_for_scalar_type (mask_element_type);
|
||||
nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
mask = (int *) xmalloc (sizeof (int) * nunits);
|
||||
mask = XALLOCAVEC (unsigned char, nunits);
|
||||
unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
|
||||
|
||||
/* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
|
||||
|
@ -2529,6 +2533,18 @@ vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
|
|||
{
|
||||
tree mask_vec = NULL;
|
||||
|
||||
if (!can_vec_perm_p (mode, false, mask))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
{
|
||||
fprintf (vect_dump, "unsupported vect permute { ");
|
||||
for (i = 0; i < nunits; ++i)
|
||||
fprintf (vect_dump, "%d ", mask[i]);
|
||||
fprintf (vect_dump, "}\n");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
while (--index >= 0)
|
||||
{
|
||||
tree t = build_int_cst (mask_element_type, mask[index]);
|
||||
|
@ -2537,17 +2553,6 @@ vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
|
|||
mask_vec = build_vector (mask_type, mask_vec);
|
||||
index = 0;
|
||||
|
||||
if (!can_vec_perm_expr_p (vectype, mask_vec))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
{
|
||||
fprintf (vect_dump, "unsupported vect permute ");
|
||||
print_generic_expr (vect_dump, mask_vec, 0);
|
||||
}
|
||||
free (mask);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!analyze_only)
|
||||
{
|
||||
if (need_next_vector)
|
||||
|
@ -2569,7 +2574,6 @@ vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
|
|||
}
|
||||
}
|
||||
|
||||
free (mask);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -4090,25 +4090,29 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
static tree
|
||||
perm_mask_for_reverse (tree vectype)
|
||||
{
|
||||
tree mask_element_type, mask_type, mask_vec = NULL;
|
||||
tree mask_elt_type, mask_type, mask_vec;
|
||||
int i, nunits;
|
||||
unsigned char *sel;
|
||||
|
||||
if (!can_vec_perm_expr_p (vectype, NULL_TREE))
|
||||
nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
sel = XALLOCAVEC (unsigned char, nunits);
|
||||
|
||||
for (i = 0; i < nunits; ++i)
|
||||
sel[i] = nunits - 1 - i;
|
||||
|
||||
if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
|
||||
return NULL;
|
||||
|
||||
mask_element_type
|
||||
mask_elt_type
|
||||
= lang_hooks.types.type_for_size
|
||||
(TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
|
||||
mask_type = get_vectype_for_scalar_type (mask_element_type);
|
||||
nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
mask_type = get_vectype_for_scalar_type (mask_elt_type);
|
||||
|
||||
mask_vec = NULL;
|
||||
for (i = 0; i < nunits; i++)
|
||||
mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec);
|
||||
mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, i), mask_vec);
|
||||
mask_vec = build_vector (mask_type, mask_vec);
|
||||
|
||||
if (!can_vec_perm_expr_p (vectype, mask_vec))
|
||||
return NULL;
|
||||
|
||||
return mask_vec;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue