targhooks.c (default_builtin_vectorized_conversion): Handle vec_construct, using vectype to base cost on subparts.
2012-06-13 Bill Schmidt <wschmidt@linux.ibm.com> * targhooks.c (default_builtin_vectorized_conversion): Handle vec_construct, using vectype to base cost on subparts. * target.h (enum vect_cost_for_stmt): Add vec_construct. * tree-vect-stmts.c (vect_model_load_cost): Use vec_construct instead of scalar_to-vec. * config/spu/spu.c (spu_builtin_vectorization_cost): Handle vec_construct in same way as default for now. * config/i386/i386.c (ix86_builtin_vectorization_cost): Likewise. * config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost): Handle vec_construct, including special case for 32-bit loads. From-SVN: r188508
This commit is contained in:
parent
2be13164c1
commit
a21892add3
7 changed files with 55 additions and 8 deletions
|
@ -1,3 +1,16 @@
|
||||||
|
2012-06-13 Bill Schmidt <wschmidt@linux.ibm.com>
|
||||||
|
|
||||||
|
* targhooks.c (default_builtin_vectorized_conversion): Handle
|
||||||
|
vec_construct, using vectype to base cost on subparts.
|
||||||
|
* target.h (enum vect_cost_for_stmt): Add vec_construct.
|
||||||
|
* tree-vect-stmts.c (vect_model_load_cost): Use vec_construct
|
||||||
|
instead of scalar_to-vec.
|
||||||
|
* config/spu/spu.c (spu_builtin_vectorization_cost): Handle
|
||||||
|
vec_construct in same way as default for now.
|
||||||
|
* config/i386/i386.c (ix86_builtin_vectorization_cost): Likewise.
|
||||||
|
* config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost):
|
||||||
|
Handle vec_construct, including special case for 32-bit loads.
|
||||||
|
|
||||||
2012-06-13 Xinyu Qi <xyqi@marvell.com>
|
2012-06-13 Xinyu Qi <xyqi@marvell.com>
|
||||||
|
|
||||||
* config/arm/arm.c (FL_IWMMXT2): New define.
|
* config/arm/arm.c (FL_IWMMXT2): New define.
|
||||||
|
|
|
@ -36072,9 +36072,11 @@ static const struct attribute_spec ix86_attribute_table[] =
|
||||||
/* Implement targetm.vectorize.builtin_vectorization_cost. */
|
/* Implement targetm.vectorize.builtin_vectorization_cost. */
|
||||||
static int
|
static int
|
||||||
ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
||||||
tree vectype ATTRIBUTE_UNUSED,
|
tree vectype,
|
||||||
int misalign ATTRIBUTE_UNUSED)
|
int misalign ATTRIBUTE_UNUSED)
|
||||||
{
|
{
|
||||||
|
unsigned elements;
|
||||||
|
|
||||||
switch (type_of_cost)
|
switch (type_of_cost)
|
||||||
{
|
{
|
||||||
case scalar_stmt:
|
case scalar_stmt:
|
||||||
|
@ -36115,6 +36117,10 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
||||||
case vec_promote_demote:
|
case vec_promote_demote:
|
||||||
return ix86_cost->vec_stmt_cost;
|
return ix86_cost->vec_stmt_cost;
|
||||||
|
|
||||||
|
case vec_construct:
|
||||||
|
elements = TYPE_VECTOR_SUBPARTS (vectype);
|
||||||
|
return elements / 2 + 1;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
gcc_unreachable ();
|
gcc_unreachable ();
|
||||||
}
|
}
|
||||||
|
|
|
@ -3405,6 +3405,7 @@ rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
||||||
tree vectype, int misalign)
|
tree vectype, int misalign)
|
||||||
{
|
{
|
||||||
unsigned elements;
|
unsigned elements;
|
||||||
|
tree elem_type;
|
||||||
|
|
||||||
switch (type_of_cost)
|
switch (type_of_cost)
|
||||||
{
|
{
|
||||||
|
@ -3504,6 +3505,18 @@ rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
||||||
|
|
||||||
return 2;
|
return 2;
|
||||||
|
|
||||||
|
case vec_construct:
|
||||||
|
elements = TYPE_VECTOR_SUBPARTS (vectype);
|
||||||
|
elem_type = TREE_TYPE (vectype);
|
||||||
|
/* 32-bit vectors loaded into registers are stored as double
|
||||||
|
precision, so we need n/2 converts in addition to the usual
|
||||||
|
n/2 merges to construct a vector of short floats from them. */
|
||||||
|
if (SCALAR_FLOAT_TYPE_P (elem_type)
|
||||||
|
&& TYPE_PRECISION (elem_type) == 32)
|
||||||
|
return elements + 1;
|
||||||
|
else
|
||||||
|
return elements / 2 + 1;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
gcc_unreachable ();
|
gcc_unreachable ();
|
||||||
}
|
}
|
||||||
|
|
|
@ -6908,9 +6908,11 @@ spu_builtin_mask_for_load (void)
|
||||||
/* Implement targetm.vectorize.builtin_vectorization_cost. */
|
/* Implement targetm.vectorize.builtin_vectorization_cost. */
|
||||||
static int
|
static int
|
||||||
spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
||||||
tree vectype ATTRIBUTE_UNUSED,
|
tree vectype,
|
||||||
int misalign ATTRIBUTE_UNUSED)
|
int misalign ATTRIBUTE_UNUSED)
|
||||||
{
|
{
|
||||||
|
unsigned elements;
|
||||||
|
|
||||||
switch (type_of_cost)
|
switch (type_of_cost)
|
||||||
{
|
{
|
||||||
case scalar_stmt:
|
case scalar_stmt:
|
||||||
|
@ -6937,6 +6939,10 @@ spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
||||||
case cond_branch_taken:
|
case cond_branch_taken:
|
||||||
return 6;
|
return 6;
|
||||||
|
|
||||||
|
case vec_construct:
|
||||||
|
elements = TYPE_VECTOR_SUBPARTS (vectype);
|
||||||
|
return elements / 2 + 1;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
gcc_unreachable ();
|
gcc_unreachable ();
|
||||||
}
|
}
|
||||||
|
|
|
@ -146,7 +146,8 @@ enum vect_cost_for_stmt
|
||||||
cond_branch_not_taken,
|
cond_branch_not_taken,
|
||||||
cond_branch_taken,
|
cond_branch_taken,
|
||||||
vec_perm,
|
vec_perm,
|
||||||
vec_promote_demote
|
vec_promote_demote,
|
||||||
|
vec_construct
|
||||||
};
|
};
|
||||||
|
|
||||||
/* The target structure. This holds all the backend hooks. */
|
/* The target structure. This holds all the backend hooks. */
|
||||||
|
|
|
@ -499,9 +499,11 @@ default_builtin_vectorized_conversion (unsigned int code ATTRIBUTE_UNUSED,
|
||||||
|
|
||||||
int
|
int
|
||||||
default_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
default_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
||||||
tree vectype ATTRIBUTE_UNUSED,
|
tree vectype,
|
||||||
int misalign ATTRIBUTE_UNUSED)
|
int misalign ATTRIBUTE_UNUSED)
|
||||||
{
|
{
|
||||||
|
unsigned elements;
|
||||||
|
|
||||||
switch (type_of_cost)
|
switch (type_of_cost)
|
||||||
{
|
{
|
||||||
case scalar_stmt:
|
case scalar_stmt:
|
||||||
|
@ -524,6 +526,10 @@ default_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
||||||
case cond_branch_taken:
|
case cond_branch_taken:
|
||||||
return 3;
|
return 3;
|
||||||
|
|
||||||
|
case vec_construct:
|
||||||
|
elements = TYPE_VECTOR_SUBPARTS (vectype);
|
||||||
|
return elements / 2 + 1;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
gcc_unreachable ();
|
gcc_unreachable ();
|
||||||
}
|
}
|
||||||
|
|
|
@ -1031,11 +1031,13 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
|
||||||
/* The loads themselves. */
|
/* The loads themselves. */
|
||||||
if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
|
if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
|
||||||
{
|
{
|
||||||
/* N scalar loads plus gathering them into a vector.
|
/* N scalar loads plus gathering them into a vector. */
|
||||||
??? scalar_to_vec isn't the cost for that. */
|
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
||||||
inside_cost += (vect_get_stmt_cost (scalar_load) * ncopies
|
inside_cost += (vect_get_stmt_cost (scalar_load) * ncopies
|
||||||
* TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)));
|
* TYPE_VECTOR_SUBPARTS (vectype));
|
||||||
inside_cost += ncopies * vect_get_stmt_cost (scalar_to_vec);
|
inside_cost += ncopies
|
||||||
|
* targetm.vectorize.builtin_vectorization_cost (vec_construct,
|
||||||
|
vectype, 0);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
vect_get_load_cost (first_dr, ncopies,
|
vect_get_load_cost (first_dr, ncopies,
|
||||||
|
|
Loading…
Add table
Reference in a new issue