re PR tree-optimization/58135 ([x86] Missed opportunities for partial SLP)
Fix PR58135. 2016-05-23 Venkataramanan Kumar <venkataramanan.kumar@amd.com> PR tree-optimization/58135 * tree-vect-slp.c: When group size is not multiple of vector size, allow splitting of store group at vector boundary. 2016-05-23 Venkataramanan Kumar <venkataramanan.kumar@amd.com> * gcc.dg/vect/bb-slp-19.c: Remove XFAIL. * gcc.dg/vect/pr58135.c: Add new. * gfortran.dg/pr46519-1.f: Adjust test case. From-SVN: r236582
This commit is contained in:
parent
e4b7111409
commit
e569db5fb5
6 changed files with 56 additions and 30 deletions
|
@ -1,3 +1,10 @@
|
|||
2016-05-23 Venkataramanan Kumar <venkataramanan.kumar@amd.com>
|
||||
|
||||
PR tree-optimization/58135
|
||||
* tree-vect-slp.c: When group size is not multiple
|
||||
of vector size, allow splitting of store group at
|
||||
vector boundary.
|
||||
|
||||
2016-05-23 Christophe Lyon <christophe.lyon@linaro.org>
|
||||
|
||||
* config/arm/arm_neon.h (vtst_p16, vtstq_p16): New.
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
2016-05-23 Venkataramanan Kumar <venkataramanan.kumar@amd.com>
|
||||
|
||||
* gcc.dg/vect/bb-slp-19.c: Remove XFAIL.
|
||||
* gcc.dg/vect/pr58135.c: Add new.
|
||||
* gfortran.dg/pr46519-1.f: Adjust test case.
|
||||
|
||||
2016-05-23 Paolo Carlini <paolo.carlini@oracle.com>
|
||||
|
||||
PR c++/53401
|
||||
|
|
|
@ -53,5 +53,5 @@ int main (void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
|
||||
|
||||
|
|
10
gcc/testsuite/gcc.dg/vect/pr58135.c
Normal file
10
gcc/testsuite/gcc.dg/vect/pr58135.c
Normal file
|
@ -0,0 +1,10 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
int a[100];
|
||||
void foo ()
|
||||
{
|
||||
a[0] = a[1] = a[2] = a[3] = a[4]= 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
|
|
@ -1,5 +1,5 @@
|
|||
! { dg-do compile { target i?86-*-* x86_64-*-* } }
|
||||
! { dg-options "-O3 -mavx -mvzeroupper -mtune=generic -dp" }
|
||||
! { dg-options "-O3 -mavx -mvzeroupper -fno-tree-slp-vectorize -mtune=generic -dp" }
|
||||
|
||||
PROGRAM MG3XDEMO
|
||||
INTEGER LM, NM, NV, NR, NIT
|
||||
|
|
|
@ -1757,18 +1757,6 @@ vect_analyze_slp_instance (vec_info *vinfo,
|
|||
}
|
||||
nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
|
||||
/* Calculate the unrolling factor. */
|
||||
unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
|
||||
if (unrolling_factor != 1 && is_a <bb_vec_info> (vinfo))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"Build SLP failed: unrolling required in basic"
|
||||
" block SLP\n");
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Create a node (a root of the SLP tree) for the packed grouped stores. */
|
||||
scalar_stmts.create (group_size);
|
||||
next = stmt;
|
||||
|
@ -1804,26 +1792,36 @@ vect_analyze_slp_instance (vec_info *vinfo,
|
|||
/* Build the tree for the SLP instance. */
|
||||
bool *matches = XALLOCAVEC (bool, group_size);
|
||||
unsigned npermutes = 0;
|
||||
if ((node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
|
||||
node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
|
||||
&max_nunits, &loads, matches, &npermutes,
|
||||
NULL, max_tree_size)) != NULL)
|
||||
NULL, max_tree_size);
|
||||
if (node != NULL)
|
||||
{
|
||||
/* Calculate the unrolling factor based on the smallest type. */
|
||||
if (max_nunits > nunits)
|
||||
unrolling_factor = least_common_multiple (max_nunits, group_size)
|
||||
/ group_size;
|
||||
unrolling_factor
|
||||
= least_common_multiple (max_nunits, group_size) / group_size;
|
||||
|
||||
if (unrolling_factor != 1 && is_a <bb_vec_info> (vinfo))
|
||||
if (unrolling_factor != 1
|
||||
&& is_a <bb_vec_info> (vinfo))
|
||||
{
|
||||
|
||||
if (max_nunits > group_size)
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"Build SLP failed: unrolling required in basic"
|
||||
" block SLP\n");
|
||||
"Build SLP failed: store group "
|
||||
"size not a multiple of the vector size "
|
||||
"in basic block SLP\n");
|
||||
vect_free_slp_tree (node);
|
||||
loads.release ();
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Fatal mismatch. */
|
||||
matches[group_size/max_nunits * max_nunits] = false;
|
||||
vect_free_slp_tree (node);
|
||||
loads.release ();
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Create a new SLP instance. */
|
||||
new_instance = XNEW (struct _slp_instance);
|
||||
SLP_INSTANCE_TREE (new_instance) = node;
|
||||
|
@ -1845,8 +1843,8 @@ vect_analyze_slp_instance (vec_info *vinfo,
|
|||
(vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
|
||||
{
|
||||
int load_place
|
||||
= vect_get_place_in_interleaving_chain (load, first_stmt);
|
||||
int load_place = vect_get_place_in_interleaving_chain
|
||||
(load, first_stmt);
|
||||
gcc_assert (load_place != -1);
|
||||
if (load_place != j)
|
||||
this_load_permuted = true;
|
||||
|
@ -1876,7 +1874,8 @@ vect_analyze_slp_instance (vec_info *vinfo,
|
|||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"Build SLP failed: unsupported load "
|
||||
"permutation ");
|
||||
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
|
||||
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
|
||||
TDF_SLIM, stmt, 0);
|
||||
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
|
||||
}
|
||||
vect_free_slp_instance (new_instance);
|
||||
|
@ -1884,7 +1883,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
|
|||
}
|
||||
}
|
||||
|
||||
/* If the loads and stores can be handled with load/store-lane
|
||||
/* If the loads and stores can be handled with load/store-lan
|
||||
instructions do not generate this SLP instance. */
|
||||
if (is_a <loop_vec_info> (vinfo)
|
||||
&& loads_permuted
|
||||
|
@ -1896,7 +1895,8 @@ vect_analyze_slp_instance (vec_info *vinfo,
|
|||
gimple *first_stmt = GROUP_FIRST_ELEMENT
|
||||
(vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
|
||||
stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
|
||||
/* Use SLP for strided accesses (or if we can't load-lanes). */
|
||||
/* Use SLP for strided accesses (or if we
|
||||
can't load-lanes). */
|
||||
if (STMT_VINFO_STRIDED_P (stmt_vinfo)
|
||||
|| ! vect_load_lanes_supported
|
||||
(STMT_VINFO_VECTYPE (stmt_vinfo),
|
||||
|
@ -1925,11 +1925,14 @@ vect_analyze_slp_instance (vec_info *vinfo,
|
|||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Failed to SLP. */
|
||||
/* Free the allocated memory. */
|
||||
scalar_stmts.release ();
|
||||
loads.release ();
|
||||
}
|
||||
|
||||
/* For basic block SLP, try to break the group up into multiples of the
|
||||
vector size. */
|
||||
|
|
Loading…
Add table
Reference in a new issue