re PR tree-optimization/58135 ([x86] Missed opportunities for partial SLP)

Fix PR58135. 2016-05-23 Venkataramanan Kumar <venkataramanan.kumar@amd.com> PR tree-optimization/58135 * tree-vect-slp.c: When group size is not multiple of vector size, allow splitting of store group at vector boundary. 2016-05-23 Venkataramanan Kumar <venkataramanan.kumar@amd.com> * gcc.dg/vect/bb-slp-19.c: Remove XFAIL. * gcc.dg/vect/pr58135.c: Add new. * gfortran.dg/pr46519-1.f: Adjust test case. From-SVN: r236582
2016-05-23 09:48:54 +00:00 · 2016-05-23 09:48:54 +00:00 · e569db5fb5
commit e569db5fb5
parent e4b7111409
6 changed files with 56 additions and 30 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,10 @@
+2016-05-23  Venkataramanan Kumar  <venkataramanan.kumar@amd.com>
+
+	PR tree-optimization/58135
+	* tree-vect-slp.c: When group size is not multiple
+	of vector size, allow splitting of store group at
+	vector boundary.
+
 2016-05-23  Christophe Lyon  <christophe.lyon@linaro.org>

 	* config/arm/arm_neon.h (vtst_p16, vtstq_p16): New.
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,9 @@
+2016-05-23  Venkataramanan Kumar  <venkataramanan.kumar@amd.com>
+
+	* gcc.dg/vect/bb-slp-19.c:  Remove XFAIL. 
+	* gcc.dg/vect/pr58135.c:  Add new.
+	* gfortran.dg/pr46519-1.f: Adjust test case.
+
 2016-05-23  Paolo Carlini  <paolo.carlini@oracle.com>

 	PR c++/53401
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-19.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-19.c
@ -53,5 +53,5 @@ int main (void)
  return 0;
 }

-/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2"  { xfail *-*-* }  } } */
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
  
--- a/gcc/testsuite/gcc.dg/vect/pr58135.c
+++ b/gcc/testsuite/gcc.dg/vect/pr58135.c
@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+
+int a[100];
+void foo ()
+{
+  a[0] = a[1] = a[2] = a[3] = a[4]= 0;
+}
+
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
--- a/gcc/testsuite/gfortran.dg/pr46519-1.f
+++ b/gcc/testsuite/gfortran.dg/pr46519-1.f
@ -1,5 +1,5 @@
 ! { dg-do compile { target i?86-*-* x86_64-*-* } }
-! { dg-options "-O3 -mavx -mvzeroupper -mtune=generic -dp" }
+! { dg-options "-O3 -mavx -mvzeroupper -fno-tree-slp-vectorize -mtune=generic -dp" }

      PROGRAM MG3XDEMO 
      INTEGER LM, NM, NV, NR, NIT
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@ -1757,18 +1757,6 @@ vect_analyze_slp_instance (vec_info *vinfo,
    }
  nunits = TYPE_VECTOR_SUBPARTS (vectype);

-  /* Calculate the unrolling factor.  */
-  unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
-  if (unrolling_factor != 1 && is_a <bb_vec_info> (vinfo))
-    {
-      if (dump_enabled_p ())
-        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			 "Build SLP failed: unrolling required in basic"
-			 " block SLP\n");
-
-      return false;
-    }
-
  /* Create a node (a root of the SLP tree) for the packed grouped stores.  */
  scalar_stmts.create (group_size);
  next = stmt;
@ -1804,26 +1792,36 @@ vect_analyze_slp_instance (vec_info *vinfo,
  /* Build the tree for the SLP instance.  */
  bool *matches = XALLOCAVEC (bool, group_size);
  unsigned npermutes = 0;
-  if ((node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
+  node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
 				   &max_nunits, &loads, matches, &npermutes,
-				   NULL, max_tree_size)) != NULL)
+			      NULL, max_tree_size);
+  if (node != NULL)
    {
      /* Calculate the unrolling factor based on the smallest type.  */
-      if (max_nunits > nunits)
-        unrolling_factor = least_common_multiple (max_nunits, group_size)
-                           / group_size;
+      unrolling_factor
+	= least_common_multiple (max_nunits, group_size) / group_size;

-      if (unrolling_factor != 1 && is_a <bb_vec_info> (vinfo))
+      if (unrolling_factor != 1
+	  && is_a <bb_vec_info> (vinfo))
+	{
+
+	  if (max_nunits > group_size)
        {
-          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			     "Build SLP failed: unrolling required in basic"
-			     " block SLP\n");
+			       "Build SLP failed: store group "
+			       "size not a multiple of the vector size "
+			       "in basic block SLP\n");
 	  vect_free_slp_tree (node);
 	  loads.release ();
          return false;
        }
-
+	  /* Fatal mismatch.  */
+	  matches[group_size/max_nunits * max_nunits] = false;
+	  vect_free_slp_tree (node);
+	  loads.release ();
+	}
+      else
+	{
      /* Create a new SLP instance.  */
      new_instance = XNEW (struct _slp_instance);
      SLP_INSTANCE_TREE (new_instance) = node;
@ -1845,8 +1843,8 @@ vect_analyze_slp_instance (vec_info *vinfo,
 	      (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
 	  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
 	    {
-	      int load_place
-		= vect_get_place_in_interleaving_chain (load, first_stmt);
+		  int load_place = vect_get_place_in_interleaving_chain
+				     (load, first_stmt);
 	      gcc_assert (load_place != -1);
 	      if (load_place != j)
 		this_load_permuted = true;
@ -1876,7 +1874,8 @@ vect_analyze_slp_instance (vec_info *vinfo,
                  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 				   "Build SLP failed: unsupported load "
 				   "permutation ");
-                  dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
+		      dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
+					TDF_SLIM, stmt, 0);
                  dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
                }
              vect_free_slp_instance (new_instance);
@ -1884,7 +1883,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
            }
        }

-      /* If the loads and stores can be handled with load/store-lane
+	  /* If the loads and stores can be handled with load/store-lan
 	 instructions do not generate this SLP instance.  */
      if (is_a <loop_vec_info> (vinfo)
 	  && loads_permuted
@ -1896,7 +1895,8 @@ vect_analyze_slp_instance (vec_info *vinfo,
 	      gimple *first_stmt = GROUP_FIRST_ELEMENT
 		  (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
 	      stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
-	      /* Use SLP for strided accesses (or if we can't load-lanes).  */
+		  /* Use SLP for strided accesses (or if we
+		     can't load-lanes).  */
 	      if (STMT_VINFO_STRIDED_P (stmt_vinfo)
 		  || ! vect_load_lanes_supported
 			(STMT_VINFO_VECTYPE (stmt_vinfo),
@ -1925,11 +1925,14 @@ vect_analyze_slp_instance (vec_info *vinfo,

      return true;
    }
-
+    }
+  else
+    {
  /* Failed to SLP.  */
  /* Free the allocated memory.  */
  scalar_stmts.release ();
  loads.release ();
+    }

  /* For basic block SLP, try to break the group up into multiples of the
     vector size.  */