tree-vect-slp.c (vect_attempt_slp_rearrange_stmts): Split out from ...

2015-06-10 Richard Biener <rguenther@suse.de> * tree-vect-slp.c (vect_attempt_slp_rearrange_stmts): Split out from ... (vect_supported_load_permutation_p): ... here. Handle supportable permutations in reductions. * tree-vect-stmts.c (vectorizable_load): Handle SLP permutations for vectorizing strided group loads. From-SVN: r224324

tree-vect-slp.c (vect_attempt_slp_rearrange_stmts): Split out from ...
2015-06-10 Richard Biener <rguenther@suse.de> * tree-vect-slp.c (vect_attempt_slp_rearrange_stmts): Split out from ... (vect_supported_load_permutation_p): ... here. Handle supportable permutations in reductions. * tree-vect-stmts.c (vectorizable_load): Handle SLP permutations for vectorizing strided group loads. From-SVN: r224324
b266b968 · Richard Biener · Richard Biener · 8ffd51d2 · b266b968 · b266b968
Commit b266b968 authored Jun 10, 2015 by Richard Biener Committed by Richard Biener Jun 10, 2015
Show whitespace changes
Inline Side-by-side

Showing with 78 additions and 47 deletions

gcc/ChangeLog
+9 -0

gcc/tree-vect-slp.c
+58 -43

gcc/tree-vect-stmts.c
+11 -4

No files found.
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
+2015-06-10  Richard Biener  <rguenther@suse.de>
+	* tree-vect-slp.c (vect_attempt_slp_rearrange_stmts): Split
+	out from ...
+	(vect_supported_load_permutation_p): ... here.  Handle
+	supportable permutations in reductions.
+	* tree-vect-stmts.c (vectorizable_load): Handle SLP permutations
+	for vectorizing strided group loads.
 2015-06-10  Jakub Jelinek  <jakub@redhat.com>
 	PR target/66470

--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -1299,55 +1299,18 @@ vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
 }
-/* Check if the required load permutations in the SLP instance
+/* Attempt to reorder stmts in a reduction chain so that we don't
-   SLP_INSTN are supported.  */
+   require any load permutation.  Return true if that was possible,
+   otherwise return false.  */
 static bool
-vect_supported_load_permutation_p (slp_instance slp_instn)
+vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
 {
  unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
-  unsigned int i, j, k, next;
+  unsigned int i, j;
  sbitmap load_index;
-  slp_tree node;
-  gimple stmt, load, next_load, first_load;
-  struct data_reference *dr;
-  if (dump_enabled_p ())
-    {
-      dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
-      FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
-	if (node->load_permutation.exists ())
-	  FOR_EACH_VEC_ELT (node->load_permutation, j, next)
-	    dump_printf (MSG_NOTE, "%d ", next);
-	else
-	  for (k = 0; k < group_size; ++k)
-	    dump_printf (MSG_NOTE, "%d ", k);
-      dump_printf (MSG_NOTE, "\n");
-    }
-  /* In case of reduction every load permutation is allowed, since the order
-     of the reduction statements is not important (as opposed to the case of
-     grouped stores).  The only condition we need to check is that all the
-     load nodes are of the same size and have the same permutation (and then
-     rearrange all the nodes of the SLP instance according to this 
-     permutation).  */
-  /* Check that all the load nodes are of the same size.  */
-  /* ???  Can't we assert this? */
-  FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
-    if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
-      return false;
-  node = SLP_INSTANCE_TREE (slp_instn);
-  stmt = SLP_TREE_SCALAR_STMTS (node)[0];
-  /* Reduction (there are no data-refs in the root).
-     In reduction chain the order of the loads is important.  */
-  if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
-      && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
-    {
-      slp_tree load;
  unsigned int lidx;
+  slp_tree node, load;
  /* Compare all the permutation sequences to the first one.  We know
     that at least one load is permuted.  */
@@ -1395,6 +1358,58 @@ vect_supported_load_permutation_p (slp_instance slp_instn)
  FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
    SLP_TREE_LOAD_PERMUTATION (node).release ();
  return true;
+}
+/* Check if the required load permutations in the SLP instance
+   SLP_INSTN are supported.  */
+static bool
+vect_supported_load_permutation_p (slp_instance slp_instn)
+{
+  unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
+  unsigned int i, j, k, next;
+  slp_tree node;
+  gimple stmt, load, next_load, first_load;
+  struct data_reference *dr;
+  if (dump_enabled_p ())
+    {
+      dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
+      FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
+	if (node->load_permutation.exists ())
+	  FOR_EACH_VEC_ELT (node->load_permutation, j, next)
+	    dump_printf (MSG_NOTE, "%d ", next);
+	else
+	  for (k = 0; k < group_size; ++k)
+	    dump_printf (MSG_NOTE, "%d ", k);
+      dump_printf (MSG_NOTE, "\n");
+    }
+  /* In case of reduction every load permutation is allowed, since the order
+     of the reduction statements is not important (as opposed to the case of
+     grouped stores).  The only condition we need to check is that all the
+     load nodes are of the same size and have the same permutation (and then
+     rearrange all the nodes of the SLP instance according to this 
+     permutation).  */
+  /* Check that all the load nodes are of the same size.  */
+  /* ???  Can't we assert this? */
+  FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
+    if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
+      return false;
+  node = SLP_INSTANCE_TREE (slp_instn);
+  stmt = SLP_TREE_SCALAR_STMTS (node)[0];
+  /* Reduction (there are no data-refs in the root).
+     In reduction chain the order of the loads is not important.  */
+  if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
+      && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
+    {
+      if (vect_attempt_slp_rearrange_stmts (slp_instn))
+	return true;
+      /* Fallthru to general load permutation handling.  */
    }
  /* In basic block vectorization we allow any subchain of an interleaving

--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -5995,9 +5995,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
      if ((grouped_load
 	   && (slp || PURE_SLP_STMT (stmt_info)))
 	  && (group_size > nunits
-	      || nunits % group_size != 0
+	      || nunits % group_size != 0))
-	      /* We don't support load permutations.  */
-	      || slp_perm))
 	{
 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 			   "unhandled strided group load\n");
@@ -6294,6 +6292,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
      alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
      int nloads = nunits;
      tree ltype = TREE_TYPE (vectype);
+      auto_vec<tree> dr_chain;
      if (slp)
 	{
 	  nloads = nunits / group_size;
@@ -6303,7 +6302,8 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
 	    ltype = vectype;
 	  ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
 	  ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
-	  gcc_assert (!slp_perm);
+	  if (slp_perm)
+	    dr_chain.create (ncopies);
 	}
      for (j = 0; j < ncopies; j++)
 	{
@@ -6350,13 +6350,20 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
 	    }
 	  if (slp)
+	    {
 	      SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+	      if (slp_perm)
+		dr_chain.quick_push (gimple_assign_lhs (new_stmt));
+	    }
 	  if (j == 0)
 	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
 	  else
 	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
 	  prev_stmt_info = vinfo_for_stmt (new_stmt);
 	}
+      if (slp_perm)
+	vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
+				      slp_node_instance, false);
      return true;
    }