Vector peeling cost model 4/6

gcc/ChangeLog: 2017-05-30 Robin Dapp <rdapp@linux.vnet.ibm.com> * tree-vect-data-refs.c (vect_get_data_access_cost): Workaround for SLP handling. (vect_enhance_data_refs_alignment): Compute costs for doing no peeling at all, compare to the best peeling costs so far and avoid peeling if cheaper. From-SVN: r248678

Vector peeling cost model 4/6
gcc/ChangeLog: 2017-05-30 Robin Dapp <rdapp@linux.vnet.ibm.com> * tree-vect-data-refs.c (vect_get_data_access_cost): Workaround for SLP handling. (vect_enhance_data_refs_alignment): Compute costs for doing no peeling at all, compare to the best peeling costs so far and avoid peeling if cheaper. From-SVN: r248678
64812d33 · Robin Dapp · Andreas Krebbel · 1e69cc8f · 64812d33 · 64812d33
Commit 64812d33 authored May 30, 2017 by Robin Dapp Committed by Andreas Krebbel May 30, 2017
Hide whitespace changes
Inline Side-by-side

Showing with 111 additions and 83 deletions

gcc/ChangeLog
+8 -0

gcc/tree-vect-data-refs.c
+103 -83

No files found.
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
 2017-05-30  Robin Dapp  <rdapp@linux.vnet.ibm.com>
+	* tree-vect-data-refs.c (vect_get_data_access_cost):
+	Workaround for SLP handling.
+	(vect_enhance_data_refs_alignment):
+	Compute costs for doing no peeling at all, compare to the best
+	peeling costs so far and avoid peeling if cheaper.
+2017-05-30  Robin Dapp  <rdapp@linux.vnet.ibm.com>
 	* tree-vect-data-refs.c (vect_peeling_hash_choose_best_peeling):
 	Return peeling info and set costs to zero for unlimited cost
 	model.

--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -1134,7 +1134,7 @@ vect_get_data_access_cost (struct data_reference *dr,
  int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
-  int ncopies = vf / nunits;
+  int ncopies = MAX (1, vf / nunits); /* TODO: Handle SLP properly  */
  if (DR_IS_READ (dr))
    vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost,
@@ -1517,10 +1517,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
 {
  vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-  enum dr_alignment_support supportable_dr_alignment;
  struct data_reference *dr0 = NULL, *first_store = NULL;
  struct data_reference *dr;
-  struct data_reference *dr0_known_align = NULL;
  unsigned int i, j;
  bool do_peeling = false;
  bool do_versioning = false;
@@ -1601,7 +1599,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
 	  && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
 	continue;
-      supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
      do_peeling = vector_alignment_reachable_p (dr);
      if (do_peeling)
        {
@@ -1690,16 +1687,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
 	      if (!first_store && DR_IS_WRITE (dr))
 		first_store = dr;
-              /* If there are both known and unknown misaligned accesses in the
-                 loop, we choose peeling amount according to the known
-                 accesses.  */
-              if (!supportable_dr_alignment)
-                {
-                  dr0 = dr;
-                  if (!first_store && DR_IS_WRITE (dr))
-                    first_store = dr;
-                }
 	      one_misalignment_unknown = true;
            }
        }
@@ -1721,81 +1708,85 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
      || loop->inner)
    do_peeling = false;
-  unsigned int unknown_align_inside_cost = INT_MAX;
+  struct _vect_peel_extended_info peel_for_known_alignment;
-  unsigned int unknown_align_outside_cost = INT_MAX;
+  struct _vect_peel_extended_info peel_for_unknown_alignment;
+  struct _vect_peel_extended_info best_peel;
+  peel_for_unknown_alignment.inside_cost = INT_MAX;
+  peel_for_unknown_alignment.outside_cost = INT_MAX;
+  peel_for_unknown_alignment.peel_info.count = 0;
  if (do_peeling
-      && one_misalignment_unknown
+      && one_misalignment_unknown)
-      && vect_supportable_dr_alignment (dr0, false))
    {
      /* Check if the target requires to prefer stores over loads, i.e., if
         misaligned stores are more expensive than misaligned loads (taking
         drs with same alignment into account).  */
-      if (first_store && DR_IS_READ (dr0))
+      unsigned int load_inside_cost = 0;
-        {
+      unsigned int load_outside_cost = 0;
-	  unsigned int load_inside_cost = 0;
+      unsigned int store_inside_cost = 0;
-	  unsigned int load_outside_cost = 0;
+      unsigned int store_outside_cost = 0;
-	  unsigned int store_inside_cost = 0;
-	  unsigned int store_outside_cost = 0;
+      stmt_vector_for_cost dummy;
-	  stmt_vector_for_cost dummy;
+      dummy.create (2);
-	  dummy.create (2);
+      vect_get_peeling_costs_all_drs (dr0,
-	  vect_get_peeling_costs_all_drs (dr0,
+				      &load_inside_cost,
-					  &load_inside_cost,
+				      &load_outside_cost,
-					  &load_outside_cost,
+				      &dummy, vf / 2, vf);
-					  &dummy, vf / 2, vf);
+      dummy.release ();
-	  dummy.release ();
+      if (first_store)
+	{
 	  dummy.create (2);
 	  vect_get_peeling_costs_all_drs (first_store,
 					  &store_inside_cost,
 					  &store_outside_cost,
 					  &dummy, vf / 2, vf);
 	  dummy.release ();
+	}
+      else
+	{
+	  store_inside_cost = INT_MAX;
+	  store_outside_cost = INT_MAX;
+	}
-          if (load_inside_cost > store_inside_cost
+      if (load_inside_cost > store_inside_cost
-              || (load_inside_cost == store_inside_cost
+	  || (load_inside_cost == store_inside_cost
-		  && load_outside_cost > store_outside_cost))
+	      && load_outside_cost > store_outside_cost))
-	    {
+	{
-	      dr0 = first_store;
+	  dr0 = first_store;
-	      unknown_align_inside_cost = store_inside_cost;
+	  peel_for_unknown_alignment.inside_cost = store_inside_cost;
-	      unknown_align_outside_cost = store_outside_cost;
+	  peel_for_unknown_alignment.outside_cost = store_outside_cost;
-	    }
+	}
-	  else
+      else
-	    {
+	{
-	      unknown_align_inside_cost = load_inside_cost;
+	  peel_for_unknown_alignment.inside_cost = load_inside_cost;
-	      unknown_align_outside_cost = load_outside_cost;
+	  peel_for_unknown_alignment.outside_cost = load_outside_cost;
-	    }
+	}
-	  stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
+      stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
-	  prologue_cost_vec.create (2);
+      prologue_cost_vec.create (2);
-	  epilogue_cost_vec.create (2);
+      epilogue_cost_vec.create (2);
-	  int dummy2;
+      int dummy2;
-	  unknown_align_outside_cost += vect_get_known_peeling_cost
+      peel_for_unknown_alignment.outside_cost += vect_get_known_peeling_cost
-	    (loop_vinfo, vf / 2, &dummy2,
+	(loop_vinfo, vf / 2, &dummy2,
-	     &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
+	 &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
-	     &prologue_cost_vec, &epilogue_cost_vec);
+	 &prologue_cost_vec, &epilogue_cost_vec);
-	  prologue_cost_vec.release ();
+      prologue_cost_vec.release ();
-	  epilogue_cost_vec.release ();
+      epilogue_cost_vec.release ();
-        }
-      /* Use peeling only if it may help to align other accesses in the loop or
+      peel_for_unknown_alignment.peel_info.count = 1
-	 if it may help improving load bandwith when we'd end up using
+	+ STMT_VINFO_SAME_ALIGN_REFS
-	 unaligned loads.  */
+	(vinfo_for_stmt (DR_STMT (dr0))).length ();
-      tree dr0_vt = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr0)));
-      if (STMT_VINFO_SAME_ALIGN_REFS
-	    (vinfo_for_stmt (DR_STMT (dr0))).length () == 0
-	  && (vect_supportable_dr_alignment (dr0, false)
-	      != dr_unaligned_supported
-	      || (DR_IS_READ (dr0)
-		  && (builtin_vectorization_cost (vector_load, dr0_vt, 0)
-		      == builtin_vectorization_cost (unaligned_load,
-						     dr0_vt, -1)))))
-        do_peeling = false;
    }
-  struct _vect_peel_extended_info peel_for_known_alignment;
+  peel_for_unknown_alignment.peel_info.npeel = 0;
+  peel_for_unknown_alignment.peel_info.dr = dr0;
+  best_peel = peel_for_unknown_alignment;
  peel_for_known_alignment.inside_cost = INT_MAX;
  peel_for_known_alignment.outside_cost = INT_MAX;
  peel_for_known_alignment.peel_info.count = 0;
@@ -1804,24 +1795,52 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
  if (do_peeling && one_misalignment_known)
    {
      /* Peeling is possible, but there is no data access that is not supported
-         unless aligned. So we try to choose the best possible peeling.  */
+         unless aligned.  So we try to choose the best possible peeling from
+	 the hash table.  */
-      /* Choose the best peeling from the hash table.  */
      peel_for_known_alignment = vect_peeling_hash_choose_best_peeling
 	(&peeling_htab, loop_vinfo, &npeel, &body_cost_vec);
-      dr0_known_align = peel_for_known_alignment.peel_info.dr;
    }
  /* Compare costs of peeling for known and unknown alignment. */
-  if (dr0_known_align != NULL
+  if (peel_for_known_alignment.peel_info.dr != NULL
-      && unknown_align_inside_cost >= peel_for_known_alignment.inside_cost)
+      && peel_for_unknown_alignment.inside_cost
-    {
+      >= peel_for_known_alignment.inside_cost)
-      dr0 = dr0_known_align;
+    best_peel = peel_for_known_alignment;
-      if (!npeel)
-	do_peeling = false;
+  /* Calculate the penalty for no peeling, i.e. leaving everything
-    }
+     unaligned.
+     TODO: use something like an adapted vect_get_peeling_costs_all_drs.  */
+  unsigned nopeel_inside_cost = 0;
+  unsigned nopeel_outside_cost = 0;
+  stmt_vector_for_cost dummy;
+  dummy.create (2);
+  FOR_EACH_VEC_ELT (datarefs, i, dr)
+    vect_get_data_access_cost (dr, &nopeel_inside_cost,
+			       &nopeel_outside_cost, &dummy);
+  dummy.release ();
+  /* Add epilogue costs.  As we do not peel for alignment here, no prologue
+     costs will be recorded.  */
+  stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
+  prologue_cost_vec.create (2);
+  epilogue_cost_vec.create (2);
+  int dummy2;
+  nopeel_outside_cost += vect_get_known_peeling_cost
+    (loop_vinfo, 0, &dummy2,
+     &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
+     &prologue_cost_vec, &epilogue_cost_vec);
+  prologue_cost_vec.release ();
+  epilogue_cost_vec.release ();
+  npeel = best_peel.peel_info.npeel;
+  dr0 = best_peel.peel_info.dr;
-  if (dr0 == NULL)
+  /* If no peeling is not more expensive than the best peeling we
+     have so far, don't perform any peeling.  */
+  if (nopeel_inside_cost <= best_peel.inside_cost)
    do_peeling = false;
  if (do_peeling)
@@ -2000,7 +2019,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
 	      break;
 	    }
-	  supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
+	  enum dr_alignment_support supportable_dr_alignment =
+	    vect_supportable_dr_alignment (dr, false);
          if (!supportable_dr_alignment)
            {