Commit 1e69cc8f by Robin Dapp Committed by Andreas Krebbel

Vector peeling cost model 3/6

gcc/ChangeLog:

2017-05-30  Robin Dapp  <rdapp@linux.vnet.ibm.com>

	* tree-vect-data-refs.c (vect_peeling_hash_choose_best_peeling):
	Return peeling info and set costs to zero for unlimited cost
	model.
	(vect_enhance_data_refs_alignment): Also inspect all datarefs
	with unknown misalignment. Compute and costs for unknown
	misalignment, compare them to the costs for known misalignment
	and choose the cheapest for peeling.

From-SVN: r248677
parent 71595748
2017-05-30 Robin Dapp <rdapp@linux.vnet.ibm.com> 2017-05-30 Robin Dapp <rdapp@linux.vnet.ibm.com>
* tree-vect-data-refs.c (vect_peeling_hash_choose_best_peeling):
Return peeling info and set costs to zero for unlimited cost
model.
(vect_enhance_data_refs_alignment): Also inspect all datarefs
with unknown misalignment. Compute and costs for unknown
misalignment, compare them to the costs for known misalignment
and choose the cheapest for peeling.
2017-05-30 Robin Dapp <rdapp@linux.vnet.ibm.com>
* tree-vect-data-refs.c (vect_update_misalignment_for_peel): Rename. * tree-vect-data-refs.c (vect_update_misalignment_for_peel): Rename.
(vect_get_peeling_costs_all_drs): Create function. (vect_get_peeling_costs_all_drs): Create function.
(vect_peeling_hash_get_lowest_cost): (vect_peeling_hash_get_lowest_cost):
......
...@@ -1342,7 +1342,7 @@ vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot, ...@@ -1342,7 +1342,7 @@ vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
choosing an option with the lowest cost (if cost model is enabled) or the choosing an option with the lowest cost (if cost model is enabled) or the
option that aligns as many accesses as possible. */ option that aligns as many accesses as possible. */
static struct data_reference * static struct _vect_peel_extended_info
vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab, vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab,
loop_vec_info loop_vinfo, loop_vec_info loop_vinfo,
unsigned int *npeel, unsigned int *npeel,
...@@ -1365,11 +1365,13 @@ vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_hta ...@@ -1365,11 +1365,13 @@ vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_hta
res.peel_info.count = 0; res.peel_info.count = 0;
peeling_htab->traverse <_vect_peel_extended_info *, peeling_htab->traverse <_vect_peel_extended_info *,
vect_peeling_hash_get_most_frequent> (&res); vect_peeling_hash_get_most_frequent> (&res);
res.inside_cost = 0;
res.outside_cost = 0;
} }
*npeel = res.peel_info.npeel; *npeel = res.peel_info.npeel;
*body_cost_vec = res.body_cost_vec; *body_cost_vec = res.body_cost_vec;
return res.peel_info.dr; return res;
} }
/* Return true if the new peeling NPEEL is supported. */ /* Return true if the new peeling NPEEL is supported. */
...@@ -1518,6 +1520,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ...@@ -1518,6 +1520,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
enum dr_alignment_support supportable_dr_alignment; enum dr_alignment_support supportable_dr_alignment;
struct data_reference *dr0 = NULL, *first_store = NULL; struct data_reference *dr0 = NULL, *first_store = NULL;
struct data_reference *dr; struct data_reference *dr;
struct data_reference *dr0_known_align = NULL;
unsigned int i, j; unsigned int i, j;
bool do_peeling = false; bool do_peeling = false;
bool do_versioning = false; bool do_versioning = false;
...@@ -1525,7 +1528,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ...@@ -1525,7 +1528,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
gimple *stmt; gimple *stmt;
stmt_vec_info stmt_info; stmt_vec_info stmt_info;
unsigned int npeel = 0; unsigned int npeel = 0;
bool all_misalignments_unknown = true; bool one_misalignment_known = false;
bool one_misalignment_unknown = false;
unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
unsigned possible_npeel_number = 1; unsigned possible_npeel_number = 1;
tree vectype; tree vectype;
...@@ -1651,11 +1655,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ...@@ -1651,11 +1655,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
npeel_tmp += nelements; npeel_tmp += nelements;
} }
all_misalignments_unknown = false; one_misalignment_known = true;
/* Data-ref that was chosen for the case that all the
misalignments are unknown is not relevant anymore, since we
have a data-ref with known alignment. */
dr0 = NULL;
} }
else else
{ {
...@@ -1663,35 +1663,32 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ...@@ -1663,35 +1663,32 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
peeling for data-ref that has the maximum number of data-refs peeling for data-ref that has the maximum number of data-refs
with the same alignment, unless the target prefers to align with the same alignment, unless the target prefers to align
stores over load. */ stores over load. */
if (all_misalignments_unknown) unsigned same_align_drs
{ = STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ();
unsigned same_align_drs if (!dr0
= STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length (); || same_align_drs_max < same_align_drs)
if (!dr0 {
|| same_align_drs_max < same_align_drs) same_align_drs_max = same_align_drs;
{ dr0 = dr;
same_align_drs_max = same_align_drs; }
dr0 = dr; /* For data-refs with the same number of related
} accesses prefer the one where the misalign
/* For data-refs with the same number of related computation will be invariant in the outermost loop. */
accesses prefer the one where the misalign else if (same_align_drs_max == same_align_drs)
computation will be invariant in the outermost loop. */ {
else if (same_align_drs_max == same_align_drs) struct loop *ivloop0, *ivloop;
{ ivloop0 = outermost_invariant_loop_for_expr
struct loop *ivloop0, *ivloop; (loop, DR_BASE_ADDRESS (dr0));
ivloop0 = outermost_invariant_loop_for_expr ivloop = outermost_invariant_loop_for_expr
(loop, DR_BASE_ADDRESS (dr0)); (loop, DR_BASE_ADDRESS (dr));
ivloop = outermost_invariant_loop_for_expr if ((ivloop && !ivloop0)
(loop, DR_BASE_ADDRESS (dr)); || (ivloop && ivloop0
if ((ivloop && !ivloop0) && flow_loop_nested_p (ivloop, ivloop0)))
|| (ivloop && ivloop0 dr0 = dr;
&& flow_loop_nested_p (ivloop, ivloop0))) }
dr0 = dr;
}
if (!first_store && DR_IS_WRITE (dr)) if (!first_store && DR_IS_WRITE (dr))
first_store = dr; first_store = dr;
}
/* If there are both known and unknown misaligned accesses in the /* If there are both known and unknown misaligned accesses in the
loop, we choose peeling amount according to the known loop, we choose peeling amount according to the known
...@@ -1702,6 +1699,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ...@@ -1702,6 +1699,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
if (!first_store && DR_IS_WRITE (dr)) if (!first_store && DR_IS_WRITE (dr))
first_store = dr; first_store = dr;
} }
one_misalignment_unknown = true;
} }
} }
else else
...@@ -1722,8 +1721,11 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ...@@ -1722,8 +1721,11 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|| loop->inner) || loop->inner)
do_peeling = false; do_peeling = false;
unsigned int unknown_align_inside_cost = INT_MAX;
unsigned int unknown_align_outside_cost = INT_MAX;
if (do_peeling if (do_peeling
&& all_misalignments_unknown && one_misalignment_unknown
&& vect_supportable_dr_alignment (dr0, false)) && vect_supportable_dr_alignment (dr0, false))
{ {
/* Check if the target requires to prefer stores over loads, i.e., if /* Check if the target requires to prefer stores over loads, i.e., if
...@@ -1731,62 +1733,51 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ...@@ -1731,62 +1733,51 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
drs with same alignment into account). */ drs with same alignment into account). */
if (first_store && DR_IS_READ (dr0)) if (first_store && DR_IS_READ (dr0))
{ {
unsigned int load_inside_cost = 0, load_outside_cost = 0; unsigned int load_inside_cost = 0;
unsigned int store_inside_cost = 0, store_outside_cost = 0; unsigned int load_outside_cost = 0;
unsigned int load_inside_penalty = 0, load_outside_penalty = 0; unsigned int store_inside_cost = 0;
unsigned int store_inside_penalty = 0, store_outside_penalty = 0; unsigned int store_outside_cost = 0;
stmt_vector_for_cost dummy; stmt_vector_for_cost dummy;
dummy.create (2); dummy.create (2);
vect_get_peeling_costs_all_drs (dr0,
&load_inside_cost,
&load_outside_cost,
&dummy, vf / 2, vf);
dummy.release ();
vect_get_data_access_cost (dr0, &load_inside_cost, &load_outside_cost, dummy.create (2);
&dummy); vect_get_peeling_costs_all_drs (first_store,
vect_get_data_access_cost (first_store, &store_inside_cost, &store_inside_cost,
&store_outside_cost, &dummy); &store_outside_cost,
&dummy, vf / 2, vf);
dummy.release (); dummy.release ();
/* Calculate the penalty for leaving FIRST_STORE unaligned (by if (load_inside_cost > store_inside_cost
aligning the load DR0). */ || (load_inside_cost == store_inside_cost
load_inside_penalty = store_inside_cost; && load_outside_cost > store_outside_cost))
load_outside_penalty = store_outside_cost; {
for (i = 0; dr0 = first_store;
STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt ( unknown_align_inside_cost = store_inside_cost;
DR_STMT (first_store))).iterate (i, &dr); unknown_align_outside_cost = store_outside_cost;
i++) }
if (DR_IS_READ (dr)) else
{ {
load_inside_penalty += load_inside_cost; unknown_align_inside_cost = load_inside_cost;
load_outside_penalty += load_outside_cost; unknown_align_outside_cost = load_outside_cost;
} }
else
{ stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
load_inside_penalty += store_inside_cost; prologue_cost_vec.create (2);
load_outside_penalty += store_outside_cost; epilogue_cost_vec.create (2);
}
int dummy2;
/* Calculate the penalty for leaving DR0 unaligned (by unknown_align_outside_cost += vect_get_known_peeling_cost
aligning the FIRST_STORE). */ (loop_vinfo, vf / 2, &dummy2,
store_inside_penalty = load_inside_cost; &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
store_outside_penalty = load_outside_cost; &prologue_cost_vec, &epilogue_cost_vec);
for (i = 0;
STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt ( prologue_cost_vec.release ();
DR_STMT (dr0))).iterate (i, &dr); epilogue_cost_vec.release ();
i++)
if (DR_IS_READ (dr))
{
store_inside_penalty += load_inside_cost;
store_outside_penalty += load_outside_cost;
}
else
{
store_inside_penalty += store_inside_cost;
store_outside_penalty += store_outside_cost;
}
if (load_inside_penalty > store_inside_penalty
|| (load_inside_penalty == store_inside_penalty
&& load_outside_penalty > store_outside_penalty))
dr0 = first_store;
} }
/* Use peeling only if it may help to align other accesses in the loop or /* Use peeling only if it may help to align other accesses in the loop or
...@@ -1804,22 +1795,35 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ...@@ -1804,22 +1795,35 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
do_peeling = false; do_peeling = false;
} }
if (do_peeling && !dr0) struct _vect_peel_extended_info peel_for_known_alignment;
peel_for_known_alignment.inside_cost = INT_MAX;
peel_for_known_alignment.outside_cost = INT_MAX;
peel_for_known_alignment.peel_info.count = 0;
peel_for_known_alignment.peel_info.dr = NULL;
if (do_peeling && one_misalignment_known)
{ {
/* Peeling is possible, but there is no data access that is not supported /* Peeling is possible, but there is no data access that is not supported
unless aligned. So we try to choose the best possible peeling. */ unless aligned. So we try to choose the best possible peeling. */
/* We should get here only if there are drs with known misalignment. */
gcc_assert (!all_misalignments_unknown);
/* Choose the best peeling from the hash table. */ /* Choose the best peeling from the hash table. */
dr0 = vect_peeling_hash_choose_best_peeling (&peeling_htab, peel_for_known_alignment = vect_peeling_hash_choose_best_peeling
loop_vinfo, &npeel, (&peeling_htab, loop_vinfo, &npeel, &body_cost_vec);
&body_cost_vec); dr0_known_align = peel_for_known_alignment.peel_info.dr;
if (!dr0 || !npeel) }
do_peeling = false;
/* Compare costs of peeling for known and unknown alignment. */
if (dr0_known_align != NULL
&& unknown_align_inside_cost >= peel_for_known_alignment.inside_cost)
{
dr0 = dr0_known_align;
if (!npeel)
do_peeling = false;
} }
if (dr0 == NULL)
do_peeling = false;
if (do_peeling) if (do_peeling)
{ {
stmt = DR_STMT (dr0); stmt = DR_STMT (dr0);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment