Commit ddb0b824 by Jan Hubicka Committed by Jan Hubicka

lto-partition.c: Include sreal.h


	* lto-partition.c: Include sreal.h
	(add_symbol_to_partition_1): Use size instead of self_size
	for size estimate.
	(account_reference_p): New.
	(lto_balanced_map): Use 64bit arithmetics for size calculatoins; cleanup;
	fix accounting errors in boundary size; add debug output; combine cost
	as cost/size instead of cost/internal; reduce the partitioning error to
	+- 1/8 of the parttion size.

From-SVN: r259749
parent 8d70b61e
2018-04-19 Jan Hubicka <jh@suse.cz>
* lto-partition.c: Include sreal.h
(add_symbol_to_partition_1): Use size instead of self_size
for size estimate.
(account_reference_p): New.
(lto_balanced_map): Use 64bit arithmetics for size calculatoins; cleanup;
fix accounting errors in boundary size; add debug output; combine cost
as cost/size instead of cost/internal; reduce the partitioning error to
+- 1/8 of the parttion size.
2018-04-19 Martin Liska <mliska@suse.cz> 2018-04-19 Martin Liska <mliska@suse.cz>
* lto-symtab.c (lto_symtab_resolve_symbols): Do not bail out * lto-symtab.c (lto_symtab_resolve_symbols): Do not bail out
......
...@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see ...@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see
#include "ipa-prop.h" #include "ipa-prop.h"
#include "ipa-fnsummary.h" #include "ipa-fnsummary.h"
#include "lto-partition.h" #include "lto-partition.h"
#include "sreal.h"
vec<ltrans_partition> ltrans_partitions; vec<ltrans_partition> ltrans_partitions;
...@@ -152,8 +153,8 @@ add_symbol_to_partition_1 (ltrans_partition part, symtab_node *node) ...@@ -152,8 +153,8 @@ add_symbol_to_partition_1 (ltrans_partition part, symtab_node *node)
if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node)) if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node))
{ {
struct cgraph_edge *e; struct cgraph_edge *e;
if (!node->alias) if (!node->alias && c == SYMBOL_PARTITION)
part->insns += ipa_fn_summaries->get (cnode)->self_size; part->insns += ipa_fn_summaries->get (cnode)->size;
/* Add all inline clones and callees that are duplicated. */ /* Add all inline clones and callees that are duplicated. */
for (e = cnode->callees; e; e = e->next_callee) for (e = cnode->callees; e; e = e->next_callee)
...@@ -276,8 +277,9 @@ undo_partition (ltrans_partition partition, unsigned int n_nodes) ...@@ -276,8 +277,9 @@ undo_partition (ltrans_partition partition, unsigned int n_nodes)
delete partition->initializers_visited; delete partition->initializers_visited;
partition->initializers_visited = NULL; partition->initializers_visited = NULL;
if (!node->alias && (cnode = dyn_cast <cgraph_node *> (node))) if (!node->alias && (cnode = dyn_cast <cgraph_node *> (node))
partition->insns -= ipa_fn_summaries->get (cnode)->self_size; && node->get_partitioning_class () == SYMBOL_PARTITION)
partition->insns -= ipa_fn_summaries->get (cnode)->size;
lto_symtab_encoder_delete_node (partition->encoder, node); lto_symtab_encoder_delete_node (partition->encoder, node);
node->aux = (void *)((size_t)node->aux - 1); node->aux = (void *)((size_t)node->aux - 1);
} }
...@@ -408,6 +410,24 @@ add_sorted_nodes (vec<symtab_node *> &next_nodes, ltrans_partition partition) ...@@ -408,6 +410,24 @@ add_sorted_nodes (vec<symtab_node *> &next_nodes, ltrans_partition partition)
add_symbol_to_partition (partition, node); add_symbol_to_partition (partition, node);
} }
/* Return true if we should account reference from N1 to N2 in cost
of partition boundary. */
bool
account_reference_p (symtab_node *n1, symtab_node *n2)
{
if (cgraph_node *cnode = dyn_cast <cgraph_node *> (n1))
n1 = cnode;
/* Do not account recursion - the code below will handle it incorrectly
otherwise. Also do not account references to external symbols.
They will never become local. */
if (n1 == n2
|| DECL_EXTERNAL (n2->decl)
|| !n2->definition)
return false;
return true;
}
/* Group cgraph nodes into equally-sized partitions. /* Group cgraph nodes into equally-sized partitions.
...@@ -457,14 +477,14 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) ...@@ -457,14 +477,14 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
auto_vec<varpool_node *> varpool_order; auto_vec<varpool_node *> varpool_order;
int i; int i;
struct cgraph_node *node; struct cgraph_node *node;
int original_total_size, total_size = 0, best_total_size = 0; int64_t original_total_size, total_size = 0;
int partition_size; int64_t partition_size;
ltrans_partition partition; ltrans_partition partition;
int last_visited_node = 0; int last_visited_node = 0;
varpool_node *vnode; varpool_node *vnode;
int cost = 0, internal = 0; int64_t cost = 0, internal = 0;
int best_n_nodes = 0, best_i = 0, best_cost = int best_n_nodes = 0, best_i = 0;
INT_MAX, best_internal = 0; int64_t best_cost = -1, best_internal = 0, best_size = 0;
int npartitions; int npartitions;
int current_order = -1; int current_order = -1;
int noreorder_pos = 0; int noreorder_pos = 0;
...@@ -513,7 +533,8 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) ...@@ -513,7 +533,8 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
/* Compute partition size and create the first partition. */ /* Compute partition size and create the first partition. */
if (PARAM_VALUE (MIN_PARTITION_SIZE) > max_partition_size) if (PARAM_VALUE (MIN_PARTITION_SIZE) > max_partition_size)
fatal_error (input_location, "min partition size cannot be greater than max partition size"); fatal_error (input_location, "min partition size cannot be greater "
"than max partition size");
partition_size = total_size / n_lto_partitions; partition_size = total_size / n_lto_partitions;
if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE)) if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE))
...@@ -521,7 +542,7 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) ...@@ -521,7 +542,7 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
npartitions = 1; npartitions = 1;
partition = new_partition (""); partition = new_partition ("");
if (symtab->dump_file) if (symtab->dump_file)
fprintf (symtab->dump_file, "Total unit size: %i, partition size: %i\n", fprintf (symtab->dump_file, "Total unit size: %" PRId64 ", partition size: %" PRId64 "\n",
total_size, partition_size); total_size, partition_size);
auto_vec<symtab_node *> next_nodes; auto_vec<symtab_node *> next_nodes;
...@@ -540,17 +561,11 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) ...@@ -540,17 +561,11 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
next_nodes.safe_push (varpool_order[varpool_pos++]); next_nodes.safe_push (varpool_order[varpool_pos++]);
while (noreorder_pos < (int)noreorder.length () while (noreorder_pos < (int)noreorder.length ()
&& noreorder[noreorder_pos]->order < current_order) && noreorder[noreorder_pos]->order < current_order)
{ next_nodes.safe_push (noreorder[noreorder_pos++]);
if (!noreorder[noreorder_pos]->alias)
total_size -= ipa_fn_summaries->get (noreorder[noreorder_pos])->size;
next_nodes.safe_push (noreorder[noreorder_pos++]);
}
add_sorted_nodes (next_nodes, partition); add_sorted_nodes (next_nodes, partition);
if (!symbol_partitioned_p (order[i])) if (!symbol_partitioned_p (order[i]))
add_symbol_to_partition (partition, order[i]); add_symbol_to_partition (partition, order[i]);
if (!order[i]->alias)
total_size -= ipa_fn_summaries->get (order[i])->size;
/* Once we added a new node to the partition, we also want to add /* Once we added a new node to the partition, we also want to add
...@@ -567,7 +582,6 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) ...@@ -567,7 +582,6 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
it and thus we need to subtract it from COST. */ it and thus we need to subtract it from COST. */
while (last_visited_node < lto_symtab_encoder_size (partition->encoder)) while (last_visited_node < lto_symtab_encoder_size (partition->encoder))
{ {
symtab_node *refs_node;
int j; int j;
struct ipa_ref *ref = NULL; struct ipa_ref *ref = NULL;
symtab_node *snode = lto_symtab_encoder_deref (partition->encoder, symtab_node *snode = lto_symtab_encoder_deref (partition->encoder,
...@@ -577,7 +591,6 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) ...@@ -577,7 +591,6 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
{ {
struct cgraph_edge *edge; struct cgraph_edge *edge;
refs_node = node;
last_visited_node++; last_visited_node++;
...@@ -585,7 +598,9 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) ...@@ -585,7 +598,9 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
/* Compute boundary cost of callgraph edges. */ /* Compute boundary cost of callgraph edges. */
for (edge = node->callees; edge; edge = edge->next_callee) for (edge = node->callees; edge; edge = edge->next_callee)
if (edge->callee->definition) /* Inline edges will always end up local. */
if (edge->inline_failed
&& account_reference_p (node, edge->callee))
{ {
int edge_cost = edge->frequency (); int edge_cost = edge->frequency ();
int index; int index;
...@@ -602,6 +617,8 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) ...@@ -602,6 +617,8 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
cost += edge_cost; cost += edge_cost;
} }
for (edge = node->callers; edge; edge = edge->next_caller) for (edge = node->callers; edge; edge = edge->next_caller)
if (edge->inline_failed
&& account_reference_p (edge->caller, node))
{ {
int edge_cost = edge->frequency (); int edge_cost = edge->frequency ();
int index; int index;
...@@ -614,27 +631,24 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) ...@@ -614,27 +631,24 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
edge->caller); edge->caller);
if (index != LCC_NOT_FOUND if (index != LCC_NOT_FOUND
&& index < last_visited_node - 1) && index < last_visited_node - 1)
cost -= edge_cost; cost -= edge_cost, internal += edge_cost;
else else
cost += edge_cost; cost += edge_cost;
} }
} }
else else
{ last_visited_node++;
refs_node = snode;
last_visited_node++;
}
/* Compute boundary cost of IPA REF edges and at the same time look into /* Compute boundary cost of IPA REF edges and at the same time look into
variables referenced from current partition and try to add them. */ variables referenced from current partition and try to add them. */
for (j = 0; refs_node->iterate_reference (j, ref); j++) for (j = 0; snode->iterate_reference (j, ref); j++)
if (is_a <varpool_node *> (ref->referred)) if (!account_reference_p (snode, ref->referred))
;
else if (is_a <varpool_node *> (ref->referred))
{ {
int index; int index;
vnode = dyn_cast <varpool_node *> (ref->referred); vnode = dyn_cast <varpool_node *> (ref->referred);
if (!vnode->definition)
continue;
if (!symbol_partitioned_p (vnode) if (!symbol_partitioned_p (vnode)
&& !vnode->no_reorder && !vnode->no_reorder
&& vnode->get_partitioning_class () == SYMBOL_PARTITION) && vnode->get_partitioning_class () == SYMBOL_PARTITION)
...@@ -652,8 +666,6 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) ...@@ -652,8 +666,6 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
int index; int index;
node = dyn_cast <cgraph_node *> (ref->referred); node = dyn_cast <cgraph_node *> (ref->referred);
if (!node->definition)
continue;
index = lto_symtab_encoder_lookup (partition->encoder, index = lto_symtab_encoder_lookup (partition->encoder,
node); node);
if (index != LCC_NOT_FOUND if (index != LCC_NOT_FOUND
...@@ -662,8 +674,10 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) ...@@ -662,8 +674,10 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
else else
cost++; cost++;
} }
for (j = 0; refs_node->iterate_referring (j, ref); j++) for (j = 0; snode->iterate_referring (j, ref); j++)
if (is_a <varpool_node *> (ref->referring)) if (!account_reference_p (ref->referring, snode))
;
else if (is_a <varpool_node *> (ref->referring))
{ {
int index; int index;
...@@ -682,7 +696,7 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) ...@@ -682,7 +696,7 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
vnode); vnode);
if (index != LCC_NOT_FOUND if (index != LCC_NOT_FOUND
&& index < last_visited_node - 1) && index < last_visited_node - 1)
cost--; cost--, internal++;
else else
cost++; cost++;
} }
...@@ -696,36 +710,41 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) ...@@ -696,36 +710,41 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
node); node);
if (index != LCC_NOT_FOUND if (index != LCC_NOT_FOUND
&& index < last_visited_node - 1) && index < last_visited_node - 1)
cost--; cost--, internal++;
else else
cost++; cost++;
} }
} }
/* If the partition is large enough, start looking for smallest boundary cost. */ gcc_assert (cost >= 0 && internal >= 0);
if (partition->insns < partition_size * 3 / 4
|| best_cost == INT_MAX /* If the partition is large enough, start looking for smallest boundary cost.
|| ((!cost If partition still seems too small (less than 7/8 of target weight) accept
|| (best_internal * (HOST_WIDE_INT) cost any cost. If partition has right size, optimize for highest internal/cost.
> (internal * (HOST_WIDE_INT)best_cost))) Later we stop building partition if its size is 9/8 of the target wight. */
&& partition->insns < partition_size * 5 / 4)) if (partition->insns < partition_size * 7 / 8
|| best_cost == -1
|| (!cost
|| ((sreal)best_internal * (sreal) cost
< ((sreal) internal * (sreal)best_cost))))
{ {
best_cost = cost; best_cost = cost;
best_internal = internal; best_internal = internal;
best_size = partition->insns;
best_i = i; best_i = i;
best_n_nodes = lto_symtab_encoder_size (partition->encoder); best_n_nodes = lto_symtab_encoder_size (partition->encoder);
best_total_size = total_size;
best_varpool_pos = varpool_pos; best_varpool_pos = varpool_pos;
} }
if (symtab->dump_file) if (symtab->dump_file)
fprintf (symtab->dump_file, "Step %i: added %s/%i, size %i, cost %i/%i " fprintf (symtab->dump_file, "Step %i: added %s/%i, size %i, "
"best %i/%i, step %i\n", i, "cost %" PRId64 "/%" PRId64 " "
"best %" PRId64 "/%" PRId64", step %i\n", i,
order[i]->name (), order[i]->order, order[i]->name (), order[i]->order,
partition->insns, cost, internal, partition->insns, cost, internal,
best_cost, best_internal, best_i); best_cost, best_internal, best_i);
/* Partition is too large, unwind into step when best cost was reached and /* Partition is too large, unwind into step when best cost was reached and
start new partition. */ start new partition. */
if (partition->insns > 2 * partition_size if (partition->insns > 9 * partition_size / 8
|| partition->insns > max_partition_size) || partition->insns > max_partition_size)
{ {
if (best_i != i) if (best_i != i)
...@@ -736,21 +755,26 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) ...@@ -736,21 +755,26 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
undo_partition (partition, best_n_nodes); undo_partition (partition, best_n_nodes);
varpool_pos = best_varpool_pos; varpool_pos = best_varpool_pos;
} }
gcc_assert (best_size == partition->insns);
i = best_i; i = best_i;
if (symtab->dump_file)
fprintf (symtab->dump_file,
"Partition insns: %i (want %" PRId64 ")\n",
partition->insns, partition_size);
/* When we are finished, avoid creating empty partition. */ /* When we are finished, avoid creating empty partition. */
while (i < n_nodes - 1 && symbol_partitioned_p (order[i + 1])) while (i < n_nodes - 1 && symbol_partitioned_p (order[i + 1]))
i++; i++;
if (i == n_nodes - 1) if (i == n_nodes - 1)
break; break;
total_size -= partition->insns;
partition = new_partition (""); partition = new_partition ("");
last_visited_node = 0; last_visited_node = 0;
total_size = best_total_size;
cost = 0; cost = 0;
if (symtab->dump_file) if (symtab->dump_file)
fprintf (symtab->dump_file, "New partition\n"); fprintf (symtab->dump_file, "New partition\n");
best_n_nodes = 0; best_n_nodes = 0;
best_cost = INT_MAX; best_cost = -1;
/* Since the size of partitions is just approximate, update the size after /* Since the size of partitions is just approximate, update the size after
we finished current one. */ we finished current one. */
...@@ -760,6 +784,10 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) ...@@ -760,6 +784,10 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
/* Watch for overflow. */ /* Watch for overflow. */
partition_size = INT_MAX / 16; partition_size = INT_MAX / 16;
if (symtab->dump_file)
fprintf (symtab->dump_file,
"Total size: %" PRId64 " partition_size: %" PRId64 "\n",
total_size, partition_size);
if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE)) if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE))
partition_size = PARAM_VALUE (MIN_PARTITION_SIZE); partition_size = PARAM_VALUE (MIN_PARTITION_SIZE);
npartitions ++; npartitions ++;
...@@ -779,6 +807,9 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) ...@@ -779,6 +807,9 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
next_nodes.safe_push (varpool_order[varpool_pos++]); next_nodes.safe_push (varpool_order[varpool_pos++]);
while (noreorder_pos < (int)noreorder.length ()) while (noreorder_pos < (int)noreorder.length ())
next_nodes.safe_push (noreorder[noreorder_pos++]); next_nodes.safe_push (noreorder[noreorder_pos++]);
/* For one partition the cost of boundary should be 0 unless we added final
symbols here (these are not accounted) or we have accounting bug. */
gcc_assert (next_nodes.length () || npartitions != 1 || !best_cost);
add_sorted_nodes (next_nodes, partition); add_sorted_nodes (next_nodes, partition);
free (order); free (order);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment