Commit 9696c529 by Steven Bosscher

opts.c (common_handle_option): Do not set flag_value_profile_transformations for…

opts.c (common_handle_option): Do not set flag_value_profile_transformations for -fprofile-generate.

	* opts.c (common_handle_option): Do not set 
	flag_value_profile_transformations for -fprofile-generate.
	* profile.c (instrument_values): Use COUNTER_FOR_HIST_TYPE.
	(BB_TO_GCOV_INDEX): Remove.
	(output_location): Don't use it.
	(branch_prob): Likewise.  Don't fiddle with the index of
	ENTRY_BLOCK_PTR and EXIT_BLOCK_PTR.  Use clear_aux_for_blocks.
	(find_spanning_tree):
	* gcov.c (struct function_info): Document that blocks 0 and 1
	are the entry resp. exit blocks in gcov, too, like in GCC itself.
	(solve_flow_graph): Use ENTRY_BLOCK and EXIT_BLOCK for special
	blocks identification.
	(output_lines): Likewise.
	* value-prof.c (gimple_value_profile_transformations): Do not
	test flag_value_profile_transformations again.
	(gimple_ic_transform): Take a gimple_stmt_iterator like all other
	transformation functions.
	(gimple_values_to_profile):
	Don't test flag_value_profile_transformations
	* tree-profile.c (tree_profiling): Assert that the cgraph is in
	the CGRAPH_STATE_IPA_SSA state.
	Do not set, or look at, after_tree_profile.
	* function.h (struct function): Remove after_tree_profile bit.
	* omp-low.c (expand_omp_taskreg): Don't set after_tree_profile.
	* tree-inline.c (initialize_cfun): Don't copy it.
	* lto-streamer-out.c (output_struct_function_base): Don't stream it.
	* lto-streamer-in.c (input_struct_function_base): Likewise.

From-SVN: r189778
parent 678400b3
2012-07-22 Steven Bosscher <steven@gcc.gnu.org>
* opts.c (common_handle_option): Do not set
flag_value_profile_transformations for -fprofile-generate.
* profile.c (instrument_values): Use COUNTER_FOR_HIST_TYPE.
(BB_TO_GCOV_INDEX): Remove.
(output_location): Don't use it.
(branch_prob): Likewise. Don't fiddle with the index of
ENTRY_BLOCK_PTR and EXIT_BLOCK_PTR. Use clear_aux_for_blocks.
(find_spanning_tree):
* gcov.c (struct function_info): Document that blocks 0 and 1
are the entry resp. exit blocks in gcov, too, like in GCC itself.
(solve_flow_graph): Use ENTRY_BLOCK and EXIT_BLOCK for special
blocks identification.
(output_lines): Likewise.
* value-prof.c (gimple_value_profile_transformations): Do not
test flag_value_profile_transformations again.
(gimple_ic_transform): Take a gimple_stmt_iterator like all other
transformation functions.
(gimple_values_to_profile):
Don't test flag_value_profile_transformations
* tree-profile.c (tree_profiling): Assert that the cgraph is in
the CGRAPH_STATE_IPA_SSA state.
Do not set, or look at, after_tree_profile.
* function.h (struct function): Remove after_tree_profile bit.
* omp-low.c (expand_omp_taskreg): Don't set after_tree_profile.
* tree-inline.c (initialize_cfun): Don't copy it.
* lto-streamer-out.c (output_struct_function_base): Don't stream it.
* lto-streamer-in.c (input_struct_function_base): Likewise.
2012-07-22 Oleg Endo <olegendo@gcc.gnu.org> 2012-07-22 Oleg Endo <olegendo@gcc.gnu.org>
* config/sh/sh.h (TARGET_DYNSHIFT): New macro. * config/sh/sh.h (TARGET_DYNSHIFT): New macro.
......
...@@ -645,9 +645,6 @@ struct GTY(()) function { ...@@ -645,9 +645,6 @@ struct GTY(()) function {
return the address of where it has put a structure value. */ return the address of where it has put a structure value. */
unsigned int returns_pcc_struct : 1; unsigned int returns_pcc_struct : 1;
/* Nonzero if pass_tree_profile was run on this function. */
unsigned int after_tree_profile : 1;
/* Nonzero if this function has local DECL_HARD_REGISTER variables. /* Nonzero if this function has local DECL_HARD_REGISTER variables.
In this case code motion has to be done more carefully. */ In this case code motion has to be done more carefully. */
unsigned int has_local_explicit_reg_vars : 1; unsigned int has_local_explicit_reg_vars : 1;
......
...@@ -57,10 +57,10 @@ along with Gcov; see the file COPYING3. If not see ...@@ -57,10 +57,10 @@ along with Gcov; see the file COPYING3. If not see
/* The code validates that the profile information read in corresponds /* The code validates that the profile information read in corresponds
to the code currently being compiled. Rather than checking for to the code currently being compiled. Rather than checking for
identical files, the code below computes a checksum on the CFG identical files, the code below compares a checksum on the CFG
(based on the order of basic blocks and the arcs in the CFG). If (based on the order of basic blocks and the arcs in the CFG). If
the CFG checksum in the gcda file match the CFG checksum for the the CFG checksum in the gcda file match the CFG checksum in the
code currently being compiled, the profile data will be used. */ gcno file, the profile data will be used. */
/* This is the size of the buffer used to read in source file lines. */ /* This is the size of the buffer used to read in source file lines. */
...@@ -177,7 +177,10 @@ typedef struct function_info ...@@ -177,7 +177,10 @@ typedef struct function_info
/* The graph contains at least one fake incoming edge. */ /* The graph contains at least one fake incoming edge. */
unsigned has_catch : 1; unsigned has_catch : 1;
/* Array of basic blocks. */ /* Array of basic blocks. Like in GCC, the entry block is
at blocks[0] and the exit block is at blocks[1]. */
#define ENTRY_BLOCK (0)
#define EXIT_BLOCK (1)
block_t *blocks; block_t *blocks;
unsigned num_blocks; unsigned num_blocks;
unsigned blocks_executed; unsigned blocks_executed;
...@@ -1363,21 +1366,21 @@ solve_flow_graph (function_t *fn) ...@@ -1363,21 +1366,21 @@ solve_flow_graph (function_t *fn)
bbg_file_name, fn->name); bbg_file_name, fn->name);
else else
{ {
if (fn->blocks[0].num_pred) if (fn->blocks[ENTRY_BLOCK].num_pred)
fnotice (stderr, "%s:'%s' has arcs to entry block\n", fnotice (stderr, "%s:'%s' has arcs to entry block\n",
bbg_file_name, fn->name); bbg_file_name, fn->name);
else else
/* We can't deduce the entry block counts from the lack of /* We can't deduce the entry block counts from the lack of
predecessors. */ predecessors. */
fn->blocks[0].num_pred = ~(unsigned)0; fn->blocks[ENTRY_BLOCK].num_pred = ~(unsigned)0;
if (fn->blocks[fn->num_blocks - 1].num_succ) if (fn->blocks[EXIT_BLOCK].num_succ)
fnotice (stderr, "%s:'%s' has arcs from exit block\n", fnotice (stderr, "%s:'%s' has arcs from exit block\n",
bbg_file_name, fn->name); bbg_file_name, fn->name);
else else
/* Likewise, we can't deduce exit block counts from the lack /* Likewise, we can't deduce exit block counts from the lack
of its successors. */ of its successors. */
fn->blocks[fn->num_blocks - 1].num_succ = ~(unsigned)0; fn->blocks[EXIT_BLOCK].num_succ = ~(unsigned)0;
} }
/* Propagate the measured counts, this must be done in the same /* Propagate the measured counts, this must be done in the same
...@@ -1637,7 +1640,7 @@ add_branch_counts (coverage_t *coverage, const arc_t *arc) ...@@ -1637,7 +1640,7 @@ add_branch_counts (coverage_t *coverage, const arc_t *arc)
} }
} }
/* Format a HOST_WIDE_INT as either a percent ratio, or absolute /* Format a GCOV_TYPE integer as either a percent ratio, or absolute
count. If dp >= 0, format TOP/BOTTOM * 100 to DP decimal places. count. If dp >= 0, format TOP/BOTTOM * 100 to DP decimal places.
If DP is zero, no decimal point is printed. Only print 100% when If DP is zero, no decimal point is printed. Only print 100% when
TOP==BOTTOM and only print 0% when TOP=0. If dp < 0, then simply TOP==BOTTOM and only print 0% when TOP=0. If dp < 0, then simply
...@@ -2266,8 +2269,9 @@ output_lines (FILE *gcov_file, const source_t *src) ...@@ -2266,8 +2269,9 @@ output_lines (FILE *gcov_file, const source_t *src)
{ {
for (; fn && fn->line == line_num; fn = fn->line_next) for (; fn && fn->line == line_num; fn = fn->line_next)
{ {
arc_t *arc = fn->blocks[fn->num_blocks - 1].pred; arc_t *arc = fn->blocks[EXIT_BLOCK].pred;
gcov_type return_count = fn->blocks[fn->num_blocks - 1].count; gcov_type return_count = fn->blocks[EXIT_BLOCK].count;
gcov_type called_count = fn->blocks[ENTRY_BLOCK].count;
for (; arc; arc = arc->pred_next) for (; arc; arc = arc->pred_next)
if (arc->fake) if (arc->fake)
...@@ -2275,9 +2279,9 @@ output_lines (FILE *gcov_file, const source_t *src) ...@@ -2275,9 +2279,9 @@ output_lines (FILE *gcov_file, const source_t *src)
fprintf (gcov_file, "function %s", fn->name); fprintf (gcov_file, "function %s", fn->name);
fprintf (gcov_file, " called %s", fprintf (gcov_file, " called %s",
format_gcov (fn->blocks[0].count, 0, -1)); format_gcov (called_count, 0, -1));
fprintf (gcov_file, " returned %s", fprintf (gcov_file, " returned %s",
format_gcov (return_count, fn->blocks[0].count, 0)); format_gcov (return_count, called_count, 0));
fprintf (gcov_file, " blocks executed %s", fprintf (gcov_file, " blocks executed %s",
format_gcov (fn->blocks_executed, fn->num_blocks - 2, 0)); format_gcov (fn->blocks_executed, fn->num_blocks - 2, 0));
fprintf (gcov_file, "\n"); fprintf (gcov_file, "\n");
......
...@@ -798,7 +798,6 @@ input_struct_function_base (struct function *fn, struct data_in *data_in, ...@@ -798,7 +798,6 @@ input_struct_function_base (struct function *fn, struct data_in *data_in,
bp = streamer_read_bitpack (ib); bp = streamer_read_bitpack (ib);
fn->is_thunk = bp_unpack_value (&bp, 1); fn->is_thunk = bp_unpack_value (&bp, 1);
fn->has_local_explicit_reg_vars = bp_unpack_value (&bp, 1); fn->has_local_explicit_reg_vars = bp_unpack_value (&bp, 1);
fn->after_tree_profile = bp_unpack_value (&bp, 1);
fn->returns_pcc_struct = bp_unpack_value (&bp, 1); fn->returns_pcc_struct = bp_unpack_value (&bp, 1);
fn->returns_struct = bp_unpack_value (&bp, 1); fn->returns_struct = bp_unpack_value (&bp, 1);
fn->can_throw_non_call_exceptions = bp_unpack_value (&bp, 1); fn->can_throw_non_call_exceptions = bp_unpack_value (&bp, 1);
......
...@@ -757,7 +757,6 @@ output_struct_function_base (struct output_block *ob, struct function *fn) ...@@ -757,7 +757,6 @@ output_struct_function_base (struct output_block *ob, struct function *fn)
bp = bitpack_create (ob->main_stream); bp = bitpack_create (ob->main_stream);
bp_pack_value (&bp, fn->is_thunk, 1); bp_pack_value (&bp, fn->is_thunk, 1);
bp_pack_value (&bp, fn->has_local_explicit_reg_vars, 1); bp_pack_value (&bp, fn->has_local_explicit_reg_vars, 1);
bp_pack_value (&bp, fn->after_tree_profile, 1);
bp_pack_value (&bp, fn->returns_pcc_struct, 1); bp_pack_value (&bp, fn->returns_pcc_struct, 1);
bp_pack_value (&bp, fn->returns_struct, 1); bp_pack_value (&bp, fn->returns_struct, 1);
bp_pack_value (&bp, fn->can_throw_non_call_exceptions, 1); bp_pack_value (&bp, fn->can_throw_non_call_exceptions, 1);
......
...@@ -3398,9 +3398,6 @@ expand_omp_taskreg (struct omp_region *region) ...@@ -3398,9 +3398,6 @@ expand_omp_taskreg (struct omp_region *region)
entry_stmt = last_stmt (region->entry); entry_stmt = last_stmt (region->entry);
child_fn = gimple_omp_taskreg_child_fn (entry_stmt); child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
child_cfun = DECL_STRUCT_FUNCTION (child_fn); child_cfun = DECL_STRUCT_FUNCTION (child_fn);
/* If this function has been already instrumented, make sure
the child function isn't instrumented again. */
child_cfun->after_tree_profile = cfun->after_tree_profile;
entry_bb = region->entry; entry_bb = region->entry;
exit_bb = region->exit; exit_bb = region->exit;
......
...@@ -1593,8 +1593,6 @@ common_handle_option (struct gcc_options *opts, ...@@ -1593,8 +1593,6 @@ common_handle_option (struct gcc_options *opts,
opts->x_profile_arc_flag = value; opts->x_profile_arc_flag = value;
if (!opts_set->x_flag_profile_values) if (!opts_set->x_flag_profile_values)
opts->x_flag_profile_values = value; opts->x_flag_profile_values = value;
if (!opts_set->x_flag_value_profile_transformations)
opts->x_flag_value_profile_transformations = value;
if (!opts_set->x_flag_inline_functions) if (!opts_set->x_flag_inline_functions)
opts->x_flag_inline_functions = value; opts->x_flag_inline_functions = value;
/* FIXME: Instrumentation we insert makes ipa-reference bitmaps /* FIXME: Instrumentation we insert makes ipa-reference bitmaps
......
...@@ -143,46 +143,15 @@ instrument_edges (struct edge_list *el) ...@@ -143,46 +143,15 @@ instrument_edges (struct edge_list *el)
static void static void
instrument_values (histogram_values values) instrument_values (histogram_values values)
{ {
unsigned i, t; unsigned i;
/* Emit code to generate the histograms before the insns. */ /* Emit code to generate the histograms before the insns. */
for (i = 0; i < VEC_length (histogram_value, values); i++) for (i = 0; i < VEC_length (histogram_value, values); i++)
{ {
histogram_value hist = VEC_index (histogram_value, values, i); histogram_value hist = VEC_index (histogram_value, values, i);
switch (hist->type) unsigned t = COUNTER_FOR_HIST_TYPE (hist->type);
{
case HIST_TYPE_INTERVAL:
t = GCOV_COUNTER_V_INTERVAL;
break;
case HIST_TYPE_POW2:
t = GCOV_COUNTER_V_POW2;
break;
case HIST_TYPE_SINGLE_VALUE:
t = GCOV_COUNTER_V_SINGLE;
break;
case HIST_TYPE_CONST_DELTA:
t = GCOV_COUNTER_V_DELTA;
break;
case HIST_TYPE_INDIR_CALL:
t = GCOV_COUNTER_V_INDIR;
break;
case HIST_TYPE_AVERAGE:
t = GCOV_COUNTER_AVERAGE;
break;
case HIST_TYPE_IOR:
t = GCOV_COUNTER_IOR;
break;
default:
gcc_unreachable ();
}
if (!coverage_counter_alloc (t, hist->n_counters)) if (!coverage_counter_alloc (t, hist->n_counters))
continue; continue;
...@@ -870,9 +839,6 @@ compute_value_histograms (histogram_values values, unsigned cfg_checksum, ...@@ -870,9 +839,6 @@ compute_value_histograms (histogram_values values, unsigned cfg_checksum,
free (histogram_counts[t]); free (histogram_counts[t]);
} }
/* The entry basic block will be moved around so that it has index=1,
there is nothing at index 0 and the exit is at n_basic_block. */
#define BB_TO_GCOV_INDEX(bb) ((bb)->index - 1)
/* When passed NULL as file_name, initialize. /* When passed NULL as file_name, initialize.
When passed something else, output the necessary commands to change When passed something else, output the necessary commands to change
line to LINE and offset to FILE_NAME. */ line to LINE and offset to FILE_NAME. */
...@@ -899,7 +865,7 @@ output_location (char const *file_name, int line, ...@@ -899,7 +865,7 @@ output_location (char const *file_name, int line,
if (!*offset) if (!*offset)
{ {
*offset = gcov_write_tag (GCOV_TAG_LINES); *offset = gcov_write_tag (GCOV_TAG_LINES);
gcov_write_unsigned (BB_TO_GCOV_INDEX (bb)); gcov_write_unsigned (bb->index);
name_differs = line_differs=true; name_differs = line_differs=true;
} }
...@@ -919,19 +885,22 @@ output_location (char const *file_name, int line, ...@@ -919,19 +885,22 @@ output_location (char const *file_name, int line,
} }
} }
/* Instrument and/or analyze program behavior based on program flow graph. /* Instrument and/or analyze program behavior based on program the CFG.
In either case, this function builds a flow graph for the function being
compiled. The flow graph is stored in BB_GRAPH. This function creates a representation of the control flow graph (of
the function being compiled) that is suitable for the instrumentation
of edges and/or converting measured edge counts to counts on the
complete CFG.
When FLAG_PROFILE_ARCS is nonzero, this function instruments the edges in When FLAG_PROFILE_ARCS is nonzero, this function instruments the edges in
the flow graph that are needed to reconstruct the dynamic behavior of the the flow graph that are needed to reconstruct the dynamic behavior of the
flow graph. flow graph. This data is written to the gcno file for gcov.
When FLAG_BRANCH_PROBABILITIES is nonzero, this function reads auxiliary When FLAG_BRANCH_PROBABILITIES is nonzero, this function reads auxiliary
information from a data file containing edge count information from previous information from the gcda file containing edge count information from
executions of the function being compiled. In this case, the flow graph is previous executions of the function being compiled. In this case, the
annotated with actual execution counts, which are later propagated into the control flow graph is annotated with actual execution counts by
rtl for optimization purposes. compute_branch_probabilities().
Main entry point of this file. */ Main entry point of this file. */
...@@ -1145,8 +1114,7 @@ branch_prob (void) ...@@ -1145,8 +1114,7 @@ branch_prob (void)
lineno_checksum = coverage_compute_lineno_checksum (); lineno_checksum = coverage_compute_lineno_checksum ();
/* Write the data from which gcov can reconstruct the basic block /* Write the data from which gcov can reconstruct the basic block
graph and function line numbers */ graph and function line numbers (the gcno file). */
if (coverage_begin_function (lineno_checksum, cfg_checksum)) if (coverage_begin_function (lineno_checksum, cfg_checksum))
{ {
gcov_position_t offset; gcov_position_t offset;
...@@ -1157,12 +1125,6 @@ branch_prob (void) ...@@ -1157,12 +1125,6 @@ branch_prob (void)
gcov_write_unsigned (0); gcov_write_unsigned (0);
gcov_write_length (offset); gcov_write_length (offset);
/* Keep all basic block indexes nonnegative in the gcov output.
Index 0 is used for entry block, last index is for exit
block. */
ENTRY_BLOCK_PTR->index = 1;
EXIT_BLOCK_PTR->index = last_basic_block;
/* Arcs */ /* Arcs */
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb) FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
{ {
...@@ -1170,7 +1132,7 @@ branch_prob (void) ...@@ -1170,7 +1132,7 @@ branch_prob (void)
edge_iterator ei; edge_iterator ei;
offset = gcov_write_tag (GCOV_TAG_ARCS); offset = gcov_write_tag (GCOV_TAG_ARCS);
gcov_write_unsigned (BB_TO_GCOV_INDEX (bb)); gcov_write_unsigned (bb->index);
FOR_EACH_EDGE (e, ei, bb->succs) FOR_EACH_EDGE (e, ei, bb->succs)
{ {
...@@ -1191,7 +1153,7 @@ branch_prob (void) ...@@ -1191,7 +1153,7 @@ branch_prob (void)
&& e->src->next_bb == e->dest) && e->src->next_bb == e->dest)
flag_bits |= GCOV_ARC_FALLTHROUGH; flag_bits |= GCOV_ARC_FALLTHROUGH;
gcov_write_unsigned (BB_TO_GCOV_INDEX (e->dest)); gcov_write_unsigned (e->dest->index);
gcov_write_unsigned (flag_bits); gcov_write_unsigned (flag_bits);
} }
} }
...@@ -1199,9 +1161,6 @@ branch_prob (void) ...@@ -1199,9 +1161,6 @@ branch_prob (void)
gcov_write_length (offset); gcov_write_length (offset);
} }
ENTRY_BLOCK_PTR->index = ENTRY_BLOCK;
EXIT_BLOCK_PTR->index = EXIT_BLOCK;
/* Line numbers. */ /* Line numbers. */
/* Initialize the output. */ /* Initialize the output. */
output_location (NULL, 0, NULL, NULL); output_location (NULL, 0, NULL, NULL);
...@@ -1247,8 +1206,6 @@ branch_prob (void) ...@@ -1247,8 +1206,6 @@ branch_prob (void)
} }
} }
#undef BB_TO_GCOV_INDEX
if (flag_profile_values) if (flag_profile_values)
gimple_find_values_to_profile (&values); gimple_find_values_to_profile (&values);
...@@ -1391,8 +1348,7 @@ find_spanning_tree (struct edge_list *el) ...@@ -1391,8 +1348,7 @@ find_spanning_tree (struct edge_list *el)
} }
} }
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb) clear_aux_for_blocks ();
bb->aux = NULL;
} }
/* Perform file-level initialization for branch-prob processing. */ /* Perform file-level initialization for branch-prob processing. */
......
...@@ -2110,7 +2110,6 @@ initialize_cfun (tree new_fndecl, tree callee_fndecl, gcov_type count) ...@@ -2110,7 +2110,6 @@ initialize_cfun (tree new_fndecl, tree callee_fndecl, gcov_type count)
cfun->can_delete_dead_exceptions = src_cfun->can_delete_dead_exceptions; cfun->can_delete_dead_exceptions = src_cfun->can_delete_dead_exceptions;
cfun->returns_struct = src_cfun->returns_struct; cfun->returns_struct = src_cfun->returns_struct;
cfun->returns_pcc_struct = src_cfun->returns_pcc_struct; cfun->returns_pcc_struct = src_cfun->returns_pcc_struct;
cfun->after_tree_profile = src_cfun->after_tree_profile;
init_empty_tree_cfg (); init_empty_tree_cfg ();
......
...@@ -100,6 +100,8 @@ init_ic_make_global_vars (void) ...@@ -100,6 +100,8 @@ init_ic_make_global_vars (void)
varpool_finalize_decl (ic_gcov_type_ptr_var); varpool_finalize_decl (ic_gcov_type_ptr_var);
} }
/* Create the type and function decls for the interface with gcov. */
void void
gimple_init_edge_profiler (void) gimple_init_edge_profiler (void)
{ {
...@@ -332,8 +334,9 @@ gimple_gen_ic_profiler (histogram_value value, unsigned tag, unsigned base) ...@@ -332,8 +334,9 @@ gimple_gen_ic_profiler (histogram_value value, unsigned tag, unsigned base)
/* Insert code: /* Insert code:
__gcov_indirect_call_counters = get_relevant_counter_ptr (); stmt1: __gcov_indirect_call_counters = get_relevant_counter_ptr ();
__gcov_indirect_call_callee = (void *) indirect call argument; stmt2: tmp1 = (void *) (indirect call argument value)
stmt3: __gcov_indirect_call_callee = tmp1;
*/ */
tmp1 = create_tmp_reg (ptr_void, "PROF"); tmp1 = create_tmp_reg (ptr_void, "PROF");
...@@ -368,6 +371,13 @@ gimple_gen_ic_func_profiler (void) ...@@ -368,6 +371,13 @@ gimple_gen_ic_func_profiler (void)
gimple_init_edge_profiler (); gimple_init_edge_profiler ();
/* Insert code:
stmt1: __gcov_indirect_call_profiler (__gcov_indirect_call_counters,
current_function_funcdef_no,
&current_function_decl,
__gcov_indirect_call_callee);
*/
gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR)); gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
cur_func = force_gimple_operand_gsi (&gsi, cur_func = force_gimple_operand_gsi (&gsi,
...@@ -461,12 +471,9 @@ tree_profiling (void) ...@@ -461,12 +471,9 @@ tree_profiling (void)
{ {
struct cgraph_node *node; struct cgraph_node *node;
/* Don't profile functions produced at destruction time, particularly /* This is a small-ipa pass that gets called only once, from
the gcov datastructure initializer. Don't profile if it has been cgraphunit.c:ipa_passes(). */
already instrumented either (when OpenMP expansion creates gcc_assert (cgraph_state == CGRAPH_STATE_IPA_SSA);
child function from already instrumented body). */
if (cgraph_state == CGRAPH_STATE_FINISHED)
return 0;
init_node_map(); init_node_map();
...@@ -476,8 +483,7 @@ tree_profiling (void) ...@@ -476,8 +483,7 @@ tree_profiling (void)
continue; continue;
/* Don't profile functions produced for builtin stuff. */ /* Don't profile functions produced for builtin stuff. */
if (DECL_SOURCE_LOCATION (node->symbol.decl) == BUILTINS_LOCATION if (DECL_SOURCE_LOCATION (node->symbol.decl) == BUILTINS_LOCATION)
|| DECL_STRUCT_FUNCTION (node->symbol.decl)->after_tree_profile)
continue; continue;
push_cfun (DECL_STRUCT_FUNCTION (node->symbol.decl)); push_cfun (DECL_STRUCT_FUNCTION (node->symbol.decl));
...@@ -489,6 +495,7 @@ tree_profiling (void) ...@@ -489,6 +495,7 @@ tree_profiling (void)
/* Local pure-const may imply need to fixup the cfg. */ /* Local pure-const may imply need to fixup the cfg. */
if (execute_fixup_cfg () & TODO_cleanup_cfg) if (execute_fixup_cfg () & TODO_cleanup_cfg)
cleanup_tree_cfg (); cleanup_tree_cfg ();
branch_prob (); branch_prob ();
if (! flag_branch_probabilities if (! flag_branch_probabilities
...@@ -519,8 +526,7 @@ tree_profiling (void) ...@@ -519,8 +526,7 @@ tree_profiling (void)
continue; continue;
/* Don't profile functions produced for builtin stuff. */ /* Don't profile functions produced for builtin stuff. */
if (DECL_SOURCE_LOCATION (node->symbol.decl) == BUILTINS_LOCATION if (DECL_SOURCE_LOCATION (node->symbol.decl) == BUILTINS_LOCATION)
|| DECL_STRUCT_FUNCTION (node->symbol.decl)->after_tree_profile)
continue; continue;
cgraph_set_const_flag (node, false, false); cgraph_set_const_flag (node, false, false);
...@@ -538,8 +544,7 @@ tree_profiling (void) ...@@ -538,8 +544,7 @@ tree_profiling (void)
continue; continue;
/* Don't profile functions produced for builtin stuff. */ /* Don't profile functions produced for builtin stuff. */
if (DECL_SOURCE_LOCATION (node->symbol.decl) == BUILTINS_LOCATION if (DECL_SOURCE_LOCATION (node->symbol.decl) == BUILTINS_LOCATION)
|| DECL_STRUCT_FUNCTION (node->symbol.decl)->after_tree_profile)
continue; continue;
push_cfun (DECL_STRUCT_FUNCTION (node->symbol.decl)); push_cfun (DECL_STRUCT_FUNCTION (node->symbol.decl));
...@@ -556,7 +561,6 @@ tree_profiling (void) ...@@ -556,7 +561,6 @@ tree_profiling (void)
} }
} }
cfun->after_tree_profile = 1;
update_ssa (TODO_update_ssa); update_ssa (TODO_update_ssa);
rebuild_cgraph_edges (); rebuild_cgraph_edges ();
......
...@@ -53,28 +53,63 @@ along with GCC; see the file COPYING3. If not see ...@@ -53,28 +53,63 @@ along with GCC; see the file COPYING3. If not see
1) Division/modulo specialization. Provided that we can determine that the 1) Division/modulo specialization. Provided that we can determine that the
operands of the division have some special properties, we may use it to operands of the division have some special properties, we may use it to
produce more effective code. produce more effective code.
2) Speculative prefetching. If we are able to determine that the difference
between addresses accessed by a memory reference is usually constant, we
may add the prefetch instructions.
FIXME: This transformation was removed together with RTL based value
profiling.
3) Indirect/virtual call specialization. If we can determine most 2) Indirect/virtual call specialization. If we can determine most
common function callee in indirect/virtual call. We can use this common function callee in indirect/virtual call. We can use this
information to improve code effectiveness (especially info for information to improve code effectiveness (especially info for
inliner). the inliner).
Every such optimization should add its requirements for profiled values to 3) Speculative prefetching. If we are able to determine that the difference
insn_values_to_profile function. This function is called from branch_prob between addresses accessed by a memory reference is usually constant, we
in profile.c and the requested values are instrumented by it in the first may add the prefetch instructions.
compilation with -fprofile-arcs. The optimization may then read the FIXME: This transformation was removed together with RTL based value
gathered data in the second compilation with -fbranch-probabilities. profiling.
The measured data is pointed to from the histograms
field of the statement annotation of the instrumented insns. It is
kept as a linked list of struct histogram_value_t's, which contain the
same information as above. */
Value profiling internals
==========================
Every value profiling transformation starts with defining what values
to profile. There are different histogram types (see HIST_TYPE_* in
value-prof.h) and each transformation can request one or more histogram
types per GIMPLE statement. The function gimple_find_values_to_profile()
collects the values to profile in a VEC, and adds the number of counters
required for the different histogram types.
For a -fprofile-generate run, the statements for which values should be
recorded, are instrumented in instrument_values(). The instrumentation
is done by helper functions that can be found in tree-profile.c, where
new types of histograms can be added if necessary.
After a -fprofile-use, the value profiling data is read back in by
compute_value_histograms() that translates the collected data to
histograms and attaches them to the profiled statements via
gimple_add_histogram_value(). Histograms are stored in a hash table
that is attached to every intrumented function, see VALUE_HISTOGRAMS
in function.h.
The value-profile transformations driver is the function
gimple_value_profile_transformations(). It traverses all statements in
the to-be-transformed function, and looks for statements with one or
more histograms attached to it. If a statement has histograms, the
transformation functions are called on the statement.
Limitations / FIXME / TODO:
* Only one histogram of each type can be associated with a statement.
* Currently, HIST_TYPE_CONST_DELTA is not implemented.
(This type of histogram was originally used to implement a form of
stride profiling based speculative prefetching to improve SPEC2000
scores for memory-bound benchmarks, mcf and equake. However, this
was an RTL value-profiling transformation, and those have all been
removed.)
* Some value profile transformations are done in builtins.c (?!)
* Updating of histograms needs some TLC.
* The value profiling code could be used to record analysis results
from non-profiling (e.g. VRP).
* Adding new profilers should be simplified, starting with a cleanup
of what-happens-where andwith making gimple_find_values_to_profile
and gimple_value_profile_transformations table-driven, perhaps...
*/
static tree gimple_divmod_fixed_value (gimple, tree, int, gcov_type, gcov_type); static tree gimple_divmod_fixed_value (gimple, tree, int, gcov_type, gcov_type);
static tree gimple_mod_pow2 (gimple, int, gcov_type, gcov_type); static tree gimple_mod_pow2 (gimple, int, gcov_type, gcov_type);
...@@ -84,7 +119,7 @@ static bool gimple_divmod_fixed_value_transform (gimple_stmt_iterator *); ...@@ -84,7 +119,7 @@ static bool gimple_divmod_fixed_value_transform (gimple_stmt_iterator *);
static bool gimple_mod_pow2_value_transform (gimple_stmt_iterator *); static bool gimple_mod_pow2_value_transform (gimple_stmt_iterator *);
static bool gimple_mod_subtract_transform (gimple_stmt_iterator *); static bool gimple_mod_subtract_transform (gimple_stmt_iterator *);
static bool gimple_stringops_transform (gimple_stmt_iterator *); static bool gimple_stringops_transform (gimple_stmt_iterator *);
static bool gimple_ic_transform (gimple); static bool gimple_ic_transform (gimple_stmt_iterator *);
/* Allocate histogram value. */ /* Allocate histogram value. */
...@@ -309,7 +344,7 @@ dump_histograms_for_stmt (struct function *fun, FILE *dump_file, gimple stmt) ...@@ -309,7 +344,7 @@ dump_histograms_for_stmt (struct function *fun, FILE *dump_file, gimple stmt)
{ {
histogram_value hist; histogram_value hist;
for (hist = gimple_histogram_value (fun, stmt); hist; hist = hist->hvalue.next) for (hist = gimple_histogram_value (fun, stmt); hist; hist = hist->hvalue.next)
dump_histogram_value (dump_file, hist); dump_histogram_value (dump_file, hist);
} }
/* Remove all histograms associated with STMT. */ /* Remove all histograms associated with STMT. */
...@@ -519,12 +554,11 @@ gimple_value_profile_transformations (void) ...@@ -519,12 +554,11 @@ gimple_value_profile_transformations (void)
will be added before the current statement, and that the will be added before the current statement, and that the
current statement remain valid (although possibly current statement remain valid (although possibly
modified) upon return. */ modified) upon return. */
if (flag_value_profile_transformations if (gimple_mod_subtract_transform (&gsi)
&& (gimple_mod_subtract_transform (&gsi) || gimple_divmod_fixed_value_transform (&gsi)
|| gimple_divmod_fixed_value_transform (&gsi) || gimple_mod_pow2_value_transform (&gsi)
|| gimple_mod_pow2_value_transform (&gsi) || gimple_stringops_transform (&gsi)
|| gimple_stringops_transform (&gsi) || gimple_ic_transform (&gsi))
|| gimple_ic_transform (stmt)))
{ {
stmt = gsi_stmt (gsi); stmt = gsi_stmt (gsi);
changed = true; changed = true;
...@@ -1283,8 +1317,9 @@ gimple_ic (gimple icall_stmt, struct cgraph_node *direct_call, ...@@ -1283,8 +1317,9 @@ gimple_ic (gimple icall_stmt, struct cgraph_node *direct_call,
*/ */
static bool static bool
gimple_ic_transform (gimple stmt) gimple_ic_transform (gimple_stmt_iterator *gsi)
{ {
gimple stmt = gsi_stmt (*gsi);
histogram_value histogram; histogram_value histogram;
gcov_type val, count, all, bb_all; gcov_type val, count, all, bb_all;
gcov_type prob; gcov_type prob;
...@@ -1749,12 +1784,9 @@ gimple_stringops_values_to_profile (gimple stmt, histogram_values *values) ...@@ -1749,12 +1784,9 @@ gimple_stringops_values_to_profile (gimple stmt, histogram_values *values)
static void static void
gimple_values_to_profile (gimple stmt, histogram_values *values) gimple_values_to_profile (gimple stmt, histogram_values *values)
{ {
if (flag_value_profile_transformations) gimple_divmod_values_to_profile (stmt, values);
{ gimple_stringops_values_to_profile (stmt, values);
gimple_divmod_values_to_profile (stmt, values); gimple_indirect_call_to_profile (stmt, values);
gimple_stringops_values_to_profile (stmt, values);
gimple_indirect_call_to_profile (stmt, values);
}
} }
void void
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment