Commit 6d9901e7 by Zdenek Dvorak Committed by Zdenek Dvorak

Makefile.in (rtl-profile.o, [...]): Add GCC_H dependency.

	* Makefile.in (rtl-profile.o, value-prof.o): Add GCC_H dependency.
	* common.opt (fspeculative-prefetching): New.
	* flags.h (flag_speculative_prefetching_set): Declare.
	* gcov-io.c (gcov_write_counter, gcov_read_counter): Allow negative
	values.
	* opts.c (flag_sepculative_prefetching_set): New variable.
	(common_handle_option): Handle -fspeculative-prefetching.
	* passes.c (rest_of_compilation): Ditto.
	* profile.c (instrument_values, compute_value_histograms, branch_prob):
	Use vectors instead of arrays.
	* toplev.c (process_options): Handle -fspeculative-prefetching.
	* rtl-profile.c: Include ggc.h.
	(rtl_gen_interval_profiler, rtl_gen_pow2_profiler,
	rtl_gen_one_value_profiler_no_edge_manipulation,
	rtl_gen_one_value_profiler, rtl_gen_const_delta_profiler): Type of
	argument changed.
	* tree-profile.c (tree_gen_interval_profiler, tree_gen_pow2_profiler,
	tree_gen_one_value_profiler, tree_gen_const_delta_profiler): Type of
	argument changed.
	* value-prof.c: Include ggc.h.
	(NOPREFETCH_RANGE_MIN, NOPREFETCH_RANGE_MAX): New
	macros.
	(insn_prefetch_values_to_profile, find_mem_reference_1,
	find_mem_reference_2, find_mem_reference, gen_speculative_prefetch,
	speculative_prefetching_transform): New.
	(value_profile_transformations): Call speculative_prefetching_transform.
	(insn_values_to_profile): Call insn_prefetch_values_to_profile.
	(insn_divmod_values_to_profile, rtl_find_values_to_profile,
	tree_find_values_to_profile, find_values to profile): Use vectors
	instead of arrays.
	(free_profiled_values): Removed.
	* value-prof.h (struct histogram_value): Renamed to
	struct histogram_value_t.
	(histogram_value, histogram_values): New types.
	(find_values_to_profile): Declaration changed.
	(free_profiled_values): Removed.
	(struct profile_hooks): Type of argument of the hooks changed to
	histogram_value.
	* doc/invoke.texi (-fspeculative-prefetching): Document.

From-SVN: r86930
parent d7fe1183
2004-09-01 Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz> 2004-09-01 Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz>
* Makefile.in (rtl-profile.o, value-prof.o): Add GCC_H dependency.
* common.opt (fspeculative-prefetching): New.
* flags.h (flag_speculative_prefetching_set): Declare.
* gcov-io.c (gcov_write_counter, gcov_read_counter): Allow negative
values.
* opts.c (flag_sepculative_prefetching_set): New variable.
(common_handle_option): Handle -fspeculative-prefetching.
* passes.c (rest_of_compilation): Ditto.
* profile.c (instrument_values, compute_value_histograms, branch_prob):
Use vectors instead of arrays.
* toplev.c (process_options): Handle -fspeculative-prefetching.
* rtl-profile.c: Include ggc.h.
(rtl_gen_interval_profiler, rtl_gen_pow2_profiler,
rtl_gen_one_value_profiler_no_edge_manipulation,
rtl_gen_one_value_profiler, rtl_gen_const_delta_profiler): Type of
argument changed.
* tree-profile.c (tree_gen_interval_profiler, tree_gen_pow2_profiler,
tree_gen_one_value_profiler, tree_gen_const_delta_profiler): Type of
argument changed.
* value-prof.c: Include ggc.h.
(NOPREFETCH_RANGE_MIN, NOPREFETCH_RANGE_MAX): New
macros.
(insn_prefetch_values_to_profile, find_mem_reference_1,
find_mem_reference_2, find_mem_reference, gen_speculative_prefetch,
speculative_prefetching_transform): New.
(value_profile_transformations): Call speculative_prefetching_transform.
(insn_values_to_profile): Call insn_prefetch_values_to_profile.
(insn_divmod_values_to_profile, rtl_find_values_to_profile,
tree_find_values_to_profile, find_values to profile): Use vectors
instead of arrays.
(free_profiled_values): Removed.
* value-prof.h (struct histogram_value): Renamed to
struct histogram_value_t.
(histogram_value, histogram_values): New types.
(find_values_to_profile): Declaration changed.
(free_profiled_values): Removed.
(struct profile_hooks): Type of argument of the hooks changed to
histogram_value.
* doc/invoke.texi (-fspeculative-prefetching): Document.
2004-09-01 Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz>
PR rtl-optimization/16408 PR rtl-optimization/16408
* gcse.c (replace_store_insn): Fix LIBCALL/RETVAL notes. * gcse.c (replace_store_insn): Fix LIBCALL/RETVAL notes.
......
...@@ -1959,10 +1959,10 @@ tree-profile.o : tree-profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ ...@@ -1959,10 +1959,10 @@ tree-profile.o : tree-profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
tree-pass.h $(TREE_FLOW_H) $(TIMEVAR_H) tree-pass.h $(TREE_FLOW_H) $(TIMEVAR_H)
rtl-profile.o : tree-profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ rtl-profile.o : tree-profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
$(TM_H) $(RTL_H) $(TREE_H) $(FLAGS_H) output.h $(REGS_H) $(EXPR_H) function.h \ $(TM_H) $(RTL_H) $(TREE_H) $(FLAGS_H) output.h $(REGS_H) $(EXPR_H) function.h \
toplev.h $(BASIC_BLOCK_H) $(COVERAGE_H) $(TREE_FLOW_H) value-prof.h toplev.h $(BASIC_BLOCK_H) $(COVERAGE_H) $(TREE_FLOW_H) value-prof.h $(GGC_H)
value-prof.o : value-prof.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ value-prof.o : value-prof.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
$(BASIC_BLOCK_H) hard-reg-set.h value-prof.h $(EXPR_H) output.h $(FLAGS_H) \ $(BASIC_BLOCK_H) hard-reg-set.h value-prof.h $(EXPR_H) output.h $(FLAGS_H) \
$(RECOG_H) insn-config.h $(OPTABS_H) $(REGS_H) $(RECOG_H) insn-config.h $(OPTABS_H) $(REGS_H) $(GGC_H)
loop.o : loop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(FLAGS_H) $(LOOP_H) \ loop.o : loop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(FLAGS_H) $(LOOP_H) \
insn-config.h $(REGS_H) hard-reg-set.h $(RECOG_H) $(EXPR_H) \ insn-config.h $(REGS_H) hard-reg-set.h $(RECOG_H) $(EXPR_H) \
real.h $(PREDICT_H) $(BASIC_BLOCK_H) function.h $(CFGLOOP_H) \ real.h $(PREDICT_H) $(BASIC_BLOCK_H) function.h $(CFGLOOP_H) \
......
...@@ -752,6 +752,10 @@ fsingle-precision-constant ...@@ -752,6 +752,10 @@ fsingle-precision-constant
Common Report Var(flag_single_precision_constant) Common Report Var(flag_single_precision_constant)
Convert floating point constants to single precision constants Convert floating point constants to single precision constants
fspeculative-prefetching
Common Report Var(flag_speculative_prefetching)
Use value profiling for speculative prefetching
; Emit code to probe the stack, to help detect stack overflow; also ; Emit code to probe the stack, to help detect stack overflow; also
; may cause large objects to be allocated dynamically. ; may cause large objects to be allocated dynamically.
fstack-check fstack-check
......
...@@ -311,7 +311,7 @@ Objective-C and Objective-C++ Dialects}. ...@@ -311,7 +311,7 @@ Objective-C and Objective-C++ Dialects}.
-fsched-stalled-insns=@var{n} -sched-stalled-insns-dep=@var{n} @gol -fsched-stalled-insns=@var{n} -sched-stalled-insns-dep=@var{n} @gol
-fsched2-use-superblocks @gol -fsched2-use-superblocks @gol
-fsched2-use-traces -freschedule-modulo-scheduled-loops @gol -fsched2-use-traces -freschedule-modulo-scheduled-loops @gol
-fsignaling-nans -fsingle-precision-constant @gol -fsignaling-nans -fsingle-precision-constant -fspeculative-prefetching @gol
-fstrength-reduce -fstrict-aliasing -ftracer -fthread-jumps @gol -fstrength-reduce -fstrict-aliasing -ftracer -fthread-jumps @gol
-funroll-all-loops -funroll-loops -fpeel-loops @gol -funroll-all-loops -funroll-loops -fpeel-loops @gol
-funswitch-loops -fold-unroll-loops -fold-unroll-all-loops @gol -funswitch-loops -fold-unroll-loops -fold-unroll-all-loops @gol
...@@ -5011,6 +5011,21 @@ and actually performs the optimizations based on them. ...@@ -5011,6 +5011,21 @@ and actually performs the optimizations based on them.
Currently the optimizations include specialization of division operation Currently the optimizations include specialization of division operation
using the knowledge about the value of the denominator. using the knowledge about the value of the denominator.
@item -fspeculative-prefetching
@opindex fspeculative-prefetching
If combined with @option{-fprofile-arcs}, it instructs the compiler to add
a code to gather information about addresses of memory references in the
program.
With @option{-fbranch-probabilities}, it reads back the data gathered
and issues prefetch instructions according to them. In addition to the opportunities
noticed by @option{-fprefetch-loop-arrays}, it also notices more complicated
memory access patterns -- for example accesses to the data stored in linked
list whose elements are usually allocated sequentially.
In order to prevent issuing double prefetches, usage of
@option{-fspeculative-prefetching} implies @option{-fno-prefetch-loop-arrays}.
Enabled with @option{-fprofile-generate} and @option{-fprofile-use}. Enabled with @option{-fprofile-generate} and @option{-fprofile-use}.
@item -frename-registers @item -frename-registers
......
...@@ -257,6 +257,10 @@ extern int flag_remove_unreachable_functions; ...@@ -257,6 +257,10 @@ extern int flag_remove_unreachable_functions;
/* Nonzero if we should track variables. */ /* Nonzero if we should track variables. */
extern int flag_var_tracking; extern int flag_var_tracking;
/* True if flag_speculative_prefetching was set by user. Used to suppress
warning message in case flag was set by -fprofile-{generate,use}. */
extern bool flag_speculative_prefetching_set;
/* A string that's used when a random name is required. NULL means /* A string that's used when a random name is required. NULL means
to make it really random. */ to make it really random. */
......
...@@ -268,9 +268,6 @@ gcov_write_counter (gcov_type value) ...@@ -268,9 +268,6 @@ gcov_write_counter (gcov_type value)
buffer[1] = (gcov_unsigned_t) (value >> 32); buffer[1] = (gcov_unsigned_t) (value >> 32);
else else
buffer[1] = 0; buffer[1] = 0;
if (value < 0)
gcov_var.error = -1;
} }
#endif /* IN_LIBGCOV */ #endif /* IN_LIBGCOV */
...@@ -454,8 +451,6 @@ gcov_read_counter (void) ...@@ -454,8 +451,6 @@ gcov_read_counter (void)
else if (buffer[1]) else if (buffer[1])
gcov_var.error = -1; gcov_var.error = -1;
if (value < 0)
gcov_var.error = -1;
return value; return value;
} }
......
...@@ -93,6 +93,7 @@ static const char undocumented_msg[] = N_("This switch lacks documentation"); ...@@ -93,6 +93,7 @@ static const char undocumented_msg[] = N_("This switch lacks documentation");
static bool profile_arc_flag_set, flag_profile_values_set; static bool profile_arc_flag_set, flag_profile_values_set;
static bool flag_unroll_loops_set, flag_tracer_set; static bool flag_unroll_loops_set, flag_tracer_set;
static bool flag_value_profile_transformations_set; static bool flag_value_profile_transformations_set;
bool flag_speculative_prefetching_set;
static bool flag_peel_loops_set, flag_branch_probabilities_set; static bool flag_peel_loops_set, flag_branch_probabilities_set;
/* Input file names. */ /* Input file names. */
...@@ -830,6 +831,10 @@ common_handle_option (size_t scode, const char *arg, int value) ...@@ -830,6 +831,10 @@ common_handle_option (size_t scode, const char *arg, int value)
flag_tracer = value; flag_tracer = value;
if (!flag_value_profile_transformations_set) if (!flag_value_profile_transformations_set)
flag_value_profile_transformations = value; flag_value_profile_transformations = value;
#ifdef HAVE_prefetch
if (!flag_speculative_prefetching_set)
flag_speculative_prefetching = value;
#endif
break; break;
case OPT_fprofile_generate: case OPT_fprofile_generate:
...@@ -839,6 +844,10 @@ common_handle_option (size_t scode, const char *arg, int value) ...@@ -839,6 +844,10 @@ common_handle_option (size_t scode, const char *arg, int value)
flag_profile_values = value; flag_profile_values = value;
if (!flag_value_profile_transformations_set) if (!flag_value_profile_transformations_set)
flag_value_profile_transformations = value; flag_value_profile_transformations = value;
#ifdef HAVE_prefetch
if (!flag_speculative_prefetching_set)
flag_speculative_prefetching = value;
#endif
break; break;
case OPT_fprofile_values: case OPT_fprofile_values:
...@@ -861,7 +870,11 @@ common_handle_option (size_t scode, const char *arg, int value) ...@@ -861,7 +870,11 @@ common_handle_option (size_t scode, const char *arg, int value)
break; break;
case OPT_fvpt: case OPT_fvpt:
flag_value_profile_transformations_set = value; flag_value_profile_transformations_set = true;
break;
case OPT_fspeculative_prefetching:
flag_speculative_prefetching_set = true;
break; break;
case OPT_frandom_seed: case OPT_frandom_seed:
......
...@@ -1820,7 +1820,8 @@ rest_of_compilation (void) ...@@ -1820,7 +1820,8 @@ rest_of_compilation (void)
if (flag_branch_probabilities if (flag_branch_probabilities
&& flag_profile_values && flag_profile_values
&& flag_value_profile_transformations) && (flag_value_profile_transformations
|| flag_speculative_prefetching))
rest_of_handle_value_profile_transformations (); rest_of_handle_value_profile_transformations ();
/* Remove the death notes created for vpt. */ /* Remove the death notes created for vpt. */
......
...@@ -119,9 +119,9 @@ static int total_num_branches; ...@@ -119,9 +119,9 @@ static int total_num_branches;
/* Forward declarations. */ /* Forward declarations. */
static void find_spanning_tree (struct edge_list *); static void find_spanning_tree (struct edge_list *);
static unsigned instrument_edges (struct edge_list *); static unsigned instrument_edges (struct edge_list *);
static void instrument_values (unsigned, struct histogram_value *); static void instrument_values (histogram_values);
static void compute_branch_probabilities (void); static void compute_branch_probabilities (void);
static void compute_value_histograms (unsigned, struct histogram_value *); static void compute_value_histograms (histogram_values);
static gcov_type * get_exec_counts (void); static gcov_type * get_exec_counts (void);
static basic_block find_group (basic_block); static basic_block find_group (basic_block);
static void union_groups (basic_block, basic_block); static void union_groups (basic_block, basic_block);
...@@ -166,17 +166,18 @@ instrument_edges (struct edge_list *el) ...@@ -166,17 +166,18 @@ instrument_edges (struct edge_list *el)
return num_instr_edges; return num_instr_edges;
} }
/* Add code to measure histograms list of VALUES of length N_VALUES. */ /* Add code to measure histograms for values in list VALUES. */
static void static void
instrument_values (unsigned n_values, struct histogram_value *values) instrument_values (histogram_values values)
{ {
unsigned i, t; unsigned i, t;
/* Emit code to generate the histograms before the insns. */ /* Emit code to generate the histograms before the insns. */
for (i = 0; i < n_values; i++) for (i = 0; i < VEC_length (histogram_value, values); i++)
{ {
switch (values[i].type) histogram_value hist = VEC_index (histogram_value, values, i);
switch (hist->type)
{ {
case HIST_TYPE_INTERVAL: case HIST_TYPE_INTERVAL:
t = GCOV_COUNTER_V_INTERVAL; t = GCOV_COUNTER_V_INTERVAL;
...@@ -197,25 +198,25 @@ instrument_values (unsigned n_values, struct histogram_value *values) ...@@ -197,25 +198,25 @@ instrument_values (unsigned n_values, struct histogram_value *values)
default: default:
abort (); abort ();
} }
if (!coverage_counter_alloc (t, values[i].n_counters)) if (!coverage_counter_alloc (t, hist->n_counters))
continue; continue;
switch (values[i].type) switch (hist->type)
{ {
case HIST_TYPE_INTERVAL: case HIST_TYPE_INTERVAL:
(profile_hooks->gen_interval_profiler) (values + i, t, 0); (profile_hooks->gen_interval_profiler) (hist, t, 0);
break; break;
case HIST_TYPE_POW2: case HIST_TYPE_POW2:
(profile_hooks->gen_pow2_profiler) (values + i, t, 0); (profile_hooks->gen_pow2_profiler) (hist, t, 0);
break; break;
case HIST_TYPE_SINGLE_VALUE: case HIST_TYPE_SINGLE_VALUE:
(profile_hooks->gen_one_value_profiler) (values + i, t, 0); (profile_hooks->gen_one_value_profiler) (hist, t, 0);
break; break;
case HIST_TYPE_CONST_DELTA: case HIST_TYPE_CONST_DELTA:
(profile_hooks->gen_const_delta_profiler) (values + i, t, 0); (profile_hooks->gen_const_delta_profiler) (hist, t, 0);
break; break;
default: default:
...@@ -613,22 +614,27 @@ compute_branch_probabilities (void) ...@@ -613,22 +614,27 @@ compute_branch_probabilities (void)
free_aux_for_blocks (); free_aux_for_blocks ();
} }
/* Load value histograms for N_VALUES values whose description is stored /* Load value histograms values whose description is stored in VALUES array
in VALUES array from .da file. */ from .da file. */
static void static void
compute_value_histograms (unsigned n_values, struct histogram_value *values) compute_value_histograms (histogram_values values)
{ {
unsigned i, j, t, any; unsigned i, j, t, any;
unsigned n_histogram_counters[GCOV_N_VALUE_COUNTERS]; unsigned n_histogram_counters[GCOV_N_VALUE_COUNTERS];
gcov_type *histogram_counts[GCOV_N_VALUE_COUNTERS]; gcov_type *histogram_counts[GCOV_N_VALUE_COUNTERS];
gcov_type *act_count[GCOV_N_VALUE_COUNTERS]; gcov_type *act_count[GCOV_N_VALUE_COUNTERS];
gcov_type *aact_count; gcov_type *aact_count;
histogram_value hist;
for (t = 0; t < GCOV_N_VALUE_COUNTERS; t++) for (t = 0; t < GCOV_N_VALUE_COUNTERS; t++)
n_histogram_counters[t] = 0; n_histogram_counters[t] = 0;
for (i = 0; i < n_values; i++) for (i = 0; i < VEC_length (histogram_value, values); i++)
n_histogram_counters[(int) (values[i].type)] += values[i].n_counters; {
hist = VEC_index (histogram_value, values, i);
n_histogram_counters[(int) hist->type] += hist->n_counters;
}
any = 0; any = 0;
for (t = 0; t < GCOV_N_VALUE_COUNTERS; t++) for (t = 0; t < GCOV_N_VALUE_COUNTERS; t++)
...@@ -649,25 +655,27 @@ compute_value_histograms (unsigned n_values, struct histogram_value *values) ...@@ -649,25 +655,27 @@ compute_value_histograms (unsigned n_values, struct histogram_value *values)
if (!any) if (!any)
return; return;
for (i = 0; i < n_values; i++) for (i = 0; i < VEC_length (histogram_value, values); i++)
{ {
rtx hist_list = NULL_RTX; rtx hist_list = NULL_RTX;
t = (int) (values[i].type);
hist = VEC_index (histogram_value, values, i);
t = (int) hist->type;
/* FIXME: make this work for trees. */ /* FIXME: make this work for trees. */
if (!ir_type ()) if (!ir_type ())
{ {
aact_count = act_count[t]; aact_count = act_count[t];
act_count[t] += values[i].n_counters; act_count[t] += hist->n_counters;
for (j = values[i].n_counters; j > 0; j--) for (j = hist->n_counters; j > 0; j--)
hist_list = alloc_EXPR_LIST (0, GEN_INT (aact_count[j - 1]), hist_list = alloc_EXPR_LIST (0, GEN_INT (aact_count[j - 1]),
hist_list); hist_list);
hist_list = alloc_EXPR_LIST (0, hist_list = alloc_EXPR_LIST (0,
copy_rtx ((rtx)values[i].value), hist_list); copy_rtx ((rtx) hist->value), hist_list);
hist_list = alloc_EXPR_LIST (0, GEN_INT (values[i].type), hist_list); hist_list = alloc_EXPR_LIST (0, GEN_INT (hist->type), hist_list);
REG_NOTES ((rtx)values[i].insn) = REG_NOTES ((rtx) hist->insn) =
alloc_EXPR_LIST (REG_VALUE_PROFILE, hist_list, alloc_EXPR_LIST (REG_VALUE_PROFILE, hist_list,
REG_NOTES ((rtx)values[i].insn)); REG_NOTES ((rtx) hist->insn));
} }
} }
...@@ -700,8 +708,7 @@ branch_prob (void) ...@@ -700,8 +708,7 @@ branch_prob (void)
unsigned num_edges, ignored_edges; unsigned num_edges, ignored_edges;
unsigned num_instrumented; unsigned num_instrumented;
struct edge_list *el; struct edge_list *el;
unsigned n_values = 0; histogram_values values = NULL;
struct histogram_value *values = NULL;
total_num_times_called++; total_num_times_called++;
...@@ -960,13 +967,13 @@ branch_prob (void) ...@@ -960,13 +967,13 @@ branch_prob (void)
#undef BB_TO_GCOV_INDEX #undef BB_TO_GCOV_INDEX
if (flag_profile_values) if (flag_profile_values)
find_values_to_profile (&n_values, &values); find_values_to_profile (&values);
if (flag_branch_probabilities) if (flag_branch_probabilities)
{ {
compute_branch_probabilities (); compute_branch_probabilities ();
if (flag_profile_values) if (flag_profile_values)
compute_value_histograms (n_values, values); compute_value_histograms (values);
} }
remove_fake_edges (); remove_fake_edges ();
...@@ -981,7 +988,7 @@ branch_prob (void) ...@@ -981,7 +988,7 @@ branch_prob (void)
abort (); abort ();
if (flag_profile_values) if (flag_profile_values)
instrument_values (n_values, values); instrument_values (values);
/* Commit changes done by instrumentation. */ /* Commit changes done by instrumentation. */
if (ir_type ()) if (ir_type ())
......
...@@ -62,6 +62,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA ...@@ -62,6 +62,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
#include "coverage.h" #include "coverage.h"
#include "value-prof.h" #include "value-prof.h"
#include "tree.h" #include "tree.h"
#include "ggc.h"
/* Output instructions as RTL to increment the edge execution count. */ /* Output instructions as RTL to increment the edge execution count. */
...@@ -93,8 +94,7 @@ rtl_gen_edge_profiler (int edgeno, edge e) ...@@ -93,8 +94,7 @@ rtl_gen_edge_profiler (int edgeno, edge e)
section for counters, BASE is offset of the counter position. */ section for counters, BASE is offset of the counter position. */
static void static void
rtl_gen_interval_profiler (struct histogram_value *value, unsigned tag, rtl_gen_interval_profiler (histogram_value value, unsigned tag, unsigned base)
unsigned base)
{ {
unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1); unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0); enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
...@@ -196,8 +196,7 @@ rtl_gen_interval_profiler (struct histogram_value *value, unsigned tag, ...@@ -196,8 +196,7 @@ rtl_gen_interval_profiler (struct histogram_value *value, unsigned tag,
section for counters, BASE is offset of the counter position. */ section for counters, BASE is offset of the counter position. */
static void static void
rtl_gen_pow2_profiler (struct histogram_value *value, unsigned tag, rtl_gen_pow2_profiler (histogram_value value, unsigned tag, unsigned base)
unsigned base)
{ {
unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1); unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0); enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
...@@ -272,7 +271,7 @@ rtl_gen_pow2_profiler (struct histogram_value *value, unsigned tag, ...@@ -272,7 +271,7 @@ rtl_gen_pow2_profiler (struct histogram_value *value, unsigned tag,
section for counters, BASE is offset of the counter position. */ section for counters, BASE is offset of the counter position. */
static rtx static rtx
rtl_gen_one_value_profiler_no_edge_manipulation (struct histogram_value *value, rtl_gen_one_value_profiler_no_edge_manipulation (histogram_value value,
unsigned tag, unsigned base) unsigned tag, unsigned base)
{ {
unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1); unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
...@@ -351,8 +350,7 @@ rtl_gen_one_value_profiler_no_edge_manipulation (struct histogram_value *value, ...@@ -351,8 +350,7 @@ rtl_gen_one_value_profiler_no_edge_manipulation (struct histogram_value *value,
section for counters, BASE is offset of the counter position. */ section for counters, BASE is offset of the counter position. */
static void static void
rtl_gen_one_value_profiler (struct histogram_value *value, unsigned tag, rtl_gen_one_value_profiler (histogram_value value, unsigned tag, unsigned base)
unsigned base)
{ {
edge e = split_block (BLOCK_FOR_INSN ((rtx)value->insn), edge e = split_block (BLOCK_FOR_INSN ((rtx)value->insn),
PREV_INSN ((rtx)value->insn)); PREV_INSN ((rtx)value->insn));
...@@ -368,10 +366,9 @@ rtl_gen_one_value_profiler (struct histogram_value *value, unsigned tag, ...@@ -368,10 +366,9 @@ rtl_gen_one_value_profiler (struct histogram_value *value, unsigned tag,
section for counters, BASE is offset of the counter position. */ section for counters, BASE is offset of the counter position. */
static void static void
rtl_gen_const_delta_profiler (struct histogram_value *value, unsigned tag, rtl_gen_const_delta_profiler (histogram_value value, unsigned tag, unsigned base)
unsigned base)
{ {
struct histogram_value one_value_delta; histogram_value one_value_delta;
unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1); unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0); enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
rtx stored_value_ref, stored_value, tmp, uval; rtx stored_value_ref, stored_value, tmp, uval;
...@@ -393,12 +390,13 @@ rtl_gen_const_delta_profiler (struct histogram_value *value, unsigned tag, ...@@ -393,12 +390,13 @@ rtl_gen_const_delta_profiler (struct histogram_value *value, unsigned tag,
copy_rtx (uval), copy_rtx (stored_value), copy_rtx (uval), copy_rtx (stored_value),
NULL_RTX, 0, OPTAB_WIDEN); NULL_RTX, 0, OPTAB_WIDEN);
one_value_delta.value = tmp; one_value_delta = ggc_alloc (sizeof (*one_value_delta));
one_value_delta.mode = mode; one_value_delta->value = tmp;
one_value_delta.seq = NULL_RTX; one_value_delta->mode = mode;
one_value_delta.insn = value->insn; one_value_delta->seq = NULL_RTX;
one_value_delta.type = HIST_TYPE_SINGLE_VALUE; one_value_delta->insn = value->insn;
emit_insn (rtl_gen_one_value_profiler_no_edge_manipulation (&one_value_delta, one_value_delta->type = HIST_TYPE_SINGLE_VALUE;
emit_insn (rtl_gen_one_value_profiler_no_edge_manipulation (one_value_delta,
tag, base + 1)); tag, base + 1));
emit_move_insn (copy_rtx (stored_value), uval); emit_move_insn (copy_rtx (stored_value), uval);
sequence = get_insns (); sequence = get_insns ();
......
...@@ -1730,6 +1730,17 @@ process_options (void) ...@@ -1730,6 +1730,17 @@ process_options (void)
if (flag_value_profile_transformations) if (flag_value_profile_transformations)
flag_profile_values = 1; flag_profile_values = 1;
/* Speculative prefetching implies the value profiling. We also switch off
the prefetching in the loop optimizer, so that we do not emit double
prefetches. TODO -- we should teach these two to cooperate; the loop
based prefetching may sometimes do a better job, especially in connection
with reuse analysis. */
if (flag_speculative_prefetching)
{
flag_profile_values = 1;
flag_prefetch_loop_arrays = 0;
}
/* Warn about options that are not supported on this machine. */ /* Warn about options that are not supported on this machine. */
#ifndef INSN_SCHEDULING #ifndef INSN_SCHEDULING
if (flag_schedule_insns || flag_schedule_insns_after_reload) if (flag_schedule_insns || flag_schedule_insns_after_reload)
...@@ -1898,12 +1909,24 @@ process_options (void) ...@@ -1898,12 +1909,24 @@ process_options (void)
warning ("-fprefetch-loop-arrays not supported for this target"); warning ("-fprefetch-loop-arrays not supported for this target");
flag_prefetch_loop_arrays = 0; flag_prefetch_loop_arrays = 0;
} }
if (flag_speculative_prefetching)
{
if (flag_speculative_prefetching_set)
WARNIng ("-fspeculative-prefetching not supported for this target");
flag_speculative_prefetching = 0;
}
#else #else
if (flag_prefetch_loop_arrays && !HAVE_prefetch) if (flag_prefetch_loop_arrays && !HAVE_prefetch)
{ {
warning ("-fprefetch-loop-arrays not supported for this target (try -march switches)"); warning ("-fprefetch-loop-arrays not supported for this target (try -march switches)");
flag_prefetch_loop_arrays = 0; flag_prefetch_loop_arrays = 0;
} }
if (flag_speculative_prefetching && !HAVE_prefetch)
{
if (flag_speculative_prefetching_set)
warning ("-fspeculative-prefetching not supported for this target (try -march switches)");
flag_speculative_prefetching = 0;
}
#endif #endif
/* This combination of options isn't handled for i386 targets and doesn't /* This combination of options isn't handled for i386 targets and doesn't
......
...@@ -94,7 +94,7 @@ tree_gen_edge_profiler (int edgeno, edge e) ...@@ -94,7 +94,7 @@ tree_gen_edge_profiler (int edgeno, edge e)
tag of the section for counters, BASE is offset of the counter position. */ tag of the section for counters, BASE is offset of the counter position. */
static void static void
tree_gen_interval_profiler (struct histogram_value *value ATTRIBUTE_UNUSED, tree_gen_interval_profiler (histogram_value value ATTRIBUTE_UNUSED,
unsigned tag ATTRIBUTE_UNUSED, unsigned tag ATTRIBUTE_UNUSED,
unsigned base ATTRIBUTE_UNUSED) unsigned base ATTRIBUTE_UNUSED)
{ {
...@@ -107,7 +107,7 @@ tree_gen_interval_profiler (struct histogram_value *value ATTRIBUTE_UNUSED, ...@@ -107,7 +107,7 @@ tree_gen_interval_profiler (struct histogram_value *value ATTRIBUTE_UNUSED,
of the section for counters, BASE is offset of the counter position. */ of the section for counters, BASE is offset of the counter position. */
static void static void
tree_gen_pow2_profiler (struct histogram_value *value ATTRIBUTE_UNUSED, tree_gen_pow2_profiler (histogram_value value ATTRIBUTE_UNUSED,
unsigned tag ATTRIBUTE_UNUSED, unsigned tag ATTRIBUTE_UNUSED,
unsigned base ATTRIBUTE_UNUSED) unsigned base ATTRIBUTE_UNUSED)
{ {
...@@ -120,7 +120,7 @@ tree_gen_pow2_profiler (struct histogram_value *value ATTRIBUTE_UNUSED, ...@@ -120,7 +120,7 @@ tree_gen_pow2_profiler (struct histogram_value *value ATTRIBUTE_UNUSED,
section for counters, BASE is offset of the counter position. */ section for counters, BASE is offset of the counter position. */
static void static void
tree_gen_one_value_profiler (struct histogram_value *value ATTRIBUTE_UNUSED, tree_gen_one_value_profiler (histogram_value value ATTRIBUTE_UNUSED,
unsigned tag ATTRIBUTE_UNUSED, unsigned tag ATTRIBUTE_UNUSED,
unsigned base ATTRIBUTE_UNUSED) unsigned base ATTRIBUTE_UNUSED)
{ {
...@@ -134,7 +134,7 @@ tree_gen_one_value_profiler (struct histogram_value *value ATTRIBUTE_UNUSED, ...@@ -134,7 +134,7 @@ tree_gen_one_value_profiler (struct histogram_value *value ATTRIBUTE_UNUSED,
section for counters, BASE is offset of the counter position. */ section for counters, BASE is offset of the counter position. */
static void static void
tree_gen_const_delta_profiler (struct histogram_value *value ATTRIBUTE_UNUSED, tree_gen_const_delta_profiler (histogram_value value ATTRIBUTE_UNUSED,
unsigned tag ATTRIBUTE_UNUSED, unsigned tag ATTRIBUTE_UNUSED,
unsigned base ATTRIBUTE_UNUSED) unsigned base ATTRIBUTE_UNUSED)
{ {
......
...@@ -33,11 +33,20 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA ...@@ -33,11 +33,20 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
#include "recog.h" #include "recog.h"
#include "optabs.h" #include "optabs.h"
#include "regs.h" #include "regs.h"
#include "ggc.h"
static struct value_prof_hooks *value_prof_hooks; static struct value_prof_hooks *value_prof_hooks;
/* In this file value profile based optimizations will be placed (none are /* In this file value profile based optimizations are placed. Currently the
here just now, but they are hopefully coming soon). following optimizations are implemented (for more detailed descriptions
see comments at value_profile_transformations):
1) Division/modulo specialisation. Provided that we can determine that the
operands of the division have some special properties, we may use it to
produce more effective code.
2) Speculative prefetching. If we are able to determine that the difference
between addresses accessed by a memory reference is usually constant, we
may add the prefetch instructions.
Every such optimization should add its requirements for profiled values to Every such optimization should add its requirements for profiled values to
insn_values_to_profile function. This function is called from branch_prob insn_values_to_profile function. This function is called from branch_prob
...@@ -52,35 +61,51 @@ static struct value_prof_hooks *value_prof_hooks; ...@@ -52,35 +61,51 @@ static struct value_prof_hooks *value_prof_hooks;
-- the expression that is profiled -- the expression that is profiled
-- list of counters starting from the first one. */ -- list of counters starting from the first one. */
static void insn_divmod_values_to_profile (rtx, unsigned *, /* For speculative prefetching, the range in that we do not prefetch (because
struct histogram_value **); we assume that it will be in cache anyway). The assymetry between min and
static void insn_values_to_profile (rtx, unsigned *, struct histogram_value **); max range is trying to reflect the fact that the sequential prefetching
of the data is commonly done directly by hardware. Nevertheless, these
values are just a guess and should of course be target-specific. */
#ifndef NOPREFETCH_RANGE_MIN
#define NOPREFETCH_RANGE_MIN (-16)
#endif
#ifndef NOPREFETCH_RANGE_MAX
#define NOPREFETCH_RANGE_MAX 32
#endif
static void insn_divmod_values_to_profile (rtx, histogram_values *);
#ifdef HAVE_prefetch
static bool insn_prefetch_values_to_profile (rtx, histogram_values *);
static int find_mem_reference_1 (rtx *, void *);
static void find_mem_reference_2 (rtx, rtx, void *);
static bool find_mem_reference (rtx, rtx *, int *);
#endif
static void insn_values_to_profile (rtx, histogram_values *);
static rtx gen_divmod_fixed_value (enum machine_mode, enum rtx_code, rtx, rtx, static rtx gen_divmod_fixed_value (enum machine_mode, enum rtx_code, rtx, rtx,
rtx, gcov_type, int); rtx, gcov_type, int);
static rtx gen_mod_pow2 (enum machine_mode, enum rtx_code, rtx, rtx, rtx, int); static rtx gen_mod_pow2 (enum machine_mode, enum rtx_code, rtx, rtx, rtx, int);
static rtx gen_mod_subtract (enum machine_mode, enum rtx_code, rtx, rtx, rtx, static rtx gen_mod_subtract (enum machine_mode, enum rtx_code, rtx, rtx, rtx,
int, int, int); int, int, int);
#ifdef HAVE_prefetch
static rtx gen_speculative_prefetch (rtx, gcov_type, int);
#endif
static bool divmod_fixed_value_transform (rtx insn); static bool divmod_fixed_value_transform (rtx insn);
static bool mod_pow2_value_transform (rtx); static bool mod_pow2_value_transform (rtx);
static bool mod_subtract_transform (rtx); static bool mod_subtract_transform (rtx);
#ifdef HAVE_prefetch
/* Release the list of VALUES of length N_VALUES for that we want to measure static bool speculative_prefetching_transform (rtx);
histograms. */ #endif
void
free_profiled_values (unsigned n_values ATTRIBUTE_UNUSED,
struct histogram_value *values)
{
free (values);
}
/* Find values inside INSN for that we want to measure histograms for /* Find values inside INSN for that we want to measure histograms for
division/modulo optimization. */ division/modulo optimization and stores them to VALUES. */
static void static void
insn_divmod_values_to_profile (rtx insn, unsigned *n_values, insn_divmod_values_to_profile (rtx insn, histogram_values *values)
struct histogram_value **values)
{ {
rtx set, set_src, op1, op2; rtx set, set_src, op1, op2;
enum machine_mode mode; enum machine_mode mode;
histogram_value hist;
if (!INSN_P (insn)) if (!INSN_P (insn))
return; return;
...@@ -108,30 +133,26 @@ insn_divmod_values_to_profile (rtx insn, unsigned *n_values, ...@@ -108,30 +133,26 @@ insn_divmod_values_to_profile (rtx insn, unsigned *n_values,
/* Check for a special case where the divisor is power of 2. */ /* Check for a special case where the divisor is power of 2. */
if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2)) if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2))
{ {
*values = xrealloc (*values, hist = ggc_alloc (sizeof (*hist));
(*n_values + 1) hist->value = op2;
* sizeof (struct histogram_value)); hist->seq = NULL_RTX;
(*values)[*n_values].value = op2; hist->mode = mode;
(*values)[*n_values].seq = NULL_RTX; hist->insn = insn;
(*values)[*n_values].mode = mode; hist->type = HIST_TYPE_POW2;
(*values)[*n_values].insn = insn; hist->hdata.pow2.may_be_other = 1;
(*values)[*n_values].type = HIST_TYPE_POW2; VEC_safe_push (histogram_value, *values, hist);
(*values)[*n_values].hdata.pow2.may_be_other = 1;
(*n_values)++;
} }
/* Check whether the divisor is not in fact a constant. */ /* Check whether the divisor is not in fact a constant. */
if (!CONSTANT_P (op2)) if (!CONSTANT_P (op2))
{ {
*values = xrealloc (*values, hist = ggc_alloc (sizeof (*hist));
(*n_values + 1) hist->value = op2;
* sizeof (struct histogram_value)); hist->mode = mode;
(*values)[*n_values].value = op2; hist->seq = NULL_RTX;
(*values)[*n_values].mode = mode; hist->insn = insn;
(*values)[*n_values].seq = NULL_RTX; hist->type = HIST_TYPE_SINGLE_VALUE;
(*values)[*n_values].insn = insn; VEC_safe_push (histogram_value, *values, hist);
(*values)[*n_values].type = HIST_TYPE_SINGLE_VALUE;
(*n_values)++;
} }
/* For mod, check whether it is not often a noop (or replaceable by /* For mod, check whether it is not often a noop (or replaceable by
...@@ -140,22 +161,20 @@ insn_divmod_values_to_profile (rtx insn, unsigned *n_values, ...@@ -140,22 +161,20 @@ insn_divmod_values_to_profile (rtx insn, unsigned *n_values,
{ {
rtx tmp; rtx tmp;
*values = xrealloc (*values, hist = ggc_alloc (sizeof (*hist));
(*n_values + 1)
* sizeof (struct histogram_value));
start_sequence (); start_sequence ();
tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2)); tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2));
(*values)[*n_values].value = force_operand (tmp, NULL_RTX); hist->value = force_operand (tmp, NULL_RTX);
(*values)[*n_values].seq = get_insns (); hist->seq = get_insns ();
end_sequence (); end_sequence ();
(*values)[*n_values].mode = mode; hist->mode = mode;
(*values)[*n_values].insn = insn; hist->insn = insn;
(*values)[*n_values].type = HIST_TYPE_INTERVAL; hist->type = HIST_TYPE_INTERVAL;
(*values)[*n_values].hdata.intvl.int_start = 0; hist->hdata.intvl.int_start = 0;
(*values)[*n_values].hdata.intvl.steps = 2; hist->hdata.intvl.steps = 2;
(*values)[*n_values].hdata.intvl.may_be_less = 1; hist->hdata.intvl.may_be_less = 1;
(*values)[*n_values].hdata.intvl.may_be_more = 1; hist->hdata.intvl.may_be_more = 1;
(*n_values)++; VEC_safe_push (histogram_value, *values, hist);
} }
return; return;
...@@ -164,72 +183,162 @@ insn_divmod_values_to_profile (rtx insn, unsigned *n_values, ...@@ -164,72 +183,162 @@ insn_divmod_values_to_profile (rtx insn, unsigned *n_values,
} }
} }
#ifdef HAVE_prefetch
/* Called from find_mem_reference through for_each_rtx, finds a memory
reference. I.e. if *EXPR is a MEM, the reference to this MEM is stored
to *RET and the traversing of the expression is interrupted by returning 1.
Otherwise 0 is returned. */
static int
find_mem_reference_1 (rtx *expr, void *ret)
{
rtx *mem = ret;
if (GET_CODE (*expr) == MEM)
{
*mem = *expr;
return 1;
}
return 0;
}
/* Called form find_mem_reference through note_stores to find out whether
the memory reference MEM is a store. I.e. if EXPR == MEM, the variable
FMR2_WRITE is set to true. */
static int fmr2_write;
static void
find_mem_reference_2 (rtx expr, rtx pat ATTRIBUTE_UNUSED, void *mem)
{
if (expr == mem)
fmr2_write = true;
}
/* Find a memory reference inside INSN, return it in MEM. Set WRITE to true
if it is a write of the mem. Return false if no memory reference is found,
true otherwise. */
static bool
find_mem_reference (rtx insn, rtx *mem, int *write)
{
*mem = NULL_RTX;
for_each_rtx (&PATTERN (insn), find_mem_reference_1, mem);
if (!*mem)
return false;
fmr2_write = false;
note_stores (PATTERN (insn), find_mem_reference_2, *mem);
*write = fmr2_write;
return true;
}
/* Find values inside INSN for that we want to measure histograms for
a speculative prefetching. Add them to the list VALUES.
Returns true if such we found any such value, false otherwise. */
static bool
insn_prefetch_values_to_profile (rtx insn, histogram_values *values)
{
rtx mem, address;
int write;
histogram_value hist;
if (!INSN_P (insn))
return false;
if (!find_mem_reference (insn, &mem, &write))
return false;
address = XEXP (mem, 0);
if (side_effects_p (address))
return false;
if (CONSTANT_P (address))
return false;
hist = ggc_alloc (sizeof (*hist));
hist->value = address;
hist->mode = GET_MODE (address);
hist->seq = NULL_RTX;
hist->insn = insn;
hist->type = HIST_TYPE_CONST_DELTA;
VEC_safe_push (histogram_value, *values, hist);
return true;
}
#endif
/* Find values inside INSN for that we want to measure histograms and adds /* Find values inside INSN for that we want to measure histograms and adds
them to list VALUES (increasing the record of its length in N_VALUES). */ them to list VALUES (increasing the record of its length in N_VALUES). */
static void static void
insn_values_to_profile (rtx insn, insn_values_to_profile (rtx insn, histogram_values *values)
unsigned *n_values,
struct histogram_value **values)
{ {
if (flag_value_profile_transformations) if (flag_value_profile_transformations)
insn_divmod_values_to_profile (insn, n_values, values); insn_divmod_values_to_profile (insn, values);
#ifdef HAVE_prefetch
if (flag_speculative_prefetching)
insn_prefetch_values_to_profile (insn, values);
#endif
} }
/* Find list of values for that we want to measure histograms. */ /* Find list of values for that we want to measure histograms. */
static void static void
rtl_find_values_to_profile (unsigned *n_values, struct histogram_value **values) rtl_find_values_to_profile (histogram_values *values)
{ {
rtx insn; rtx insn;
unsigned i; unsigned i;
life_analysis (NULL, PROP_DEATH_NOTES); life_analysis (NULL, PROP_DEATH_NOTES);
*n_values = 0; *values = VEC_alloc (histogram_value, 0);
*values = NULL;
for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
insn_values_to_profile (insn, n_values, values); insn_values_to_profile (insn, values);
for (i = 0; i < *n_values; i++) for (i = 0; i < VEC_length (histogram_value, *values); i++)
{ {
switch ((*values)[i].type) histogram_value hist = VEC_index (histogram_value, *values, i);
switch (hist->type)
{ {
case HIST_TYPE_INTERVAL: case HIST_TYPE_INTERVAL:
if (dump_file) if (dump_file)
fprintf (dump_file, fprintf (dump_file,
"Interval counter for insn %d, range %d -- %d.\n", "Interval counter for insn %d, range %d -- %d.\n",
INSN_UID ((rtx)(*values)[i].insn), INSN_UID ((rtx)hist->insn),
(*values)[i].hdata.intvl.int_start, hist->hdata.intvl.int_start,
((*values)[i].hdata.intvl.int_start (hist->hdata.intvl.int_start
+ (*values)[i].hdata.intvl.steps - 1)); + hist->hdata.intvl.steps - 1));
(*values)[i].n_counters = (*values)[i].hdata.intvl.steps + hist->n_counters = hist->hdata.intvl.steps +
((*values)[i].hdata.intvl.may_be_less ? 1 : 0) + (hist->hdata.intvl.may_be_less ? 1 : 0) +
((*values)[i].hdata.intvl.may_be_more ? 1 : 0); (hist->hdata.intvl.may_be_more ? 1 : 0);
break; break;
case HIST_TYPE_POW2: case HIST_TYPE_POW2:
if (dump_file) if (dump_file)
fprintf (dump_file, fprintf (dump_file,
"Pow2 counter for insn %d.\n", "Pow2 counter for insn %d.\n",
INSN_UID ((rtx)(*values)[i].insn)); INSN_UID ((rtx)hist->insn));
(*values)[i].n_counters hist->n_counters
= GET_MODE_BITSIZE ((*values)[i].mode) = GET_MODE_BITSIZE (hist->mode)
+ ((*values)[i].hdata.pow2.may_be_other ? 1 : 0); + (hist->hdata.pow2.may_be_other ? 1 : 0);
break; break;
case HIST_TYPE_SINGLE_VALUE: case HIST_TYPE_SINGLE_VALUE:
if (dump_file) if (dump_file)
fprintf (dump_file, fprintf (dump_file,
"Single value counter for insn %d.\n", "Single value counter for insn %d.\n",
INSN_UID ((rtx)(*values)[i].insn)); INSN_UID ((rtx)hist->insn));
(*values)[i].n_counters = 3; hist->n_counters = 3;
break; break;
case HIST_TYPE_CONST_DELTA: case HIST_TYPE_CONST_DELTA:
if (dump_file) if (dump_file)
fprintf (dump_file, fprintf (dump_file,
"Constant delta counter for insn %d.\n", "Constant delta counter for insn %d.\n",
INSN_UID ((rtx)(*values)[i].insn)); INSN_UID ((rtx)hist->insn));
(*values)[i].n_counters = 4; hist->n_counters = 4;
break; break;
default: default:
...@@ -300,6 +409,23 @@ rtl_find_values_to_profile (unsigned *n_values, struct histogram_value **values) ...@@ -300,6 +409,23 @@ rtl_find_values_to_profile (unsigned *n_values, struct histogram_value **values)
It would be possible to continue analogically for K * b for other small It would be possible to continue analogically for K * b for other small
K's, but it is probably not useful. K's, but it is probably not useful.
5)
Read or write of mem[address], where the value of address changes usually
by a constant C != 0 between the following accesses to the computation; with
-fspeculative-prefetching we then add a prefetch of address + C before
the insn. This handles prefetching of several interesting cases in addition
to a simple prefetching for addresses that are induction variables, e. g.
linked lists allocated sequentially (even in case they are processed
recursively).
TODO -- we should also check whether there is not (usually) a small
difference with the adjacent memory references, so that we do
not issue overlapping prefetches. Also we should employ some
heuristics to eliminate cases where prefetching evidently spoils
the code.
-- it should somehow cooperate with the loop optimizer prefetching
TODO: TODO:
There are other useful cases that could be handled by a similar mechanism, There are other useful cases that could be handled by a similar mechanism,
...@@ -353,6 +479,11 @@ rtl_value_profile_transformations (void) ...@@ -353,6 +479,11 @@ rtl_value_profile_transformations (void)
|| divmod_fixed_value_transform (insn) || divmod_fixed_value_transform (insn)
|| mod_pow2_value_transform (insn))) || mod_pow2_value_transform (insn)))
changed = true; changed = true;
#ifdef HAVE_prefetch
if (flag_speculative_prefetching
&& speculative_prefetching_transform (insn))
changed = true;
#endif
} }
if (changed) if (changed)
...@@ -755,11 +886,117 @@ mod_subtract_transform (rtx insn) ...@@ -755,11 +886,117 @@ mod_subtract_transform (rtx insn)
return true; return true;
} }
#ifdef HAVE_prefetch
/* Generate code for transformation 5 for mem with ADDRESS and a constant
step DELTA. WRITE is true if the reference is a store to mem. */
static rtx
gen_speculative_prefetch (rtx address, gcov_type delta, int write)
{
rtx tmp;
rtx sequence;
/* TODO: we do the prefetching for just one iteration ahead, which
often is not enough. */
start_sequence ();
if (offsettable_address_p (0, VOIDmode, address))
tmp = plus_constant (copy_rtx (address), delta);
else
{
tmp = simplify_gen_binary (PLUS, Pmode,
copy_rtx (address), GEN_INT (delta));
tmp = force_operand (tmp, NULL);
}
if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate)
(tmp, insn_data[(int)CODE_FOR_prefetch].operand[0].mode))
tmp = force_reg (Pmode, tmp);
emit_insn (gen_prefetch (tmp, GEN_INT (write), GEN_INT (3)));
sequence = get_insns ();
end_sequence ();
return sequence;
}
/* Do transform 5) on INSN if applicable. */
static bool
speculative_prefetching_transform (rtx insn)
{
rtx histogram, value;
gcov_type val, count, all;
edge e;
rtx mem, address;
int write;
if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
return false;
if (!find_mem_reference (insn, &mem, &write))
return false;
address = XEXP (mem, 0);
if (side_effects_p (address))
return false;
if (CONSTANT_P (address))
return false;
for (histogram = REG_NOTES (insn);
histogram;
histogram = XEXP (histogram, 1))
if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
&& XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_CONST_DELTA))
break;
if (!histogram)
return false;
histogram = XEXP (XEXP (histogram, 0), 1);
value = XEXP (histogram, 0);
histogram = XEXP (histogram, 1);
/* Skip last value referenced. */
histogram = XEXP (histogram, 1);
val = INTVAL (XEXP (histogram, 0));
histogram = XEXP (histogram, 1);
count = INTVAL (XEXP (histogram, 0));
histogram = XEXP (histogram, 1);
all = INTVAL (XEXP (histogram, 0));
/* With that few executions we do not really have a reason to optimize the
statement, and more importantly, the data about differences of addresses
are spoiled by the first item that had no previous value to compare
with. */
if (all < 4)
return false;
/* We require that count is at least half of all; this means
that for the transformation to fire the value must be constant
at least 50% of time (and 75% gives the garantee of usage). */
if (!rtx_equal_p (address, value) || 2 * count < all)
return false;
/* If the difference is too small, it does not make too much sense to
prefetch, as the memory is probably already in cache. */
if (val >= NOPREFETCH_RANGE_MIN && val <= NOPREFETCH_RANGE_MAX)
return false;
if (dump_file)
fprintf (dump_file, "Speculative prefetching for insn %d\n",
INSN_UID (insn));
e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
insert_insn_on_edge (gen_speculative_prefetch (address, val, write), e);
return true;
}
#endif /* HAVE_prefetch */
/* Connection to the outside world. */ /* Connection to the outside world. */
/* Struct for IR-dependent hooks. */ /* Struct for IR-dependent hooks. */
struct value_prof_hooks { struct value_prof_hooks {
/* Find list of values for which we want to measure histograms. */ /* Find list of values for which we want to measure histograms. */
void (*find_values_to_profile) (unsigned *, struct histogram_value **); void (*find_values_to_profile) (histogram_values *);
/* Identify and exploit properties of values that are hard to analyze /* Identify and exploit properties of values that are hard to analyze
statically. See value-prof.c for more detail. */ statically. See value-prof.c for more detail. */
...@@ -783,10 +1020,8 @@ rtl_register_value_prof_hooks (void) ...@@ -783,10 +1020,8 @@ rtl_register_value_prof_hooks (void)
/* Tree-based versions are stubs for now. */ /* Tree-based versions are stubs for now. */
static void static void
tree_find_values_to_profile (unsigned *n_values, struct histogram_value **values) tree_find_values_to_profile (histogram_values *values ATTRIBUTE_UNUSED)
{ {
(void)n_values;
(void)values;
abort (); abort ();
} }
...@@ -811,9 +1046,9 @@ tree_register_value_prof_hooks (void) ...@@ -811,9 +1046,9 @@ tree_register_value_prof_hooks (void)
/* IR-independent entry points. */ /* IR-independent entry points. */
void void
find_values_to_profile (unsigned *n_values, struct histogram_value **values) find_values_to_profile (histogram_values *values)
{ {
(value_prof_hooks->find_values_to_profile) (n_values, values); (value_prof_hooks->find_values_to_profile) (values);
} }
bool bool
......
...@@ -39,12 +39,13 @@ enum hist_type ...@@ -39,12 +39,13 @@ enum hist_type
/* The value to measure. */ /* The value to measure. */
/* The void *'s are either rtx or tree, depending on which IR is in use. */ /* The void *'s are either rtx or tree, depending on which IR is in use. */
struct histogram_value struct histogram_value_t GTY(())
{ {
void * value; /* The value to profile. */ PTR GTY ((skip (""))) value; /* The value to profile. */
enum machine_mode mode; /* And its mode. */ enum machine_mode mode; /* And its mode. */
void * seq; /* Insns required to count the profiled value. */ PTR GTY ((skip (""))) seq; /* Insns required to count the
void * insn; /* Insn before that to measure. */ profiled value. */
PTR GTY ((skip (""))) insn; /* Insn before that to measure. */
enum hist_type type; /* Type of information to measure. */ enum hist_type type; /* Type of information to measure. */
unsigned n_counters; /* Number of required counters. */ unsigned n_counters; /* Number of required counters. */
union union
...@@ -63,13 +64,18 @@ struct histogram_value ...@@ -63,13 +64,18 @@ struct histogram_value
} hdata; /* Profiled information specific data. */ } hdata; /* Profiled information specific data. */
}; };
typedef struct histogram_value_t *histogram_value;
DEF_VEC_P(histogram_value);
typedef VEC(histogram_value) *histogram_values;
/* Hooks registration. */ /* Hooks registration. */
extern void rtl_register_value_prof_hooks (void); extern void rtl_register_value_prof_hooks (void);
extern void tree_register_value_prof_hooks (void); extern void tree_register_value_prof_hooks (void);
/* IR-independent entry points. */ /* IR-independent entry points. */
extern void find_values_to_profile (unsigned *, struct histogram_value **); extern void find_values_to_profile (histogram_values *);
extern void free_profiled_values (unsigned, struct histogram_value *);
extern bool value_profile_transformations (void); extern bool value_profile_transformations (void);
/* External declarations for edge-based profiling. */ /* External declarations for edge-based profiling. */
...@@ -78,18 +84,17 @@ struct profile_hooks { ...@@ -78,18 +84,17 @@ struct profile_hooks {
void (*gen_edge_profiler) (int, edge); void (*gen_edge_profiler) (int, edge);
/* Insert code to increment the interval histogram counter. */ /* Insert code to increment the interval histogram counter. */
void (*gen_interval_profiler) (struct histogram_value *, unsigned, unsigned); void (*gen_interval_profiler) (histogram_value, unsigned, unsigned);
/* Insert code to increment the power of two histogram counter. */ /* Insert code to increment the power of two histogram counter. */
void (*gen_pow2_profiler) (struct histogram_value *, unsigned, unsigned); void (*gen_pow2_profiler) (histogram_value, unsigned, unsigned);
/* Insert code to find the most common value. */ /* Insert code to find the most common value. */
void (*gen_one_value_profiler) (struct histogram_value *, unsigned, unsigned); void (*gen_one_value_profiler) (histogram_value, unsigned, unsigned);
/* Insert code to find the most common value of a difference between two /* Insert code to find the most common value of a difference between two
evaluations of an expression. */ evaluations of an expression. */
void (*gen_const_delta_profiler) (struct histogram_value *, unsigned, void (*gen_const_delta_profiler) (histogram_value, unsigned, unsigned);
unsigned);
FILE * (*profile_dump_file) (void); FILE * (*profile_dump_file) (void);
}; };
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment