Commit fca9dc00 by Zdenek Dvorak Committed by Zdenek Dvorak

Makefile.in (toplev.o): Add value-prof.h dependency.

	* Makefile.in (toplev.o): Add value-prof.h dependency.
	(value-prof.o): Add REGS_H dependency.
	* common.opt (fprofile-values, fvpt): New.
	* flags.h (flag_value_profile_transformations): Declare.
	* opts.c (common_handle_option): Handle -fprofile_values and
	-fvpt.
	* profile.c (branch_prob): Don't remove death notes here.
	* timevar.def (TV_VPT): New.
	* value-prof.c: Include regs.h.
	(insn_divmod_values_to_profile, gen_divmod_fixed_value, gen_mod_pow2,
	gen_mod_subtract, divmod_fixed_value_transform,mod_pow2_value_transform,
	mod_subtract_transform, value_profile_transformations): New.
	(insn_values_to_profile): Call insn_divmod_values_to_profile.
	(find_values_to_profile): Add dumps.
	* value-prof.h (value_profile_transformations): Declare.
	* toplev.c: Include value-prof.h.
	(rest_of_handle_value_profile_transformations): New.
	(enum dump_file_index): Add DFI_vpt.
	(dump_file): Add vpt dump.
	(flag_value_profile_transformations): New.
	(lang_independent_options): Add flag_profile_values and
	flag_value_profile_transformations.
	(rest_of_compilation): Call
	rest_of_handle_value_profile_transformations.
	(process_options): Let -fvpt imply -fprofile-values.
	* doc/invoke.texi (-fvpt): Document.

From-SVN: r72685
parent 47395a24
2003-10-19 Zdenek Dvorak <rakdver@atrey.karlin.mff.cuni.cz>
* Makefile.in (toplev.o): Add value-prof.h dependency.
(value-prof.o): Add REGS_H dependency.
* common.opt (fprofile-values, fvpt): New.
* flags.h (flag_value_profile_transformations): Declare.
* opts.c (common_handle_option): Handle -fprofile_values and
-fvpt.
* profile.c (branch_prob): Don't remove death notes here.
* timevar.def (TV_VPT): New.
* value-prof.c: Include regs.h.
(insn_divmod_values_to_profile, gen_divmod_fixed_value, gen_mod_pow2,
gen_mod_subtract, divmod_fixed_value_transform,mod_pow2_value_transform,
mod_subtract_transform, value_profile_transformations): New.
(insn_values_to_profile): Call insn_divmod_values_to_profile.
(find_values_to_profile): Add dumps.
* value-prof.h (value_profile_transformations): Declare.
* toplev.c: Include value-prof.h.
(rest_of_handle_value_profile_transformations): New.
(enum dump_file_index): Add DFI_vpt.
(dump_file): Add vpt dump.
(flag_value_profile_transformations): New.
(lang_independent_options): Add flag_profile_values and
flag_value_profile_transformations.
(rest_of_compilation): Call
rest_of_handle_value_profile_transformations.
(process_options): Let -fvpt imply -fprofile-values.
* doc/invoke.texi (-fvpt): Document.
2003-10-19 Jan Hubicka <jh@suse.cz> 2003-10-19 Jan Hubicka <jh@suse.cz>
* i386.c (print_reg): Do not abort on certain registers. * i386.c (print_reg): Do not abort on certain registers.
......
...@@ -1499,7 +1499,7 @@ toplev.o : toplev.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_ ...@@ -1499,7 +1499,7 @@ toplev.o : toplev.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_
function.h flags.h xcoffout.h input.h $(INSN_ATTR_H) output.h $(DIAGNOSTIC_H) \ function.h flags.h xcoffout.h input.h $(INSN_ATTR_H) output.h $(DIAGNOSTIC_H) \
debug.h insn-config.h intl.h $(RECOG_H) Makefile toplev.h \ debug.h insn-config.h intl.h $(RECOG_H) Makefile toplev.h \
dwarf2out.h sdbout.h dbxout.h $(EXPR_H) hard-reg-set.h $(BASIC_BLOCK_H) \ dwarf2out.h sdbout.h dbxout.h $(EXPR_H) hard-reg-set.h $(BASIC_BLOCK_H) \
graph.h $(LOOP_H) except.h $(REGS_H) $(TIMEVAR_H) \ graph.h $(LOOP_H) except.h $(REGS_H) $(TIMEVAR_H) value-prof.h \
ssa.h $(PARAMS_H) $(TM_P_H) reload.h dwarf2asm.h $(TARGET_H) \ ssa.h $(PARAMS_H) $(TM_P_H) reload.h dwarf2asm.h $(TARGET_H) \
langhooks.h insn-flags.h cfglayout.h real.h cfgloop.h \ langhooks.h insn-flags.h cfglayout.h real.h cfgloop.h \
hosthooks.h $(LANGHOOKS_DEF_H) cgraph.h $(COVERAGE_H) hosthooks.h $(LANGHOOKS_DEF_H) cgraph.h $(COVERAGE_H)
...@@ -1653,7 +1653,7 @@ profile.o : profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ ...@@ -1653,7 +1653,7 @@ profile.o : profile.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
toplev.h $(BASIC_BLOCK_H) $(COVERAGE_H) $(TREE_H) value-prof.h toplev.h $(BASIC_BLOCK_H) $(COVERAGE_H) $(TREE_H) value-prof.h
value-prof.o : value-prof.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ value-prof.o : value-prof.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
$(BASIC_BLOCK_H) hard-reg-set.h value-prof.h $(EXPR_H) output.h flags.h \ $(BASIC_BLOCK_H) hard-reg-set.h value-prof.h $(EXPR_H) output.h flags.h \
$(RECOG_H) insn-config.h $(OPTABS_H) $(RECOG_H) insn-config.h $(OPTABS_H) $(REGS_H)
loop.o : loop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) flags.h $(LOOP_H) \ loop.o : loop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) flags.h $(LOOP_H) \
insn-config.h $(REGS_H) hard-reg-set.h $(RECOG_H) $(EXPR_H) \ insn-config.h $(REGS_H) hard-reg-set.h $(RECOG_H) $(EXPR_H) \
real.h $(PREDICT_H) $(BASIC_BLOCK_H) function.h cfgloop.h \ real.h $(PREDICT_H) $(BASIC_BLOCK_H) function.h cfgloop.h \
......
...@@ -517,6 +517,10 @@ fprofile-arcs ...@@ -517,6 +517,10 @@ fprofile-arcs
Common Common
Insert arc-based program profiling code Insert arc-based program profiling code
fprofile-values
Common
Insert code to profile values of expressions
frandom-seed frandom-seed
Common Common
...@@ -719,6 +723,10 @@ fverbose-asm ...@@ -719,6 +723,10 @@ fverbose-asm
Common Common
Add extra commentary to assembler output Add extra commentary to assembler output
fvpt
Common
Use expression value profiles in optimizations
fweb fweb
Common Common
Construct webs and split unrelated uses of single variable Construct webs and split unrelated uses of single variable
......
...@@ -264,7 +264,7 @@ in the following sections. ...@@ -264,7 +264,7 @@ in the following sections.
@xref{Optimize Options,,Options that Control Optimization}. @xref{Optimize Options,,Options that Control Optimization}.
@gccoptlist{-falign-functions=@var{n} -falign-jumps=@var{n} @gol @gccoptlist{-falign-functions=@var{n} -falign-jumps=@var{n} @gol
-falign-labels=@var{n} -falign-loops=@var{n} @gol -falign-labels=@var{n} -falign-loops=@var{n} @gol
-fbranch-probabilities -fprofile-values -fbranch-target-load-optimize @gol -fbranch-probabilities -fprofile-values -fvpt -fbranch-target-load-optimize @gol
-fbranch-target-load-optimize2 -fcaller-saves -fcprop-registers @gol -fbranch-target-load-optimize2 -fcaller-saves -fcprop-registers @gol
-fcse-follow-jumps -fcse-skip-blocks -fdata-sections @gol -fcse-follow-jumps -fcse-skip-blocks -fdata-sections @gol
-fdelayed-branch -fdelete-null-pointer-checks @gol -fdelayed-branch -fdelete-null-pointer-checks @gol
...@@ -4564,6 +4564,16 @@ With @option{-fbranch-probabilities}, it reads back the data gathered ...@@ -4564,6 +4564,16 @@ With @option{-fbranch-probabilities}, it reads back the data gathered
from profiling values of expressions and adds @samp{REG_VALUE_PROFILE} from profiling values of expressions and adds @samp{REG_VALUE_PROFILE}
notes to instructions for their later usage in optimizations. notes to instructions for their later usage in optimizations.
@item -fvpt
@opindex fvpt
If combined with @option{-fprofile-arcs}, it instructs the compiler to add
a code to gather information about values of expressions.
With @option{-fbranch-probabilities}, it reads back the data gathered
and actually performs the optimizations based on them.
Currently the optimizations include specialization of division operation
using the knowledge about the value of the denominator.
@item -fnew-ra @item -fnew-ra
@opindex fnew-ra @opindex fnew-ra
Use a graph coloring register allocator. Currently this option is meant Use a graph coloring register allocator. Currently this option is meant
......
...@@ -680,6 +680,9 @@ extern int flag_gcse_sm; ...@@ -680,6 +680,9 @@ extern int flag_gcse_sm;
extern int flag_gcse_las; extern int flag_gcse_las;
/* Nonzero if value histograms should be used to optimize code. */
extern int flag_value_profile_transformations;
/* Perform branch target register optimization before prologue / epilogue /* Perform branch target register optimization before prologue / epilogue
threading. */ threading. */
......
...@@ -1182,6 +1182,14 @@ common_handle_option (size_t scode, const char *arg, ...@@ -1182,6 +1182,14 @@ common_handle_option (size_t scode, const char *arg,
profile_arc_flag = value; profile_arc_flag = value;
break; break;
case OPT_fprofile_values:
flag_profile_values = value;
break;
case OPT_fvpt:
flag_value_profile_transformations = value;
break;
case OPT_frandom_seed: case OPT_frandom_seed:
/* The real switch is -fno-random-seed. */ /* The real switch is -fno-random-seed. */
if (value) if (value)
......
...@@ -988,8 +988,6 @@ branch_prob (void) ...@@ -988,8 +988,6 @@ branch_prob (void)
allocate_reg_info (max_reg_num (), FALSE, FALSE); allocate_reg_info (max_reg_num (), FALSE, FALSE);
} }
if (flag_profile_values)
count_or_remove_death_notes (NULL, 1);
remove_fake_edges (); remove_fake_edges ();
free_aux_for_edges (); free_aux_for_edges ();
/* Re-merge split basic blocks and the mess introduced by /* Re-merge split basic blocks and the mess introduced by
......
...@@ -72,6 +72,7 @@ DEFTIMEVAR (TV_TRACER , "tracer") ...@@ -72,6 +72,7 @@ DEFTIMEVAR (TV_TRACER , "tracer")
DEFTIMEVAR (TV_WEB , "web") DEFTIMEVAR (TV_WEB , "web")
DEFTIMEVAR (TV_CSE2 , "CSE 2") DEFTIMEVAR (TV_CSE2 , "CSE 2")
DEFTIMEVAR (TV_BRANCH_PROB , "branch prediction") DEFTIMEVAR (TV_BRANCH_PROB , "branch prediction")
DEFTIMEVAR (TV_VPT , "value profile opts")
DEFTIMEVAR (TV_FLOW , "flow analysis") DEFTIMEVAR (TV_FLOW , "flow analysis")
DEFTIMEVAR (TV_COMBINE , "combiner") DEFTIMEVAR (TV_COMBINE , "combiner")
DEFTIMEVAR (TV_IFCVT , "if-conversion") DEFTIMEVAR (TV_IFCVT , "if-conversion")
......
...@@ -78,6 +78,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA ...@@ -78,6 +78,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
#include "cgraph.h" #include "cgraph.h"
#include "opts.h" #include "opts.h"
#include "coverage.h" #include "coverage.h"
#include "value-prof.h"
#if defined (DWARF2_UNWIND_INFO) || defined (DWARF2_DEBUGGING_INFO) #if defined (DWARF2_UNWIND_INFO) || defined (DWARF2_DEBUGGING_INFO)
#include "dwarf2out.h" #include "dwarf2out.h"
...@@ -137,6 +138,7 @@ static void rest_of_handle_null_pointer (tree, rtx); ...@@ -137,6 +138,7 @@ static void rest_of_handle_null_pointer (tree, rtx);
static void rest_of_handle_addressof (tree, rtx); static void rest_of_handle_addressof (tree, rtx);
static void rest_of_handle_cfg (tree, rtx); static void rest_of_handle_cfg (tree, rtx);
static void rest_of_handle_branch_prob (tree, rtx); static void rest_of_handle_branch_prob (tree, rtx);
static void rest_of_handle_value_profile_transformations (tree, rtx);
static void rest_of_handle_if_conversion (tree, rtx); static void rest_of_handle_if_conversion (tree, rtx);
static void rest_of_handle_if_after_combine (tree, rtx); static void rest_of_handle_if_after_combine (tree, rtx);
static void rest_of_handle_tracer (tree, rtx); static void rest_of_handle_tracer (tree, rtx);
...@@ -264,6 +266,7 @@ enum dump_file_index ...@@ -264,6 +266,7 @@ enum dump_file_index
DFI_bypass, DFI_bypass,
DFI_cfg, DFI_cfg,
DFI_bp, DFI_bp,
DFI_vpt,
DFI_ce1, DFI_ce1,
DFI_tracer, DFI_tracer,
DFI_web, DFI_web,
...@@ -296,7 +299,7 @@ enum dump_file_index ...@@ -296,7 +299,7 @@ enum dump_file_index
Remaining -d letters: Remaining -d letters:
" m q " " m q "
" JK O Q V Y " " JK O Q Y "
*/ */
static struct dump_file_info dump_file[DFI_MAX] = static struct dump_file_info dump_file[DFI_MAX] =
...@@ -318,6 +321,7 @@ static struct dump_file_info dump_file[DFI_MAX] = ...@@ -318,6 +321,7 @@ static struct dump_file_info dump_file[DFI_MAX] =
{ "bypass", 'G', 1, 0, 0 }, /* Yes, duplicate enable switch. */ { "bypass", 'G', 1, 0, 0 }, /* Yes, duplicate enable switch. */
{ "cfg", 'f', 1, 0, 0 }, { "cfg", 'f', 1, 0, 0 },
{ "bp", 'b', 1, 0, 0 }, { "bp", 'b', 1, 0, 0 },
{ "vpt", 'V', 1, 0, 0 },
{ "ce1", 'C', 1, 0, 0 }, { "ce1", 'C', 1, 0, 0 },
{ "tracer", 'T', 1, 0, 0 }, { "tracer", 'T', 1, 0, 0 },
{ "web", 'Z', 0, 0, 0 }, { "web", 'Z', 0, 0, 0 },
...@@ -404,6 +408,9 @@ int profile_arc_flag = 0; ...@@ -404,6 +408,9 @@ int profile_arc_flag = 0;
int flag_profile_values = 0; int flag_profile_values = 0;
/* Nonzero if value histograms should be used to optimize code. */
int flag_value_profile_transformations = 0;
/* Nonzero if generating info for gcov to calculate line test coverage. */ /* Nonzero if generating info for gcov to calculate line test coverage. */
int flag_test_coverage = 0; int flag_test_coverage = 0;
...@@ -1110,6 +1117,8 @@ static const lang_independent_options f_options[] = ...@@ -1110,6 +1117,8 @@ static const lang_independent_options f_options[] =
{"asynchronous-unwind-tables", &flag_asynchronous_unwind_tables, 1 }, {"asynchronous-unwind-tables", &flag_asynchronous_unwind_tables, 1 },
{"non-call-exceptions", &flag_non_call_exceptions, 1 }, {"non-call-exceptions", &flag_non_call_exceptions, 1 },
{"profile-arcs", &profile_arc_flag, 1 }, {"profile-arcs", &profile_arc_flag, 1 },
{"profile-values", &flag_profile_values, 1 },
{"vpt", &flag_value_profile_transformations, 1 },
{"test-coverage", &flag_test_coverage, 1 }, {"test-coverage", &flag_test_coverage, 1 },
{"branch-probabilities", &flag_branch_probabilities, 1 }, {"branch-probabilities", &flag_branch_probabilities, 1 },
{"profile", &profile_flag, 1 }, {"profile", &profile_flag, 1 },
...@@ -2462,6 +2471,7 @@ rest_of_handle_branch_prob (tree decl, rtx insns) ...@@ -2462,6 +2471,7 @@ rest_of_handle_branch_prob (tree decl, rtx insns)
timevar_push (TV_BRANCH_PROB); timevar_push (TV_BRANCH_PROB);
open_dump_file (DFI_bp, decl); open_dump_file (DFI_bp, decl);
if (profile_arc_flag || flag_test_coverage || flag_branch_probabilities) if (profile_arc_flag || flag_test_coverage || flag_branch_probabilities)
branch_prob (); branch_prob ();
...@@ -2481,6 +2491,20 @@ rest_of_handle_branch_prob (tree decl, rtx insns) ...@@ -2481,6 +2491,20 @@ rest_of_handle_branch_prob (tree decl, rtx insns)
timevar_pop (TV_BRANCH_PROB); timevar_pop (TV_BRANCH_PROB);
} }
/* Do optimizations based on expression value profiles. */
static void
rest_of_handle_value_profile_transformations (tree decl, rtx insns)
{
open_dump_file (DFI_vpt, decl);
timevar_push (TV_VPT);
if (value_profile_transformations ())
cleanup_cfg (CLEANUP_EXPENSIVE);
timevar_pop (TV_VPT);
close_dump_file (DFI_vpt, print_rtl_with_bb, insns);
}
/* Do control and data flow analysis; write some of the results to the /* Do control and data flow analysis; write some of the results to the
dump file. */ dump file. */
static void static void
...@@ -3345,8 +3369,19 @@ rest_of_compilation (tree decl) ...@@ -3345,8 +3369,19 @@ rest_of_compilation (tree decl)
if (optimize > 0 if (optimize > 0
|| profile_arc_flag || flag_test_coverage || flag_branch_probabilities) || profile_arc_flag || flag_test_coverage || flag_branch_probabilities)
{
rest_of_handle_branch_prob (decl, insns); rest_of_handle_branch_prob (decl, insns);
if (flag_branch_probabilities
&& flag_profile_values
&& flag_value_profile_transformations)
rest_of_handle_value_profile_transformations (decl, insns);
/* Remove the death notes created for vpt. */
if (flag_profile_values)
count_or_remove_death_notes (NULL, 1);
}
if (optimize > 0) if (optimize > 0)
rest_of_handle_if_conversion (decl, insns); rest_of_handle_if_conversion (decl, insns);
...@@ -4228,6 +4263,9 @@ process_options (void) ...@@ -4228,6 +4263,9 @@ process_options (void)
if (flag_unit_at_a_time && ! lang_hooks.callgraph.expand_function) if (flag_unit_at_a_time && ! lang_hooks.callgraph.expand_function)
flag_unit_at_a_time = 0; flag_unit_at_a_time = 0;
if (flag_value_profile_transformations)
flag_profile_values = 1;
/* Warn about options that are not supported on this machine. */ /* Warn about options that are not supported on this machine. */
#ifndef INSN_SCHEDULING #ifndef INSN_SCHEDULING
if (flag_schedule_insns || flag_schedule_insns_after_reload) if (flag_schedule_insns || flag_schedule_insns_after_reload)
......
...@@ -32,6 +32,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA ...@@ -32,6 +32,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
#include "insn-config.h" #include "insn-config.h"
#include "recog.h" #include "recog.h"
#include "optabs.h" #include "optabs.h"
#include "regs.h"
/* In this file value profile based optimizations will be placed (none are /* In this file value profile based optimizations will be placed (none are
here just now, but they are hopefully coming soon). here just now, but they are hopefully coming soon).
...@@ -49,7 +50,17 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA ...@@ -49,7 +50,17 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-- the expression that is profiled -- the expression that is profiled
-- list of counters starting from the first one. */ -- list of counters starting from the first one. */
static void insn_divmod_values_to_profile (rtx, unsigned *,
struct histogram_value **);
static void insn_values_to_profile (rtx, unsigned *, struct histogram_value **); static void insn_values_to_profile (rtx, unsigned *, struct histogram_value **);
static rtx gen_divmod_fixed_value (enum machine_mode, enum rtx_code, rtx, rtx,
rtx, gcov_type);
static rtx gen_mod_pow2 (enum machine_mode, enum rtx_code, rtx, rtx, rtx);
static rtx gen_mod_subtract (enum machine_mode, enum rtx_code, rtx, rtx, rtx,
int);
static bool divmod_fixed_value_transform (rtx insn);
static bool mod_pow2_value_transform (rtx);
static bool mod_subtract_transform (rtx);
/* Release the list of VALUES of length N_VALUES for that we want to measure /* Release the list of VALUES of length N_VALUES for that we want to measure
histograms. */ histograms. */
...@@ -60,13 +71,106 @@ free_profiled_values (unsigned n_values ATTRIBUTE_UNUSED, ...@@ -60,13 +71,106 @@ free_profiled_values (unsigned n_values ATTRIBUTE_UNUSED,
free (values); free (values);
} }
/* Find values inside INSN for that we want to measure histograms for
division/modulo optimization. */
static void
insn_divmod_values_to_profile (rtx insn, unsigned *n_values,
struct histogram_value **values)
{
rtx set, set_src, op1, op2;
enum machine_mode mode;
if (!INSN_P (insn))
return;
set = single_set (insn);
if (!set)
return;
mode = GET_MODE (SET_DEST (set));
if (!INTEGRAL_MODE_P (mode))
return;
set_src = SET_SRC (set);
switch (GET_CODE (set_src))
{
case DIV:
case MOD:
case UDIV:
case UMOD:
op1 = XEXP (set_src, 0);
op2 = XEXP (set_src, 1);
if (side_effects_p (op2))
return;
/* Check for a special case where the divisor is power of 2. */
if ((GET_CODE (set_src) == UMOD) && !CONSTANT_P (op2))
{
*values = xrealloc (*values,
(*n_values + 1)
* sizeof (struct histogram_value));
(*values)[*n_values].value = op2;
(*values)[*n_values].seq = NULL_RTX;
(*values)[*n_values].mode = mode;
(*values)[*n_values].insn = insn;
(*values)[*n_values].type = HIST_TYPE_POW2;
(*values)[*n_values].hdata.pow2.may_be_other = 1;
(*n_values)++;
}
/* Check whether the divisor is not in fact a constant. */
if (!CONSTANT_P (op2))
{
*values = xrealloc (*values,
(*n_values + 1)
* sizeof (struct histogram_value));
(*values)[*n_values].value = op2;
(*values)[*n_values].mode = mode;
(*values)[*n_values].seq = NULL_RTX;
(*values)[*n_values].insn = insn;
(*values)[*n_values].type = HIST_TYPE_SINGLE_VALUE;
(*n_values)++;
}
/* For mod, check whether it is not often a noop (or replacable by
a few subtractions). */
if (GET_CODE (set_src) == UMOD && !side_effects_p (op1))
{
rtx tmp;
*values = xrealloc (*values,
(*n_values + 1)
* sizeof (struct histogram_value));
start_sequence ();
tmp = simplify_gen_binary (DIV, mode, copy_rtx (op1), copy_rtx (op2));
(*values)[*n_values].value = force_operand (tmp, NULL_RTX);
(*values)[*n_values].seq = get_insns ();
end_sequence ();
(*values)[*n_values].mode = mode;
(*values)[*n_values].insn = insn;
(*values)[*n_values].type = HIST_TYPE_INTERVAL;
(*values)[*n_values].hdata.intvl.int_start = 0;
(*values)[*n_values].hdata.intvl.steps = 2;
(*values)[*n_values].hdata.intvl.may_be_less = 1;
(*values)[*n_values].hdata.intvl.may_be_more = 1;
(*n_values)++;
}
return;
default:
return;
}
}
/* Find values inside INSN for that we want to measure histograms and adds /* Find values inside INSN for that we want to measure histograms and adds
them to list VALUES (increasing the record of its length in N_VALUES). */ them to list VALUES (increasing the record of its length in N_VALUES). */
static void static void
insn_values_to_profile (rtx insn ATTRIBUTE_UNUSED, insn_values_to_profile (rtx insn,
unsigned *n_values ATTRIBUTE_UNUSED, unsigned *n_values,
struct histogram_value **values ATTRIBUTE_UNUSED) struct histogram_value **values)
{ {
if (flag_value_profile_transformations)
insn_divmod_values_to_profile (insn, n_values, values);
} }
/* Find list of values for that we want to measure histograms. */ /* Find list of values for that we want to measure histograms. */
...@@ -86,21 +190,40 @@ find_values_to_profile (unsigned *n_values, struct histogram_value **values) ...@@ -86,21 +190,40 @@ find_values_to_profile (unsigned *n_values, struct histogram_value **values)
switch ((*values)[i].type) switch ((*values)[i].type)
{ {
case HIST_TYPE_INTERVAL: case HIST_TYPE_INTERVAL:
if (rtl_dump_file)
fprintf (rtl_dump_file,
"Interval counter for insn %d, range %d -- %d.\n",
INSN_UID ((*values)[i].insn),
(*values)[i].hdata.intvl.int_start,
((*values)[i].hdata.intvl.int_start
+ (*values)[i].hdata.intvl.steps - 1));
(*values)[i].n_counters = (*values)[i].hdata.intvl.steps + (*values)[i].n_counters = (*values)[i].hdata.intvl.steps +
((*values)[i].hdata.intvl.may_be_less ? 1 : 0) + ((*values)[i].hdata.intvl.may_be_less ? 1 : 0) +
((*values)[i].hdata.intvl.may_be_more ? 1 : 0); ((*values)[i].hdata.intvl.may_be_more ? 1 : 0);
break; break;
case HIST_TYPE_POW2: case HIST_TYPE_POW2:
if (rtl_dump_file)
fprintf (rtl_dump_file,
"Pow2 counter for insn %d.\n",
INSN_UID ((*values)[i].insn));
(*values)[i].n_counters = GET_MODE_BITSIZE ((*values)[i].mode) + (*values)[i].n_counters = GET_MODE_BITSIZE ((*values)[i].mode) +
((*values)[i].hdata.pow2.may_be_other ? 1 : 0); ((*values)[i].hdata.pow2.may_be_other ? 1 : 0);
break; break;
case HIST_TYPE_SINGLE_VALUE: case HIST_TYPE_SINGLE_VALUE:
if (rtl_dump_file)
fprintf (rtl_dump_file,
"Single value counter for insn %d.\n",
INSN_UID ((*values)[i].insn));
(*values)[i].n_counters = 3; (*values)[i].n_counters = 3;
break; break;
case HIST_TYPE_CONST_DELTA: case HIST_TYPE_CONST_DELTA:
if (rtl_dump_file)
fprintf (rtl_dump_file,
"Constant delta counter for insn %d.\n",
INSN_UID ((*values)[i].insn));
(*values)[i].n_counters = 4; (*values)[i].n_counters = 4;
break; break;
...@@ -109,3 +232,477 @@ find_values_to_profile (unsigned *n_values, struct histogram_value **values) ...@@ -109,3 +232,477 @@ find_values_to_profile (unsigned *n_values, struct histogram_value **values)
} }
} }
} }
/* Main entry point. Finds REG_VALUE_PROFILE notes from profiler and uses
them to identify and exploit properties of values that are hard to analyze
statically.
We do following transformations:
1)
x = a / b;
where b is almost always a constant N is transformed to
if (b == N)
x = a / N;
else
x = a / b;
Analogically with %
2)
x = a % b
where b is almost always a power of 2 and the division is unsigned
TODO -- handle signed case as well
if ((b & (b - 1)) == 0)
x = a & (b - 1);
else
x = x % b;
Note that when b = 0, no error will occur and x = a; this is correct,
as result of such operation is undefined.
3)
x = a % b
where a is almost always less then b and the division is unsigned
TODO -- handle signed case as well
x = a;
if (x >= b)
x %= b;
4)
x = a % b
where a is almost always less then 2 * b and the division is unsigned
TODO -- handle signed case as well
x = a;
if (x >= b)
x -= b;
if (x >= b)
x %= b;
It would be possible to continue analogically for K * b for other small
K's, but it is probably not useful.
TODO:
There are other useful cases that could be handled by a similar mechanism,
for example:
for (i = 0; i < n; i++)
...
transform to (for constant N):
if (n == N)
for (i = 0; i < N; i++)
...
else
for (i = 0; i < n; i++)
...
making unroller happy. Since this may grow the code significantly,
we would have to be very careful here. */
bool
value_profile_transformations ()
{
rtx insn, next;
int changed = false;
for (insn = get_insns (); insn; insn = next)
{
next = NEXT_INSN (insn);
if (!INSN_P (insn))
continue;
/* Scan for insn carrying a histogram. */
if (!find_reg_note (insn, REG_VALUE_PROFILE, 0))
continue;
/* Ignore cold areas -- we are growing a code. */
if (!maybe_hot_bb_p (BLOCK_FOR_INSN (insn)))
continue;
if (rtl_dump_file)
{
fprintf (rtl_dump_file, "Trying transformations on insn %d\n",
INSN_UID (insn));
print_rtl_single (rtl_dump_file, insn);
}
/* Transformations: */
if (flag_value_profile_transformations
&& (mod_subtract_transform (insn)
|| divmod_fixed_value_transform (insn)
|| mod_pow2_value_transform (insn)))
changed = true;
}
if (changed)
{
commit_edge_insertions ();
allocate_reg_info (max_reg_num (), FALSE, FALSE);
}
return changed;
}
/* Generate code for transformation 1 (with MODE and OPERATION, operands OP1
and OP2 whose value is expected to be VALUE and result TARGET). */
static rtx
gen_divmod_fixed_value (enum machine_mode mode, enum rtx_code operation,
rtx target, rtx op1, rtx op2, gcov_type value)
{
rtx tmp, tmp1;
rtx neq_label = gen_label_rtx ();
rtx end_label = gen_label_rtx ();
rtx sequence;
start_sequence ();
if (!REG_P (op2))
{
tmp = gen_reg_rtx (mode);
emit_move_insn (tmp, copy_rtx (op2));
}
else
tmp = op2;
do_compare_rtx_and_jump (tmp, GEN_INT (value), NE, 0, mode, NULL_RTX,
NULL_RTX, neq_label);
tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), GEN_INT (value));
tmp1 = force_operand (tmp1, target);
if (tmp1 != target)
emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
emit_jump_insn (gen_jump (end_label));
emit_barrier ();
emit_label (neq_label);
tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp));
tmp1 = force_operand (tmp1, target);
if (tmp1 != target)
emit_move_insn (copy_rtx (target), copy_rtx (tmp1));
emit_label (end_label);
sequence = get_insns ();
end_sequence ();
rebuild_jump_labels (sequence);
return sequence;
}
/* Do transform 1) on INSN if applicable. */
static bool
divmod_fixed_value_transform (rtx insn)
{
rtx set, set_src, set_dest, op1, op2, value, histogram;
enum rtx_code code;
enum machine_mode mode;
gcov_type val, count, all;
edge e;
set = single_set (insn);
if (!set)
return false;
set_src = SET_SRC (set);
set_dest = SET_DEST (set);
code = GET_CODE (set_src);
mode = GET_MODE (set_dest);
if (code != DIV && code != MOD && code != UDIV && code != UMOD)
return false;
op1 = XEXP (set_src, false);
op2 = XEXP (set_src, 1);
for (histogram = REG_NOTES (insn);
histogram;
histogram = XEXP (histogram, 1))
if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
&& XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_SINGLE_VALUE))
break;
if (!histogram)
return false;
histogram = XEXP (XEXP (histogram, 0), 1);
value = XEXP (histogram, 0);
histogram = XEXP (histogram, 1);
val = INTVAL (XEXP (histogram, 0));
histogram = XEXP (histogram, 1);
count = INTVAL (XEXP (histogram, 0));
histogram = XEXP (histogram, 1);
all = INTVAL (XEXP (histogram, 0));
/* We requiere that count is at least half of all; this means
that for the transformation to fire the value must be constant
at least 50% of time (and 75% gives the garantee of usage). */
if (!rtx_equal_p (op2, value) || 2 * count < all)
return false;
if (rtl_dump_file)
fprintf (rtl_dump_file, "Div/mod by constant transformation on insn %d\n",
INSN_UID (insn));
e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
delete_insn (insn);
insert_insn_on_edge (
gen_divmod_fixed_value (mode, code, set_dest, op1, op2, val), e);
return true;
}
/* Generate code for transformation 2 (with MODE and OPERATION, operands OP1
and OP2 and result TARGET). */
static rtx
gen_mod_pow2 (enum machine_mode mode, enum rtx_code operation, rtx target,
rtx op1, rtx op2)
{
rtx tmp, tmp1, tmp2, tmp3;
rtx neq_label = gen_label_rtx ();
rtx end_label = gen_label_rtx ();
rtx sequence;
start_sequence ();
if (!REG_P (op2))
{
tmp = gen_reg_rtx (mode);
emit_move_insn (tmp, copy_rtx (op2));
}
else
tmp = op2;
tmp1 = expand_simple_binop (mode, PLUS, tmp, constm1_rtx, NULL_RTX,
0, OPTAB_WIDEN);
tmp2 = expand_simple_binop (mode, AND, tmp, tmp1, NULL_RTX,
0, OPTAB_WIDEN);
do_compare_rtx_and_jump (tmp2, const0_rtx, NE, 0, mode, NULL_RTX,
NULL_RTX, neq_label);
tmp3 = expand_simple_binop (mode, AND, op1, tmp1, target,
0, OPTAB_WIDEN);
if (tmp3 != target)
emit_move_insn (copy_rtx (target), tmp3);
emit_jump_insn (gen_jump (end_label));
emit_barrier ();
emit_label (neq_label);
tmp1 = simplify_gen_binary (operation, mode, copy_rtx (op1), copy_rtx (tmp));
tmp1 = force_operand (tmp1, target);
if (tmp1 != target)
emit_move_insn (target, tmp1);
emit_label (end_label);
sequence = get_insns ();
end_sequence ();
rebuild_jump_labels (sequence);
return sequence;
}
/* Do transform 2) on INSN if applicable. */
static bool
mod_pow2_value_transform (rtx insn)
{
rtx set, set_src, set_dest, op1, op2, value, histogram;
enum rtx_code code;
enum machine_mode mode;
gcov_type wrong_values, count;
edge e;
int i;
set = single_set (insn);
if (!set)
return false;
set_src = SET_SRC (set);
set_dest = SET_DEST (set);
code = GET_CODE (set_src);
mode = GET_MODE (set_dest);
if (code != UMOD)
return false;
op1 = XEXP (set_src, 0);
op2 = XEXP (set_src, 1);
for (histogram = REG_NOTES (insn);
histogram;
histogram = XEXP (histogram, 1))
if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
&& XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_POW2))
break;
if (!histogram)
return false;
histogram = XEXP (XEXP (histogram, 0), 1);
value = XEXP (histogram, 0);
histogram = XEXP (histogram, 1);
wrong_values =INTVAL (XEXP (histogram, 0));
histogram = XEXP (histogram, 1);
count = 0;
for (i = 0; i < GET_MODE_BITSIZE (mode); i++)
{
count += INTVAL (XEXP (histogram, 0));
histogram = XEXP (histogram, 1);
}
if (!rtx_equal_p (op2, value))
return false;
/* We require that we hit a power of two at least half of all evaluations. */
if (count < wrong_values)
return false;
if (rtl_dump_file)
fprintf (rtl_dump_file, "Mod power of 2 transformation on insn %d\n",
INSN_UID (insn));
e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
delete_insn (insn);
insert_insn_on_edge (
gen_mod_pow2 (mode, code, set_dest, op1, op2), e);
return true;
}
/* Generate code for transformations 3 and 4 (with MODE and OPERATION,
operands OP1 and OP2, result TARGET and at most SUB subtractions). */
static rtx
gen_mod_subtract (enum machine_mode mode, enum rtx_code operation,
rtx target, rtx op1, rtx op2, int sub)
{
rtx tmp, tmp1;
rtx end_label = gen_label_rtx ();
rtx sequence;
int i;
start_sequence ();
if (!REG_P (op2))
{
tmp = gen_reg_rtx (mode);
emit_move_insn (tmp, copy_rtx (op2));
}
else
tmp = op2;
emit_move_insn (target, copy_rtx (op1));
do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
NULL_RTX, end_label);
for (i = 0; i < sub; i++)
{
tmp1 = expand_simple_binop (mode, MINUS, target, tmp, target,
0, OPTAB_WIDEN);
if (tmp1 != target)
emit_move_insn (target, tmp1);
do_compare_rtx_and_jump (target, tmp, LTU, 0, mode, NULL_RTX,
NULL_RTX, end_label);
}
tmp1 = simplify_gen_binary (operation, mode, copy_rtx (target), copy_rtx (tmp));
tmp1 = force_operand (tmp1, target);
if (tmp1 != target)
emit_move_insn (target, tmp1);
emit_label (end_label);
sequence = get_insns ();
end_sequence ();
rebuild_jump_labels (sequence);
return sequence;
}
/* Do transforms 3) and 4) on INSN if applicable. */
static bool
mod_subtract_transform (rtx insn)
{
rtx set, set_src, set_dest, op1, op2, value, histogram;
enum rtx_code code;
enum machine_mode mode;
gcov_type wrong_values, counts[2], count, all;
edge e;
int i;
set = single_set (insn);
if (!set)
return false;
set_src = SET_SRC (set);
set_dest = SET_DEST (set);
code = GET_CODE (set_src);
mode = GET_MODE (set_dest);
if (code != UMOD)
return false;
op1 = XEXP (set_src, 0);
op2 = XEXP (set_src, 1);
for (histogram = REG_NOTES (insn);
histogram;
histogram = XEXP (histogram, 1))
if (REG_NOTE_KIND (histogram) == REG_VALUE_PROFILE
&& XEXP (XEXP (histogram, 0), 0) == GEN_INT (HIST_TYPE_INTERVAL))
break;
if (!histogram)
return false;
histogram = XEXP (XEXP (histogram, 0), 1);
value = XEXP (histogram, 0);
histogram = XEXP (histogram, 1);
all = 0;
for (i = 0; i < 2; i++)
{
counts[i] = INTVAL (XEXP (histogram, 0));
all += counts[i];
histogram = XEXP (histogram, 1);
}
wrong_values = INTVAL (XEXP (histogram, 0));
histogram = XEXP (histogram, 1);
wrong_values += INTVAL (XEXP (histogram, 0));
all += wrong_values;
/* We require that we use just subtractions in at least 50% of all
evaluations. */
count = 0;
for (i = 0; i < 2; i++)
{
count += counts[i];
if (count * 2 >= all)
break;
}
if (i == 2)
return false;
if (rtl_dump_file)
fprintf (rtl_dump_file, "Mod subtract transformation on insn %d\n",
INSN_UID (insn));
e = split_block (BLOCK_FOR_INSN (insn), PREV_INSN (insn));
delete_insn (insn);
insert_insn_on_edge (
gen_mod_subtract (mode, code, set_dest, op1, op2, i), e);
return true;
}
...@@ -61,3 +61,4 @@ struct histogram_value ...@@ -61,3 +61,4 @@ struct histogram_value
extern void find_values_to_profile (unsigned *, struct histogram_value **); extern void find_values_to_profile (unsigned *, struct histogram_value **);
extern void free_profiled_values (unsigned, struct histogram_value *); extern void free_profiled_values (unsigned, struct histogram_value *);
extern bool value_profile_transformations (void);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment