Commit 194734e9 by Jan Hubicka Committed by Jan Hubicka

invoke.texi (-malign-double): Re-add lost warning.

	* invoke.texi (-malign-double): Re-add lost warning.

	* i386-protos.h (x86_output_mi_thunk): Declare.
	* unix.h (ASM_OUTPUT_MI_THUNK): Move offline to ...
	* i386.c (x86_output_mi_thunk): ... here; handle 64bits.

	* dwarf2out.c (output_call_frame_info): Do not skip unwind info
	when flag_asynchronous_unwind_tables is set.

	* flags.h (flag_reorder_functions): Declare.
	* function.c (prepare_function_start): Initialize frequnecy.
	* params.def (HOT_BB_COUNT_FRACTION, HOT_BB_FREQUENCY_FRACTION): New paramters.
	* Makefile.in (predict.o): Add dependency on target.h and params.h
	* defaults.h (HOT_TEXT_SECTION_NAME,
	UNLIKELY_EXECUTED_TEXT_SECTION_NAME): New macros.
	* predict.c (choose_function_section): New function.
	(estimate_bb_frequencies): Use it.
	* toplev.c (flag_reorder_functions): New global variable.
	(lang_independent_options): New.
	(parse_options_and_default_flags): Set.
	* varasm.c (assemble_start_function): Bypass functdion alignment
	for never executed functions.
	* invoke.texi (-freorder-blocks, -freorder-functions): Document.
	(param hot-bb-count-fraction, hot-bb-frequency-fraction): New.
	* tm.texi (HOT_TEXT_SECTION_NAME, UNLIKELY_EXECUTED_TEXT_SECTION_NAME):
	Document.

	Thu Jan  3 21:52:09 CET 2002  Jan Hubicka  <jh@suse.cz>

	* predict.c: Inlude profile.h
	(MIN_COUNT): Rename to MIN_COUNT_FRACTION
	(maybe_hot_bb_p, probably_cold_bb_p, probably_never_executed_bb_p):
	Use the information about maximal counter in the program.

	Thu Dec 20 22:14:00 CET 2001  Jan Hubicka  <jh@suse.cz>

	* basic-block.h (maybe_hot_bb_p, probably_cold_bb_p,
	probably_never_executed_bb_p): New functions.
	* cfgcleanup.c (outgoing_edges_match): Use them.
	* predict.c (MIN_COUNT, MIN_FREQUENCY): New macros.
	(maybe_hot_bb_p, probably_cold_bb_p,
	probably_never_executed_bb_p): New functions.

	* function.h (function): Add new field function_frequency.
	* predict.c (compute_function_frequency): New function.
	(estimate_probability): Call it.

From-SVN: r53478
parent 61ad9a34
Wed May 15 10:38:27 CEST 2002 Jan Hubicka <jh@suse.cz>
* invoke.texi (-malign-double): Re-add lost warning.
* i386-protos.h (x86_output_mi_thunk): Declare.
* unix.h (ASM_OUTPUT_MI_THUNK): Move offline to ...
* i386.c (x86_output_mi_thunk): ... here; handle 64bits.
* dwarf2out.c (output_call_frame_info): Do not skip unwind info
when flag_asynchronous_unwind_tables is set.
* flags.h (flag_reorder_functions): Declare.
* function.c (prepare_function_start): Initialize frequnecy.
* params.def (HOT_BB_COUNT_FRACTION, HOT_BB_FREQUENCY_FRACTION): New paramters.
* Makefile.in (predict.o): Add dependency on target.h and params.h
* defaults.h (HOT_TEXT_SECTION_NAME,
UNLIKELY_EXECUTED_TEXT_SECTION_NAME): New macros.
* predict.c (choose_function_section): New function.
(estimate_bb_frequencies): Use it.
* toplev.c (flag_reorder_functions): New global variable.
(lang_independent_options): New.
(parse_options_and_default_flags): Set.
* varasm.c (assemble_start_function): Bypass functdion alignment
for never executed functions.
* invoke.texi (-freorder-blocks, -freorder-functions): Document.
(param hot-bb-count-fraction, hot-bb-frequency-fraction): New.
* tm.texi (HOT_TEXT_SECTION_NAME, UNLIKELY_EXECUTED_TEXT_SECTION_NAME):
Document.
Thu Jan 3 21:52:09 CET 2002 Jan Hubicka <jh@suse.cz>
* predict.c: Inlude profile.h
(MIN_COUNT): Rename to MIN_COUNT_FRACTION
(maybe_hot_bb_p, probably_cold_bb_p, probably_never_executed_bb_p):
Use the information about maximal counter in the program.
Thu Dec 20 22:14:00 CET 2001 Jan Hubicka <jh@suse.cz>
* basic-block.h (maybe_hot_bb_p, probably_cold_bb_p,
probably_never_executed_bb_p): New functions.
* cfgcleanup.c (outgoing_edges_match): Use them.
* predict.c (MIN_COUNT, MIN_FREQUENCY): New macros.
(maybe_hot_bb_p, probably_cold_bb_p,
probably_never_executed_bb_p): New functions.
* function.h (function): Add new field function_frequency.
* predict.c (compute_function_frequency): New function.
(estimate_probability): Call it.
2002-03-09 Jakub Jelinek <jakub@redhat.com>
PR optimization/5172, optimization/5200
......
......@@ -1579,7 +1579,8 @@ reg-stack.o : reg-stack.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) $(RECOG_H)
varray.h function.h $(TM_P_H)
predict.o: predict.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) flags.h \
insn-config.h $(BASIC_BLOCK_H) $(REGS_H) hard-reg-set.h output.h toplev.h \
$(RECOG_H) function.h except.h $(EXPR_H) $(TM_P_H) $(PREDICT_H) real.h
$(RECOG_H) function.h except.h $(EXPR_H) $(TM_P_H) $(PREDICT_H) real.h \
$(PARAMS_H) $(TARGET_H)
lists.o: lists.c $(CONFIG_H) $(SYSTEM_H) toplev.h $(RTL_H) $(GGC_H)
bb-reorder.o : bb-reorder.c $(CONFIG_H) $(SYSTEM_H) $(RTL_H) $(TREE_H) \
flags.h $(BASIC_BLOCK_H) hard-reg-set.h output.h cfglayout.h $(TARGET_H)
......
......@@ -628,6 +628,10 @@ extern rtx emit_block_insn_before PARAMS ((rtx, rtx, basic_block));
extern void estimate_probability PARAMS ((struct loops *));
extern void note_prediction_to_br_prob PARAMS ((void));
extern void expected_value_to_br_prob PARAMS ((void));
extern void note_prediction_to_br_prob PARAMS ((void));
extern bool maybe_hot_bb_p PARAMS ((basic_block));
extern bool probably_cold_bb_p PARAMS ((basic_block));
extern bool probably_never_executed_bb_p PARAMS ((basic_block));
/* In flow.c */
extern void init_flow PARAMS ((void));
......
......@@ -1211,8 +1211,8 @@ outgoing_edges_match (mode, bb1, bb2)
roughly similar. */
if (match
&& !optimize_size
&& bb1->frequency > BB_FREQ_MAX / 1000
&& bb2->frequency > BB_FREQ_MAX / 1000)
&& maybe_hot_bb_p (bb1)
&& maybe_hot_bb_p (bb2))
{
int prob2;
......
......@@ -197,4 +197,5 @@ extern tree ix86_handle_shared_attribute PARAMS ((tree *, tree, tree, int, bool
extern unsigned int i386_pe_section_type_flags PARAMS ((tree, const char *,
int));
extern void i386_pe_asm_named_section PARAMS ((const char *, unsigned int));
extern void x86_output_mi_thunk PARAMS ((FILE *, int, tree));
#endif
......@@ -13049,3 +13049,78 @@ x86_order_regs_for_local_alloc ()
while (pos < FIRST_PSEUDO_REGISTER)
reg_alloc_order [pos++] = 0;
}
void
x86_output_mi_thunk (file, delta, function)
FILE *file;
int delta;
tree function;
{
tree parm;
rtx xops[3];
if (ix86_regparm > 0)
parm = TYPE_ARG_TYPES (TREE_TYPE (function));
else
parm = NULL_TREE;
for (; parm; parm = TREE_CHAIN (parm))
if (TREE_VALUE (parm) == void_type_node)
break;
xops[0] = GEN_INT (delta);
if (TARGET_64BIT)
{
int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
if (flag_pic)
{
fprintf (file, "\tjmp *");
assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
fprintf (file, "@GOTPCREL(%%rip)\n");
}
else
{
fprintf (file, "\tjmp ");
assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
fprintf (file, "\n");
}
}
else
{
if (parm)
xops[1] = gen_rtx_REG (SImode, 0);
else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
else
xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
if (flag_pic)
{
xops[0] = pic_offset_table_rtx;
xops[1] = gen_label_rtx ();
xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
if (ix86_regparm > 2)
abort ();
output_asm_insn ("push{l}\t%0", xops);
output_asm_insn ("call\t%P1", xops);
ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
output_asm_insn ("pop{l}\t%0", xops);
output_asm_insn
("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
output_asm_insn
("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
}
else
{
fprintf (file, "\tjmp ");
assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
fprintf (file, "\n");
}
}
}
......@@ -79,57 +79,5 @@ Boston, MA 02111-1307, USA. */
/* Output code to add DELTA to the first argument, and then jump to FUNCTION.
Used for C++ multiple inheritance. */
#define ASM_OUTPUT_MI_THUNK(FILE, THUNK_FNDECL, DELTA, FUNCTION) \
do { \
tree parm; \
rtx xops[3]; \
\
if (ix86_regparm > 0) \
parm = TYPE_ARG_TYPES (TREE_TYPE (function)); \
else \
parm = NULL_TREE; \
for (; parm; parm = TREE_CHAIN (parm)) \
if (TREE_VALUE (parm) == void_type_node) \
break; \
\
xops[0] = GEN_INT (DELTA); \
if (parm) \
xops[1] = gen_rtx_REG (SImode, 0); \
else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (FUNCTION)))) \
xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8)); \
else \
xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4)); \
output_asm_insn ("add{l} {%0, %1|%1, %0}", xops); \
\
if (flag_pic && !TARGET_64BIT) \
{ \
xops[0] = pic_offset_table_rtx; \
xops[1] = gen_label_rtx (); \
xops[2] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); \
\
if (ix86_regparm > 2) \
abort (); \
output_asm_insn ("push{l}\t%0", xops); \
output_asm_insn ("call\t%P1", xops); \
ASM_OUTPUT_INTERNAL_LABEL (FILE, "L", CODE_LABEL_NUMBER (xops[1])); \
output_asm_insn ("pop{l}\t%0", xops); \
output_asm_insn ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops); \
xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (FUNCTION), 0)); \
output_asm_insn ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}",\
xops); \
asm_fprintf (FILE, "\tpop{l\t%%ebx|\t%%ebx}\n"); \
asm_fprintf (FILE, "\tjmp\t{*%%ecx|%%ecx}\n"); \
} \
else if (flag_pic && TARGET_64BIT) \
{ \
fprintf (FILE, "\tjmp *"); \
assemble_name (FILE, XSTR (XEXP (DECL_RTL (FUNCTION), 0), 0)); \
fprintf (FILE, "@GOTPCREL(%%rip)\n"); \
} \
else \
{ \
fprintf (FILE, "\tjmp "); \
assemble_name (FILE, XSTR (XEXP (DECL_RTL (FUNCTION), 0), 0)); \
fprintf (FILE, "\n"); \
} \
} while (0)
#define ASM_OUTPUT_MI_THUNK(FILE, THUNK_FNDECL, DELTA, FUNCTION) \
x86_output_mi_thunk (FILE, DELTA, FUNCTION);
......@@ -517,4 +517,12 @@ You Lose! You must define PREFERRED_DEBUGGING_TYPE!
&& !ROUND_TOWARDS_ZERO)
#endif
#ifndef HOT_TEXT_SECTION_NAME
#define HOT_TEXT_SECTION_NAME "text.hot"
#endif
#ifndef UNLIKELY_EXECUTED_TEXT_SECTION_NAME
#define UNLIKELY_EXECUTED_TEXT_SECTION_NAME "text.unlikely"
#endif
#endif /* ! GCC_DEFAULTS_H */
......@@ -278,6 +278,7 @@ in the following sections.
-fomit-frame-pointer -foptimize-register-move @gol
-foptimize-sibling-calls -fprefetch-loop-arrays @gol
-freduce-all-givs -fregmove -frename-registers @gol
-freorder-blocks -freorder-functions @gol
-frerun-cse-after-loop -frerun-loop-opt @gol
-fschedule-insns -fschedule-insns2 @gol
-fsingle-precision-constant -fssa -fssa-ccp -fssa-dce @gol
......@@ -3712,6 +3713,23 @@ non-determinism is of paramount import. This switch allows users to
reduce non-determinism, possibly at the expense of inferior
optimization.
@item -freorder-blocks
@opindex freorder-blocks
Reorder basic blocks in the compiled function in order to reduce number of
taken branches and improve code locality.
@item -freorder-functions
@opindex freorder-functions
Reorder basic blocks in the compiled function in order to reduce number of
taken branches and improve code locality. This is implemented by using special
subsections @code{text.hot} for most frequently executed functions and
@code{text.unlikely} for unlikely executed functions. Reordering is done by
the linker so object file format must support named sections and linker must
place them in resonable way.
Also profile feedback must be available in to make this option effective. See
@option{-fprofile-arcs} for details.
@item -fstrict-aliasing
@opindex fstrict-aliasing
Allows the compiler to assume the strictest aliasing rules applicable to
......@@ -3900,6 +3918,13 @@ The maximum number of instructions that a loop should have if that loop
is unrolled, and if the loop is unrolled, it determines how many times
the loop code is unrolled.
@item hot-bb-count-fraction
Select fraction of the maximal count of repetitions of basic block in program
given basic block needs to have to be considered hot.
@item hot-bb-frequency-fraction
Select fraction of the maximal frequency of executions of basic block in
function given basic block needs to have to be considered hot
@end table
@end table
......@@ -7389,6 +7414,10 @@ boundary. Aligning @code{double} variables on a two word boundary will
produce code that runs somewhat faster on a @samp{Pentium} at the
expense of more memory.
@strong{Warning:} if you use the @samp{-malign-double} switch,
structures containing the above types will be aligned differently than
the published application binary interface specifications for the 386.
@item -m128bit-long-double
@opindex m128bit-long-double
Control the size of @code{long double} type. i386 application binary interface
......
......@@ -5651,6 +5651,17 @@ Normally this is not needed, as simply defining @code{TEXT_SECTION_ASM_OP}
is enough. The MIPS port uses this to sort all functions after all data
declarations.
@findex HOT_TEXT_SECTION_NAME
@item HOT_TEXT_SECTION_NAME
If defined, a C string constant for the name of the section containing most
frequently executed functions of the program. If not defined, GCC will provide
a default definition if the target supports named sections.
@findex UNLIKELY_EXECUTED_TEXT_SECTION_NAME
@item UNLIKELY_EXECUTED_TEXT_SECTION_NAME
If defined, a C string constant for the name of the section containing unlikely
executed functions in the program.
@findex DATA_SECTION_ASM_OP
@item DATA_SECTION_ASM_OP
A C expression whose value is a string, including spacing, containing the
......
......@@ -1968,7 +1968,8 @@ output_call_frame_info (for_eh)
fde = &fde_table[i];
/* Don't emit EH unwind info for leaf functions that don't need it. */
if (for_eh && fde->nothrow && ! fde->uses_eh_lsda)
if (!flag_asynchronous_unwind_tables && for_eh && fde->nothrow
&& ! fde->uses_eh_lsda)
continue;
ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, FDE_LABEL, for_eh + i * 2);
......
......@@ -204,6 +204,10 @@ extern int flag_branch_probabilities;
extern int flag_reorder_blocks;
/* Nonzero if functions should be reordered. */
extern int flag_reorder_functions;
/* Nonzero if registers should be renamed. */
extern int flag_rename_registers;
......
......@@ -6320,6 +6320,10 @@ prepare_function_start ()
cfun->arc_profile = profile_arc_flag || flag_test_coverage;
cfun->arc_profile = profile_arc_flag || flag_test_coverage;
cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
(*lang_hooks.function.init) (cfun);
if (init_machine_status)
(*init_machine_status) (cfun);
......
......@@ -481,6 +481,19 @@ struct function
/* Nonzero if code to initialize arg_pointer_save_area has been emited. */
unsigned int arg_pointer_save_area_init : 1;
/* How commonly executed the function is. Initialized during branch
probabilities pass. */
enum function_frequency {
/* This function most likely won't be executed at all.
(set only when profile feedback is available). */
FUNCTION_FREQUENCY_UNLIKELY_EXECUTED,
/* The default value. */
FUNCTION_FREQUENCY_NORMAL,
/* Optimize this function hard
(set only when profile feedback is available). */
FUNCTION_FREQUENCY_HOT
} function_frequency;
};
/* The function currently being compiled. */
......
......@@ -150,6 +150,15 @@ DEFPARAM(PARAM_MAX_UNROLLED_INSNS,
"max-unrolled-insns",
"The maximum number of instructions to consider to unroll in a loop",
100)
DEFPARAM(HOT_BB_COUNT_FRACTION,
"hot-bb-count-fraction",
"Select fraction of the maximal count of repetitions of basic block in program given basic block needs to have to be considered hot",
10000)
DEFPARAM(HOT_BB_FREQUENCY_FRACTION,
"hot-bb-frequency-fraction",
"Select fraction of the maximal frequency of executions of basic block in function given basic block needs to have to be considered hot",
1000)
/*
Local variables:
mode:c
......
......@@ -381,6 +381,10 @@ int flag_branch_probabilities = 0;
int flag_reorder_blocks = 0;
/* Nonzero if functions should be reordered. */
int flag_reorder_functions = 0;
/* Nonzero if registers should be renamed. */
int flag_rename_registers = 0;
......@@ -1076,6 +1080,8 @@ static const lang_independent_options f_options[] =
N_("Enable basic program profiling code") },
{"reorder-blocks", &flag_reorder_blocks, 1,
N_("Reorder basic blocks to improve code placement") },
{"reorder-functions", &flag_reorder_functions, 1,
N_("Reorder functions to improve code placement") },
{"rename-registers", &flag_rename_registers, 1,
N_("Do the register renaming optimization pass") },
{"cprop-registers", &flag_cprop_registers, 1,
......@@ -4657,6 +4663,7 @@ parse_options_and_default_flags (argc, argv)
flag_strict_aliasing = 1;
flag_delete_null_pointer_checks = 1;
flag_reorder_blocks = 1;
flag_reorder_functions = 1;
}
if (optimize >= 3)
......
......@@ -1197,7 +1197,8 @@ assemble_start_function (decl, fnname)
/* Handle a user-specified function alignment.
Note that we still need to align to FUNCTION_BOUNDARY, as above,
because ASM_OUTPUT_MAX_SKIP_ALIGN might not do any alignment at all. */
if (align_functions_log > align)
if (align_functions_log > align
&& cfun->function_frequency != FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
{
#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment