Commit 0dd0e980 by Jan Hubicka Committed by Jan Hubicka

predict.c (estimate_probability): Reorganize opcode heuristics.

	* predict.c (estimate_probability): Reorganize opcode heuristics.
	* predict.def (PRED_OPCODE_POSITIVE, PRED_OPCODE_NONEQUAL,
	PRED_FPOPCODE): New.

	* i386.c (override_options): Recognize various CPU variants and set
	SSE/MMX/3dNOW flags accordingly.
	* i386.h (MASK_MMX_SET, MASK_SSE_SET, MASK_SSE2_SET, MASK_3DNOW_SET,
	MASK_3DNOW_A_SET): New.
	(MASK_ACCUMULATE_OUTGOING_ARGS_SET): New.
	(MASK_NO_ACCUMULATE_OUTGOING_ARGS): Delete.
	(MASK_*): Renumber.
	(TARGET_FLAGS): Use new masks.
	(CPP_CPU_SPECS): Recognize new CPU variants.
	* invoke.texi (-mcpu): Update documentation.

	* flags.h (flag_prefetch_loop_arrays): Declare.
	* loop.h (LOOP_PREFETCH): Define new constant.
	* loop.c (strength_reduce): Call emit_prefetch_instructions.
	(MAX_PREFETCHES, PREFETCH_BLOCKS_BEFORE_LOOP_MAX,
	PREFETCH_BLOCKS_BEFORE_LOOP_MIN, PREFETCH_BLOCKS_IN_LOOP_MIN): New
	constants.
	(check_store_data): New structure.
	(check_store, emit_prefetch_instructions, rtx_equal_for_prefetch_p):
	New functions.
	* toplev.c: Include insn-flags.h.
	(flag_prefetch_loop_arrays): New global variable.
	(lang_independent_option): Add -fprefetch-loop-arrays.
	(rest_of_compilation) Pass LOOP_PREFETCH when flag_prefetch_loop_arrays
        is set.
	* Makefile.in (toplev.c): Depend on insn-flags.h.
	* invoke.texi (-fprefetch-loop-arrays): Document.

	* predict.c (estimate_probability): Distribute the loop exit
	probability according to number of exit edges.

	* cfgcleanup.c (insns_match_p): Break out from ...;
	(flow_find_cross_jump): ... here;
	(outgoing_edges_match): Add parameter MODE; attempt to match everything
	except for tablejumps.
	(try_crossjump_to_edge): Accept complex edges.
	(try_crossjump_bb): Likewise.

From-SVN: r47969
parent 85230e52
Thu Dec 13 12:31:07 CET 2001 Jan Hubicka <jh@suse.cz>
* predict.c (estimate_probability): Reorganize opcode heuristics.
* predict.def (PRED_OPCODE_POSITIVE, PRED_OPCODE_NONEQUAL,
PRED_FPOPCODE): New.
* i386.c (override_options): Recognize various CPU variants and set
SSE/MMX/3dNOW flags accordingly.
* i386.h (MASK_MMX_SET, MASK_SSE_SET, MASK_SSE2_SET, MASK_3DNOW_SET,
MASK_3DNOW_A_SET): New.
(MASK_ACCUMULATE_OUTGOING_ARGS_SET): New.
(MASK_NO_ACCUMULATE_OUTGOING_ARGS): Delete.
(MASK_*): Renumber.
(TARGET_FLAGS): Use new masks.
(CPP_CPU_SPECS): Recognize new CPU variants.
* invoke.texi (-mcpu): Update documentation.
* flags.h (flag_prefetch_loop_arrays): Declare.
* loop.h (LOOP_PREFETCH): Define new constant.
* loop.c (strength_reduce): Call emit_prefetch_instructions.
(MAX_PREFETCHES, PREFETCH_BLOCKS_BEFORE_LOOP_MAX,
PREFETCH_BLOCKS_BEFORE_LOOP_MIN, PREFETCH_BLOCKS_IN_LOOP_MIN): New
constants.
(check_store_data): New structure.
(check_store, emit_prefetch_instructions, rtx_equal_for_prefetch_p):
New functions.
* toplev.c: Include insn-flags.h.
(flag_prefetch_loop_arrays): New global variable.
(lang_independent_option): Add -fprefetch-loop-arrays.
(rest_of_compilation) Pass LOOP_PREFETCH when flag_prefetch_loop_arrays
is set.
* Makefile.in (toplev.c): Depend on insn-flags.h.
* invoke.texi (-fprefetch-loop-arrays): Document.
* predict.c (estimate_probability): Distribute the loop exit
probability according to number of exit edges.
* cfgcleanup.c (insns_match_p): Break out from ...;
(flow_find_cross_jump): ... here;
(outgoing_edges_match): Add parameter MODE; attempt to match everything
except for tablejumps.
(try_crossjump_to_edge): Accept complex edges.
(try_crossjump_bb): Likewise.
2001-11-29 Corey Minyard <minyard@acm.org> 2001-11-29 Corey Minyard <minyard@acm.org>
* recog.c (validate_replace_rtx_1): Use simplify_gen_binary * recog.c (validate_replace_rtx_1): Use simplify_gen_binary
......
...@@ -1321,7 +1321,7 @@ toplev.o : toplev.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(RTL_H) function.h \ ...@@ -1321,7 +1321,7 @@ toplev.o : toplev.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(RTL_H) function.h \
dwarf2out.h sdbout.h dbxout.h $(EXPR_H) hard-reg-set.h $(BASIC_BLOCK_H) \ dwarf2out.h sdbout.h dbxout.h $(EXPR_H) hard-reg-set.h $(BASIC_BLOCK_H) \
graph.h $(LOOP_H) except.h $(REGS_H) $(TIMEVAR_H) $(lang_options_files) \ graph.h $(LOOP_H) except.h $(REGS_H) $(TIMEVAR_H) $(lang_options_files) \
ssa.h $(PARAMS_H) $(TM_P_H) reload.h dwarf2asm.h $(TARGET_H) halfpic.h \ ssa.h $(PARAMS_H) $(TM_P_H) reload.h dwarf2asm.h $(TARGET_H) halfpic.h \
langhooks.h langhooks.h insn-flags.h
$(CC) $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(CC) $(ALL_CFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
-DTARGET_NAME=\"$(target_alias)\" \ -DTARGET_NAME=\"$(target_alias)\" \
-c $(srcdir)/toplev.c $(OUTPUT_OPTION) -c $(srcdir)/toplev.c $(OUTPUT_OPTION)
......
...@@ -817,18 +817,42 @@ override_options () ...@@ -817,18 +817,42 @@ override_options ()
{ {
const char *const name; /* processor name or nickname. */ const char *const name; /* processor name or nickname. */
const enum processor_type processor; const enum processor_type processor;
const enum pta_flags
{
PTA_SSE = 1,
PTA_SSE2 = 2,
PTA_MMX = 4,
PTA_SSEPREFETCH = 8,
PTA_3DNOW = 16,
PTA_3DNOW_A = 64
} flags;
} }
const processor_alias_table[] = const processor_alias_table[] =
{ {
{"i386", PROCESSOR_I386}, {"i386", PROCESSOR_I386, 0},
{"i486", PROCESSOR_I486}, {"i486", PROCESSOR_I486, 0},
{"i586", PROCESSOR_PENTIUM}, {"i586", PROCESSOR_PENTIUM, 0},
{"pentium", PROCESSOR_PENTIUM}, {"pentium", PROCESSOR_PENTIUM, 0},
{"i686", PROCESSOR_PENTIUMPRO}, {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
{"pentiumpro", PROCESSOR_PENTIUMPRO}, {"i686", PROCESSOR_PENTIUMPRO, 0},
{"k6", PROCESSOR_K6}, {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
{"athlon", PROCESSOR_ATHLON}, {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
{"pentium4", PROCESSOR_PENTIUM4}, {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSEPREFETCH},
{"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
PTA_MMX | PTA_SSEPREFETCH},
{"k6", PROCESSOR_K6, PTA_MMX},
{"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
{"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
{"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_SSEPREFETCH | PTA_3DNOW
| PTA_3DNOW_A},
{"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_SSEPREFETCH
| PTA_3DNOW | PTA_3DNOW_A},
{"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_SSEPREFETCH | PTA_3DNOW
| PTA_3DNOW_A | PTA_SSE},
{"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_SSEPREFETCH | PTA_3DNOW
| PTA_3DNOW_A | PTA_SSE},
{"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_SSEPREFETCH | PTA_3DNOW
| PTA_3DNOW_A | PTA_SSE},
}; };
int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta); int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
...@@ -880,6 +904,21 @@ override_options () ...@@ -880,6 +904,21 @@ override_options ()
ix86_arch = processor_alias_table[i].processor; ix86_arch = processor_alias_table[i].processor;
/* Default cpu tuning to the architecture. */ /* Default cpu tuning to the architecture. */
ix86_cpu = ix86_arch; ix86_cpu = ix86_arch;
if (processor_alias_table[i].flags & PTA_MMX
&& !(target_flags & MASK_MMX_SET))
target_flags |= MASK_MMX;
if (processor_alias_table[i].flags & PTA_3DNOW
&& !(target_flags & MASK_3DNOW_SET))
target_flags |= MASK_3DNOW;
if (processor_alias_table[i].flags & PTA_3DNOW_A
&& !(target_flags & MASK_3DNOW_A_SET))
target_flags |= MASK_3DNOW_A;
if (processor_alias_table[i].flags & PTA_SSE
&& !(target_flags & MASK_SSE_SET))
target_flags |= MASK_SSE;
if (processor_alias_table[i].flags & PTA_SSE2
&& !(target_flags & MASK_SSE2_SET))
target_flags |= MASK_SSE2;
break; break;
} }
...@@ -1045,7 +1084,7 @@ override_options () ...@@ -1045,7 +1084,7 @@ override_options ()
target_flags |= MASK_3DNOW_A; target_flags |= MASK_3DNOW_A;
} }
if ((x86_accumulate_outgoing_args & CPUMASK) if ((x86_accumulate_outgoing_args & CPUMASK)
&& !(target_flags & MASK_NO_ACCUMULATE_OUTGOING_ARGS) && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS_SET)
&& !optimize_size) && !optimize_size)
target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
......
...@@ -112,25 +112,30 @@ extern int target_flags; ...@@ -112,25 +112,30 @@ extern int target_flags;
#define MASK_NO_FANCY_MATH_387 0x00000040 /* Disable sin, cos, sqrt */ #define MASK_NO_FANCY_MATH_387 0x00000040 /* Disable sin, cos, sqrt */
#define MASK_OMIT_LEAF_FRAME_POINTER 0x080 /* omit leaf frame pointers */ #define MASK_OMIT_LEAF_FRAME_POINTER 0x080 /* omit leaf frame pointers */
#define MASK_STACK_PROBE 0x00000100 /* Enable stack probing */ #define MASK_STACK_PROBE 0x00000100 /* Enable stack probing */
#define MASK_NO_ALIGN_STROPS 0x00001000 /* Enable aligning of string ops. */ #define MASK_NO_ALIGN_STROPS 0x00000200 /* Enable aligning of string ops. */
#define MASK_INLINE_ALL_STROPS 0x00002000 /* Inline stringops in all cases */ #define MASK_INLINE_ALL_STROPS 0x00000400 /* Inline stringops in all cases */
#define MASK_NO_PUSH_ARGS 0x00004000 /* Use push instructions */ #define MASK_NO_PUSH_ARGS 0x00000800 /* Use push instructions */
#define MASK_ACCUMULATE_OUTGOING_ARGS 0x00008000/* Accumulate outgoing args */ #define MASK_ACCUMULATE_OUTGOING_ARGS 0x00001000/* Accumulate outgoing args */
#define MASK_NO_ACCUMULATE_OUTGOING_ARGS 0x00010000 #define MASK_ACCUMULATE_OUTGOING_ARGS_SET 0x00002000
#define MASK_MMX 0x00020000 /* Support MMX regs/builtins */ #define MASK_MMX 0x00004000 /* Support MMX regs/builtins */
#define MASK_SSE 0x00040000 /* Support SSE regs/builtins */ #define MASK_MMX_SET 0x00008000
#define MASK_SSE2 0x00080000 /* Support SSE2 regs/builtins */ #define MASK_SSE 0x00010000 /* Support SSE regs/builtins */
#define MASK_SSE_SET 0x00020000
#define MASK_SSE2 0x00040000 /* Support SSE2 regs/builtins */
#define MASK_SSE2_SET 0x00080000
#define MASK_3DNOW 0x00100000 /* Support 3Dnow builtins */ #define MASK_3DNOW 0x00100000 /* Support 3Dnow builtins */
#define MASK_3DNOW_A 0x00200000 /* Support Athlon 3Dnow builtins */ #define MASK_3DNOW_SET 0x00200000
#define MASK_128BIT_LONG_DOUBLE 0x00400000 /* long double size is 128bit */ #define MASK_3DNOW_A 0x00400000 /* Support Athlon 3Dnow builtins */
#define MASK_MIX_SSE_I387 0x00800000 /* Mix SSE and i387 instructions */ #define MASK_3DNOW_A_SET 0x00800000
#define MASK_64BIT 0x01000000 /* Produce 64bit code */ #define MASK_128BIT_LONG_DOUBLE 0x01000000 /* long double size is 128bit */
#define MASK_NO_RED_ZONE 0x02000000 /* Do not use red zone */ #define MASK_MIX_SSE_I387 0x02000000 /* Mix SSE and i387 instructions */
#define MASK_64BIT 0x04000000 /* Produce 64bit code */
#define MASK_NO_RED_ZONE 0x08000000 /* Do not use red zone */
/* Temporary codegen switches */ /* Temporary codegen switches */
#define MASK_INTEL_SYNTAX 0x00000200 #define MASK_INTEL_SYNTAX 0x10000000
#define MASK_DEBUG_ARG 0x00000400 /* function_arg */ #define MASK_DEBUG_ARG 0x20000000 /* function_arg */
#define MASK_DEBUG_ADDR 0x00000800 /* GO_IF_LEGITIMATE_ADDRESS */ #define MASK_DEBUG_ADDR 0x40000000 /* GO_IF_LEGITIMATE_ADDRESS */
/* Use the floating point instructions */ /* Use the floating point instructions */
#define TARGET_80387 (target_flags & MASK_80387) #define TARGET_80387 (target_flags & MASK_80387)
...@@ -335,24 +340,30 @@ extern const int x86_epilogue_using_move, x86_decompose_lea; ...@@ -335,24 +340,30 @@ extern const int x86_epilogue_using_move, x86_decompose_lea;
N_("Use push instructions to save outgoing arguments") }, \ N_("Use push instructions to save outgoing arguments") }, \
{ "no-push-args", MASK_NO_PUSH_ARGS, \ { "no-push-args", MASK_NO_PUSH_ARGS, \
N_("Do not use push instructions to save outgoing arguments") }, \ N_("Do not use push instructions to save outgoing arguments") }, \
{ "accumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS, \ { "accumulate-outgoing-args", (MASK_ACCUMULATE_OUTGOING_ARGS \
| MASK_ACCUMULATE_OUTGOING_ARGS_SET), \
N_("Use push instructions to save outgoing arguments") }, \ N_("Use push instructions to save outgoing arguments") }, \
{ "no-accumulate-outgoing-args",MASK_NO_ACCUMULATE_OUTGOING_ARGS, \ { "no-accumulate-outgoing-args",MASK_ACCUMULATE_OUTGOING_ARGS_SET, \
N_("Do not use push instructions to save outgoing arguments") }, \ N_("Do not use push instructions to save outgoing arguments") }, \
{ "mmx", MASK_MMX, N_("Support MMX builtins") }, \ { "mmx", MASK_MMX | MASK_MMX_SET, \
{ "no-mmx", -MASK_MMX, \ N_("Support MMX builtins") }, \
{ "no-mmx", -MASK_MMX, \
N_("Do not support MMX builtins") }, \ N_("Do not support MMX builtins") }, \
{ "3dnow", MASK_3DNOW, \ { "no-mmx", MASK_MMX_SET, N_("") }, \
{ "3dnow", MASK_3DNOW | MASK_3DNOW_SET, \
N_("Support 3DNow! builtins") }, \ N_("Support 3DNow! builtins") }, \
{ "no-3dnow", -MASK_3DNOW, \ { "no-3dnow", -MASK_3DNOW, N_("") }, \
{ "no-3dnow", MASK_3DNOW_SET, \
N_("Do not support 3DNow! builtins") }, \ N_("Do not support 3DNow! builtins") }, \
{ "sse", MASK_SSE, \ { "sse", MASK_SSE | MASK_SSE_SET, \
N_("Support MMX and SSE builtins and code generation") }, \ N_("Support MMX and SSE builtins and code generation") }, \
{ "no-sse", -MASK_SSE, \ { "no-sse", -MASK_SSE, N_("") }, \
{ "no-sse", MASK_SSE_SET, \
N_("Do not support MMX and SSE builtins and code generation") }, \ N_("Do not support MMX and SSE builtins and code generation") }, \
{ "sse2", MASK_SSE2, \ { "sse2", MASK_SSE2 | MASK_SSE2_SET, \
N_("Support MMX, SSE and SSE2 builtins and code generation") }, \ N_("Support MMX, SSE and SSE2 builtins and code generation") }, \
{ "no-sse2", -MASK_SSE2, \ { "no-sse2", -MASK_SSE2, N_("") }, \
{ "no-sse2", MASK_SSE2_SET, \
N_("Do not support MMX, SSE and SSE2 builtins and code generation") }, \ N_("Do not support MMX, SSE and SSE2 builtins and code generation") }, \
{ "mix-sse-i387", MASK_MIX_SSE_I387, \ { "mix-sse-i387", MASK_MIX_SSE_I387, \
N_("Use both SSE and i387 instruction sets for floating point arithmetics") },\ N_("Use both SSE and i387 instruction sets for floating point arithmetics") },\
...@@ -522,11 +533,22 @@ extern int ix86_arch; ...@@ -522,11 +533,22 @@ extern int ix86_arch;
%{march=pentium4:-D__pentium4 -D__pentium4__ %{!mcpu*:-D__tune_pentium4__ }}\ %{march=pentium4:-D__pentium4 -D__pentium4__ %{!mcpu*:-D__tune_pentium4__ }}\
%{m386|mcpu=i386:-D__tune_i386__ }\ %{m386|mcpu=i386:-D__tune_i386__ }\
%{m486|mcpu=i486:-D__tune_i486__ }\ %{m486|mcpu=i486:-D__tune_i486__ }\
%{mpentium|mcpu=pentium|mcpu=i586:-D__tune_i586__ -D__tune_pentium__ }\ %{mpentium|mcpu=pentium|mcpu=i586|mcpu=pentium-mmx:-D__tune_i586__ -D__tune_pentium__ }\
%{mpentiumpro|mcpu=pentiumpro|mcpu=i686:-D__tune_i686__ -D__tune_pentiumpro__ }\ %{mpentiumpro|mcpu=pentiumpro|mcpu=i686|cpu=pentium2|cpu=pentium3:-D__tune_i686__\
%{mcpu=k6:-D__tune_k6__ }\ -D__tune_pentiumpro__ }\
%{mcpu=athlon:-D__tune_athlon__ }\ %{mcpu=k6|mcpu=k6-2|mcpu=k6-3:-D__tune_k6__ }\
%{mcpu=athlon|mcpu=athlon-tbird|mcpu=athlon-4|mcpu=athlon-xp|mcpu=athlon-mp:\
-D__tune_athlon__ }\
%{mcpu=pentium4:-D__tune_pentium4__ }\ %{mcpu=pentium4:-D__tune_pentium4__ }\
%{march=march=athlon-tbird|march=athlon-xp|march=athlon-mp|march=pentium3|march=pentium4:\
-D__SSE__ }\
%{march=pentium-mmx|march=k6|march=k6-2|march=k6-3\
march=athlon|march=athlon-tbird|march=athlon-4|march=athlon-xp\
|march=athlon-mp|march=pentium2|march=pentium3|march=pentium4: -D__MMX__ }\
%{march=k6|march=k6-2|march=k6-3\
march=athlon|march=athlon-tbird|march=athlon-4|march=athlon-xp\
|march=athlon-mp: -D__3dNOW__ }\
%{mcpu=mcpu=pentium4: -D__SSE2__ }\
%{!march*:%{!mcpu*:%{!m386:%{!m486:%{!mpentium*:%(cpp_cpu_default)}}}}}" %{!march*:%{!mcpu*:%{!m386:%{!m486:%{!mpentium*:%(cpp_cpu_default)}}}}}"
#ifndef CPP_CPU_SPEC #ifndef CPP_CPU_SPEC
......
...@@ -272,7 +272,7 @@ in the following sections. ...@@ -272,7 +272,7 @@ in the following sections.
-fno-inline -fno-math-errno -fno-peephole -fno-peephole2 @gol -fno-inline -fno-math-errno -fno-peephole -fno-peephole2 @gol
-funsafe-math-optimizations -fno-trapping-math @gol -funsafe-math-optimizations -fno-trapping-math @gol
-fomit-frame-pointer -foptimize-register-move @gol -fomit-frame-pointer -foptimize-register-move @gol
-foptimize-sibling-calls -freduce-all-givs @gol -foptimize-sibling-calls -fprefetch-loop-arrays -freduce-all-givs @gol
-fregmove -frename-registers @gol -fregmove -frename-registers @gol
-frerun-cse-after-loop -frerun-loop-opt @gol -frerun-cse-after-loop -frerun-loop-opt @gol
-fschedule-insns -fschedule-insns2 @gol -fschedule-insns -fschedule-insns2 @gol
...@@ -3570,6 +3570,10 @@ the loop is entered. This usually makes programs run more slowly. ...@@ -3570,6 +3570,10 @@ the loop is entered. This usually makes programs run more slowly.
@option{-funroll-all-loops} implies the same options as @option{-funroll-all-loops} implies the same options as
@option{-funroll-loops}, @option{-funroll-loops},
@item -fprefetch-loop-arrays
@opindex fprefetch-loop-arrays
If supported by the target machine, generate instructions to prefetch
memory to improve the performance of loops that access large arrays.
@item -fmove-all-movables @item -fmove-all-movables
@opindex fmove-all-movables @opindex fmove-all-movables
...@@ -7476,10 +7480,13 @@ computers: ...@@ -7476,10 +7480,13 @@ computers:
@table @gcctabopt @table @gcctabopt
@item -mcpu=@var{cpu-type} @item -mcpu=@var{cpu-type}
@opindex mcpu @opindex mcpu
Assume the defaults for the machine type @var{cpu-type} when scheduling Tune to @var{cpu-type} everything applicable about the generated code, except
instructions. The choices for @var{cpu-type} are @samp{i386}, for the ABI and the set of available instructions. The choices for
@samp{i486}, @samp{i586}, @samp{i686}, @samp{pentium}, @var{cpu-type} are @samp{i386}, @samp{i486}, @samp{i586}, @samp{i686},
@samp{pentiumpro}, @samp{pentium4}, @samp{k6}, and @samp{athlon} @samp{pentium}, @samp{pentium-mmx}, @samp{pentiumpro}, @samp{pentium2},
@samp{pentium3}, @samp{pentium4}, @samp{k6}, @samp{k6-2}, @samp{k6-3},
@samp{athlon}, @samp{athlon-tbird}, @samp{athlon-4}, @samp{athlon-xp}
and @samp{athlon-mp}.
While picking a specific @var{cpu-type} will schedule things appropriately While picking a specific @var{cpu-type} will schedule things appropriately
for that particular chip, the compiler will not generate any code that for that particular chip, the compiler will not generate any code that
......
...@@ -269,6 +269,10 @@ extern int flag_unroll_all_loops; ...@@ -269,6 +269,10 @@ extern int flag_unroll_all_loops;
extern int flag_move_all_movables; extern int flag_move_all_movables;
/* Nonzero enables prefetch optimizations for arrays in loops. */
extern int flag_prefetch_loop_arrays;
/* Nonzero forces all general induction variables in loops to be /* Nonzero forces all general induction variables in loops to be
strength reduced. */ strength reduced. */
......
...@@ -27,6 +27,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA ...@@ -27,6 +27,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
/* Flags passed to loop_optimize. */ /* Flags passed to loop_optimize. */
#define LOOP_UNROLL 1 #define LOOP_UNROLL 1
#define LOOP_BCT 2 #define LOOP_BCT 2
#define LOOP_PREFETCH 4
/* Get the loop info pointer of a loop. */ /* Get the loop info pointer of a loop. */
#define LOOP_INFO(LOOP) ((struct loop_info *) (LOOP)->aux) #define LOOP_INFO(LOOP) ((struct loop_info *) (LOOP)->aux)
......
...@@ -329,12 +329,17 @@ estimate_probability (loops_info) ...@@ -329,12 +329,17 @@ estimate_probability (loops_info)
for (i = 0; i < loops_info->num; i++) for (i = 0; i < loops_info->num; i++)
{ {
int j; int j;
int exits;
struct loop *loop = &loops_info->array[i];
for (j = loops_info->array[i].first->index; flow_loop_scan (loops_info, loop, LOOP_EXIT_EDGES);
j <= loops_info->array[i].last->index; exits = loop->num_exits;
for (j = loop->first->index;
j <= loop->last->index;
++j) ++j)
{ {
if (TEST_BIT (loops_info->array[i].nodes, j)) if (TEST_BIT (loop->nodes, j))
{ {
int header_found = 0; int header_found = 0;
edge e; edge e;
...@@ -342,8 +347,8 @@ estimate_probability (loops_info) ...@@ -342,8 +347,8 @@ estimate_probability (loops_info)
/* Loop branch heuristics - predict as taken an edge back to /* Loop branch heuristics - predict as taken an edge back to
a loop's head. */ a loop's head. */
for (e = BASIC_BLOCK(j)->succ; e; e = e->succ_next) for (e = BASIC_BLOCK(j)->succ; e; e = e->succ_next)
if (e->dest == loops_info->array[i].header if (e->dest == loop->header
&& e->src == loops_info->array[i].latch) && e->src == loop->latch)
{ {
header_found = 1; header_found = 1;
predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN); predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
...@@ -354,8 +359,11 @@ estimate_probability (loops_info) ...@@ -354,8 +359,11 @@ estimate_probability (loops_info)
if (!header_found) if (!header_found)
for (e = BASIC_BLOCK(j)->succ; e; e = e->succ_next) for (e = BASIC_BLOCK(j)->succ; e; e = e->succ_next)
if (e->dest->index <= 0 if (e->dest->index <= 0
|| !TEST_BIT (loops_info->array[i].nodes, e->dest->index)) || !TEST_BIT (loop->nodes, e->dest->index))
predict_edge_def (e, PRED_LOOP_EXIT, NOT_TAKEN); predict_edge (e, PRED_LOOP_EXIT,
(REG_BR_PROB_BASE
- predictor_info [(int)PRED_LOOP_EXIT].hitrate)
/ exits);
} }
} }
} }
...@@ -435,74 +443,83 @@ estimate_probability (loops_info) ...@@ -435,74 +443,83 @@ estimate_probability (loops_info)
/* Try "pointer heuristic." /* Try "pointer heuristic."
A comparison ptr == 0 is predicted as false. A comparison ptr == 0 is predicted as false.
Similarly, a comparison ptr1 == ptr2 is predicted as false. */ Similarly, a comparison ptr1 == ptr2 is predicted as false. */
switch (GET_CODE (cond)) if (GET_RTX_CLASS (GET_CODE (cond)) == '<'
{ && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
case EQ: || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
if (GET_CODE (XEXP (cond, 0)) == REG switch (GET_CODE (cond))
&& REG_POINTER (XEXP (cond, 0)) {
&& (XEXP (cond, 1) == const0_rtx case EQ:
|| (GET_CODE (XEXP (cond, 1)) == REG
&& REG_POINTER (XEXP (cond, 1)))))
predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN); predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
break; break;
case NE: case NE:
if (GET_CODE (XEXP (cond, 0)) == REG
&& REG_POINTER (XEXP (cond, 0))
&& (XEXP (cond, 1) == const0_rtx
|| (GET_CODE (XEXP (cond, 1)) == REG
&& REG_POINTER (XEXP (cond, 1)))))
predict_insn_def (last_insn, PRED_POINTER, TAKEN); predict_insn_def (last_insn, PRED_POINTER, TAKEN);
break; break;
default:
default: break;
break; }
} else
/* Try "opcode heuristic." /* Try "opcode heuristic."
EQ tests are usually false and NE tests are usually true. Also, EQ tests are usually false and NE tests are usually true. Also,
most quantities are positive, so we can make the appropriate guesses most quantities are positive, so we can make the appropriate guesses
about signed comparisons against zero. */ about signed comparisons against zero. */
switch (GET_CODE (cond)) switch (GET_CODE (cond))
{ {
case CONST_INT: case CONST_INT:
/* Unconditional branch. */ /* Unconditional branch. */
predict_insn_def (last_insn, PRED_UNCONDITIONAL, predict_insn_def (last_insn, PRED_UNCONDITIONAL,
cond == const0_rtx ? NOT_TAKEN : TAKEN); cond == const0_rtx ? NOT_TAKEN : TAKEN);
break; break;
case EQ: case EQ:
case UNEQ: case UNEQ:
predict_insn_def (last_insn, PRED_OPCODE, NOT_TAKEN); /* Floating point comparisons appears to behave in a very
break; inpredictable way because of special role of = tests in
case NE: FP code. */
case LTGT: if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
predict_insn_def (last_insn, PRED_OPCODE, TAKEN); ;
break; /* Comparisons with 0 are often used for booleans and there is
case ORDERED: nothing usefull to predict about them. */
predict_insn_def (last_insn, PRED_OPCODE, TAKEN); else if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 0) == const0_rtx)
break; ;
case UNORDERED: else
predict_insn_def (last_insn, PRED_OPCODE, NOT_TAKEN); predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
break; break;
case LE: case NE:
case LT: case LTGT:
if (XEXP (cond, 1) == const0_rtx /* Floating point comparisons appears to behave in a very
|| (GET_CODE (XEXP (cond, 1)) == CONST_INT inpredictable way because of special role of = tests in
&& INTVAL (XEXP (cond, 1)) == -1)) FP code. */
predict_insn_def (last_insn, PRED_OPCODE, NOT_TAKEN); if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
break; ;
case GE: /* Comparisons with 0 are often used for booleans and there is
case GT: nothing usefull to predict about them. */
if (XEXP (cond, 1) == const0_rtx else if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 0) == const0_rtx)
|| (GET_CODE (XEXP (cond, 1)) == CONST_INT ;
&& INTVAL (XEXP (cond, 1)) == -1)) else
predict_insn_def (last_insn, PRED_OPCODE, TAKEN); predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
break; break;
case ORDERED:
default: predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
break; break;
} case UNORDERED:
predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
break;
case LE:
case LT:
if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
|| XEXP (cond, 1) == constm1_rtx)
predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
break;
case GE:
case GT:
if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
|| XEXP (cond, 1) == constm1_rtx)
predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
break;
default:
break;
}
} }
/* Attach the combined probability to each conditional jump. */ /* Attach the combined probability to each conditional jump. */
......
...@@ -89,7 +89,9 @@ DEF_PREDICTOR (PRED_LOOP_HEADER, "loop header", HITRATE (64), 0) ...@@ -89,7 +89,9 @@ DEF_PREDICTOR (PRED_LOOP_HEADER, "loop header", HITRATE (64), 0)
DEF_PREDICTOR (PRED_POINTER, "pointer", HITRATE (83), 0) DEF_PREDICTOR (PRED_POINTER, "pointer", HITRATE (83), 0)
/* NE is probable, EQ not etc... */ /* NE is probable, EQ not etc... */
DEF_PREDICTOR (PRED_OPCODE, "opcode", HITRATE (55), 0) DEF_PREDICTOR (PRED_OPCODE_POSITIVE, "opcode values positive", HITRATE (78), 0)
DEF_PREDICTOR (PRED_OPCODE_NONEQUAL, "opcode values nonequal", HITRATE (70), 0)
DEF_PREDICTOR (PRED_FPOPCODE, "fp_opcode", HITRATE (90), 0)
/* Branch guarding call is probably taken. */ /* Branch guarding call is probably taken. */
DEF_PREDICTOR (PRED_CALL, "call", HITRATE (70), 0) DEF_PREDICTOR (PRED_CALL, "call", HITRATE (70), 0)
......
...@@ -46,6 +46,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA ...@@ -46,6 +46,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
#include "flags.h" #include "flags.h"
#include "insn-attr.h" #include "insn-attr.h"
#include "insn-config.h" #include "insn-config.h"
#include "insn-flags.h"
#include "hard-reg-set.h" #include "hard-reg-set.h"
#include "recog.h" #include "recog.h"
#include "output.h" #include "output.h"
...@@ -544,6 +545,10 @@ int flag_unroll_loops; ...@@ -544,6 +545,10 @@ int flag_unroll_loops;
int flag_unroll_all_loops; int flag_unroll_all_loops;
/* Nonzero enables prefetch optimizations for arrays in loops. */
int flag_prefetch_loop_arrays;
/* Nonzero forces all invariant computations in loops to be moved /* Nonzero forces all invariant computations in loops to be moved
outside the loop. */ outside the loop. */
...@@ -1001,6 +1006,8 @@ lang_independent_options f_options[] = ...@@ -1001,6 +1006,8 @@ lang_independent_options f_options[] =
N_("Perform loop unrolling when iteration count is known") }, N_("Perform loop unrolling when iteration count is known") },
{"unroll-all-loops", &flag_unroll_all_loops, 1, {"unroll-all-loops", &flag_unroll_all_loops, 1,
N_("Perform loop unrolling for all loops") }, N_("Perform loop unrolling for all loops") },
{"prefetch-loop-arrays", &flag_prefetch_loop_arrays, 1,
N_("Generate prefetch instructions, if available, for arrays in loops") },
{"move-all-movables", &flag_move_all_movables, 1, {"move-all-movables", &flag_move_all_movables, 1,
N_("Force all loop invariant computations out of loops") }, N_("Force all loop invariant computations out of loops") },
{"reduce-all-givs", &flag_reduce_all_givs, 1, {"reduce-all-givs", &flag_reduce_all_givs, 1,
...@@ -2863,7 +2870,8 @@ rest_of_compilation (decl) ...@@ -2863,7 +2870,8 @@ rest_of_compilation (decl)
} }
cleanup_barriers (); cleanup_barriers ();
loop_optimize (insns, rtl_dump_file, loop_optimize (insns, rtl_dump_file,
(flag_unroll_loops ? LOOP_UNROLL : 0) | LOOP_BCT); (flag_unroll_loops ? LOOP_UNROLL : 0) | LOOP_BCT
| (flag_prefetch_loop_arrays ? LOOP_PREFETCH : 0));
close_dump_file (DFI_loop, print_rtl, insns); close_dump_file (DFI_loop, print_rtl, insns);
timevar_pop (TV_LOOP); timevar_pop (TV_LOOP);
...@@ -4928,6 +4936,20 @@ process_options () ...@@ -4928,6 +4936,20 @@ process_options ()
flag_function_sections = 0; flag_function_sections = 0;
} }
#ifndef HAVE_prefetch
if (flag_prefetch_loop_arrays)
{
warning ("-fprefetch-loop-arrays not supported for this target");
flag_prefetch_loop_arrays = 0;
}
#else
if (flag_prefetch_loop_arrays && !HAVE_prefetch)
{
warning ("-fprefetch-loop-arrays not supported for this target (try -march switches)");
flag_prefetch_loop_arrays = 0;
}
#endif
#ifndef OBJECT_FORMAT_ELF #ifndef OBJECT_FORMAT_ELF
if (flag_function_sections && write_symbols != NO_DEBUG) if (flag_function_sections && write_symbols != NO_DEBUG)
warning ("-ffunction-sections may affect debugging on some targets"); warning ("-ffunction-sections may affect debugging on some targets");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment