Commit 5accd822 by David Edelsohn Committed by David Edelsohn

toplev.c (rest_of_compilation): Set bct_p on second call to loop_optimize.

        * toplev.c (rest_of_compilation): Set bct_p on second call to
        loop_optimize.
        * loop.c (loop_optimize, scan_loop, strength_reduce): New argument
        bct_p.
        (strength_reduce): Only call analyze_loop_iterations and
        insert_bct if bct_p set.
        (check_dbra_loop): Fix typo.
        (insert_bct): Use word_mode instead of SImode.
        (instrument_loop_bct): Likewise.  Do not delete iteration count
        condition code generation insn.  Initialize iteration count before
        loop start.
        * rtl.h (loop_optimize): Update prototype.
        * ginclude/va-ppc.h (va_arg): longlong types in overflow area are
        not doubleword aligned.
        * rs6000.c (optimization_options): New function.
        (secondary_reload_class): Only call true_regnum for PSEUDO_REGs.
        * rs6000.h (OPTIMIZATION_OPTIONS): Define.
        (REG_ALLOC_ORDER): Allocate highest numbered condition regsiters
        first; cr1 can be used for FP record condition insns.

From-SVN: r22471
parent 73595522
Fri Sep 18 23:50:56 1998 David Edelsohn <edelsohn@mhpcc.edu>
* toplev.c (rest_of_compilation): Set bct_p on second call to
loop_optimize.
* loop.c (loop_optimize, scan_loop, strength_reduce): New argument
bct_p.
(strength_reduce): Only call analyze_loop_iterations and
insert_bct if bct_p set.
(check_dbra_loop): Fix typo.
(insert_bct): Use word_mode instead of SImode.
(instrument_loop_bct): Likewise. Do not delete iteration count
condition code generation insn. Initialize iteration count before
loop start.
* rtl.h (loop_optimize): Update prototype.
* ginclude/va-ppc.h (va_arg): longlong types in overflow area are
not doubleword aligned.
* rs6000.c (optimization_options): New function.
(secondary_reload_class): Only call true_regnum for PSEUDO_REGs.
* rs6000.h (OPTIMIZATION_OPTIONS): Define.
(REG_ALLOC_ORDER): Allocate highest numbered condition regsiters
first; cr1 can be used for FP record condition insns.
Fri Sep 18 09:44:55 1998 Nick Clifton <nickc@cygnus.com> Fri Sep 18 09:44:55 1998 Nick Clifton <nickc@cygnus.com>
* config/m32r/m32r.h (m32r_block_immediate_operand): Add to * config/m32r/m32r.h (m32r_block_immediate_operand): Add to
......
...@@ -347,6 +347,20 @@ rs6000_override_options (default_cpu) ...@@ -347,6 +347,20 @@ rs6000_override_options (default_cpu)
SUBTARGET_OVERRIDE_OPTIONS; SUBTARGET_OVERRIDE_OPTIONS;
#endif #endif
} }
void
optimization_options (level, size)
int level;
int size ATTRIBUTE_UNUSED;
{
#if 0
#ifdef HAIFA
/* When optimizing, enable use of BCT instruction. */
if (level >= 1)
flag_branch_on_count_reg = 1;
#endif
#endif
}
/* Do anything needed at the start of the asm file. */ /* Do anything needed at the start of the asm file. */
...@@ -1305,14 +1319,18 @@ function_arg_padding (mode, type) ...@@ -1305,14 +1319,18 @@ function_arg_padding (mode, type)
Windows NT wants anything >= 8 bytes to be double word aligned. Windows NT wants anything >= 8 bytes to be double word aligned.
V.4 wants long longs to be double word aligned. */ V.4 wants long longs to be double word aligned.
FP emulation: double precision passed, returned, and same alignment
as long long. */
int int
function_arg_boundary (mode, type) function_arg_boundary (mode, type)
enum machine_mode mode; enum machine_mode mode;
tree type; tree type;
{ {
if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS) && mode == DImode) if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_SOLARIS)
&& ((mode == DImode) || (TARGET_SOFT_FLOAT && mode == DFmode)))
return 64; return 64;
if (DEFAULT_ABI != ABI_NT || TARGET_64BIT) if (DEFAULT_ABI != ABI_NT || TARGET_64BIT)
...@@ -2188,10 +2206,7 @@ secondary_reload_class (class, mode, in) ...@@ -2188,10 +2206,7 @@ secondary_reload_class (class, mode, in)
enum machine_mode mode ATTRIBUTE_UNUSED; enum machine_mode mode ATTRIBUTE_UNUSED;
rtx in; rtx in;
{ {
int regno = true_regnum (in); int regno;
if (regno >= FIRST_PSEUDO_REGISTER)
regno = -1;
/* We can not copy a symbolic operand directly into anything other than /* We can not copy a symbolic operand directly into anything other than
BASE_REGS for TARGET_ELF. So indicate that a register from BASE_REGS BASE_REGS for TARGET_ELF. So indicate that a register from BASE_REGS
...@@ -2203,6 +2218,25 @@ secondary_reload_class (class, mode, in) ...@@ -2203,6 +2218,25 @@ secondary_reload_class (class, mode, in)
|| GET_CODE (in) == CONST)) || GET_CODE (in) == CONST))
return BASE_REGS; return BASE_REGS;
if (GET_CODE (in) == REG)
{
regno = REGNO (in);
if (regno >= FIRST_PSEUDO_REGISTER)
{
regno = true_regnum (in);
if (regno >= FIRST_PSEUDO_REGISTER)
regno = -1;
}
}
else if (GET_CODE (in) == SUBREG)
{
regno = true_regnum (in);
if (regno >= FIRST_PSEUDO_REGISTER)
regno = -1;
}
else
regno = -1;
/* We can place anything into GENERAL_REGS and can put GENERAL_REGS /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
into anything. */ into anything. */
if (class == GENERAL_REGS || class == BASE_REGS if (class == GENERAL_REGS || class == BASE_REGS
......
...@@ -473,10 +473,17 @@ extern int rs6000_debug_arg; /* debug argument handling */ ...@@ -473,10 +473,17 @@ extern int rs6000_debug_arg; /* debug argument handling */
defined, is executed once just after all the command options have defined, is executed once just after all the command options have
been parsed. been parsed.
Don't use this macro to turn on various extra optimizations for
`-O'. That is what `OPTIMIZATION_OPTIONS' is for.
On the RS/6000 this is used to define the target cpu type. */ On the RS/6000 this is used to define the target cpu type. */
#define OVERRIDE_OPTIONS rs6000_override_options (TARGET_CPU_DEFAULT) #define OVERRIDE_OPTIONS rs6000_override_options (TARGET_CPU_DEFAULT)
/* Define this to change the optimizations performed by default. */
#define OPTIMIZATION_OPTIONS(LEVEL,SIZE) optimization_options(LEVEL,SIZE)
/* Show we can debug even without a frame pointer. */ /* Show we can debug even without a frame pointer. */
#define CAN_DEBUG_WITHOUT_FP #define CAN_DEBUG_WITHOUT_FP
...@@ -712,9 +719,10 @@ extern int rs6000_debug_arg; /* debug argument handling */ ...@@ -712,9 +719,10 @@ extern int rs6000_debug_arg; /* debug argument handling */
fp13 - fp2 (not saved; incoming fp arg registers) fp13 - fp2 (not saved; incoming fp arg registers)
fp1 (not saved; return value) fp1 (not saved; return value)
fp31 - fp14 (saved; order given to save least number) fp31 - fp14 (saved; order given to save least number)
cr1, cr6, cr7 (not saved or special) cr7, cr6 (not saved or special)
cr1 (not saved, but used for FP operations)
cr0 (not saved, but used for arithmetic operations) cr0 (not saved, but used for arithmetic operations)
cr2, cr3, cr4 (saved) cr4, cr3, cr2 (saved)
r0 (not saved; cannot be base reg) r0 (not saved; cannot be base reg)
r9 (not saved; best for TImode) r9 (not saved; best for TImode)
r11, r10, r8-r4 (not saved; highest used first to make less conflict) r11, r10, r8-r4 (not saved; highest used first to make less conflict)
...@@ -732,7 +740,7 @@ extern int rs6000_debug_arg; /* debug argument handling */ ...@@ -732,7 +740,7 @@ extern int rs6000_debug_arg; /* debug argument handling */
33, \ 33, \
63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, \ 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, \
50, 49, 48, 47, 46, \ 50, 49, 48, 47, 46, \
69, 74, 75, 68, 70, 71, 72, \ 75, 74, 69, 68, 72, 71, 70, \
0, \ 0, \
9, 11, 10, 8, 7, 6, 5, 4, \ 9, 11, 10, 8, 7, 6, 5, 4, \
3, \ 3, \
......
...@@ -158,9 +158,6 @@ __extension__ (*({ \ ...@@ -158,9 +158,6 @@ __extension__ (*({ \
} \ } \
else \ else \
{ \ { \
if (__va_longlong_p(TYPE) && ((long)__va_overflow(AP) & 4) != 0) \
__va_overflow(AP) += 4; \
\
__ptr = (TYPE *) (void *) (__va_overflow(AP)); \ __ptr = (TYPE *) (void *) (__va_overflow(AP)); \
__va_overflow(AP) += __va_size (TYPE) * sizeof (long); \ __va_overflow(AP) += __va_size (TYPE) * sizeof (long); \
} \ } \
......
...@@ -310,7 +310,7 @@ static void count_loop_regs_set PROTO((rtx, rtx, varray_type, varray_type, ...@@ -310,7 +310,7 @@ static void count_loop_regs_set PROTO((rtx, rtx, varray_type, varray_type,
int *, int)); int *, int));
static void note_addr_stored PROTO((rtx, rtx)); static void note_addr_stored PROTO((rtx, rtx));
static int loop_reg_used_before_p PROTO((rtx, rtx, rtx, rtx, rtx)); static int loop_reg_used_before_p PROTO((rtx, rtx, rtx, rtx, rtx));
static void scan_loop PROTO((rtx, rtx, int)); static void scan_loop PROTO((rtx, rtx, int, int));
#if 0 #if 0
static void replace_call_address PROTO((rtx, rtx, rtx)); static void replace_call_address PROTO((rtx, rtx, rtx));
#endif #endif
...@@ -324,7 +324,7 @@ static int rtx_equal_for_loop_p PROTO((rtx, rtx, struct movable *)); ...@@ -324,7 +324,7 @@ static int rtx_equal_for_loop_p PROTO((rtx, rtx, struct movable *));
static void add_label_notes PROTO((rtx, rtx)); static void add_label_notes PROTO((rtx, rtx));
static void move_movables PROTO((struct movable *, int, int, rtx, rtx, int)); static void move_movables PROTO((struct movable *, int, int, rtx, rtx, int));
static int count_nonfixed_reads PROTO((rtx)); static int count_nonfixed_reads PROTO((rtx));
static void strength_reduce PROTO((rtx, rtx, rtx, int, rtx, rtx, int)); static void strength_reduce PROTO((rtx, rtx, rtx, int, rtx, rtx, int, int));
static void find_single_use_in_loop PROTO((rtx, rtx, varray_type)); static void find_single_use_in_loop PROTO((rtx, rtx, varray_type));
static int valid_initial_value_p PROTO((rtx, rtx, int, rtx)); static int valid_initial_value_p PROTO((rtx, rtx, int, rtx));
static void find_mem_givs PROTO((rtx, rtx, int, rtx, rtx)); static void find_mem_givs PROTO((rtx, rtx, int, rtx, rtx));
...@@ -440,11 +440,11 @@ init_loop () ...@@ -440,11 +440,11 @@ init_loop ()
(or 0 if none should be output). */ (or 0 if none should be output). */
void void
loop_optimize (f, dumpfile, unroll_p) loop_optimize (f, dumpfile, unroll_p, bct_p)
/* f is the first instruction of a chain of insns for one function */ /* f is the first instruction of a chain of insns for one function */
rtx f; rtx f;
FILE *dumpfile; FILE *dumpfile;
int unroll_p; int unroll_p, bct_p;
{ {
register rtx insn; register rtx insn;
register int i; register int i;
...@@ -589,7 +589,7 @@ loop_optimize (f, dumpfile, unroll_p) ...@@ -589,7 +589,7 @@ loop_optimize (f, dumpfile, unroll_p)
for (i = max_loop_num-1; i >= 0; i--) for (i = max_loop_num-1; i >= 0; i--)
if (! loop_invalid[i] && loop_number_loop_ends[i]) if (! loop_invalid[i] && loop_number_loop_ends[i])
scan_loop (loop_number_loop_starts[i], loop_number_loop_ends[i], scan_loop (loop_number_loop_starts[i], loop_number_loop_ends[i],
unroll_p); unroll_p, bct_p);
/* If debugging and unrolling loops, we must replicate the tree nodes /* If debugging and unrolling loops, we must replicate the tree nodes
corresponding to the blocks inside the loop, so that the original one corresponding to the blocks inside the loop, so that the original one
...@@ -643,9 +643,9 @@ next_insn_in_loop (insn, start, end, loop_top) ...@@ -643,9 +643,9 @@ next_insn_in_loop (insn, start, end, loop_top)
write, then we can also mark the memory read as invariant. */ write, then we can also mark the memory read as invariant. */
static void static void
scan_loop (loop_start, end, unroll_p) scan_loop (loop_start, end, unroll_p, bct_p)
rtx loop_start, end; rtx loop_start, end;
int unroll_p; int unroll_p, bct_p;
{ {
register int i; register int i;
rtx p; rtx p;
...@@ -1185,7 +1185,7 @@ scan_loop (loop_start, end, unroll_p) ...@@ -1185,7 +1185,7 @@ scan_loop (loop_start, end, unroll_p)
{ {
the_movables = movables; the_movables = movables;
strength_reduce (scan_start, end, loop_top, strength_reduce (scan_start, end, loop_top,
insn_count, loop_start, end, unroll_p); insn_count, loop_start, end, unroll_p, bct_p);
} }
VARRAY_FREE (n_times_set); VARRAY_FREE (n_times_set);
...@@ -3579,14 +3579,14 @@ static rtx addr_placeholder; ...@@ -3579,14 +3579,14 @@ static rtx addr_placeholder;
static void static void
strength_reduce (scan_start, end, loop_top, insn_count, strength_reduce (scan_start, end, loop_top, insn_count,
loop_start, loop_end, unroll_p) loop_start, loop_end, unroll_p, bct_p)
rtx scan_start; rtx scan_start;
rtx end; rtx end;
rtx loop_top; rtx loop_top;
int insn_count; int insn_count;
rtx loop_start; rtx loop_start;
rtx loop_end; rtx loop_end;
int unroll_p; int unroll_p, bct_p;
{ {
rtx p; rtx p;
rtx set; rtx set;
...@@ -4106,7 +4106,7 @@ strength_reduce (scan_start, end, loop_top, insn_count, ...@@ -4106,7 +4106,7 @@ strength_reduce (scan_start, end, loop_top, insn_count,
the loop. Unrolling may update part of this information, and the the loop. Unrolling may update part of this information, and the
correct data will be used for generating the BCT. */ correct data will be used for generating the BCT. */
#ifdef HAVE_decrement_and_branch_on_count #ifdef HAVE_decrement_and_branch_on_count
if (HAVE_decrement_and_branch_on_count) if (HAVE_decrement_and_branch_on_count && bct_p)
analyze_loop_iterations (loop_start, loop_end); analyze_loop_iterations (loop_start, loop_end);
#endif #endif
#endif /* HAIFA */ #endif /* HAIFA */
...@@ -4613,7 +4613,7 @@ strength_reduce (scan_start, end, loop_top, insn_count, ...@@ -4613,7 +4613,7 @@ strength_reduce (scan_start, end, loop_top, insn_count,
#ifdef HAIFA #ifdef HAIFA
/* instrument the loop with bct insn */ /* instrument the loop with bct insn */
#ifdef HAVE_decrement_and_branch_on_count #ifdef HAVE_decrement_and_branch_on_count
if (HAVE_decrement_and_branch_on_count) if (HAVE_decrement_and_branch_on_count && bct_p)
insert_bct (loop_start, loop_end); insert_bct (loop_start, loop_end);
#endif #endif
#endif /* HAIFA */ #endif /* HAIFA */
...@@ -6981,7 +6981,7 @@ check_dbra_loop (loop_end, insn_count, loop_start) ...@@ -6981,7 +6981,7 @@ check_dbra_loop (loop_end, insn_count, loop_start)
/* If we have a decrement_and_branch_on_count, prefer /* If we have a decrement_and_branch_on_count, prefer
the NE test, since this will allow that instruction to the NE test, since this will allow that instruction to
be generated. */ be generated. */
#if ! defined (HAVE_decrement_and_branch_on_zero) && defined (HAVE_decrement_and_branch_on_count) #if ! defined (HAVE_decrement_and_branch_until_zero) && defined (HAVE_decrement_and_branch_on_count)
&& (add_val != 1 || ! vtop) && (add_val != 1 || ! vtop)
#endif #endif
&& GET_CODE (comparison_value) == CONST_INT && GET_CODE (comparison_value) == CONST_INT
...@@ -8189,7 +8189,7 @@ insert_bct (loop_start, loop_end) ...@@ -8189,7 +8189,7 @@ insert_bct (loop_start, loop_end)
/* the only machine mode we work with - is the integer of the size that the /* the only machine mode we work with - is the integer of the size that the
machine has */ machine has */
enum machine_mode loop_var_mode = SImode; enum machine_mode loop_var_mode = word_mode;
int loop_num = uid_loop_num [INSN_UID (loop_start)]; int loop_num = uid_loop_num [INSN_UID (loop_start)];
...@@ -8284,7 +8284,8 @@ insert_bct (loop_start, loop_end) ...@@ -8284,7 +8284,8 @@ insert_bct (loop_start, loop_end)
/* try to instrument the loop. */ /* try to instrument the loop. */
/* Handle the simpler case, where the bounds are known at compile time. */ /* Handle the simpler case, where the bounds are known at compile time. */
if (GET_CODE (initial_value) == CONST_INT && GET_CODE (comparison_value) == CONST_INT) if (GET_CODE (initial_value) == CONST_INT
&& GET_CODE (comparison_value) == CONST_INT)
{ {
int n_iterations; int n_iterations;
int increment_value_abs = INTVAL (increment) * increment_direction; int increment_value_abs = INTVAL (increment) * increment_direction;
...@@ -8459,15 +8460,15 @@ instrument_loop_bct (loop_start, loop_end, loop_num_iterations) ...@@ -8459,15 +8460,15 @@ instrument_loop_bct (loop_start, loop_end, loop_num_iterations)
rtx start_label; rtx start_label;
rtx sequence; rtx sequence;
enum machine_mode loop_var_mode = SImode; enum machine_mode loop_var_mode = word_mode;
if (HAVE_decrement_and_branch_on_count) if (HAVE_decrement_and_branch_on_count)
{ {
if (loop_dump_stream) if (loop_dump_stream)
fprintf (loop_dump_stream, "Loop: Inserting BCT\n"); fprintf (loop_dump_stream, "Loop: Inserting BCT\n");
/* eliminate the check on the old variable */ /* Discard original jump to continue loop. Original compare result
delete_insn (PREV_INSN (loop_end)); may still be live, so it cannot be discarded explicitly. */
delete_insn (PREV_INSN (loop_end)); delete_insn (PREV_INSN (loop_end));
/* insert the label which will delimit the start of the loop */ /* insert the label which will delimit the start of the loop */
...@@ -8488,12 +8489,13 @@ instrument_loop_bct (loop_start, loop_end, loop_num_iterations) ...@@ -8488,12 +8489,13 @@ instrument_loop_bct (loop_start, loop_end, loop_num_iterations)
sequence = gen_sequence (); sequence = gen_sequence ();
end_sequence (); end_sequence ();
emit_insn_after (sequence, loop_start); emit_insn_before (sequence, loop_start);
/* insert new comparison on the count register instead of the /* insert new comparison on the count register instead of the
old one, generating the needed BCT pattern (that will be old one, generating the needed BCT pattern (that will be
later recognized by assembly generation phase). */ later recognized by assembly generation phase). */
emit_jump_insn_before (gen_decrement_and_branch_on_count (temp_reg2, start_label), emit_jump_insn_before (gen_decrement_and_branch_on_count (temp_reg2,
start_label),
loop_end); loop_end);
LABEL_NUSES (start_label)++; LABEL_NUSES (start_label)++;
} }
...@@ -8935,4 +8937,3 @@ replace_label (x, data) ...@@ -8935,4 +8937,3 @@ replace_label (x, data)
return 0; return 0;
} }
...@@ -1350,7 +1350,7 @@ extern void print_inline_rtx PROTO ((FILE *, rtx, int)); ...@@ -1350,7 +1350,7 @@ extern void print_inline_rtx PROTO ((FILE *, rtx, int));
/* In loop.c */ /* In loop.c */
extern void init_loop PROTO ((void)); extern void init_loop PROTO ((void));
#ifdef BUFSIZ #ifdef BUFSIZ
extern void loop_optimize PROTO ((rtx, FILE *, int)); extern void loop_optimize PROTO ((rtx, FILE *, int, int));
#endif #endif
extern void record_excess_regs PROTO ((rtx, rtx, rtx *)); extern void record_excess_regs PROTO ((rtx, rtx, rtx *));
......
...@@ -3519,7 +3519,7 @@ rest_of_compilation (decl) ...@@ -3519,7 +3519,7 @@ rest_of_compilation (decl)
{ {
/* We only want to perform unrolling once. */ /* We only want to perform unrolling once. */
loop_optimize (insns, rtl_dump_file, 0); loop_optimize (insns, rtl_dump_file, 0, 0);
/* The first call to loop_optimize makes some instructions /* The first call to loop_optimize makes some instructions
...@@ -3532,7 +3532,7 @@ rest_of_compilation (decl) ...@@ -3532,7 +3532,7 @@ rest_of_compilation (decl)
analysis code depends on this information. */ analysis code depends on this information. */
reg_scan (insns, max_reg_num (), 1); reg_scan (insns, max_reg_num (), 1);
} }
loop_optimize (insns, rtl_dump_file, flag_unroll_loops); loop_optimize (insns, rtl_dump_file, flag_unroll_loops, 1);
}); });
/* Dump rtl code after loop opt, if we are doing that. */ /* Dump rtl code after loop opt, if we are doing that. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment