Commit 0511851c by Mark Mitchell Committed by Mark Mitchell

gcse.c (struct null_pointer_info): New type.

	* gcse.c (struct null_pointer_info): New type.
	(get_bitmap_width): New function.
	(current_block): Remove.
	(nonnull_local): Likewise.
	(nonnull_killed): Likewise.
	(invalidate_nonnull_info): Take a null_pointer_info as input.
	(delete_null_pointer_checks_1): New function.
	(delete_null_pointer_checks): Use it.

From-SVN: r30384
parent 98903742
Wed Nov 3 14:51:59 1999 Mark P. Mitchell <mark@codesourcery.com> Wed Nov 3 14:51:59 1999 Mark P. Mitchell <mark@codesourcery.com>
* gcse.c (struct null_pointer_info): New type.
(get_bitmap_width): New function.
(current_block): Remove.
(nonnull_local): Likewise.
(nonnull_killed): Likewise.
(invalidate_nonnull_info): Take a null_pointer_info as input.
(delete_null_pointer_checks_1): New function.
(delete_null_pointer_checks): Use it.
* haifa-sched.c (find_rgns): Replace uses of alloca with xmalloc. * haifa-sched.c (find_rgns): Replace uses of alloca with xmalloc.
(split_edges): Likewise. (split_edges): Likewise.
(schedule_block): Likewise. (schedule_block): Likewise.
......
...@@ -510,6 +510,18 @@ static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out; ...@@ -510,6 +510,18 @@ static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out;
/* for available exprs */ /* for available exprs */
static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out; static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out;
/* Objects of this type are passed around by the null-pointer check
removal routines. */
struct null_pointer_info {
/* The basic block being processed. */
int current_block;
/* The first register to be handled in this pass. */
int min_reg;
/* One greater than the last register to be handled in this pass. */
int max_reg;
sbitmap *nonnull_local;
sbitmap *nonnull_killed;
};
static void compute_can_copy PROTO ((void)); static void compute_can_copy PROTO ((void));
...@@ -520,6 +532,7 @@ static void alloc_gcse_mem PROTO ((rtx)); ...@@ -520,6 +532,7 @@ static void alloc_gcse_mem PROTO ((rtx));
static void free_gcse_mem PROTO ((void)); static void free_gcse_mem PROTO ((void));
static void alloc_reg_set_mem PROTO ((int)); static void alloc_reg_set_mem PROTO ((int));
static void free_reg_set_mem PROTO ((void)); static void free_reg_set_mem PROTO ((void));
static int get_bitmap_width PROTO ((int, int, int));
static void record_one_set PROTO ((int, rtx)); static void record_one_set PROTO ((int, rtx));
static void record_set_info PROTO ((rtx, rtx, void *)); static void record_set_info PROTO ((rtx, rtx, void *));
static void compute_sets PROTO ((rtx)); static void compute_sets PROTO ((rtx));
...@@ -622,6 +635,9 @@ static int handle_avail_expr PROTO ((rtx, struct expr *)); ...@@ -622,6 +635,9 @@ static int handle_avail_expr PROTO ((rtx, struct expr *));
static int classic_gcse PROTO ((void)); static int classic_gcse PROTO ((void));
static int one_classic_gcse_pass PROTO ((int)); static int one_classic_gcse_pass PROTO ((int));
static void invalidate_nonnull_info PROTO ((rtx, rtx, void *)); static void invalidate_nonnull_info PROTO ((rtx, rtx, void *));
static void delete_null_pointer_checks_1 PROTO ((int_list_ptr *, int *,
sbitmap *, sbitmap *,
struct null_pointer_info *));
static rtx process_insert_insn PROTO ((struct expr *)); static rtx process_insert_insn PROTO ((struct expr *));
static int pre_edge_insert PROTO ((struct edge_list *, struct expr **)); static int pre_edge_insert PROTO ((struct edge_list *, struct expr **));
static int expr_reaches_here_p_work PROTO ((struct occr *, struct expr *, int, int, char *)); static int expr_reaches_here_p_work PROTO ((struct occr *, struct expr *, int, int, char *));
...@@ -948,6 +964,50 @@ free_gcse_mem () ...@@ -948,6 +964,50 @@ free_gcse_mem ()
free (mem_set_in_block); free (mem_set_in_block);
} }
/* Many of the global optimization algorithms work by solving dataflow
equations for various expressions. Initially, some local value is
computed for each expression in each block. Then, the values
across the various blocks are combined (by following flow graph
edges) to arrive at global values. Conceptually, each set of
equations is independent. We may therefore solve all the equations
in parallel, solve them one at a time, or pick any intermediate
approach.
When you're going to need N two-dimensional bitmaps, each X (say,
the number of blocks) by Y (say, the number of expressions), call
this function. It's not important what X and Y represent; only
that Y correspond to the things that can be done in parallel. This
function will return an appropriate chunking factor C; you should
solve C sets of equations in parallel. By going through this
function, we can easily trade space against time; by solving fewer
equations in parallel we use less space. */
static int
get_bitmap_width (n, x, y)
int n;
int x;
int y;
{
/* It's not really worth figuring out *exactly* how much memory will
be used by a particular choice. The important thing is to get
something approximately right. */
size_t max_bitmap_memory = 10 * 1024 * 1024;
/* The number of bytes we'd use for a single column of minimum
width. */
size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE);
/* Often, it's reasonable just to solve all the equations in
parallel. */
if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory)
return y;
/* Otherwise, pick the largest width we can, without going over the
limit. */
return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1)
/ column_size);
}
/* Compute the local properties of each recorded expression. /* Compute the local properties of each recorded expression.
Local properties are those that are defined by the block, irrespective Local properties are those that are defined by the block, irrespective
...@@ -4923,23 +4983,19 @@ compute_transpout () ...@@ -4923,23 +4983,19 @@ compute_transpout ()
/* Removal of useless null pointer checks */ /* Removal of useless null pointer checks */
/* These need to be file static for communication between
invalidate_nonnull_info and delete_null_pointer_checks. */
static int current_block;
static sbitmap *nonnull_local;
static sbitmap *nonnull_killed;
/* Called via note_stores. X is set by SETTER. If X is a register we must /* Called via note_stores. X is set by SETTER. If X is a register we must
invalidate nonnull_local and set nonnull_killed. invalidate nonnull_local and set nonnull_killed. DATA is really a
`null_pointer_info *'.
We ignore hard registers. */ We ignore hard registers. */
static void static void
invalidate_nonnull_info (x, setter, data) invalidate_nonnull_info (x, setter, data)
rtx x; rtx x;
rtx setter ATTRIBUTE_UNUSED; rtx setter ATTRIBUTE_UNUSED;
void *data ATTRIBUTE_UNUSED; void *data;
{ {
int offset, regno; int offset, regno;
struct null_pointer_info* npi = (struct null_pointer_info *) data;
offset = 0; offset = 0;
while (GET_CODE (x) == SUBREG) while (GET_CODE (x) == SUBREG)
...@@ -4947,89 +5003,34 @@ invalidate_nonnull_info (x, setter, data) ...@@ -4947,89 +5003,34 @@ invalidate_nonnull_info (x, setter, data)
/* Ignore anything that is not a register or is a hard register. */ /* Ignore anything that is not a register or is a hard register. */
if (GET_CODE (x) != REG if (GET_CODE (x) != REG
|| REGNO (x) < FIRST_PSEUDO_REGISTER) || REGNO (x) < npi->min_reg
|| REGNO (x) >= npi->max_reg)
return; return;
regno = REGNO (x); regno = REGNO (x) - npi->min_reg;
RESET_BIT (nonnull_local[current_block], regno); RESET_BIT (npi->nonnull_local[npi->current_block], regno);
SET_BIT (nonnull_killed[current_block], regno); SET_BIT (npi->nonnull_killed[npi->current_block], regno);
} }
/* Find EQ/NE comparisons against zero which can be (indirectly) evaluated /* Do null-pointer check elimination for the registers indicated in
at compile time. NPI. NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps;
they are not our responsibility to free. */
This is conceptually similar to global constant/copy propagation and
classic global CSE (it even uses the same dataflow equations as cprop).
If a register is used as memory address with the form (mem (reg)), then we
know that REG can not be zero at that point in the program. Any instruction
which sets REG "kills" this property.
So, if every path leading to a conditional branch has an available memory
reference of that form, then we know the register can not have the value
zero at the conditional branch.
So we merely need to compute the local properies and propagate that data
around the cfg, then optimize where possible.
We run this pass two times. Once before CSE, then again after CSE. This static void
has proven to be the most profitable approach. It is rare for new delete_null_pointer_checks_1 (s_preds, block_reg, nonnull_avin,
optimization opportunities of this nature to appear after the first CSE nonnull_avout, npi)
pass. int_list_ptr *s_preds;
int *block_reg;
This could probably be integrated with global cprop with a little work. */ sbitmap *nonnull_avin;
sbitmap *nonnull_avout;
void struct null_pointer_info *npi;
delete_null_pointer_checks (f)
rtx f;
{ {
int_list_ptr *s_preds, *s_succs;
int *num_preds, *num_succs;
int changed, bb; int changed, bb;
sbitmap *nonnull_avin, *nonnull_avout; int current_block;
sbitmap *nonnull_local = npi->nonnull_local;
sbitmap *nonnull_killed = npi->nonnull_killed;
/* First break the program into basic blocks. */
find_basic_blocks (f, max_reg_num (), NULL, 1);
/* If we have only a single block, then there's nothing to do. */
if (n_basic_blocks <= 1)
{
/* Free storage allocated by find_basic_blocks. */
free_basic_block_vars (0);
return;
}
/* Trying to perform global optimizations on flow graphs which have
a high connectivity will take a long time and is unlikely to be
particularly useful.
In normal circumstances a cfg should have about twice has many edges
as blocks. But we do not want to punish small functions which have
a couple switch statements. So we require a relatively large number
of basic blocks and the ratio of edges to blocks to be high. */
if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
{
/* Free storage allocated by find_basic_blocks. */
free_basic_block_vars (0);
return;
}
/* We need predecessor/successor lists as well as pred/succ counts for
each basic block. */
s_preds = (int_list_ptr *) gmalloc (n_basic_blocks * sizeof (int_list_ptr));
s_succs = (int_list_ptr *) gmalloc (n_basic_blocks * sizeof (int_list_ptr));
num_preds = (int *) gmalloc (n_basic_blocks * sizeof (int));
num_succs = (int *) gmalloc (n_basic_blocks * sizeof (int));
compute_preds_succs (s_preds, s_succs, num_preds, num_succs);
/* Allocate bitmaps to hold local and global properties. */
nonnull_local = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ());
nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ());
nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ());
nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ());
/* Compute local properties, nonnull and killed. A register will have /* Compute local properties, nonnull and killed. A register will have
the nonnull property if at the end of the current block its value is the nonnull property if at the end of the current block its value is
known to be nonnull. The killed property indicates that somewhere in known to be nonnull. The killed property indicates that somewhere in
...@@ -5044,6 +5045,9 @@ delete_null_pointer_checks (f) ...@@ -5044,6 +5045,9 @@ delete_null_pointer_checks (f)
{ {
rtx insn, stop_insn; rtx insn, stop_insn;
/* Set the current block for invalidate_nonnull_info. */
npi->current_block = current_block;
/* Scan each insn in the basic block looking for memory references and /* Scan each insn in the basic block looking for memory references and
register sets. */ register sets. */
stop_insn = NEXT_INSN (BLOCK_END (current_block)); stop_insn = NEXT_INSN (BLOCK_END (current_block));
...@@ -5052,6 +5056,7 @@ delete_null_pointer_checks (f) ...@@ -5052,6 +5056,7 @@ delete_null_pointer_checks (f)
insn = NEXT_INSN (insn)) insn = NEXT_INSN (insn))
{ {
rtx set; rtx set;
rtx reg;
/* Ignore anything that is not a normal insn. */ /* Ignore anything that is not a normal insn. */
if (GET_RTX_CLASS (GET_CODE (insn)) != 'i') if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
...@@ -5063,7 +5068,7 @@ delete_null_pointer_checks (f) ...@@ -5063,7 +5068,7 @@ delete_null_pointer_checks (f)
set = single_set (insn); set = single_set (insn);
if (!set) if (!set)
{ {
note_stores (PATTERN (insn), invalidate_nonnull_info, NULL); note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
continue; continue;
} }
...@@ -5071,22 +5076,24 @@ delete_null_pointer_checks (f) ...@@ -5071,22 +5076,24 @@ delete_null_pointer_checks (f)
in case it uses its address register as a dest (which kills in case it uses its address register as a dest (which kills
the nonnull property). */ the nonnull property). */
if (GET_CODE (SET_SRC (set)) == MEM if (GET_CODE (SET_SRC (set)) == MEM
&& GET_CODE (XEXP (SET_SRC (set), 0)) == REG && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG
&& REGNO (XEXP (SET_SRC (set), 0)) >= FIRST_PSEUDO_REGISTER) && REGNO (reg) >= npi->min_reg
&& REGNO (reg) < npi->max_reg)
SET_BIT (nonnull_local[current_block], SET_BIT (nonnull_local[current_block],
REGNO (XEXP (SET_SRC (set), 0))); REGNO (reg) - npi->min_reg);
/* Now invalidate stuff clobbered by this insn. */ /* Now invalidate stuff clobbered by this insn. */
note_stores (PATTERN (insn), invalidate_nonnull_info, NULL); note_stores (PATTERN (insn), invalidate_nonnull_info, npi);
/* And handle stores, we do these last since any sets in INSN can /* And handle stores, we do these last since any sets in INSN can
not kill the nonnull property if it is derived from a MEM not kill the nonnull property if it is derived from a MEM
appearing in a SET_DEST. */ appearing in a SET_DEST. */
if (GET_CODE (SET_DEST (set)) == MEM if (GET_CODE (SET_DEST (set)) == MEM
&& GET_CODE (XEXP (SET_DEST (set), 0)) == REG && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG
&& REGNO (XEXP (SET_DEST (set), 0)) >= FIRST_PSEUDO_REGISTER) && REGNO (reg) >= npi->min_reg
&& REGNO (reg) < npi->max_reg)
SET_BIT (nonnull_local[current_block], SET_BIT (nonnull_local[current_block],
REGNO (XEXP (SET_DEST (set), 0))); REGNO (reg) - npi->min_reg);
} }
} }
...@@ -5117,33 +5124,21 @@ delete_null_pointer_checks (f) ...@@ -5117,33 +5124,21 @@ delete_null_pointer_checks (f)
for (bb = 0; bb < n_basic_blocks; bb++) for (bb = 0; bb < n_basic_blocks; bb++)
{ {
rtx last_insn = BLOCK_END (bb); rtx last_insn = BLOCK_END (bb);
rtx condition, earliest, reg; rtx condition, earliest;
int compare_and_branch; int compare_and_branch;
/* We only want conditional branches. */ /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and
if (GET_CODE (last_insn) != JUMP_INSN since BLOCK_REG[BB] is zero if this block did not end with a
|| !condjump_p (last_insn) comparison against zero, this condition works. */
|| simplejump_p (last_insn)) if (block_reg[bb] < npi->min_reg
|| block_reg[bb] >= npi->max_reg)
continue; continue;
/* LAST_INSN is a conditional jump. Get its condition. */ /* LAST_INSN is a conditional jump. Get its condition. */
condition = get_condition (last_insn, &earliest); condition = get_condition (last_insn, &earliest);
/* If we were unable to get the condition, or it is not a equality
comparison against zero then there's nothing we can do. */
if (!condition
|| (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
|| GET_CODE (XEXP (condition, 1)) != CONST_INT
|| XEXP (condition, 1) != CONST0_RTX (GET_MODE (XEXP (condition, 0))))
continue;
/* We must be checking a register against zero. */
reg = XEXP (condition, 0);
if (GET_CODE (reg) != REG)
continue;
/* Is the register known to have a nonzero value? */ /* Is the register known to have a nonzero value? */
if (!TEST_BIT (nonnull_avout[bb], REGNO (reg))) if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg))
continue; continue;
/* Try to compute whether the compare/branch at the loop end is one or /* Try to compute whether the compare/branch at the loop end is one or
...@@ -5170,6 +5165,139 @@ delete_null_pointer_checks (f) ...@@ -5170,6 +5165,139 @@ delete_null_pointer_checks (f)
delete_insn (last_insn); delete_insn (last_insn);
if (compare_and_branch == 2) if (compare_and_branch == 2)
delete_insn (earliest); delete_insn (earliest);
/* Don't check this block again. (Note that BLOCK_END is
invalid here; we deleted the last instruction in the
block.) */
block_reg[bb] = 0;
}
}
/* Find EQ/NE comparisons against zero which can be (indirectly) evaluated
at compile time.
This is conceptually similar to global constant/copy propagation and
classic global CSE (it even uses the same dataflow equations as cprop).
If a register is used as memory address with the form (mem (reg)), then we
know that REG can not be zero at that point in the program. Any instruction
which sets REG "kills" this property.
So, if every path leading to a conditional branch has an available memory
reference of that form, then we know the register can not have the value
zero at the conditional branch.
So we merely need to compute the local properies and propagate that data
around the cfg, then optimize where possible.
We run this pass two times. Once before CSE, then again after CSE. This
has proven to be the most profitable approach. It is rare for new
optimization opportunities of this nature to appear after the first CSE
pass.
This could probably be integrated with global cprop with a little work. */
void
delete_null_pointer_checks (f)
rtx f;
{
int_list_ptr *s_preds, *s_succs;
int *num_preds, *num_succs;
sbitmap *nonnull_avin, *nonnull_avout;
int *block_reg;
int bb;
int reg;
int regs_per_pass;
int max_reg;
struct null_pointer_info npi;
/* First break the program into basic blocks. */
find_basic_blocks (f, max_reg_num (), NULL, 1);
/* If we have only a single block, then there's nothing to do. */
if (n_basic_blocks <= 1)
{
/* Free storage allocated by find_basic_blocks. */
free_basic_block_vars (0);
return;
}
/* Trying to perform global optimizations on flow graphs which have
a high connectivity will take a long time and is unlikely to be
particularly useful.
In normal circumstances a cfg should have about twice has many edges
as blocks. But we do not want to punish small functions which have
a couple switch statements. So we require a relatively large number
of basic blocks and the ratio of edges to blocks to be high. */
if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20)
{
/* Free storage allocated by find_basic_blocks. */
free_basic_block_vars (0);
return;
}
/* We need predecessor/successor lists as well as pred/succ counts for
each basic block. */
s_preds = (int_list_ptr *) gmalloc (n_basic_blocks * sizeof (int_list_ptr));
s_succs = (int_list_ptr *) gmalloc (n_basic_blocks * sizeof (int_list_ptr));
num_preds = (int *) gmalloc (n_basic_blocks * sizeof (int));
num_succs = (int *) gmalloc (n_basic_blocks * sizeof (int));
compute_preds_succs (s_preds, s_succs, num_preds, num_succs);
/* We need four bitmaps, each with a bit for each register in each
basic block. */
max_reg = max_reg_num ();
regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg);
/* Allocate bitmaps to hold local and global properties. */
npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass);
/* Go through the basic blocks, seeing whether or not each block
ends with a conditional branch whose condition is a comparison
against zero. Record the register compared in BLOCK_REG. */
block_reg = (int *) xcalloc (n_basic_blocks, sizeof (int));
for (bb = 0; bb < n_basic_blocks; bb++)
{
rtx last_insn = BLOCK_END (bb);
rtx condition, earliest, reg;
/* We only want conditional branches. */
if (GET_CODE (last_insn) != JUMP_INSN
|| !condjump_p (last_insn)
|| simplejump_p (last_insn))
continue;
/* LAST_INSN is a conditional jump. Get its condition. */
condition = get_condition (last_insn, &earliest);
/* If we were unable to get the condition, or it is not a equality
comparison against zero then there's nothing we can do. */
if (!condition
|| (GET_CODE (condition) != NE && GET_CODE (condition) != EQ)
|| GET_CODE (XEXP (condition, 1)) != CONST_INT
|| (XEXP (condition, 1)
!= CONST0_RTX (GET_MODE (XEXP (condition, 0)))))
continue;
/* We must be checking a register against zero. */
reg = XEXP (condition, 0);
if (GET_CODE (reg) != REG)
continue;
block_reg[bb] = REGNO (reg);
}
/* Go through the algorithm for each block of registers. */
for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass)
{
npi.min_reg = reg;
npi.max_reg = MIN (reg + regs_per_pass, max_reg);
delete_null_pointer_checks_1 (s_preds, block_reg, nonnull_avin,
nonnull_avout, &npi);
} }
/* Free storage allocated by find_basic_blocks. */ /* Free storage allocated by find_basic_blocks. */
...@@ -5181,9 +5309,12 @@ delete_null_pointer_checks (f) ...@@ -5181,9 +5309,12 @@ delete_null_pointer_checks (f)
free (num_preds); free (num_preds);
free (num_succs); free (num_succs);
/* Free the table of registers compared at the end of every block. */
free (block_reg);
/* Free bitmaps. */ /* Free bitmaps. */
free (nonnull_local); free (npi.nonnull_local);
free (nonnull_killed); free (npi.nonnull_killed);
free (nonnull_avin); free (nonnull_avin);
free (nonnull_avout); free (nonnull_avout);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment