gcse.c 110 KB
Newer Older
1
/* Partial redundancy elimination / Hoisting for RTL.
2
   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
3
   2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
4

5
This file is part of GCC.
6

7 8
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
9
Software Foundation; either version 3, or (at your option) any later
10
version.
11

12 13 14 15
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.
16 17

You should have received a copy of the GNU General Public License
18 19
along with GCC; see the file COPYING3.  If not see
<http://www.gnu.org/licenses/>.  */
20 21 22 23 24 25

/* TODO
   - reordering of memory allocation and freeing to be more space efficient
   - do rough calc of how many regs are needed in each block, and a rough
     calc of how many regs are available in each class and use that to
     throttle back the code in cases where RTX_COST is minimal.
26 27 28
   - a store to the same address as a load does not kill the load if the
     source of the store is also the destination of the load.  Handling this
     allows more load motion, particularly out of loops.
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119

*/

/* References searched while implementing this.

   Compilers Principles, Techniques and Tools
   Aho, Sethi, Ullman
   Addison-Wesley, 1988

   Global Optimization by Suppression of Partial Redundancies
   E. Morel, C. Renvoise
   communications of the acm, Vol. 22, Num. 2, Feb. 1979

   A Portable Machine-Independent Global Optimizer - Design and Measurements
   Frederick Chow
   Stanford Ph.D. thesis, Dec. 1983

   A Fast Algorithm for Code Movement Optimization
   D.M. Dhamdhere
   SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988

   A Solution to a Problem with Morel and Renvoise's
   Global Optimization by Suppression of Partial Redundancies
   K-H Drechsler, M.P. Stadel
   ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988

   Practical Adaptation of the Global Optimization
   Algorithm of Morel and Renvoise
   D.M. Dhamdhere
   ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991

   Efficiently Computing Static Single Assignment Form and the Control
   Dependence Graph
   R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
   ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991

   Lazy Code Motion
   J. Knoop, O. Ruthing, B. Steffen
   ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI

   What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
   Time for Reducible Flow Control
   Thomas Ball
   ACM Letters on Programming Languages and Systems,
   Vol. 2, Num. 1-4, Mar-Dec 1993

   An Efficient Representation for Sparse Sets
   Preston Briggs, Linda Torczon
   ACM Letters on Programming Languages and Systems,
   Vol. 2, Num. 1-4, Mar-Dec 1993

   A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
   K-H Drechsler, M.P. Stadel
   ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993

   Partial Dead Code Elimination
   J. Knoop, O. Ruthing, B. Steffen
   ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994

   Effective Partial Redundancy Elimination
   P. Briggs, K.D. Cooper
   ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994

   The Program Structure Tree: Computing Control Regions in Linear Time
   R. Johnson, D. Pearson, K. Pingali
   ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994

   Optimal Code Motion: Theory and Practice
   J. Knoop, O. Ruthing, B. Steffen
   ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994

   The power of assignment motion
   J. Knoop, O. Ruthing, B. Steffen
   ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI

   Global code motion / global value numbering
   C. Click
   ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI

   Value Driven Redundancy Elimination
   L.T. Simpson
   Rice University Ph.D. thesis, Apr. 1996

   Value Numbering
   L.T. Simpson
   Massively Scalar Compiler Project, Rice University, Sep. 1996

   High Performance Compilers for Parallel Computing
   Michael Wolfe
   Addison-Wesley, 1996

120 121 122 123
   Advanced Compiler Design and Implementation
   Steven Muchnick
   Morgan Kaufmann, 1997

124 125 126 127
   Building an Optimizing Compiler
   Robert Morgan
   Digital Press, 1998

128 129 130 131 132 133 134 135 136
   People wishing to speed up the code here should read:
     Elimination Algorithms for Data Flow Analysis
     B.G. Ryder, M.C. Paull
     ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986

     How to Analyze Large Programs Efficiently and Informatively
     D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
     ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI

137 138 139 140 141
   People wishing to do something different can find various possibilities
   in the above papers and elsewhere.
*/

#include "config.h"
Kaveh R. Ghazi committed
142
#include "system.h"
143 144
#include "coretypes.h"
#include "tm.h"
145
#include "diagnostic-core.h"
146
#include "toplev.h"
147 148

#include "rtl.h"
149
#include "tree.h"
150
#include "tm_p.h"
151 152 153 154 155 156
#include "regs.h"
#include "hard-reg-set.h"
#include "flags.h"
#include "insn-config.h"
#include "recog.h"
#include "basic-block.h"
Kaveh R. Ghazi committed
157
#include "output.h"
158
#include "function.h"
159
#include "expr.h"
160
#include "except.h"
161
#include "ggc.h"
162
#include "params.h"
Jan Hubicka committed
163
#include "cselib.h"
164
#include "intl.h"
165
#include "obstack.h"
166
#include "timevar.h"
167
#include "tree-pass.h"
168
#include "hashtab.h"
169 170
#include "df.h"
#include "dbgcnt.h"
171
#include "target.h"
172
#include "gcse.h"
173

174
/* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
175
   are a superset of those done by classic GCSE.
176

177 178 179 180 181 182
   Two passes of copy/constant propagation are done around PRE or hoisting
   because the first one enables more GCSE and the second one helps to clean
   up the copies that PRE and HOIST create.  This is needed more for PRE than
   for HOIST because code hoisting will try to use an existing register
   containing the common subexpression rather than create a new one.  This is
   harder to do for PRE because of the code motion (which HOIST doesn't do).
183 184 185 186 187

   Expressions we are interested in GCSE-ing are of the form
   (set (pseudo-reg) (expression)).
   Function want_to_gcse_p says what these are.

188
   In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
189
   This allows PRE to hoist expressions that are expressed in multiple insns,
190 191
   such as complex address calculations (e.g. for PIC code, or loads with a
   high part and a low part).
192

193
   PRE handles moving invariant expressions out of loops (by treating them as
194
   partially redundant).
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211

   **********************

   We used to support multiple passes but there are diminishing returns in
   doing so.  The first pass usually makes 90% of the changes that are doable.
   A second pass can make a few more changes made possible by the first pass.
   Experiments show any further passes don't make enough changes to justify
   the expense.

   A study of spec92 using an unlimited number of passes:
   [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
   [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
   [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1

   It was found doing copy propagation between each pass enables further
   substitutions.

212 213 214 215 216
   This study was done before expressions in REG_EQUAL notes were added as
   candidate expressions for optimization, and before the GIMPLE optimizers
   were added.  Probably, multiple passes is even less efficient now than
   at the time when the study was conducted.

217
   PRE is quite expensive in complicated functions because the DFA can take
218
   a while to converge.  Hence we only perform one pass.
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242

   **********************

   The steps for PRE are:

   1) Build the hash table of expressions we wish to GCSE (expr_hash_table).

   2) Perform the data flow analysis for PRE.

   3) Delete the redundant instructions

   4) Insert the required copies [if any] that make the partially
      redundant instructions fully redundant.

   5) For other reaching expressions, insert an instruction to copy the value
      to a newly created pseudo that will reach the redundant instruction.

   The deletion is done first so that when we do insertions we
   know which pseudo reg to use.

   Various papers have argued that PRE DFA is expensive (O(n^2)) and others
   argue it is not.  The number of iterations for the algorithm to converge
   is typically 2-4 so I don't view it as that expensive (relatively speaking).

243
   PRE GCSE depends heavily on the second CPROP pass to clean up the copies
244 245 246 247
   we create.  To make an expression reach the place where it's redundant,
   the result of the expression is copied to a new register, and the redundant
   expression is deleted by replacing it with this new register.  Classic GCSE
   doesn't have this problem as much as it computes the reaching defs of
248 249
   each register in each block and thus can try to use an existing
   register.  */
250 251 252

/* GCSE global vars.  */

253 254 255 256 257
struct target_gcse default_target_gcse;
#if SWITCHABLE_TARGET
struct target_gcse *this_target_gcse = &default_target_gcse;
#endif

258 259
/* Set to non-zero if CSE should run after all GCSE optimizations are done.  */
int flag_rerun_cse_after_global_opts;
260

261 262 263
/* An obstack for our working variables.  */
static struct obstack gcse_obstack;

264
struct reg_use {rtx reg_rtx; };
265

266 267 268 269 270 271 272 273 274 275 276 277
/* Hash table of expressions.  */

struct expr
{
  /* The expression (SET_SRC for expressions, PATTERN for assignments).  */
  rtx expr;
  /* Index in the available expression bitmaps.  */
  int bitmap_index;
  /* Next entry with the same hash.  */
  struct expr *next_same_hash;
  /* List of anticipatable occurrences in basic blocks in the function.
     An "anticipatable occurrence" is one that is the first occurrence in the
278 279 280
     basic block, the operands are not modified in the basic block prior
     to the occurrence and the output is not used between the start of
     the block and the occurrence.  */
281 282 283 284 285 286 287 288 289 290
  struct occr *antic_occr;
  /* List of available occurrence in basic blocks in the function.
     An "available occurrence" is one that is the last occurrence in the
     basic block and the operands are not modified by following statements in
     the basic block [including this insn].  */
  struct occr *avail_occr;
  /* Non-null if the computation is PRE redundant.
     The value is the newly created pseudo-reg to record a copy of the
     expression in all the places that reach the redundant copy.  */
  rtx reaching_reg;
291 292 293 294 295 296
  /* Maximum distance in instructions this expression can travel.
     We avoid moving simple expressions for more than a few instructions
     to keep register pressure under control.
     A value of "0" removes restrictions on how far the expression can
     travel.  */
  int max_distance;
297 298 299 300 301 302 303 304 305 306 307 308
};

/* Occurrence of an expression.
   There is one per basic block.  If a pattern appears more than once the
   last appearance is used [or first for anticipatable expressions].  */

struct occr
{
  /* Next occurrence of this expression.  */
  struct occr *next;
  /* The insn that computes the expression.  */
  rtx insn;
309
  /* Nonzero if this [anticipatable] occurrence has been deleted.  */
310
  char deleted_p;
311
  /* Nonzero if this [available] occurrence has been copied to
312 313 314 315 316 317
     reaching_reg.  */
  /* ??? This is mutually exclusive with deleted_p, so they could share
     the same byte.  */
  char copied_p;
};

318 319 320 321
typedef struct occr *occr_t;
DEF_VEC_P (occr_t);
DEF_VEC_ALLOC_P (occr_t, heap);

322
/* Expression hash tables.
323 324 325 326 327 328
   Each hash table is an array of buckets.
   ??? It is known that if it were an array of entries, structure elements
   `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
   not clear whether in the final analysis a sufficient amount of memory would
   be saved as the size of the available expression bitmaps would be larger
   [one could build a mapping table without holes afterwards though].
329
   Someday I'll perform the computation and figure it out.  */
330

331
struct hash_table_d
332 333 334 335 336 337 338
{
  /* The table itself.
     This is an array of `expr_hash_table_size' elements.  */
  struct expr **table;

  /* Size of the hash table, in elements.  */
  unsigned int size;
339

340 341 342
  /* Number of hash table elements.  */
  unsigned int n_elems;
};
343

344
/* Expression hash table.  */
345
static struct hash_table_d expr_hash_table;
346

347
/* This is a list of expressions which are MEMs and will be used by load
348
   or store motion.
349
   Load motion tracks MEMs which aren't killed by
350
   anything except itself. (i.e., loads and stores to a single location).
351
   We can then allow movement of these MEM refs with a little special
352 353
   allowance. (all stores copy the same value to the reaching reg used
   for the loads).  This means all values used to store into memory must have
354
   no side effects so we can re-issue the setter value.
355 356 357 358 359 360 361
   Store Motion uses this structure as an expression table to track stores
   which look interesting, and might be moveable towards the exit block.  */

struct ls_expr
{
  struct expr * expr;		/* Gcse expression reference for LM.  */
  rtx pattern;			/* Pattern of this mem.  */
362
  rtx pattern_regs;		/* List of registers mentioned by the mem.  */
363 364
  rtx loads;			/* INSN list of loads seen.  */
  rtx stores;			/* INSN list of stores seen.  */
365 366 367
  struct ls_expr * next;	/* Next in the list.  */
  int invalid;			/* Invalid for some reason.  */
  int index;			/* If it maps to a bitmap index.  */
368
  unsigned int hash_index;	/* Index when in a hash table.  */
369 370 371 372 373 374
  rtx reaching_reg;		/* Register to use when re-writing.  */
};

/* Head of the list of load/store memory refs.  */
static struct ls_expr * pre_ldst_mems = NULL;

375 376 377
/* Hashtable for the load/store memory refs.  */
static htab_t pre_ldst_table = NULL;

378 379 380
/* Bitmap containing one bit for each register in the program.
   Used when performing GCSE to track which registers have been set since
   the start of the basic block.  */
381
static regset reg_set_bitmap;
382

383 384
/* Array, indexed by basic block number for a list of insns which modify
   memory within that block.  */
385
static VEC (rtx,heap) **modify_mem_list;
386
static bitmap modify_mem_list_set;
387

388 389 390 391 392 393 394 395 396 397 398
typedef struct modify_pair_s
{
  rtx dest;			/* A MEM.  */
  rtx dest_addr;		/* The canonical address of `dest'.  */
} modify_pair;

DEF_VEC_O(modify_pair);
DEF_VEC_ALLOC_O(modify_pair,heap);

/* This array parallels modify_mem_list, except that it stores MEMs
   being set and their canonicalized memory addresses.  */
399
static VEC (modify_pair,heap) **canon_modify_mem_list;
400

401 402 403 404
/* Bitmap indexed by block numbers to record which blocks contain
   function calls.  */
static bitmap blocks_with_calls;

405 406 407 408 409 410
/* Various variables for statistics gathering.  */

/* Memory used in a pass.
   This isn't intended to be absolutely precise.  Its intent is only
   to keep an eye on memory usage.  */
static int bytes_used;
411

412 413 414 415 416
/* GCSE substitutions made.  */
static int gcse_subst_count;
/* Number of copy instructions created.  */
static int gcse_create_count;

417 418 419
/* Doing code hoisting.  */
static bool doing_code_hoisting_p = false;

420
/* For available exprs */
421
static sbitmap *ae_kill;
422

423
static void compute_can_copy (void);
424 425
static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
426
static void *gcse_alloc (unsigned long);
427
static void alloc_gcse_mem (void);
428
static void free_gcse_mem (void);
429 430 431 432
static void hash_scan_insn (rtx, struct hash_table_d *);
static void hash_scan_set (rtx, rtx, struct hash_table_d *);
static void hash_scan_clobber (rtx, rtx, struct hash_table_d *);
static void hash_scan_call (rtx, rtx, struct hash_table_d *);
433
static int want_to_gcse_p (rtx, int *);
434 435 436
static int oprs_unchanged_p (const_rtx, const_rtx, int);
static int oprs_anticipatable_p (const_rtx, const_rtx);
static int oprs_available_p (const_rtx, const_rtx);
437
static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int,
438
				  struct hash_table_d *);
439 440
static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
static int expr_equiv_p (const_rtx, const_rtx);
441 442
static void record_last_reg_set_info (rtx, int);
static void record_last_mem_set_info (rtx);
443
static void record_last_set_info (rtx, const_rtx, void *);
444
static void compute_hash_table (struct hash_table_d *);
445
static void alloc_hash_table (struct hash_table_d *);
446 447 448
static void free_hash_table (struct hash_table_d *);
static void compute_hash_table_work (struct hash_table_d *);
static void dump_hash_table (FILE *, const char *, struct hash_table_d *);
449
static void compute_transp (const_rtx, int, sbitmap *);
450
static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
451
				      struct hash_table_d *);
452
static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
453
static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
454
static void canon_list_insert (rtx, const_rtx, void *);
455 456 457 458 459
static void alloc_pre_mem (int, int);
static void free_pre_mem (void);
static void compute_pre_data (void);
static int pre_expr_reaches_here_p (basic_block, struct expr *,
				    basic_block);
460
static void insert_insn_end_basic_block (struct expr *, basic_block);
461 462 463 464
static void pre_insert_copy_insn (struct expr *, rtx);
static void pre_insert_copies (void);
static int pre_delete (void);
static int pre_gcse (void);
465
static int one_pre_gcse_pass (void);
466 467 468 469 470
static void add_label_notes (rtx, rtx);
static void alloc_code_hoist_mem (int, int);
static void free_code_hoist_mem (void);
static void compute_code_hoist_vbeinout (void);
static void compute_code_hoist_data (void);
471 472
static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *,
				      int, int *);
473
static int hoist_code (void);
474 475 476 477 478 479 480 481 482 483 484 485
static int one_code_hoisting_pass (void);
static rtx process_insert_insn (struct expr *);
static int pre_edge_insert (struct edge_list *, struct expr **);
static int pre_expr_reaches_here_p_work (basic_block, struct expr *,
					 basic_block, char *);
static struct ls_expr * ldst_entry (rtx);
static void free_ldst_entry (struct ls_expr *);
static void free_ldst_mems (void);
static void print_ldst_list (FILE *);
static struct ls_expr * find_rtx_in_ldst (rtx);
static inline struct ls_expr * first_ls_expr (void);
static inline struct ls_expr * next_ls_expr (struct ls_expr *);
486
static int simple_mem (const_rtx);
487 488 489 490 491 492 493
static void invalidate_any_buried_refs (rtx);
static void compute_ld_motion_mems (void);
static void trim_ld_motion_mems (void);
static void update_ld_motion_stores (struct expr *);
static void clear_modify_mem_tables (void);
static void free_modify_mem_tables (void);
static rtx gcse_emit_move_after (rtx, rtx, rtx);
494
static bool is_too_expensive (const char *);
495 496 497 498 499 500 501 502 503 504 505 506

#define GNEW(T)			((T *) gmalloc (sizeof (T)))
#define GCNEW(T)		((T *) gcalloc (1, sizeof (T)))

#define GNEWVEC(T, N)		((T *) gmalloc (sizeof (T) * (N)))
#define GCNEWVEC(T, N)		((T *) gcalloc ((N), sizeof (T)))

#define GNEWVAR(T, S)		((T *) gmalloc ((S)))
#define GCNEWVAR(T, S)		((T *) gcalloc (1, (S)))

#define GOBNEW(T)		((T *) gcse_alloc (sizeof (T)))
#define GOBNEWVAR(T, S)		((T *) gcse_alloc ((S)))
507 508 509

/* Misc. utilities.  */

510 511 512 513
#define can_copy \
  (this_target_gcse->x_can_copy)
#define can_copy_init_p \
  (this_target_gcse->x_can_copy_init_p)
514

515 516 517
/* Compute which modes support reg/reg copy operations.  */

static void
518
compute_can_copy (void)
519 520
{
  int i;
Kaveh R. Ghazi committed
521
#ifndef AVOID_CCMODE_COPIES
Kazu Hirata committed
522
  rtx reg, insn;
Kaveh R. Ghazi committed
523
#endif
524
  memset (can_copy, 0, NUM_MACHINE_MODES);
525 526 527

  start_sequence ();
  for (i = 0; i < NUM_MACHINE_MODES; i++)
528 529
    if (GET_MODE_CLASS (i) == MODE_CC)
      {
530
#ifdef AVOID_CCMODE_COPIES
531
	can_copy[i] = 0;
532
#else
533 534
	reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
	insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
535
	if (recog (PATTERN (insn), insn, NULL) >= 0)
536
	  can_copy[i] = 1;
537
#endif
538
      }
539
    else
540
      can_copy[i] = 1;
541

542 543
  end_sequence ();
}
544 545 546 547

/* Returns whether the mode supports reg/reg copy operations.  */

bool
548
can_copy_p (enum machine_mode mode)
549 550 551 552 553 554 555 556 557
{
  if (! can_copy_init_p)
    {
      compute_can_copy ();
      can_copy_init_p = true;
    }

  return can_copy[mode] != 0;
}
558

559 560 561

/* Cover function to xmalloc to record bytes allocated.  */

562
static void *
563
gmalloc (size_t size)
564 565 566 567 568
{
  bytes_used += size;
  return xmalloc (size);
}

569 570 571 572 573 574 575 576 577
/* Cover function to xcalloc to record bytes allocated.  */

static void *
gcalloc (size_t nelem, size_t elsize)
{
  bytes_used += nelem * elsize;
  return xcalloc (nelem, elsize);
}

578
/* Cover function to obstack_alloc.  */
579

580
static void *
581
gcse_alloc (unsigned long size)
582
{
583
  bytes_used += size;
584
  return obstack_alloc (&gcse_obstack, size);
585 586
}

587
/* Allocate memory for the reg/memory set tracking tables.
588 589 590
   This is called at the start of each pass.  */

static void
591
alloc_gcse_mem (void)
592 593
{
  /* Allocate vars to track sets of regs.  */
594
  reg_set_bitmap = ALLOC_REG_SET (NULL);
595

596 597
  /* Allocate array to keep a list of insns which modify memory in each
     basic block.  */
598 599
  modify_mem_list = GCNEWVEC (VEC (rtx,heap) *, last_basic_block);
  canon_modify_mem_list = GCNEWVEC (VEC (modify_pair,heap) *,
600
				    last_basic_block);
601 602
  modify_mem_list_set = BITMAP_ALLOC (NULL);
  blocks_with_calls = BITMAP_ALLOC (NULL);
603 604 605 606 607
}

/* Free memory allocated by alloc_gcse_mem.  */

static void
608
free_gcse_mem (void)
609
{
610 611
  FREE_REG_SET (reg_set_bitmap);

612
  free_modify_mem_tables ();
613 614
  BITMAP_FREE (modify_mem_list_set);
  BITMAP_FREE (blocks_with_calls);
615
}
616 617

/* Compute the local properties of each recorded expression.
618 619 620

   Local properties are those that are defined by the block, irrespective of
   other blocks.
621 622 623 624 625 626 627 628 629 630 631 632

   An expression is transparent in a block if its operands are not modified
   in the block.

   An expression is computed (locally available) in a block if it is computed
   at least once and expression would contain the same value if the
   computation was moved to the end of the block.

   An expression is locally anticipatable in a block if it is computed at
   least once and expression would contain the same value if the computation
   was moved to the beginning of the block.

633
   We call this routine for pre and code hoisting.  They all compute
634
   basically the same information and thus can easily share this code.
635

636 637 638
   TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
   properties.  If NULL, then it is not necessary to compute or record that
   particular property.
639

640
   TABLE controls which hash table to look at.  */
641

642
static void
643
compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
644
			  struct hash_table_d *table)
645
{
646
  unsigned int i;
647

648 649
  /* Initialize any bitmaps that were passed in.  */
  if (transp)
650
    {
651
      sbitmap_vector_ones (transp, last_basic_block);
652
    }
653

654
  if (comp)
655
    sbitmap_vector_zero (comp, last_basic_block);
656
  if (antloc)
657
    sbitmap_vector_zero (antloc, last_basic_block);
658

659
  for (i = 0; i < table->size; i++)
660
    {
661 662
      struct expr *expr;

663
      for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
664 665
	{
	  int indx = expr->bitmap_index;
666
	  struct occr *occr;
667 668 669 670 671

	  /* The expression is transparent in this block if it is not killed.
	     We start by assuming all are transparent [none are killed], and
	     then reset the bits for those that are.  */
	  if (transp)
672
	    compute_transp (expr->expr, indx, transp);
673 674

	  /* The occurrences recorded in antic_occr are exactly those that
675
	     we want to set to nonzero in ANTLOC.  */
676
	  if (antloc)
677 678
	    for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
	      {
679
		SET_BIT (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
680

681 682 683 684
		/* While we're scanning the table, this is a good place to
		   initialize this.  */
		occr->deleted_p = 0;
	      }
685 686

	  /* The occurrences recorded in avail_occr are exactly those that
687
	     we want to set to nonzero in COMP.  */
688
	  if (comp)
689 690
	    for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
	      {
691
		SET_BIT (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
692

693 694 695 696
		/* While we're scanning the table, this is a good place to
		   initialize this.  */
		occr->copied_p = 0;
	      }
697 698 699 700 701

	  /* While we're scanning the table, this is a good place to
	     initialize this.  */
	  expr->reaching_reg = 0;
	}
702 703 704 705 706
    }
}

/* Hash table support.  */

707 708
struct reg_avail_info
{
709
  basic_block last_bb;
710 711 712 713 714
  int first_set;
  int last_set;
};

static struct reg_avail_info *reg_avail_info;
715
static basic_block current_bb;
716 717


718 719
/* See whether X, the source of a set, is something we want to consider for
   GCSE.  */
720 721

static int
722
want_to_gcse_p (rtx x, int *max_distance_ptr)
723
{
724 725 726 727 728 729 730 731
#ifdef STACK_REGS
  /* On register stack architectures, don't GCSE constants from the
     constant pool, as the benefits are often swamped by the overhead
     of shuffling the register stack between basic blocks.  */
  if (IS_STACK_MODE (GET_MODE (x)))
    x = avoid_constant_pool_reference (x);
#endif

732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749
  /* GCSE'ing constants:

     We do not specifically distinguish between constant and non-constant
     expressions in PRE and Hoist.  We use rtx_cost below to limit
     the maximum distance simple expressions can travel.

     Nevertheless, constants are much easier to GCSE, and, hence,
     it is easy to overdo the optimizations.  Usually, excessive PRE and
     Hoisting of constant leads to increased register pressure.

     RA can deal with this by rematerialing some of the constants.
     Therefore, it is important that the back-end generates sets of constants
     in a way that allows reload rematerialize them under high register
     pressure, i.e., a pseudo register with REG_EQUAL to constant
     is set only once.  Failing to do so will result in IRA/reload
     spilling such constants under high register pressure instead of
     rematerializing them.  */

750
  switch (GET_CODE (x))
751 752 753
    {
    case REG:
    case SUBREG:
754 755 756
    case CALL:
      return 0;

757 758
    case CONST_INT:
    case CONST_DOUBLE:
759
    case CONST_FIXED:
760
    case CONST_VECTOR:
761 762 763 764 765
      if (!doing_code_hoisting_p)
	/* Do not PRE constants.  */
	return 0;

      /* FALLTHRU */
766 767

    default:
768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792
      if (doing_code_hoisting_p)
	/* PRE doesn't implement max_distance restriction.  */
	{
	  int cost;
	  int max_distance;

	  gcc_assert (!optimize_function_for_speed_p (cfun)
		      && optimize_function_for_size_p (cfun));
	  cost = rtx_cost (x, SET, 0);

	  if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST))
	    {
	      max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10;
	      if (max_distance == 0)
		return 0;

	      gcc_assert (max_distance > 0);
	    }
	  else
	    max_distance = 0;

	  if (max_distance_ptr)
	    *max_distance_ptr = max_distance;
	}

793
      return can_assign_to_reg_without_clobbers_p (x);
794
    }
795 796
}

797
/* Used internally by can_assign_to_reg_without_clobbers_p.  */
798 799 800

static GTY(()) rtx test_insn;

801 802 803
/* Return true if we can assign X to a pseudo register such that the
   resulting insn does not result in clobbering a hard register as a
   side-effect.
804 805 806 807 808

   Additionally, if the target requires it, check that the resulting insn
   can be copied.  If it cannot, this means that X is special and probably
   has hidden side-effects we don't want to mess with.

809 810 811
   This function is typically used by code motion passes, to verify
   that it is safe to insert an insn without worrying about clobbering
   maybe live hard regs.  */
812

813 814
bool
can_assign_to_reg_without_clobbers_p (rtx x)
815 816 817
{
  int num_clobbers = 0;
  int icode;
818

819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840
  /* If this is a valid operand, we are OK.  If it's VOIDmode, we aren't.  */
  if (general_operand (x, GET_MODE (x)))
    return 1;
  else if (GET_MODE (x) == VOIDmode)
    return 0;

  /* Otherwise, check if we can make a valid insn from it.  First initialize
     our test insn if we haven't already.  */
  if (test_insn == 0)
    {
      test_insn
	= make_insn_raw (gen_rtx_SET (VOIDmode,
				      gen_rtx_REG (word_mode,
						   FIRST_PSEUDO_REGISTER * 2),
				      const0_rtx));
      NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
    }

  /* Now make an insn like the one we would make when GCSE'ing and see if
     valid.  */
  PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
  SET_SRC (PATTERN (test_insn)) = x;
H.J. Lu committed
841

842 843 844
  icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
  if (icode < 0)
    return false;
H.J. Lu committed
845

846 847
  if (num_clobbers > 0 && added_clobbers_hard_reg_p (icode))
    return false;
H.J. Lu committed
848

849 850
  if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (test_insn))
    return false;
H.J. Lu committed
851

852
  return true;
853 854
}

855
/* Return nonzero if the operands of expression X are unchanged from the
856 857 858 859
   start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
   or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */

static int
860
oprs_unchanged_p (const_rtx x, const_rtx insn, int avail_p)
861
{
862
  int i, j;
863
  enum rtx_code code;
864
  const char *fmt;
865 866 867 868 869 870 871 872

  if (x == 0)
    return 1;

  code = GET_CODE (x);
  switch (code)
    {
    case REG:
873 874 875 876 877
      {
	struct reg_avail_info *info = &reg_avail_info[REGNO (x)];

	if (info->last_bb != current_bb)
	  return 1;
878
	if (avail_p)
879
	  return info->last_set < DF_INSN_LUID (insn);
880
	else
881
	  return info->first_set >= DF_INSN_LUID (insn);
882
      }
883 884

    case MEM:
885
      if (load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
886 887
				  x, avail_p))
	return 0;
888
      else
889
	return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
890 891 892 893 894

    case PRE_DEC:
    case PRE_INC:
    case POST_DEC:
    case POST_INC:
895 896
    case PRE_MODIFY:
    case POST_MODIFY:
897 898 899 900 901 902 903
      return 0;

    case PC:
    case CC0: /*FIXME*/
    case CONST:
    case CONST_INT:
    case CONST_DOUBLE:
904
    case CONST_FIXED:
905
    case CONST_VECTOR:
906 907 908 909 910 911 912 913 914 915
    case SYMBOL_REF:
    case LABEL_REF:
    case ADDR_VEC:
    case ADDR_DIFF_VEC:
      return 1;

    default:
      break;
    }

916
  for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
917 918 919
    {
      if (fmt[i] == 'e')
	{
920 921 922
	  /* If we are about to do the last recursive call needed at this
	     level, change it into iteration.  This function is called enough
	     to be worth it.  */
923
	  if (i == 0)
924 925 926
	    return oprs_unchanged_p (XEXP (x, i), insn, avail_p);

	  else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
927 928 929
	    return 0;
	}
      else if (fmt[i] == 'E')
930 931 932
	for (j = 0; j < XVECLEN (x, i); j++)
	  if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
	    return 0;
933 934 935 936 937
    }

  return 1;
}

938 939 940 941 942 943 944 945 946
/* Used for communication between mems_conflict_for_gcse_p and
   load_killed_in_block_p.  Nonzero if mems_conflict_for_gcse_p finds a
   conflict between two memory references.  */
static int gcse_mems_conflict_p;

/* Used for communication between mems_conflict_for_gcse_p and
   load_killed_in_block_p.  A memory reference for a load instruction,
   mems_conflict_for_gcse_p will see if a memory store conflicts with
   this memory load.  */
947
static const_rtx gcse_mem_operand;
948 949 950 951 952 953

/* DEST is the output of an instruction.  If it is a memory reference, and
   possibly conflicts with the load found in gcse_mem_operand, then set
   gcse_mems_conflict_p to a nonzero value.  */

static void
954
mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
955
			  void *data ATTRIBUTE_UNUSED)
956 957 958 959 960 961 962 963 964
{
  while (GET_CODE (dest) == SUBREG
	 || GET_CODE (dest) == ZERO_EXTRACT
	 || GET_CODE (dest) == STRICT_LOW_PART)
    dest = XEXP (dest, 0);

  /* If DEST is not a MEM, then it will not conflict with the load.  Note
     that function calls are assumed to clobber memory, but are handled
     elsewhere.  */
965
  if (! MEM_P (dest))
966
    return;
967

968
  /* If we are setting a MEM in our list of specially recognized MEMs,
969 970
     don't mark as killed this time.  */

971
  if (expr_equiv_p (dest, gcse_mem_operand) && pre_ldst_mems != NULL)
972 973 974 975 976
    {
      if (!find_rtx_in_ldst (dest))
	gcse_mems_conflict_p = 1;
      return;
    }
977

978 979 980 981 982 983
  if (true_dependence (dest, GET_MODE (dest), gcse_mem_operand,
		       rtx_addr_varies_p))
    gcse_mems_conflict_p = 1;
}

/* Return nonzero if the expression in X (a memory reference) is killed
984
   in block BB before or after the insn with the LUID in UID_LIMIT.
985 986 987 988 989 990 991
   AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
   before UID_LIMIT.

   To check the entire block, set UID_LIMIT to max_uid + 1 and
   AVAIL_P to 0.  */

static int
992
load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x, int avail_p)
993
{
994 995 996
  VEC (rtx,heap) *list = modify_mem_list[bb->index];
  rtx setter;
  unsigned ix;
997 998 999 1000 1001

  /* If this is a readonly then we aren't going to be changing it.  */
  if (MEM_READONLY_P (x))
    return 0;

1002
  FOR_EACH_VEC_ELT_REVERSE (rtx, list, ix, setter)
1003 1004 1005
    {
      /* Ignore entries in the list that do not apply.  */
      if ((avail_p
1006
	   && DF_INSN_LUID (setter) < uid_limit)
1007
	  || (! avail_p
1008 1009
	      && DF_INSN_LUID (setter) > uid_limit))
	continue;
1010 1011 1012 1013

      /* If SETTER is a call everything is clobbered.  Note that calls
	 to pure functions are never put on the list, so we need not
	 worry about them.  */
1014
      if (CALL_P (setter))
1015 1016 1017
	return 1;

      /* SETTER must be an INSN of some kind that sets memory.  Call
1018
	 note_stores to examine each hunk of memory that is modified.
1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030

	 The note_stores interface is pretty limited, so we have to
	 communicate via global variables.  Yuk.  */
      gcse_mem_operand = x;
      gcse_mems_conflict_p = 0;
      note_stores (PATTERN (setter), mems_conflict_for_gcse_p, NULL);
      if (gcse_mems_conflict_p)
	return 1;
    }
  return 0;
}

1031
/* Return nonzero if the operands of expression X are unchanged from
1032 1033 1034
   the start of INSN's basic block up to but not including INSN.  */

static int
1035
oprs_anticipatable_p (const_rtx x, const_rtx insn)
1036 1037 1038 1039
{
  return oprs_unchanged_p (x, insn, 0);
}

1040
/* Return nonzero if the operands of expression X are unchanged from
1041 1042 1043
   INSN to the end of INSN's basic block.  */

static int
1044
oprs_available_p (const_rtx x, const_rtx insn)
1045 1046 1047 1048 1049
{
  return oprs_unchanged_p (x, insn, 1);
}

/* Hash expression X.
1050 1051 1052

   MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
   indicating if a volatile operand is found or if the expression contains
1053
   something we don't want to insert in the table.  HASH_TABLE_SIZE is
1054
   the current size of the hash table to be probed.  */
1055 1056

static unsigned int
1057
hash_expr (const_rtx x, enum machine_mode mode, int *do_not_record_p,
1058
	   int hash_table_size)
1059 1060 1061 1062 1063
{
  unsigned int hash;

  *do_not_record_p = 0;

1064 1065
  hash = hash_rtx (x, mode, do_not_record_p,
		   NULL,  /*have_reg_qty=*/false);
1066 1067
  return hash % hash_table_size;
}
1068

1069
/* Return nonzero if exp1 is equivalent to exp2.  */
1070 1071

static int
1072
expr_equiv_p (const_rtx x, const_rtx y)
1073
{
1074
  return exp_equiv_p (x, y, 0, true);
1075 1076
}

1077
/* Insert expression X in INSN in the hash TABLE.
1078 1079 1080 1081 1082 1083
   If it is already present, record it as the last occurrence in INSN's
   basic block.

   MODE is the mode of the value X is being stored into.
   It is only used if X is a CONST_INT.

1084
   ANTIC_P is nonzero if X is an anticipatable expression.
1085 1086 1087 1088
   AVAIL_P is nonzero if X is an available expression.

   MAX_DISTANCE is the maximum distance in instructions this expression can
   be moved.  */
1089 1090

static void
1091
insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
1092
		      int avail_p, int max_distance, struct hash_table_d *table)
1093 1094 1095 1096 1097 1098
{
  int found, do_not_record_p;
  unsigned int hash;
  struct expr *cur_expr, *last_expr = NULL;
  struct occr *antic_occr, *avail_occr;

1099
  hash = hash_expr (x, mode, &do_not_record_p, table->size);
1100 1101 1102 1103 1104 1105 1106

  /* Do not insert expression in table if it contains volatile operands,
     or if hash_expr determines the expression is something we don't want
     to or can't handle.  */
  if (do_not_record_p)
    return;

1107
  cur_expr = table->table[hash];
1108 1109
  found = 0;

1110
  while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1111 1112 1113 1114 1115 1116 1117 1118 1119
    {
      /* If the expression isn't found, save a pointer to the end of
	 the list.  */
      last_expr = cur_expr;
      cur_expr = cur_expr->next_same_hash;
    }

  if (! found)
    {
1120
      cur_expr = GOBNEW (struct expr);
1121
      bytes_used += sizeof (struct expr);
1122
      if (table->table[hash] == NULL)
1123
	/* This is the first pattern that hashed to this index.  */
1124
	table->table[hash] = cur_expr;
1125
      else
1126 1127 1128
	/* Add EXPR to end of this hash chain.  */
	last_expr->next_same_hash = cur_expr;

1129
      /* Set the fields of the expr element.  */
1130
      cur_expr->expr = x;
1131
      cur_expr->bitmap_index = table->n_elems++;
1132 1133 1134
      cur_expr->next_same_hash = NULL;
      cur_expr->antic_occr = NULL;
      cur_expr->avail_occr = NULL;
1135 1136
      gcc_assert (max_distance >= 0);
      cur_expr->max_distance = max_distance;
1137
    }
1138 1139
  else
    gcc_assert (cur_expr->max_distance == max_distance);
1140 1141 1142 1143 1144 1145

  /* Now record the occurrence(s).  */
  if (antic_p)
    {
      antic_occr = cur_expr->antic_occr;

1146 1147
      if (antic_occr
	  && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1148
	antic_occr = NULL;
1149 1150

      if (antic_occr)
1151 1152 1153 1154
	/* Found another instance of the expression in the same basic block.
	   Prefer the currently recorded one.  We want the first one in the
	   block and the block is scanned from start to end.  */
	; /* nothing to do */
1155 1156 1157
      else
	{
	  /* First occurrence of this expression in this basic block.  */
1158
	  antic_occr = GOBNEW (struct occr);
1159 1160
	  bytes_used += sizeof (struct occr);
	  antic_occr->insn = insn;
1161
	  antic_occr->next = cur_expr->antic_occr;
1162
	  antic_occr->deleted_p = 0;
1163
	  cur_expr->antic_occr = antic_occr;
1164 1165 1166 1167 1168 1169 1170
	}
    }

  if (avail_p)
    {
      avail_occr = cur_expr->avail_occr;

1171 1172
      if (avail_occr
	  && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1173
	{
1174 1175 1176 1177 1178
	  /* Found another instance of the expression in the same basic block.
	     Prefer this occurrence to the currently recorded one.  We want
	     the last one in the block and the block is scanned from start
	     to end.  */
	  avail_occr->insn = insn;
1179 1180 1181 1182
	}
      else
	{
	  /* First occurrence of this expression in this basic block.  */
1183
	  avail_occr = GOBNEW (struct occr);
1184 1185
	  bytes_used += sizeof (struct occr);
	  avail_occr->insn = insn;
1186
	  avail_occr->next = cur_expr->avail_occr;
1187
	  avail_occr->deleted_p = 0;
1188
	  cur_expr->avail_occr = avail_occr;
1189 1190 1191 1192
	}
    }
}

1193
/* Scan pattern PAT of INSN and add an entry to the hash TABLE.  */
1194 1195

static void
1196
hash_scan_set (rtx pat, rtx insn, struct hash_table_d *table)
1197 1198 1199
{
  rtx src = SET_SRC (pat);
  rtx dest = SET_DEST (pat);
1200
  rtx note;
1201

1202
  if (GET_CODE (src) == CALL)
1203
    hash_scan_call (src, insn, table);
1204

1205
  else if (REG_P (dest))
1206
    {
1207
      unsigned int regno = REGNO (dest);
1208
      int max_distance = 0;
1209

1210 1211
      /* See if a REG_EQUAL note shows this equivalent to a simpler expression.

1212 1213
	 This allows us to do a single GCSE pass and still eliminate
	 redundant constants, addresses or other expressions that are
1214 1215
	 constructed with multiple instructions.

1216
	 However, keep the original SRC if INSN is a simple reg-reg move.
1217 1218 1219 1220 1221 1222
	 In this case, there will almost always be a REG_EQUAL note on the
	 insn that sets SRC.  By recording the REG_EQUAL value here as SRC
	 for INSN, we miss copy propagation opportunities and we perform the
	 same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
	 do more than one PRE GCSE pass.

1223
	 Note that this does not impede profitable constant propagations.  We
1224
	 "look through" reg-reg sets in lookup_avail_set.  */
1225 1226
      note = find_reg_equal_equiv_note (insn);
      if (note != 0
1227 1228
	  && REG_NOTE_KIND (note) == REG_EQUAL
	  && !REG_P (src)
1229
	  && want_to_gcse_p (XEXP (note, 0), NULL))
1230 1231
	src = XEXP (note, 0), pat = gen_rtx_SET (VOIDmode, dest, src);

1232
      /* Only record sets of pseudo-regs in the hash table.  */
1233
      if (regno >= FIRST_PSEUDO_REGISTER
1234
	  /* Don't GCSE something if we can't do a reg/reg copy.  */
1235
	  && can_copy_p (GET_MODE (dest))
1236
	  /* GCSE commonly inserts instruction after the insn.  We can't
1237 1238 1239 1240 1241
	     do that easily for EH edges so disable GCSE on these for now.  */
	  /* ??? We can now easily create new EH landing pads at the
	     gimple level, for splitting edges; there's no reason we
	     can't do the same thing at the rtl level.  */
	  && !can_throw_internal (insn)
1242
	  /* Is SET_SRC something we want to gcse?  */
1243
	  && want_to_gcse_p (src, &max_distance)
1244
	  /* Don't CSE a nop.  */
1245 1246 1247 1248
	  && ! set_noop_p (pat)
	  /* Don't GCSE if it has attached REG_EQUIV note.
	     At this point this only function parameters should have
	     REG_EQUIV notes and if the argument slot is used somewhere
1249
	     explicitly, it means address of parameter has been taken,
1250
	     so we should not extend the lifetime of the pseudo.  */
1251
	  && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1252 1253
	{
	  /* An expression is not anticipatable if its operands are
1254
	     modified before this insn or if this is not the only SET in
1255 1256 1257 1258 1259
	     this insn.  The latter condition does not have to mean that
	     SRC itself is not anticipatable, but we just will not be
	     able to handle code motion of insns with multiple sets.  */
	  int antic_p = oprs_anticipatable_p (src, insn)
			&& !multiple_sets (insn);
1260
	  /* An expression is not available if its operands are
1261 1262 1263 1264 1265
	     subsequently modified, including this insn.  It's also not
	     available if this is a branch, because we can't insert
	     a set after the branch.  */
	  int avail_p = (oprs_available_p (src, insn)
			 && ! JUMP_P (insn));
1266

1267 1268
	  insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
				max_distance, table);
1269 1270
	}
    }
1271
  /* In case of store we want to consider the memory value as available in
1272 1273
     the REG stored in that memory. This makes it possible to remove
     redundant loads from due to stores to the same location.  */
1274
  else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1275 1276
      {
        unsigned int regno = REGNO (src);
1277
	int max_distance = 0;
1278

1279 1280
	/* Only record sets of pseudo-regs in the hash table.  */
        if (regno >= FIRST_PSEUDO_REGISTER
1281 1282 1283
	   /* Don't GCSE something if we can't do a reg/reg copy.  */
	   && can_copy_p (GET_MODE (src))
	   /* GCSE commonly inserts instruction after the insn.  We can't
1284 1285
	      do that easily for EH edges so disable GCSE on these for now.  */
	   && !can_throw_internal (insn)
1286
	   /* Is SET_DEST something we want to gcse?  */
1287
	   && want_to_gcse_p (dest, &max_distance)
1288 1289 1290 1291 1292 1293 1294 1295
	   /* Don't CSE a nop.  */
	   && ! set_noop_p (pat)
	   /* Don't GCSE if it has attached REG_EQUIV note.
	      At this point this only function parameters should have
	      REG_EQUIV notes and if the argument slot is used somewhere
	      explicitly, it means address of parameter has been taken,
	      so we should not extend the lifetime of the pseudo.  */
	   && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1296
	       || ! MEM_P (XEXP (note, 0))))
1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307
             {
               /* Stores are never anticipatable.  */
               int antic_p = 0;
	       /* An expression is not available if its operands are
	          subsequently modified, including this insn.  It's also not
	          available if this is a branch, because we can't insert
	          a set after the branch.  */
               int avail_p = oprs_available_p (dest, insn)
			     && ! JUMP_P (insn);

	       /* Record the memory expression (DEST) in the hash table.  */
1308
	       insert_expr_in_table (dest, GET_MODE (dest), insn,
1309
				     antic_p, avail_p, max_distance, table);
1310 1311
             }
      }
1312 1313 1314
}

static void
1315
hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1316
		   struct hash_table_d *table ATTRIBUTE_UNUSED)
1317 1318 1319 1320 1321
{
  /* Currently nothing to do.  */
}

static void
1322
hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1323
		struct hash_table_d *table ATTRIBUTE_UNUSED)
1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335
{
  /* Currently nothing to do.  */
}

/* Process INSN and add hash table entries as appropriate.

   Only available expressions that set a single pseudo-reg are recorded.

   Single sets in a PARALLEL could be handled, but it's an extra complication
   that isn't dealt with right now.  The trick is handling the CLOBBERs that
   are also in the PARALLEL.  Later.

1336
   If SET_P is nonzero, this is for the assignment hash table,
Steven Bosscher committed
1337
   otherwise it is for the expression hash table.  */
1338 1339

static void
1340
hash_scan_insn (rtx insn, struct hash_table_d *table)
1341 1342
{
  rtx pat = PATTERN (insn);
1343
  int i;
1344 1345 1346 1347

  /* Pick out the sets of INSN and for other forms of instructions record
     what's been modified.  */

1348
  if (GET_CODE (pat) == SET)
1349
    hash_scan_set (pat, insn, table);
1350
  else if (GET_CODE (pat) == PARALLEL)
1351 1352 1353
    for (i = 0; i < XVECLEN (pat, 0); i++)
      {
	rtx x = XVECEXP (pat, 0, i);
1354

1355
	if (GET_CODE (x) == SET)
1356
	  hash_scan_set (x, insn, table);
1357
	else if (GET_CODE (x) == CLOBBER)
1358
	  hash_scan_clobber (x, insn, table);
1359
	else if (GET_CODE (x) == CALL)
1360
	  hash_scan_call (x, insn, table);
1361
      }
1362 1363

  else if (GET_CODE (pat) == CLOBBER)
1364
    hash_scan_clobber (pat, insn, table);
1365
  else if (GET_CODE (pat) == CALL)
1366
    hash_scan_call (pat, insn, table);
1367 1368 1369
}

static void
1370
dump_hash_table (FILE *file, const char *name, struct hash_table_d *table)
1371 1372 1373
{
  int i;
  /* Flattened out table, so it's printed in proper order.  */
1374 1375
  struct expr **flat_table;
  unsigned int *hash_val;
1376
  struct expr *expr;
1377

1378 1379
  flat_table = XCNEWVEC (struct expr *, table->n_elems);
  hash_val = XNEWVEC (unsigned int, table->n_elems);
1380

1381 1382
  for (i = 0; i < (int) table->size; i++)
    for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1383 1384 1385 1386
      {
	flat_table[expr->bitmap_index] = expr;
	hash_val[expr->bitmap_index] = i;
      }
1387 1388

  fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1389
	   name, table->size, table->n_elems);
1390

1391
  for (i = 0; i < (int) table->n_elems; i++)
1392 1393
    if (flat_table[i] != 0)
      {
1394
	expr = flat_table[i];
1395 1396
	fprintf (file, "Index %d (hash value %d; max distance %d)\n  ",
		 expr->bitmap_index, hash_val[i], expr->max_distance);
1397
	print_rtl (file, expr->expr);
1398 1399
	fprintf (file, "\n");
      }
1400 1401

  fprintf (file, "\n");
1402 1403 1404

  free (flat_table);
  free (hash_val);
1405 1406 1407
}

/* Record register first/last/block set information for REGNO in INSN.
1408

1409
   first_set records the first place in the block where the register
1410
   is set and is used to compute "anticipatability".
1411

1412
   last_set records the last place in the block where the register
1413
   is set and is used to compute "availability".
1414

1415
   last_bb records the block for which first_set and last_set are
1416
   valid, as a quick test to invalidate them.  */
1417 1418

static void
1419
record_last_reg_set_info (rtx insn, int regno)
1420
{
1421
  struct reg_avail_info *info = &reg_avail_info[regno];
1422
  int luid = DF_INSN_LUID (insn);
1423

1424
  info->last_set = luid;
1425 1426 1427
  if (info->last_bb != current_bb)
    {
      info->last_bb = current_bb;
1428
      info->first_set = luid;
1429
    }
1430 1431
}

1432 1433 1434 1435 1436

/* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
   Note we store a pair of elements in the list, so they have to be
   taken off pairwise.  */

1437
static void
1438
canon_list_insert (rtx dest ATTRIBUTE_UNUSED, const_rtx unused1 ATTRIBUTE_UNUSED,
1439
		   void * v_insn)
1440 1441
{
  rtx dest_addr, insn;
1442
  int bb;
1443
  modify_pair *pair;
1444 1445 1446 1447 1448 1449 1450 1451 1452 1453

  while (GET_CODE (dest) == SUBREG
      || GET_CODE (dest) == ZERO_EXTRACT
      || GET_CODE (dest) == STRICT_LOW_PART)
    dest = XEXP (dest, 0);

  /* If DEST is not a MEM, then it will not conflict with a load.  Note
     that function calls are assumed to clobber memory, but are handled
     elsewhere.  */

1454
  if (! MEM_P (dest))
1455 1456 1457 1458
    return;

  dest_addr = get_addr (XEXP (dest, 0));
  dest_addr = canon_rtx (dest_addr);
1459
  insn = (rtx) v_insn;
1460
  bb = BLOCK_FOR_INSN (insn)->index;
1461

1462 1463 1464
  pair = VEC_safe_push (modify_pair, heap, canon_modify_mem_list[bb], NULL);
  pair->dest = dest;
  pair->dest_addr = dest_addr;
1465 1466 1467 1468 1469
}

/* Record memory modification information for INSN.  We do not actually care
   about the memory location(s) that are set, or even how they are set (consider
   a CALL_INSN).  We merely need to record which insns modify memory.  */
1470 1471

static void
1472
record_last_mem_set_info (rtx insn)
1473
{
1474
  int bb = BLOCK_FOR_INSN (insn)->index;
1475

1476
  /* load_killed_in_block_p will handle the case of calls clobbering
1477
     everything.  */
1478
  VEC_safe_push (rtx, heap, modify_mem_list[bb], insn);
1479
  bitmap_set_bit (modify_mem_list_set, bb);
1480

1481
  if (CALL_P (insn))
1482
    bitmap_set_bit (blocks_with_calls, bb);
1483
  else
1484
    note_stores (PATTERN (insn), canon_list_insert, (void*) insn);
1485 1486 1487
}

/* Called from compute_hash_table via note_stores to handle one
1488 1489
   SET or CLOBBER in an insn.  DATA is really the instruction in which
   the SET is taking place.  */
1490 1491

static void
1492
record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1493
{
1494 1495
  rtx last_set_insn = (rtx) data;

1496 1497 1498
  if (GET_CODE (dest) == SUBREG)
    dest = SUBREG_REG (dest);

1499
  if (REG_P (dest))
1500
    record_last_reg_set_info (last_set_insn, REGNO (dest));
1501
  else if (MEM_P (dest)
1502 1503 1504 1505 1506
	   /* Ignore pushes, they clobber nothing.  */
	   && ! push_operand (dest, GET_MODE (dest)))
    record_last_mem_set_info (last_set_insn);
}

1507
/* Top level function to create an expression hash table.
1508 1509 1510 1511 1512 1513 1514 1515

   Expression entries are placed in the hash table if
   - they are of the form (set (pseudo-reg) src),
   - src is something we want to perform GCSE on,
   - none of the operands are subsequently modified in the block

   Currently src must be a pseudo-reg or a const_int.

1516
   TABLE is the table computed.  */
1517 1518

static void
1519
compute_hash_table_work (struct hash_table_d *table)
1520
{
1521
  int i;
1522

1523
  /* re-Cache any INSN_LIST nodes we have allocated.  */
1524
  clear_modify_mem_tables ();
1525
  /* Some working arrays used to track first and last set in each block.  */
1526
  reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1527

1528
  for (i = 0; i < max_reg_num (); ++i)
1529
    reg_avail_info[i].last_bb = NULL;
1530

1531
  FOR_EACH_BB (current_bb)
1532 1533
    {
      rtx insn;
1534
      unsigned int regno;
1535 1536

      /* First pass over the instructions records information used to
1537
	 determine when registers and memory are first and last set.  */
1538
      FOR_BB_INSNS (current_bb, insn)
1539
	{
1540
	  if (!NONDEBUG_INSN_P (insn))
1541 1542
	    continue;

1543
	  if (CALL_P (insn))
1544 1545
	    {
	      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1546
		if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
1547
		  record_last_reg_set_info (insn, regno);
1548

1549 1550
	      if (! RTL_CONST_OR_PURE_CALL_P (insn))
		record_last_mem_set_info (insn);
1551 1552
	    }

1553
	  note_stores (PATTERN (insn), record_last_set_info, insn);
1554 1555 1556
	}

      /* The next pass builds the hash table.  */
1557
      FOR_BB_INSNS (current_bb, insn)
1558
	if (NONDEBUG_INSN_P (insn))
Steven Bosscher committed
1559
	  hash_scan_insn (insn, table);
1560 1561
    }

1562 1563
  free (reg_avail_info);
  reg_avail_info = NULL;
1564 1565
}

1566
/* Allocate space for the set/expr hash TABLE.
1567
   It is used to determine the number of buckets to use.  */
1568 1569

static void
1570
alloc_hash_table (struct hash_table_d *table)
1571 1572 1573
{
  int n;

1574 1575 1576
  n = get_max_insn_count ();

  table->size = n / 4;
1577 1578
  if (table->size < 11)
    table->size = 11;
1579

1580 1581 1582
  /* Attempt to maintain efficient use of hash table.
     Making it an odd number is simplest for now.
     ??? Later take some measurements.  */
1583 1584
  table->size |= 1;
  n = table->size * sizeof (struct expr *);
1585
  table->table = GNEWVAR (struct expr *, n);
1586 1587
}

1588
/* Free things allocated by alloc_hash_table.  */
1589 1590

static void
1591
free_hash_table (struct hash_table_d *table)
1592
{
1593
  free (table->table);
1594 1595
}

1596
/* Compute the expression hash table TABLE.  */
1597 1598

static void
1599
compute_hash_table (struct hash_table_d *table)
1600 1601
{
  /* Initialize count of number of entries in hash table.  */
1602
  table->n_elems = 0;
1603
  memset (table->table, 0, table->size * sizeof (struct expr *));
1604

1605
  compute_hash_table_work (table);
1606 1607 1608 1609
}

/* Expression tracking support.  */

1610 1611 1612
/* Clear canon_modify_mem_list and modify_mem_list tables.  */
static void
clear_modify_mem_tables (void)
1613
{
1614 1615
  unsigned i;
  bitmap_iterator bi;
1616

1617
  EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1618
    {
1619
      VEC_free (rtx, heap, modify_mem_list[i]);
1620
      VEC_free (modify_pair, heap, canon_modify_mem_list[i]);
1621
    }
1622 1623
  bitmap_clear (modify_mem_list_set);
  bitmap_clear (blocks_with_calls);
1624 1625
}

1626
/* Release memory used by modify_mem_list_set.  */
1627

1628 1629
static void
free_modify_mem_tables (void)
1630
{
1631 1632 1633 1634 1635
  clear_modify_mem_tables ();
  free (modify_mem_list);
  free (canon_modify_mem_list);
  modify_mem_list = 0;
  canon_modify_mem_list = 0;
1636 1637
}

1638 1639 1640 1641 1642

/* For each block, compute whether X is transparent.  X is either an
   expression or an assignment [though we don't care which, for this context
   an assignment is treated as an expression].  For each block where an
   element of X is modified, reset the INDX bit in BMAP.  */
1643

1644 1645
static void
compute_transp (const_rtx x, int indx, sbitmap *bmap)
1646
{
1647 1648 1649
  int i, j;
  enum rtx_code code;
  const char *fmt;
1650

1651 1652 1653
  /* repeat is used to turn tail-recursion into iteration since GCC
     can't do it when there's no return value.  */
 repeat:
1654

1655 1656
  if (x == 0)
    return;
1657

1658 1659
  code = GET_CODE (x);
  switch (code)
1660
    {
1661
    case REG:
1662
	{
1663 1664 1665 1666 1667
	  df_ref def;
	  for (def = DF_REG_DEF_CHAIN (REGNO (x));
	       def;
	       def = DF_REF_NEXT_REG (def))
	    RESET_BIT (bmap[DF_REF_BB (def)->index], indx);
1668
	}
1669

1670
      return;
1671

1672 1673
    case MEM:
      if (! MEM_READONLY_P (x))
1674
	{
1675 1676
	  bitmap_iterator bi;
	  unsigned bb_index;
1677

1678 1679 1680 1681 1682 1683
	  /* First handle all the blocks with calls.  We don't need to
	     do any list walking for them.  */
	  EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls, 0, bb_index, bi)
	    {
	      RESET_BIT (bmap[bb_index], indx);
	    }
1684

1685 1686 1687 1688 1689 1690
	    /* Now iterate over the blocks which have memory modifications
	       but which do not have any calls.  */
	    EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set,
					    blocks_with_calls,
					    0, bb_index, bi)
	      {
1691
		VEC (modify_pair,heap) *list
1692 1693 1694
		  = canon_modify_mem_list[bb_index];
		modify_pair *pair;
		unsigned ix;
1695

1696
		FOR_EACH_VEC_ELT_REVERSE (modify_pair, list, ix, pair)
1697
		  {
1698 1699
		    rtx dest = pair->dest;
		    rtx dest_addr = pair->dest_addr;
1700

1701 1702
		    if (canon_true_dependence (dest, GET_MODE (dest), dest_addr,
					       x, NULL_RTX, rtx_addr_varies_p))
1703
		      RESET_BIT (bmap[bb_index], indx);
1704 1705
	          }
	      }
1706 1707
	}

1708 1709
      x = XEXP (x, 0);
      goto repeat;
1710

1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722
    case PC:
    case CC0: /*FIXME*/
    case CONST:
    case CONST_INT:
    case CONST_DOUBLE:
    case CONST_FIXED:
    case CONST_VECTOR:
    case SYMBOL_REF:
    case LABEL_REF:
    case ADDR_VEC:
    case ADDR_DIFF_VEC:
      return;
1723

1724 1725 1726
    default:
      break;
    }
1727

1728
  for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1729
    {
1730
      if (fmt[i] == 'e')
1731
	{
1732 1733 1734 1735 1736 1737 1738 1739 1740 1741
	  /* If we are about to do the last recursive call
	     needed at this level, change it into iteration.
	     This function is called enough to be worth it.  */
	  if (i == 0)
	    {
	      x = XEXP (x, i);
	      goto repeat;
	    }

	  compute_transp (XEXP (x, i), indx, bmap);
1742
	}
1743 1744 1745
      else if (fmt[i] == 'E')
	for (j = 0; j < XVECLEN (x, i); j++)
	  compute_transp (XVECEXP (x, i, j), indx, bmap);
1746 1747
    }
}
1748

1749

1750
/* Compute PRE+LCM working variables.  */
1751 1752 1753

/* Local properties of expressions.  */
/* Nonzero for expressions that are transparent in the block.  */
1754
static sbitmap *transp;
1755

1756 1757
/* Nonzero for expressions that are computed (available) in the block.  */
static sbitmap *comp;
1758

1759 1760
/* Nonzero for expressions that are locally anticipatable in the block.  */
static sbitmap *antloc;
1761

1762 1763 1764
/* Nonzero for expressions where this block is an optimal computation
   point.  */
static sbitmap *pre_optimal;
1765

1766 1767
/* Nonzero for expressions which are redundant in a particular block.  */
static sbitmap *pre_redundant;
1768

1769 1770 1771 1772 1773 1774 1775 1776 1777
/* Nonzero for expressions which should be inserted on a specific edge.  */
static sbitmap *pre_insert_map;

/* Nonzero for expressions which should be deleted in a specific block.  */
static sbitmap *pre_delete_map;

/* Contains the edge_list returned by pre_edge_lcm.  */
static struct edge_list *edge_list;

1778
/* Allocate vars used for PRE analysis.  */
1779 1780

static void
1781
alloc_pre_mem (int n_blocks, int n_exprs)
1782
{
1783 1784 1785
  transp = sbitmap_vector_alloc (n_blocks, n_exprs);
  comp = sbitmap_vector_alloc (n_blocks, n_exprs);
  antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
1786

1787 1788 1789 1790 1791
  pre_optimal = NULL;
  pre_redundant = NULL;
  pre_insert_map = NULL;
  pre_delete_map = NULL;
  ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
1792

1793
  /* pre_insert and pre_delete are allocated later.  */
1794 1795
}

1796
/* Free vars used for PRE analysis.  */
1797 1798

static void
1799
free_pre_mem (void)
1800
{
1801 1802
  sbitmap_vector_free (transp);
  sbitmap_vector_free (comp);
1803 1804

  /* ANTLOC and AE_KILL are freed just after pre_lcm finishes.  */
1805

1806
  if (pre_optimal)
1807
    sbitmap_vector_free (pre_optimal);
1808
  if (pre_redundant)
1809
    sbitmap_vector_free (pre_redundant);
1810
  if (pre_insert_map)
1811
    sbitmap_vector_free (pre_insert_map);
1812
  if (pre_delete_map)
1813
    sbitmap_vector_free (pre_delete_map);
1814

1815
  transp = comp = NULL;
1816
  pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
1817 1818
}

1819 1820 1821 1822 1823
/* Remove certain expressions from anticipatable and transparent
   sets of basic blocks that have incoming abnormal edge.
   For PRE remove potentially trapping expressions to avoid placing
   them on abnormal edges.  For hoisting remove memory references that
   can be clobbered by calls.  */
1824 1825

static void
1826
prune_expressions (bool pre_p)
1827
{
1828
  sbitmap prune_exprs;
1829
  unsigned int ui;
1830
  basic_block bb;
1831

1832 1833
  prune_exprs = sbitmap_alloc (expr_hash_table.n_elems);
  sbitmap_zero (prune_exprs);
1834
  for (ui = 0; ui < expr_hash_table.size; ui++)
1835 1836
    {
      struct expr *e;
1837
      for (e = expr_hash_table.table[ui]; e != NULL; e = e->next_same_hash)
1838 1839 1840 1841 1842 1843 1844
	{
	  /* Note potentially trapping expressions.  */
	  if (may_trap_p (e->expr))
	    {
	      SET_BIT (prune_exprs, e->bitmap_index);
	      continue;
	    }
1845

1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856
	  if (!pre_p && MEM_P (e->expr))
	    /* Note memory references that can be clobbered by a call.
	       We do not split abnormal edges in hoisting, so would
	       a memory reference get hoisted along an abnormal edge,
	       it would be placed /before/ the call.  Therefore, only
	       constant memory references can be hoisted along abnormal
	       edges.  */
	    {
	      if (GET_CODE (XEXP (e->expr, 0)) == SYMBOL_REF
		  && CONSTANT_POOL_ADDRESS_P (XEXP (e->expr, 0)))
		continue;
1857

1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871
	      if (MEM_READONLY_P (e->expr)
		  && !MEM_VOLATILE_P (e->expr)
		  && MEM_NOTRAP_P (e->expr))
		/* Constant memory reference, e.g., a PIC address.  */
		continue;

	      /* ??? Optimally, we would use interprocedural alias
		 analysis to determine if this mem is actually killed
		 by this call.  */

	      SET_BIT (prune_exprs, e->bitmap_index);
	    }
	}
    }
1872

1873
  FOR_EACH_BB (bb)
1874
    {
1875
      edge e;
1876
      edge_iterator ei;
1877 1878

      /* If the current block is the destination of an abnormal edge, we
1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892
	 kill all trapping (for PRE) and memory (for hoist) expressions
	 because we won't be able to properly place the instruction on
	 the edge.  So make them neither anticipatable nor transparent.
	 This is fairly conservative.

	 ??? For hoisting it may be necessary to check for set-and-jump
	 instructions here, not just for abnormal edges.  The general problem
	 is that when an expression cannot not be placed right at the end of
	 a basic block we should account for any side-effects of a subsequent
	 jump instructions that could clobber the expression.  It would
	 be best to implement this check along the lines of
	 hoist_expr_reaches_here_p where the target block is already known
	 and, hence, there's no need to conservatively prune expressions on
	 "intermediate" set-and-jump instructions.  */
1893
      FOR_EACH_EDGE (e, ei, bb->preds)
1894 1895
	if ((e->flags & EDGE_ABNORMAL)
	    && (pre_p || CALL_P (BB_END (e->src))))
1896
	  {
1897 1898 1899 1900
	    sbitmap_difference (antloc[bb->index],
				antloc[bb->index], prune_exprs);
	    sbitmap_difference (transp[bb->index],
				transp[bb->index], prune_exprs);
1901 1902
	    break;
	  }
1903 1904 1905 1906 1907
    }

  sbitmap_free (prune_exprs);
}

1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976
/* It may be necessary to insert a large number of insns on edges to
   make the existing occurrences of expressions fully redundant.  This
   routine examines the set of insertions and deletions and if the ratio
   of insertions to deletions is too high for a particular expression, then
   the expression is removed from the insertion/deletion sets. 

   N_ELEMS is the number of elements in the hash table.  */

static void
prune_insertions_deletions (int n_elems)
{
  sbitmap_iterator sbi;
  sbitmap prune_exprs;

  /* We always use I to iterate over blocks/edges and J to iterate over
     expressions.  */
  unsigned int i, j;

  /* Counts for the number of times an expression needs to be inserted and
     number of times an expression can be removed as a result.  */
  int *insertions = GCNEWVEC (int, n_elems);
  int *deletions = GCNEWVEC (int, n_elems);

  /* Set of expressions which require too many insertions relative to
     the number of deletions achieved.  We will prune these out of the
     insertion/deletion sets.  */
  prune_exprs = sbitmap_alloc (n_elems);
  sbitmap_zero (prune_exprs);

  /* Iterate over the edges counting the number of times each expression
     needs to be inserted.  */
  for (i = 0; i < (unsigned) n_edges; i++)
    {
      EXECUTE_IF_SET_IN_SBITMAP (pre_insert_map[i], 0, j, sbi)
	insertions[j]++;
    }

  /* Similarly for deletions, but those occur in blocks rather than on
     edges.  */
  for (i = 0; i < (unsigned) last_basic_block; i++)
    {
      EXECUTE_IF_SET_IN_SBITMAP (pre_delete_map[i], 0, j, sbi)
	deletions[j]++;
    }

  /* Now that we have accurate counts, iterate over the elements in the
     hash table and see if any need too many insertions relative to the
     number of evaluations that can be removed.  If so, mark them in
     PRUNE_EXPRS.  */
  for (j = 0; j < (unsigned) n_elems; j++)
    if (deletions[j]
	&& ((unsigned) insertions[j] / deletions[j]) > MAX_GCSE_INSERTION_RATIO)
      SET_BIT (prune_exprs, j);

  /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS.  */
  EXECUTE_IF_SET_IN_SBITMAP (prune_exprs, 0, j, sbi)
    {
      for (i = 0; i < (unsigned) n_edges; i++)
	RESET_BIT (pre_insert_map[i], j);

      for (i = 0; i < (unsigned) last_basic_block; i++)
	RESET_BIT (pre_delete_map[i], j);
    }

  sbitmap_free (prune_exprs);
  free (insertions);
  free (deletions);
}

1977
/* Top level routine to do the dataflow analysis needed by PRE.  */
1978

1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994
static void
compute_pre_data (void)
{
  basic_block bb;

  compute_local_properties (transp, comp, antloc, &expr_hash_table);
  prune_expressions (true);
  sbitmap_vector_zero (ae_kill, last_basic_block);

  /* Compute ae_kill for each basic block using:

     ~(TRANSP | COMP)
  */

  FOR_EACH_BB (bb)
    {
1995 1996
      sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
      sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
1997 1998
    }

1999
  edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
2000
			    ae_kill, &pre_insert_map, &pre_delete_map);
2001
  sbitmap_vector_free (antloc);
2002
  antloc = NULL;
2003
  sbitmap_vector_free (ae_kill);
2004
  ae_kill = NULL;
2005 2006

  prune_insertions_deletions (expr_hash_table.n_elems);
2007 2008 2009 2010
}

/* PRE utilities */

2011
/* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
2012
   block BB.
2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024

   VISITED is a pointer to a working buffer for tracking which BB's have
   been visited.  It is NULL for the top-level call.

   We treat reaching expressions that go through blocks containing the same
   reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
   2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
   2 as not reaching.  The intent is to improve the probability of finding
   only one reaching expression and to reduce register lifetimes by picking
   the closest such expression.  */

static int
2025
pre_expr_reaches_here_p_work (basic_block occr_bb, struct expr *expr, basic_block bb, char *visited)
2026
{
2027
  edge pred;
2028
  edge_iterator ei;
H.J. Lu committed
2029

2030
  FOR_EACH_EDGE (pred, ei, bb->preds)
2031
    {
2032
      basic_block pred_bb = pred->src;
2033

2034
      if (pred->src == ENTRY_BLOCK_PTR
2035
	  /* Has predecessor has already been visited?  */
2036
	  || visited[pred_bb->index])
2037 2038
	;/* Nothing to do.  */

2039
      /* Does this predecessor generate this expression?  */
2040
      else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index))
2041 2042 2043 2044
	{
	  /* Is this the occurrence we're looking for?
	     Note that there's only one generating occurrence per block
	     so we just need to check the block number.  */
2045
	  if (occr_bb == pred_bb)
2046
	    return 1;
2047

2048
	  visited[pred_bb->index] = 1;
2049 2050
	}
      /* Ignore this predecessor if it kills the expression.  */
2051 2052
      else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index))
	visited[pred_bb->index] = 1;
2053

2054 2055
      /* Neither gen nor kill.  */
      else
Jeff Law committed
2056
	{
2057
	  visited[pred_bb->index] = 1;
2058
	  if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
2059
	    return 1;
Jeff Law committed
2060
	}
2061 2062 2063 2064 2065
    }

  /* All paths have been checked.  */
  return 0;
}
2066 2067

/* The wrapper for pre_expr_reaches_here_work that ensures that any
2068
   memory allocated for that function is returned.  */
2069 2070

static int
2071
pre_expr_reaches_here_p (basic_block occr_bb, struct expr *expr, basic_block bb)
2072 2073
{
  int rval;
2074
  char *visited = XCNEWVEC (char, last_basic_block);
2075

Kazu Hirata committed
2076
  rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
2077 2078

  free (visited);
2079
  return rval;
2080
}
2081

2082 2083

/* Given an expr, generate RTL which we can insert at the end of a BB,
2084
   or on an edge.  Set the block number of any insns generated to
2085 2086 2087
   the value of BB.  */

static rtx
2088
process_insert_insn (struct expr *expr)
2089 2090
{
  rtx reg = expr->reaching_reg;
2091 2092
  rtx exp = copy_rtx (expr->expr);
  rtx pat;
2093 2094

  start_sequence ();
2095 2096 2097 2098 2099 2100 2101 2102 2103

  /* If the expression is something that's an operand, like a constant,
     just copy it to a register.  */
  if (general_operand (exp, GET_MODE (reg)))
    emit_move_insn (reg, exp);

  /* Otherwise, make a new insn to compute this expression and make sure the
     insn will be recognized (this also adds any needed CLOBBERs).  Copy the
     expression to make sure we don't have any sharing issues.  */
2104 2105 2106 2107
  else
    {
      rtx insn = emit_insn (gen_rtx_SET (VOIDmode, reg, exp));

2108 2109
      if (insn_invalid_p (insn))
	gcc_unreachable ();
2110
    }
H.J. Lu committed
2111

2112

2113
  pat = get_insns ();
2114 2115 2116 2117
  end_sequence ();

  return pat;
}
2118

2119 2120
/* Add EXPR to the end of basic block BB.

2121
   This is used by both the PRE and code hoisting.  */
2122 2123

static void
2124
insert_insn_end_basic_block (struct expr *expr, basic_block bb)
2125
{
2126
  rtx insn = BB_END (bb);
2127 2128 2129
  rtx new_insn;
  rtx reg = expr->reaching_reg;
  int regno = REGNO (reg);
2130
  rtx pat, pat_end;
2131

2132
  pat = process_insert_insn (expr);
2133
  gcc_assert (pat && INSN_P (pat));
2134 2135 2136 2137

  pat_end = pat;
  while (NEXT_INSN (pat_end) != NULL_RTX)
    pat_end = NEXT_INSN (pat_end);
2138 2139

  /* If the last insn is a jump, insert EXPR in front [taking care to
2140
     handle cc0, etc. properly].  Similarly we need to care trapping
2141
     instructions in presence of non-call exceptions.  */
2142

2143
  if (JUMP_P (insn)
2144
      || (NONJUMP_INSN_P (insn)
2145 2146
	  && (!single_succ_p (bb)
	      || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
2147
    {
Kaveh R. Ghazi committed
2148
#ifdef HAVE_cc0
2149
      rtx note;
Kaveh R. Ghazi committed
2150
#endif
2151 2152 2153 2154 2155 2156

      /* If this is a jump table, then we can't insert stuff here.  Since
	 we know the previous real insn must be the tablejump, we insert
	 the new instruction just before the tablejump.  */
      if (GET_CODE (PATTERN (insn)) == ADDR_VEC
	  || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
2157
	insn = prev_active_insn (insn);
2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168

#ifdef HAVE_cc0
      /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
	 if cc0 isn't set.  */
      note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
      if (note)
	insn = XEXP (note, 0);
      else
	{
	  rtx maybe_cc0_setter = prev_nonnote_insn (insn);
	  if (maybe_cc0_setter
2169
	      && INSN_P (maybe_cc0_setter)
2170 2171 2172 2173 2174
	      && sets_cc0_p (PATTERN (maybe_cc0_setter)))
	    insn = maybe_cc0_setter;
	}
#endif
      /* FIXME: What if something in cc0/jump uses value set in new insn?  */
2175
      new_insn = emit_insn_before_noloc (pat, insn, bb);
2176
    }
2177

2178 2179
  /* Likewise if the last insn is a call, as will happen in the presence
     of exception handling.  */
2180
  else if (CALL_P (insn)
2181 2182
	   && (!single_succ_p (bb)
	       || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
2183
    {
2184 2185 2186
      /* Keeping in mind targets with small register classes and parameters
         in registers, we search backward and place the instructions before
	 the first parameter is loaded.  Do this for everyone for consistency
2187
	 and a presumption that we'll get better code elsewhere as well.  */
2188 2189 2190 2191

      /* Since different machines initialize their parameter registers
	 in different orders, assume nothing.  Collect the set of all
	 parameter registers.  */
2192
      insn = find_first_parameter_load (insn, BB_HEAD (bb));
2193

2194 2195 2196 2197 2198 2199 2200
      /* If we found all the parameter loads, then we want to insert
	 before the first parameter load.

	 If we did not find all the parameter loads, then we might have
	 stopped on the head of the block, which could be a CODE_LABEL.
	 If we inserted before the CODE_LABEL, then we would be putting
	 the insn in the wrong basic block.  In that case, put the insn
2201
	 after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
2202
      while (LABEL_P (insn)
2203
	     || NOTE_INSN_BASIC_BLOCK_P (insn))
2204
	insn = NEXT_INSN (insn);
2205

2206
      new_insn = emit_insn_before_noloc (pat, insn, bb);
2207 2208
    }
  else
2209
    new_insn = emit_insn_after_noloc (pat, insn, bb);
2210

2211
  while (1)
2212
    {
2213
      if (INSN_P (pat))
2214
	add_label_notes (PATTERN (pat), new_insn);
2215 2216 2217
      if (pat == pat_end)
	break;
      pat = NEXT_INSN (pat);
2218
    }
2219

2220 2221
  gcse_create_count++;

2222
  if (dump_file)
2223
    {
2224
      fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
2225
	       bb->index, INSN_UID (new_insn));
2226
      fprintf (dump_file, "copying expression %d to reg %d\n",
2227
	       expr->bitmap_index, regno);
2228 2229 2230
    }
}

2231 2232
/* Insert partially redundant expressions on edges in the CFG to make
   the expressions fully redundant.  */
2233

2234
static int
2235
pre_edge_insert (struct edge_list *edge_list, struct expr **index_map)
2236
{
2237
  int e, i, j, num_edges, set_size, did_insert = 0;
2238 2239
  sbitmap *inserted;

2240 2241
  /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
     if it reaches any of the deleted expressions.  */
2242

2243 2244
  set_size = pre_insert_map[0]->size;
  num_edges = NUM_EDGES (edge_list);
2245
  inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
2246
  sbitmap_vector_zero (inserted, num_edges);
2247

2248
  for (e = 0; e < num_edges; e++)
2249 2250
    {
      int indx;
2251
      basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
2252 2253

      for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
2254
	{
2255
	  SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
2256

2257
	  for (j = indx; insert && j < (int) expr_hash_table.n_elems; j++, insert >>= 1)
2258 2259 2260 2261
	    if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
	      {
		struct expr *expr = index_map[j];
		struct occr *occr;
2262

2263
		/* Now look at each deleted occurrence of this expression.  */
2264 2265 2266 2267 2268
		for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
		  {
		    if (! occr->deleted_p)
		      continue;

2269
		    /* Insert this expression on this edge if it would
2270
		       reach the deleted occurrence in BB.  */
2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282
		    if (!TEST_BIT (inserted[e], j))
		      {
			rtx insn;
			edge eg = INDEX_EDGE (edge_list, e);

			/* We can't insert anything on an abnormal and
			   critical edge, so we insert the insn at the end of
			   the previous block. There are several alternatives
			   detailed in Morgans book P277 (sec 10.5) for
			   handling this situation.  This one is easiest for
			   now.  */

2283
			if (eg->flags & EDGE_ABNORMAL)
2284
			  insert_insn_end_basic_block (index_map[j], bb);
2285 2286 2287 2288 2289 2290
			else
			  {
			    insn = process_insert_insn (index_map[j]);
			    insert_insn_on_edge (insn, eg);
			  }

2291
			if (dump_file)
2292
			  {
2293
			    fprintf (dump_file, "PRE: edge (%d,%d), ",
2294 2295
				     bb->index,
				     INDEX_EDGE_SUCC_BB (edge_list, e)->index);
2296
			    fprintf (dump_file, "copy expression %d\n",
2297 2298 2299
				     expr->bitmap_index);
			  }

2300
			update_ld_motion_stores (expr);
2301 2302 2303 2304 2305 2306
			SET_BIT (inserted[e], j);
			did_insert = 1;
			gcse_create_count++;
		      }
		  }
	      }
2307 2308
	}
    }
2309

2310
  sbitmap_vector_free (inserted);
2311
  return did_insert;
2312 2313
}

2314
/* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
2315 2316 2317 2318 2319 2320
   Given "old_reg <- expr" (INSN), instead of adding after it
     reaching_reg <- old_reg
   it's better to do the following:
     reaching_reg <- expr
     old_reg      <- reaching_reg
   because this way copy propagation can discover additional PRE
2321 2322 2323 2324 2325 2326 2327
   opportunities.  But if this fails, we try the old way.
   When "expr" is a store, i.e.
   given "MEM <- old_reg", instead of adding after it
     reaching_reg <- old_reg
   it's better to add it before as follows:
     reaching_reg <- old_reg
     MEM          <- reaching_reg.  */
2328 2329

static void
2330
pre_insert_copy_insn (struct expr *expr, rtx insn)
2331 2332 2333 2334
{
  rtx reg = expr->reaching_reg;
  int regno = REGNO (reg);
  int indx = expr->bitmap_index;
2335
  rtx pat = PATTERN (insn);
2336
  rtx set, first_set, new_insn;
2337
  rtx old_reg;
2338
  int i;
2339

2340
  /* This block matches the logic in hash_scan_insn.  */
2341
  switch (GET_CODE (pat))
2342
    {
2343 2344 2345 2346 2347
    case SET:
      set = pat;
      break;

    case PARALLEL:
2348 2349
      /* Search through the parallel looking for the set whose
	 source was the expression that we're interested in.  */
2350
      first_set = NULL_RTX;
2351 2352 2353 2354
      set = NULL_RTX;
      for (i = 0; i < XVECLEN (pat, 0); i++)
	{
	  rtx x = XVECEXP (pat, 0, i);
2355
	  if (GET_CODE (x) == SET)
2356
	    {
2357 2358 2359 2360 2361 2362 2363 2364 2365 2366
	      /* If the source was a REG_EQUAL or REG_EQUIV note, we
		 may not find an equivalent expression, but in this
		 case the PARALLEL will have a single set.  */
	      if (first_set == NULL_RTX)
		first_set = x;
	      if (expr_equiv_p (SET_SRC (x), expr->expr))
	        {
	          set = x;
	          break;
	        }
2367 2368
	    }
	}
2369 2370 2371 2372

      gcc_assert (first_set);
      if (set == NULL_RTX)
        set = first_set;
2373 2374 2375 2376
      break;

    default:
      gcc_unreachable ();
2377
    }
2378

2379
  if (REG_P (SET_DEST (set)))
2380
    {
2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392
      old_reg = SET_DEST (set);
      /* Check if we can modify the set destination in the original insn.  */
      if (validate_change (insn, &SET_DEST (set), reg, 0))
        {
          new_insn = gen_move_insn (old_reg, reg);
          new_insn = emit_insn_after (new_insn, insn);
        }
      else
        {
          new_insn = gen_move_insn (reg, old_reg);
          new_insn = emit_insn_after (new_insn, insn);
        }
2393
    }
2394
  else /* This is possible only in case of a store to memory.  */
2395
    {
2396
      old_reg = SET_SRC (set);
2397
      new_insn = gen_move_insn (reg, old_reg);
2398 2399 2400 2401 2402 2403

      /* Check if we can modify the set source in the original insn.  */
      if (validate_change (insn, &SET_SRC (set), reg, 0))
        new_insn = emit_insn_before (new_insn, insn);
      else
        new_insn = emit_insn_after (new_insn, insn);
2404
    }
2405 2406 2407

  gcse_create_count++;

2408 2409
  if (dump_file)
    fprintf (dump_file,
2410
	     "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
2411
	      BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
2412
	      INSN_UID (insn), regno);
2413 2414 2415 2416 2417 2418
}

/* Copy available expressions that reach the redundant expression
   to `reaching_reg'.  */

static void
2419
pre_insert_copies (void)
2420
{
2421
  unsigned int i, added_copy;
2422 2423 2424
  struct expr *expr;
  struct occr *occr;
  struct occr *avail;
2425

2426 2427 2428 2429 2430 2431
  /* For each available expression in the table, copy the result to
     `reaching_reg' if the expression reaches a deleted one.

     ??? The current algorithm is rather brute force.
     Need to do some profiling.  */

2432 2433
  for (i = 0; i < expr_hash_table.size; i++)
    for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
2434 2435 2436 2437 2438 2439 2440 2441
      {
	/* If the basic block isn't reachable, PPOUT will be TRUE.  However,
	   we don't want to insert a copy here because the expression may not
	   really be redundant.  So only insert an insn if the expression was
	   deleted.  This test also avoids further processing if the
	   expression wasn't deleted anywhere.  */
	if (expr->reaching_reg == NULL)
	  continue;
2442

2443
	/* Set when we add a copy for that expression.  */
2444
	added_copy = 0;
2445 2446 2447 2448 2449

	for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
	  {
	    if (! occr->deleted_p)
	      continue;
2450

2451 2452 2453
	    for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
	      {
		rtx insn = avail->insn;
2454

2455 2456 2457
		/* No need to handle this one if handled already.  */
		if (avail->copied_p)
		  continue;
2458

2459
		/* Don't handle this one if it's a redundant one.  */
2460
		if (INSN_DELETED_P (insn))
2461
		  continue;
2462

2463
		/* Or if the expression doesn't reach the deleted one.  */
2464
		if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
2465 2466
					       expr,
					       BLOCK_FOR_INSN (occr->insn)))
2467
		  continue;
2468

2469 2470
                added_copy = 1;

2471 2472 2473 2474 2475
		/* Copy the result of avail to reaching_reg.  */
		pre_insert_copy_insn (expr, insn);
		avail->copied_p = 1;
	      }
	  }
2476

2477
	  if (added_copy)
2478
            update_ld_motion_stores (expr);
2479
      }
2480 2481
}

2482 2483 2484
/* Emit move from SRC to DEST noting the equivalence with expression computed
   in INSN.  */
static rtx
2485
gcse_emit_move_after (rtx src, rtx dest, rtx insn)
2486
{
2487
  rtx new_rtx;
2488
  rtx set = single_set (insn), set2;
2489 2490 2491 2492 2493 2494
  rtx note;
  rtx eqv;

  /* This should never fail since we're creating a reg->reg copy
     we've verified to be valid.  */

2495
  new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
2496

2497
  /* Note the equivalence for local CSE pass.  */
2498
  set2 = single_set (new_rtx);
2499
  if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
2500
    return new_rtx;
2501 2502 2503 2504 2505
  if ((note = find_reg_equal_equiv_note (insn)))
    eqv = XEXP (note, 0);
  else
    eqv = SET_SRC (set);

2506
  set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
2507

2508
  return new_rtx;
2509 2510
}

2511 2512 2513 2514 2515
/* Delete redundant computations.
   Deletion is done by changing the insn to copy the `reaching_reg' of
   the expression into the result of the SET.  It is left to later passes
   (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.

2516
   Returns nonzero if a change is made.  */
2517 2518

static int
2519
pre_delete (void)
2520
{
2521
  unsigned int i;
2522
  int changed;
2523 2524
  struct expr *expr;
  struct occr *occr;
2525

2526
  changed = 0;
2527
  for (i = 0; i < expr_hash_table.size; i++)
2528 2529 2530
    for (expr = expr_hash_table.table[i];
	 expr != NULL;
	 expr = expr->next_same_hash)
2531 2532
      {
	int indx = expr->bitmap_index;
2533

2534 2535
	/* We only need to search antic_occr since we require
	   ANTLOC != 0.  */
2536

2537 2538 2539 2540
	for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
	  {
	    rtx insn = occr->insn;
	    rtx set;
2541
	    basic_block bb = BLOCK_FOR_INSN (insn);
2542

2543 2544
	    /* We only delete insns that have a single_set.  */
	    if (TEST_BIT (pre_delete_map[bb->index], indx)
2545 2546
		&& (set = single_set (insn)) != 0
                && dbg_cnt (pre_insn))
2547 2548 2549 2550 2551
	      {
		/* Create a pseudo-reg to store the result of reaching
		   expressions into.  Get the mode for the new pseudo from
		   the mode of the original destination pseudo.  */
		if (expr->reaching_reg == NULL)
Peter Bergner committed
2552
		  expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
2553

2554
		gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), insn);
2555 2556 2557 2558
		delete_insn (insn);
		occr->deleted_p = 1;
		changed = 1;
		gcse_subst_count++;
2559

2560
		if (dump_file)
2561
		  {
2562
		    fprintf (dump_file,
2563 2564
			     "PRE: redundant insn %d (expression %d) in ",
			       INSN_UID (insn), indx);
2565
		    fprintf (dump_file, "bb %d, reaching reg is %d\n",
2566
			     bb->index, REGNO (expr->reaching_reg));
2567 2568 2569 2570
		  }
	      }
	  }
      }
2571 2572 2573 2574 2575 2576 2577 2578

  return changed;
}

/* Perform GCSE optimizations using PRE.
   This is called by one_pre_gcse_pass after all the dataflow analysis
   has been done.

2579 2580 2581
   This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
   lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
   Compiler Design and Implementation.
2582

2583 2584 2585 2586 2587
   ??? A new pseudo reg is created to hold the reaching expression.  The nice
   thing about the classical approach is that it would try to use an existing
   reg.  If the register can't be adequately optimized [i.e. we introduce
   reload problems], one could add a pass here to propagate the new register
   through the block.
2588

2589 2590 2591 2592
   ??? We don't handle single sets in PARALLELs because we're [currently] not
   able to copy the rest of the parallel when we insert copies to create full
   redundancies from partial redundancies.  However, there's no reason why we
   can't handle PARALLELs in the cases where there are no partial
2593 2594 2595
   redundancies.  */

static int
2596
pre_gcse (void)
2597
{
2598 2599
  unsigned int i;
  int did_insert, changed;
2600
  struct expr **index_map;
2601
  struct expr *expr;
2602 2603 2604 2605

  /* Compute a mapping from expression number (`bitmap_index') to
     hash table entry.  */

2606
  index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2607 2608
  for (i = 0; i < expr_hash_table.size; i++)
    for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
2609
      index_map[expr->bitmap_index] = expr;
2610 2611 2612 2613 2614

  /* Delete the redundant insns first so that
     - we know what register to use for the new insns and for the other
       ones with reaching expressions
     - we know which insns are redundant when we go to create copies  */
2615

2616
  changed = pre_delete ();
2617
  did_insert = pre_edge_insert (edge_list, index_map);
2618

2619
  /* In other places with reaching expressions, copy the expression to the
2620
     specially allocated pseudo-reg that reaches the redundant expr.  */
2621
  pre_insert_copies ();
2622 2623 2624 2625 2626
  if (did_insert)
    {
      commit_edge_insertions ();
      changed = 1;
    }
2627

2628
  free (index_map);
2629 2630 2631 2632 2633
  return changed;
}

/* Top level routine to perform one PRE GCSE pass.

2634
   Return nonzero if a change was made.  */
2635 2636

static int
2637
one_pre_gcse_pass (void)
2638 2639 2640 2641 2642 2643
{
  int changed = 0;

  gcse_subst_count = 0;
  gcse_create_count = 0;

2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655
  /* Return if there's nothing to do, or it is too expensive.  */
  if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
      || is_too_expensive (_("PRE disabled")))
    return 0;

  /* We need alias.  */
  init_alias_analysis ();

  bytes_used = 0;
  gcc_obstack_init (&gcse_obstack);
  alloc_gcse_mem ();

2656
  alloc_hash_table (&expr_hash_table);
2657
  add_noreturn_fake_exit_edges ();
2658 2659 2660
  if (flag_gcse_lm)
    compute_ld_motion_mems ();

2661
  compute_hash_table (&expr_hash_table);
2662
  trim_ld_motion_mems ();
2663 2664
  if (dump_file)
    dump_hash_table (dump_file, "Expression", &expr_hash_table);
2665

2666
  if (expr_hash_table.n_elems > 0)
2667
    {
2668
      alloc_pre_mem (last_basic_block, expr_hash_table.n_elems);
2669 2670
      compute_pre_data ();
      changed |= pre_gcse ();
2671
      free_edge_list (edge_list);
2672 2673
      free_pre_mem ();
    }
2674

2675
  free_ldst_mems ();
2676
  remove_fake_exit_edges ();
2677
  free_hash_table (&expr_hash_table);
2678

2679 2680 2681 2682 2683 2684
  free_gcse_mem ();
  obstack_free (&gcse_obstack, NULL);

  /* We are finished with alias.  */
  end_alias_analysis ();

2685
  if (dump_file)
2686
    {
2687 2688
      fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
	       current_function_name (), n_basic_blocks, bytes_used);
2689
      fprintf (dump_file, "%d substs, %d insns created\n",
2690
	       gcse_subst_count, gcse_create_count);
2691 2692 2693 2694
    }

  return changed;
}
2695

2696 2697 2698 2699 2700
/* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
   to INSN.  If such notes are added to an insn which references a
   CODE_LABEL, the LABEL_NUSES count is incremented.  We have to add
   that note, because the following loop optimization pass requires
   them.  */
2701 2702 2703

/* ??? If there was a jump optimization pass after gcse and before loop,
   then we would not need to do this here, because jump would add the
2704
   necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes.  */
2705 2706

static void
2707
add_label_notes (rtx x, rtx insn)
2708 2709 2710
{
  enum rtx_code code = GET_CODE (x);
  int i, j;
2711
  const char *fmt;
2712 2713 2714

  if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
    {
2715
      /* This code used to ignore labels that referred to dispatch tables to
2716
	 avoid flow generating (slightly) worse code.
2717

Jeff Law committed
2718 2719
	 We no longer ignore such label references (see LABEL_REF handling in
	 mark_jump_label for additional information).  */
2720

2721 2722 2723 2724
      /* There's no reason for current users to emit jump-insns with
	 such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
	 notes.  */
      gcc_assert (!JUMP_P (insn));
2725 2726
      add_reg_note (insn, REG_LABEL_OPERAND, XEXP (x, 0));

2727 2728 2729
      if (LABEL_P (XEXP (x, 0)))
	LABEL_NUSES (XEXP (x, 0))++;

2730 2731 2732
      return;
    }

2733
  for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2734 2735 2736 2737 2738 2739 2740 2741
    {
      if (fmt[i] == 'e')
	add_label_notes (XEXP (x, i), insn);
      else if (fmt[i] == 'E')
	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
	  add_label_notes (XVECEXP (x, i, j), insn);
    }
}
2742

2743 2744 2745 2746 2747 2748 2749
/* Code Hoisting variables and subroutines.  */

/* Very busy expressions.  */
static sbitmap *hoist_vbein;
static sbitmap *hoist_vbeout;

/* ??? We could compute post dominators and run this algorithm in
2750
   reverse to perform tail merging, doing so would probably be
2751 2752 2753 2754 2755 2756 2757 2758
   more effective than the tail merging code in jump.c.

   It's unclear if tail merging could be run in parallel with
   code hoisting.  It would be nice.  */

/* Allocate vars used for code hoisting analysis.  */

static void
2759
alloc_code_hoist_mem (int n_blocks, int n_exprs)
2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771
{
  antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
  transp = sbitmap_vector_alloc (n_blocks, n_exprs);
  comp = sbitmap_vector_alloc (n_blocks, n_exprs);

  hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
  hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
}

/* Free vars used for code hoisting analysis.  */

static void
2772
free_code_hoist_mem (void)
2773
{
2774 2775 2776
  sbitmap_vector_free (antloc);
  sbitmap_vector_free (transp);
  sbitmap_vector_free (comp);
2777

2778 2779
  sbitmap_vector_free (hoist_vbein);
  sbitmap_vector_free (hoist_vbeout);
2780

2781
  free_dominance_info (CDI_DOMINATORS);
2782 2783 2784 2785 2786 2787 2788 2789
}

/* Compute the very busy expressions at entry/exit from each block.

   An expression is very busy if all paths from a given point
   compute the expression.  */

static void
2790
compute_code_hoist_vbeinout (void)
2791
{
2792 2793
  int changed, passes;
  basic_block bb;
2794

2795 2796
  sbitmap_vector_zero (hoist_vbeout, last_basic_block);
  sbitmap_vector_zero (hoist_vbein, last_basic_block);
2797 2798 2799

  passes = 0;
  changed = 1;
2800

2801 2802 2803
  while (changed)
    {
      changed = 0;
2804

2805 2806
      /* We scan the blocks in the reverse order to speed up
	 the convergence.  */
2807
      FOR_EACH_BB_REVERSE (bb)
2808
	{
2809
	  if (bb->next_bb != EXIT_BLOCK_PTR)
2810 2811 2812 2813 2814 2815 2816 2817 2818
	    {
	      sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
					     hoist_vbein, bb->index);

	      /* Include expressions in VBEout that are calculated
		 in BB and available at its end.  */
	      sbitmap_a_or_b (hoist_vbeout[bb->index],
			      hoist_vbeout[bb->index], comp[bb->index]);
	    }
2819 2820 2821 2822 2823

	  changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index],
					      antloc[bb->index],
					      hoist_vbeout[bb->index],
					      transp[bb->index]);
2824
	}
2825

2826 2827 2828
      passes++;
    }

2829
  if (dump_file)
2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840
    {
      fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);

      FOR_EACH_BB (bb)
        {
	  fprintf (dump_file, "vbein (%d): ", bb->index);
	  dump_sbitmap_file (dump_file, hoist_vbein[bb->index]);
	  fprintf (dump_file, "vbeout(%d): ", bb->index);
	  dump_sbitmap_file (dump_file, hoist_vbeout[bb->index]);
	}
    }
2841 2842 2843 2844 2845
}

/* Top level routine to do the dataflow analysis needed by code hoisting.  */

static void
2846
compute_code_hoist_data (void)
2847
{
2848
  compute_local_properties (transp, comp, antloc, &expr_hash_table);
2849
  prune_expressions (false);
2850
  compute_code_hoist_vbeinout ();
2851
  calculate_dominance_info (CDI_DOMINATORS);
2852 2853
  if (dump_file)
    fprintf (dump_file, "\n");
2854 2855 2856 2857
}

/* Determine if the expression identified by EXPR_INDEX would
   reach BB unimpared if it was placed at the end of EXPR_BB.
2858 2859
   Stop the search if the expression would need to be moved more
   than DISTANCE instructions.
2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871

   It's unclear exactly what Muchnick meant by "unimpared".  It seems
   to me that the expression must either be computed or transparent in
   *every* block in the path(s) from EXPR_BB to BB.  Any other definition
   would allow the expression to be hoisted out of loops, even if
   the expression wasn't a loop invariant.

   Contrast this to reachability for PRE where an expression is
   considered reachable if *any* path reaches instead of *all*
   paths.  */

static int
2872 2873
hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
			   char *visited, int distance, int *bb_size)
2874 2875
{
  edge pred;
2876
  edge_iterator ei;
2877
  int visited_allocated_locally = 0;
2878

2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889
  /* Terminate the search if distance, for which EXPR is allowed to move,
     is exhausted.  */
  if (distance > 0)
    {
      distance -= bb_size[bb->index];

      if (distance <= 0)
	return 0;
    }
  else
    gcc_assert (distance == 0);
2890 2891 2892

  if (visited == NULL)
    {
Kazu Hirata committed
2893
      visited_allocated_locally = 1;
2894
      visited = XCNEWVEC (char, last_basic_block);
2895 2896
    }

2897
  FOR_EACH_EDGE (pred, ei, bb->preds)
2898
    {
2899
      basic_block pred_bb = pred->src;
2900 2901 2902

      if (pred->src == ENTRY_BLOCK_PTR)
	break;
2903 2904
      else if (pred_bb == expr_bb)
	continue;
2905
      else if (visited[pred_bb->index])
2906
	continue;
2907

2908
      else if (! TEST_BIT (transp[pred_bb->index], expr_index))
2909
	break;
2910

2911 2912 2913
      /* Not killed.  */
      else
	{
2914
	  visited[pred_bb->index] = 1;
2915 2916
	  if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb,
					   visited, distance, bb_size))
2917 2918 2919
	    break;
	}
    }
2920
  if (visited_allocated_locally)
2921
    free (visited);
2922

2923 2924 2925
  return (pred == NULL);
}

2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936
/* Find occurence in BB.  */
static struct occr *
find_occr_in_bb (struct occr *occr, basic_block bb)
{
  /* Find the right occurrence of this expression.  */
  while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
    occr = occr->next;

  return occr;
}

2937
/* Actually perform code hoisting.  */
2938

2939
static int
2940
hoist_code (void)
2941
{
2942
  basic_block bb, dominated;
2943 2944
  VEC (basic_block, heap) *dom_tree_walk;
  unsigned int dom_tree_walk_index;
2945
  VEC (basic_block, heap) *domby;
2946
  unsigned int i,j;
2947
  struct expr **index_map;
2948
  struct expr *expr;
2949 2950
  int *to_bb_head;
  int *bb_size;
2951
  int changed = 0;
2952 2953 2954 2955

  /* Compute a mapping from expression number (`bitmap_index') to
     hash table entry.  */

2956
  index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
2957 2958
  for (i = 0; i < expr_hash_table.size; i++)
    for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
2959
      index_map[expr->bitmap_index] = expr;
2960

2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973
  /* Calculate sizes of basic blocks and note how far
     each instruction is from the start of its block.  We then use this
     data to restrict distance an expression can travel.  */

  to_bb_head = XCNEWVEC (int, get_max_uid ());
  bb_size = XCNEWVEC (int, last_basic_block);

  FOR_EACH_BB (bb)
    {
      rtx insn;
      int to_head;

      to_head = 0;
2974
      FOR_BB_INSNS (bb, insn)
2975 2976 2977 2978 2979 2980 2981 2982 2983 2984
	{
	  /* Don't count debug instructions to avoid them affecting
	     decision choices.  */
	  if (NONDEBUG_INSN_P (insn))
	    to_bb_head[INSN_UID (insn)] = to_head++;
	}

      bb_size[bb->index] = to_head;
    }

2985 2986 2987 2988 2989 2990 2991
  gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR->succs) == 1
	      && (EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest
		  == ENTRY_BLOCK_PTR->next_bb));

  dom_tree_walk = get_all_dominated_blocks (CDI_DOMINATORS,
					    ENTRY_BLOCK_PTR->next_bb);

2992 2993
  /* Walk over each basic block looking for potentially hoistable
     expressions, nothing gets hoisted from the entry block.  */
2994
  FOR_EACH_VEC_ELT (basic_block, dom_tree_walk, dom_tree_walk_index, bb)
2995
    {
2996 2997 2998 2999
      domby = get_dominated_to_depth (CDI_DOMINATORS, bb, MAX_HOIST_DEPTH);

      if (VEC_length (basic_block, domby) == 0)
	continue;
3000 3001 3002

      /* Examine each expression that is very busy at the exit of this
	 block.  These are the potentially hoistable expressions.  */
3003
      for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++)
3004
	{
3005
	  if (TEST_BIT (hoist_vbeout[bb->index], i))
3006
	    {
3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021
	      /* Current expression.  */
	      struct expr *expr = index_map[i];
	      /* Number of occurences of EXPR that can be hoisted to BB.  */
	      int hoistable = 0;
	      /* Basic blocks that have occurences reachable from BB.  */
	      bitmap_head _from_bbs, *from_bbs = &_from_bbs;
	      /* Occurences reachable from BB.  */
	      VEC (occr_t, heap) *occrs_to_hoist = NULL;
	      /* We want to insert the expression into BB only once, so
		 note when we've inserted it.  */
	      int insn_inserted_p;
	      occr_t occr;

	      bitmap_initialize (from_bbs, 0);

3022 3023 3024
	      /* If an expression is computed in BB and is available at end of
		 BB, hoist all occurences dominated by BB to BB.  */
	      if (TEST_BIT (comp[bb->index], i))
3025 3026 3027 3028 3029 3030 3031
		{
		  occr = find_occr_in_bb (expr->antic_occr, bb);

		  if (occr)
		    {
		      /* An occurence might've been already deleted
			 while processing a dominator of BB.  */
3032
		      if (!occr->deleted_p)
3033 3034 3035 3036 3037 3038 3039 3040
			{
			  gcc_assert (NONDEBUG_INSN_P (occr->insn));
			  hoistable++;
			}
		    }
		  else
		    hoistable++;
		}
3041

3042 3043 3044
	      /* We've found a potentially hoistable expression, now
		 we look at every block BB dominates to see if it
		 computes the expression.  */
3045
	      FOR_EACH_VEC_ELT (basic_block, domby, j, dominated)
3046
		{
3047 3048
		  int max_distance;

3049
		  /* Ignore self dominance.  */
3050
		  if (bb == dominated)
3051 3052 3053 3054
		    continue;
		  /* We've found a dominated block, now see if it computes
		     the busy expression and whether or not moving that
		     expression to the "beginning" of that block is safe.  */
3055
		  if (!TEST_BIT (antloc[dominated->index], i))
3056 3057
		    continue;

3058 3059
		  occr = find_occr_in_bb (expr->antic_occr, dominated);
		  gcc_assert (occr);
3060

3061 3062 3063
		  /* An occurence might've been already deleted
		     while processing a dominator of BB.  */
		  if (occr->deleted_p)
3064
		    continue;
3065 3066 3067 3068 3069 3070 3071 3072 3073
		  gcc_assert (NONDEBUG_INSN_P (occr->insn));

		  max_distance = expr->max_distance;
		  if (max_distance > 0)
		    /* Adjust MAX_DISTANCE to account for the fact that
		       OCCR won't have to travel all of DOMINATED, but
		       only part of it.  */
		    max_distance += (bb_size[dominated->index]
				     - to_bb_head[INSN_UID (occr->insn)]);
3074

3075
		  /* Note if the expression would reach the dominated block
3076
		     unimpared if it was placed at the end of BB.
3077 3078 3079

		     Keep track of how many times this expression is hoistable
		     from a dominated block into BB.  */
3080 3081
		  if (hoist_expr_reaches_here_p (bb, i, dominated, NULL,
						 max_distance, bb_size))
3082 3083 3084 3085 3086 3087
		    {
		      hoistable++;
		      VEC_safe_push (occr_t, heap,
				     occrs_to_hoist, occr);
		      bitmap_set_bit (from_bbs, dominated->index);
		    }
3088 3089
		}

3090
	      /* If we found more than one hoistable occurrence of this
3091
		 expression, then note it in the vector of expressions to
3092 3093 3094 3095 3096 3097
		 hoist.  It makes no sense to hoist things which are computed
		 in only one BB, and doing so tends to pessimize register
		 allocation.  One could increase this value to try harder
		 to avoid any possible code expansion due to register
		 allocation issues; however experiments have shown that
		 the vast majority of hoistable expressions are only movable
3098
		 from two successors, so raising this threshold is likely
3099
		 to nullify any benefit we get from code hoisting.  */
3100
	      if (hoistable > 1 && dbg_cnt (hoist_insn))
3101
		{
3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116
		  /* If (hoistable != VEC_length), then there is
		     an occurence of EXPR in BB itself.  Don't waste
		     time looking for LCA in this case.  */
		  if ((unsigned) hoistable
		      == VEC_length (occr_t, occrs_to_hoist))
		    {
		      basic_block lca;

		      lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
							      from_bbs);
		      if (lca != bb)
			/* Punt, it's better to hoist these occurences to
			   LCA.  */
			VEC_free (occr_t, heap, occrs_to_hoist);
		    }
3117
		}
3118 3119 3120
	      else
		/* Punt, no point hoisting a single occurence.  */
		VEC_free (occr_t, heap, occrs_to_hoist);
3121

3122
	      insn_inserted_p = 0;
3123

3124 3125
	      /* Walk through occurences of I'th expressions we want
		 to hoist to BB and make the transformations.  */
3126
	      FOR_EACH_VEC_ELT (occr_t, occrs_to_hoist, j, occr)
3127
		{
3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155
		  rtx insn;
		  rtx set;

		  gcc_assert (!occr->deleted_p);

		  insn = occr->insn;
		  set = single_set (insn);
		  gcc_assert (set);

		  /* Create a pseudo-reg to store the result of reaching
		     expressions into.  Get the mode for the new pseudo
		     from the mode of the original destination pseudo.

		     It is important to use new pseudos whenever we
		     emit a set.  This will allow reload to use
		     rematerialization for such registers.  */
		  if (!insn_inserted_p)
		    expr->reaching_reg
		      = gen_reg_rtx_and_attrs (SET_DEST (set));

		  gcse_emit_move_after (expr->reaching_reg, SET_DEST (set),
					insn);
		  delete_insn (insn);
		  occr->deleted_p = 1;
		  changed = 1;
		  gcse_subst_count++;

		  if (!insn_inserted_p)
3156
		    {
3157 3158
		      insert_insn_end_basic_block (expr, bb);
		      insn_inserted_p = 1;
3159 3160
		    }
		}
3161 3162 3163

	      VEC_free (occr_t, heap, occrs_to_hoist);
	      bitmap_clear (from_bbs);
3164 3165
	    }
	}
3166
      VEC_free (basic_block, heap, domby);
3167
    }
3168

3169
  VEC_free (basic_block, heap, dom_tree_walk);
3170 3171
  free (bb_size);
  free (to_bb_head);
Kazu Hirata committed
3172
  free (index_map);
3173 3174

  return changed;
3175 3176 3177 3178
}

/* Top level routine to perform one code hoisting (aka unification) pass

3179
   Return nonzero if a change was made.  */
3180 3181

static int
3182
one_code_hoisting_pass (void)
3183 3184 3185
{
  int changed = 0;

3186 3187 3188 3189 3190 3191 3192 3193
  gcse_subst_count = 0;
  gcse_create_count = 0;

  /* Return if there's nothing to do, or it is too expensive.  */
  if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
      || is_too_expensive (_("GCSE disabled")))
    return 0;

3194 3195
  doing_code_hoisting_p = true;

3196 3197 3198 3199 3200 3201 3202
  /* We need alias.  */
  init_alias_analysis ();

  bytes_used = 0;
  gcc_obstack_init (&gcse_obstack);
  alloc_gcse_mem ();

3203
  alloc_hash_table (&expr_hash_table);
3204
  compute_hash_table (&expr_hash_table);
3205 3206
  if (dump_file)
    dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
3207

3208
  if (expr_hash_table.n_elems > 0)
3209
    {
3210
      alloc_code_hoist_mem (last_basic_block, expr_hash_table.n_elems);
3211
      compute_code_hoist_data ();
3212
      changed = hoist_code ();
3213 3214
      free_code_hoist_mem ();
    }
3215

3216
  free_hash_table (&expr_hash_table);
3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229
  free_gcse_mem ();
  obstack_free (&gcse_obstack, NULL);

  /* We are finished with alias.  */
  end_alias_analysis ();

  if (dump_file)
    {
      fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
	       current_function_name (), n_basic_blocks, bytes_used);
      fprintf (dump_file, "%d substs, %d insns created\n",
	       gcse_subst_count, gcse_create_count);
    }
3230

3231 3232
  doing_code_hoisting_p = false;

3233 3234
  return changed;
}
3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249

/*  Here we provide the things required to do store motion towards
    the exit. In order for this to be effective, gcse also needed to
    be taught how to move a load when it is kill only by a store to itself.

	    int i;
	    float a[10];

	    void foo(float scale)
	    {
	      for (i=0; i<10; i++)
		a[i] *= scale;
	    }

    'i' is both loaded and stored to in the loop. Normally, gcse cannot move
3250 3251
    the load out since its live around the loop, and stored at the bottom
    of the loop.
3252

3253
      The 'Load Motion' referred to and implemented in this file is
3254 3255 3256 3257 3258 3259 3260
    an enhancement to gcse which when using edge based lcm, recognizes
    this situation and allows gcse to move the load out of the loop.

      Once gcse has hoisted the load, store motion can then push this
    load towards the exit, and we end up with no loads or stores of 'i'
    in the loop.  */

3261 3262 3263 3264
static hashval_t
pre_ldst_expr_hash (const void *p)
{
  int do_not_record_p = 0;
3265
  const struct ls_expr *const x = (const struct ls_expr *) p;
3266 3267 3268 3269 3270 3271
  return hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
}

static int
pre_ldst_expr_eq (const void *p1, const void *p2)
{
3272 3273
  const struct ls_expr *const ptr1 = (const struct ls_expr *) p1,
    *const ptr2 = (const struct ls_expr *) p2;
3274 3275 3276
  return expr_equiv_p (ptr1->pattern, ptr2->pattern);
}

3277
/* This will search the ldst list for a matching expression. If it
3278 3279 3280
   doesn't find one, we create one and initialize it.  */

static struct ls_expr *
3281
ldst_entry (rtx x)
3282
{
3283
  int do_not_record_p = 0;
3284
  struct ls_expr * ptr;
3285
  unsigned int hash;
3286 3287
  void **slot;
  struct ls_expr e;
3288

3289 3290
  hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
		   NULL,  /*have_reg_qty=*/false);
3291

3292 3293 3294 3295
  e.pattern = x;
  slot = htab_find_slot_with_hash (pre_ldst_table, &e, hash, INSERT);
  if (*slot)
    return (struct ls_expr *)*slot;
3296

3297
  ptr = XNEW (struct ls_expr);
3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309

  ptr->next         = pre_ldst_mems;
  ptr->expr         = NULL;
  ptr->pattern      = x;
  ptr->pattern_regs = NULL_RTX;
  ptr->loads        = NULL_RTX;
  ptr->stores       = NULL_RTX;
  ptr->reaching_reg = NULL_RTX;
  ptr->invalid      = 0;
  ptr->index        = 0;
  ptr->hash_index   = hash;
  pre_ldst_mems     = ptr;
3310
  *slot = ptr;
3311

3312 3313 3314 3315 3316
  return ptr;
}

/* Free up an individual ldst entry.  */

3317
static void
3318
free_ldst_entry (struct ls_expr * ptr)
3319
{
3320 3321
  free_INSN_LIST_list (& ptr->loads);
  free_INSN_LIST_list (& ptr->stores);
3322 3323 3324 3325 3326 3327 3328

  free (ptr);
}

/* Free up all memory associated with the ldst list.  */

static void
3329
free_ldst_mems (void)
3330
{
3331 3332
  if (pre_ldst_table)
    htab_delete (pre_ldst_table);
3333 3334
  pre_ldst_table = NULL;

3335
  while (pre_ldst_mems)
3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349
    {
      struct ls_expr * tmp = pre_ldst_mems;

      pre_ldst_mems = pre_ldst_mems->next;

      free_ldst_entry (tmp);
    }

  pre_ldst_mems = NULL;
}

/* Dump debugging info about the ldst list.  */

static void
3350
print_ldst_list (FILE * file)
3351 3352 3353 3354 3355
{
  struct ls_expr * ptr;

  fprintf (file, "LDST list: \n");

3356
  for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr))
3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384
    {
      fprintf (file, "  Pattern (%3d): ", ptr->index);

      print_rtl (file, ptr->pattern);

      fprintf (file, "\n	 Loads : ");

      if (ptr->loads)
	print_rtl (file, ptr->loads);
      else
	fprintf (file, "(nil)");

      fprintf (file, "\n	Stores : ");

      if (ptr->stores)
	print_rtl (file, ptr->stores);
      else
	fprintf (file, "(nil)");

      fprintf (file, "\n\n");
    }

  fprintf (file, "\n");
}

/* Returns 1 if X is in the list of ldst only expressions.  */

static struct ls_expr *
3385
find_rtx_in_ldst (rtx x)
3386
{
3387 3388
  struct ls_expr e;
  void **slot;
3389 3390
  if (!pre_ldst_table)
    return NULL;
3391 3392 3393 3394
  e.pattern = x;
  slot = htab_find_slot (pre_ldst_table, &e, NO_INSERT);
  if (!slot || ((struct ls_expr *)*slot)->invalid)
    return NULL;
3395
  return (struct ls_expr *) *slot;
3396 3397 3398 3399 3400
}

/* Return first item in the list.  */

static inline struct ls_expr *
3401
first_ls_expr (void)
3402 3403 3404 3405
{
  return pre_ldst_mems;
}

Matt Kraai committed
3406
/* Return the next item in the list after the specified one.  */
3407 3408

static inline struct ls_expr *
3409
next_ls_expr (struct ls_expr * ptr)
3410 3411 3412 3413 3414 3415 3416 3417 3418 3419
{
  return ptr->next;
}

/* Load Motion for loads which only kill themselves.  */

/* Return true if x is a simple MEM operation, with no registers or
   side effects. These are the types of loads we consider for the
   ld_motion list, otherwise we let the usual aliasing take care of it.  */

3420
static int
3421
simple_mem (const_rtx x)
3422
{
3423
  if (! MEM_P (x))
3424
    return 0;
3425

3426 3427
  if (MEM_VOLATILE_P (x))
    return 0;
3428

3429 3430
  if (GET_MODE (x) == BLKmode)
    return 0;
3431

3432
  /* If we are handling exceptions, we must be careful with memory references
3433
     that may trap.  If we are not, the behavior is undefined, so we may just
3434
     continue.  */
3435
  if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
3436 3437
    return 0;

3438 3439
  if (side_effects_p (x))
    return 0;
3440

3441 3442 3443 3444 3445 3446 3447 3448
  /* Do not consider function arguments passed on stack.  */
  if (reg_mentioned_p (stack_pointer_rtx, x))
    return 0;

  if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
    return 0;

  return 1;
3449 3450
}

3451 3452 3453
/* Make sure there isn't a buried reference in this pattern anywhere.
   If there is, invalidate the entry for it since we're not capable
   of fixing it up just yet.. We have to be sure we know about ALL
3454 3455
   loads since the aliasing code will allow all entries in the
   ld_motion list to not-alias itself.  If we miss a load, we will get
3456
   the wrong value since gcse might common it and we won't know to
3457 3458 3459
   fix it up.  */

static void
3460
invalidate_any_buried_refs (rtx x)
3461 3462
{
  const char * fmt;
Kazu Hirata committed
3463
  int i, j;
3464 3465 3466
  struct ls_expr * ptr;

  /* Invalidate it in the list.  */
3467
  if (MEM_P (x) && simple_mem (x))
3468 3469 3470 3471 3472 3473 3474
    {
      ptr = ldst_entry (x);
      ptr->invalid = 1;
    }

  /* Recursively process the insn.  */
  fmt = GET_RTX_FORMAT (GET_CODE (x));
3475

3476 3477 3478 3479 3480 3481 3482 3483 3484 3485
  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
    {
      if (fmt[i] == 'e')
	invalidate_any_buried_refs (XEXP (x, i));
      else if (fmt[i] == 'E')
	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
	  invalidate_any_buried_refs (XVECEXP (x, i, j));
    }
}

3486 3487 3488 3489 3490 3491 3492
/* Find all the 'simple' MEMs which are used in LOADs and STORES.  Simple
   being defined as MEM loads and stores to symbols, with no side effects
   and no registers in the expression.  For a MEM destination, we also
   check that the insn is still valid if we replace the destination with a
   REG, as is done in update_ld_motion_stores.  If there are any uses/defs
   which don't match this criteria, they are invalidated and trimmed out
   later.  */
3493

3494
static void
3495
compute_ld_motion_mems (void)
3496 3497
{
  struct ls_expr * ptr;
3498
  basic_block bb;
3499
  rtx insn;
3500

3501
  pre_ldst_mems = NULL;
3502 3503
  pre_ldst_table = htab_create (13, pre_ldst_expr_hash,
				pre_ldst_expr_eq, NULL);
3504

3505
  FOR_EACH_BB (bb)
3506
    {
3507
      FOR_BB_INSNS (bb, insn)
3508
	{
3509
	  if (NONDEBUG_INSN_P (insn))
3510 3511 3512 3513 3514 3515 3516
	    {
	      if (GET_CODE (PATTERN (insn)) == SET)
		{
		  rtx src = SET_SRC (PATTERN (insn));
		  rtx dest = SET_DEST (PATTERN (insn));

		  /* Check for a simple LOAD...  */
3517
		  if (MEM_P (src) && simple_mem (src))
3518 3519
		    {
		      ptr = ldst_entry (src);
3520
		      if (REG_P (dest))
3521 3522 3523 3524 3525 3526 3527 3528 3529
			ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
		      else
			ptr->invalid = 1;
		    }
		  else
		    {
		      /* Make sure there isn't a buried load somewhere.  */
		      invalidate_any_buried_refs (src);
		    }
3530

3531 3532 3533 3534
		  /* Check for stores. Don't worry about aliased ones, they
		     will block any movement we might do later. We only care
		     about this exact pattern since those are the only
		     circumstance that we will ignore the aliasing info.  */
3535
		  if (MEM_P (dest) && simple_mem (dest))
3536 3537
		    {
		      ptr = ldst_entry (dest);
3538

3539
		      if (! MEM_P (src)
3540 3541 3542
			  && GET_CODE (src) != ASM_OPERANDS
			  /* Check for REG manually since want_to_gcse_p
			     returns 0 for all REGs.  */
3543
			  && can_assign_to_reg_without_clobbers_p (src))
3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555
			ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
		      else
			ptr->invalid = 1;
		    }
		}
	      else
		invalidate_any_buried_refs (PATTERN (insn));
	    }
	}
    }
}

3556
/* Remove any references that have been either invalidated or are not in the
3557 3558 3559
   expression list for pre gcse.  */

static void
3560
trim_ld_motion_mems (void)
3561
{
3562 3563
  struct ls_expr * * last = & pre_ldst_mems;
  struct ls_expr * ptr = pre_ldst_mems;
3564 3565 3566

  while (ptr != NULL)
    {
3567
      struct expr * expr;
3568

3569
      /* Delete if entry has been made invalid.  */
3570
      if (! ptr->invalid)
3571 3572
	{
	  /* Delete if we cannot find this mem in the expression list.  */
3573
	  unsigned int hash = ptr->hash_index % expr_hash_table.size;
3574

3575 3576 3577 3578 3579
	  for (expr = expr_hash_table.table[hash];
	       expr != NULL;
	       expr = expr->next_same_hash)
	    if (expr_equiv_p (expr->expr, ptr->pattern))
	      break;
3580 3581
	}
      else
3582 3583 3584
	expr = (struct expr *) 0;

      if (expr)
3585 3586 3587
	{
	  /* Set the expression field if we are keeping it.  */
	  ptr->expr = expr;
3588
	  last = & ptr->next;
3589 3590
	  ptr = ptr->next;
	}
3591 3592 3593
      else
	{
	  *last = ptr->next;
3594
	  htab_remove_elt_with_hash (pre_ldst_table, ptr, ptr->hash_index);
3595 3596 3597
	  free_ldst_entry (ptr);
	  ptr = * last;
	}
3598 3599 3600
    }

  /* Show the world what we've found.  */
3601 3602
  if (dump_file && pre_ldst_mems != NULL)
    print_ldst_list (dump_file);
3603 3604 3605 3606 3607
}

/* This routine will take an expression which we are replacing with
   a reaching register, and update any stores that are needed if
   that expression is in the ld_motion list.  Stores are updated by
3608
   copying their SRC to the reaching register, and then storing
3609 3610 3611 3612
   the reaching register into the store location. These keeps the
   correct value in the reaching register for the loads.  */

static void
3613
update_ld_motion_stores (struct expr * expr)
3614 3615 3616 3617 3618
{
  struct ls_expr * mem_ptr;

  if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
    {
3619 3620
      /* We can try to find just the REACHED stores, but is shouldn't
	 matter to set the reaching reg everywhere...  some might be
3621 3622
	 dead and should be eliminated later.  */

3623 3624 3625 3626
      /* We replace (set mem expr) with (set reg expr) (set mem reg)
	 where reg is the reaching reg used in the load.  We checked in
	 compute_ld_motion_mems that we can replace (set mem expr) with
	 (set reg expr) in that insn.  */
3627
      rtx list = mem_ptr->stores;
3628

3629 3630 3631 3632 3633 3634
      for ( ; list != NULL_RTX; list = XEXP (list, 1))
	{
	  rtx insn = XEXP (list, 0);
	  rtx pat = PATTERN (insn);
	  rtx src = SET_SRC (pat);
	  rtx reg = expr->reaching_reg;
Paolo Carlini committed
3635
	  rtx copy;
3636 3637 3638 3639

	  /* If we've already copied it, continue.  */
	  if (expr->reaching_reg == src)
	    continue;
3640

3641
	  if (dump_file)
3642
	    {
3643 3644 3645 3646 3647
	      fprintf (dump_file, "PRE:  store updated with reaching reg ");
	      print_rtl (dump_file, expr->reaching_reg);
	      fprintf (dump_file, ":\n	");
	      print_inline_rtx (dump_file, insn, 8);
	      fprintf (dump_file, "\n");
3648
	    }
3649

3650
	  copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
Paolo Carlini committed
3651
	  emit_insn_before (copy, insn);
3652
	  SET_SRC (pat) = reg;
3653
	  df_insn_rescan (insn);
3654 3655 3656 3657 3658 3659 3660 3661

	  /* un-recognize this pattern since it's probably different now.  */
	  INSN_CODE (insn) = -1;
	  gcse_create_count++;
	}
    }
}

3662 3663
/* Return true if the graph is too expensive to optimize. PASS is the
   optimization about to be performed.  */
3664

3665 3666 3667 3668 3669 3670
static bool
is_too_expensive (const char *pass)
{
  /* Trying to perform global optimizations on flow graphs which have
     a high connectivity will take a long time and is unlikely to be
     particularly useful.
3671

3672 3673 3674 3675 3676 3677 3678 3679 3680 3681
     In normal circumstances a cfg should have about twice as many
     edges as blocks.  But we do not want to punish small functions
     which have a couple switch statements.  Rather than simply
     threshold the number of blocks, uses something with a more
     graceful degradation.  */
  if (n_edges > 20000 + n_basic_blocks * 4)
    {
      warning (OPT_Wdisabled_optimization,
	       "%s: %d basic blocks and %d edges/basic block",
	       pass, n_basic_blocks, n_edges / n_basic_blocks);
3682

3683 3684
      return true;
    }
3685

3686
  /* If allocating memory for the dataflow bitmaps would take up too much
3687 3688 3689 3690 3691 3692 3693 3694
     storage it's better just to disable the optimization.  */
  if ((n_basic_blocks
       * SBITMAP_SET_SIZE (max_reg_num ())
       * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
    {
      warning (OPT_Wdisabled_optimization,
	       "%s: %d basic blocks and %d registers",
	       pass, n_basic_blocks, max_reg_num ());
3695

3696 3697
      return true;
    }
3698

3699
  return false;
3700 3701
}

3702 3703 3704 3705

/* All the passes implemented in this file.  Each pass has its
   own gate and execute function, and at the end of the file a
   pass definition for passes.c.
3706

3707 3708 3709 3710
   We do not construct an accurate cfg in functions which call
   setjmp, so none of these passes runs if the function calls
   setjmp.
   FIXME: Should just handle setjmp via REG_SETJMP notes.  */
3711

3712 3713 3714 3715 3716 3717 3718 3719
static bool
gate_rtl_pre (void)
{
  return optimize > 0 && flag_gcse
    && !cfun->calls_setjmp
    && optimize_function_for_speed_p (cfun)
    && dbg_cnt (pre);
}
3720

3721 3722 3723
static unsigned int
execute_rtl_pre (void)
{
3724
  int changed;
3725 3726
  delete_unreachable_blocks ();
  df_analyze ();
3727 3728 3729 3730
  changed = one_pre_gcse_pass ();
  flag_rerun_cse_after_global_opts |= changed;
  if (changed)
    cleanup_cfg (0);
3731 3732
  return 0;
}
3733

3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744
static bool
gate_rtl_hoist (void)
{
  return optimize > 0 && flag_gcse
    && !cfun->calls_setjmp
    /* It does not make sense to run code hoisting unless we are optimizing
       for code size -- it rarely makes programs faster, and can make then
       bigger if we did PRE (when optimizing for space, we don't run PRE).  */
    && optimize_function_for_size_p (cfun)
    && dbg_cnt (hoist);
}
3745

3746 3747 3748
static unsigned int
execute_rtl_hoist (void)
{
3749
  int changed;
3750 3751
  delete_unreachable_blocks ();
  df_analyze ();
3752 3753 3754 3755
  changed = one_code_hoisting_pass ();
  flag_rerun_cse_after_global_opts |= changed;
  if (changed)
    cleanup_cfg (0);
3756 3757
  return 0;
}
3758

3759
struct rtl_opt_pass pass_rtl_pre =
3760
{
3761 3762
 {
  RTL_PASS,
3763
  "rtl pre",                            /* name */
H.J. Lu committed
3764 3765
  gate_rtl_pre,                         /* gate */
  execute_rtl_pre,    			/* execute */
3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778
  NULL,                                 /* sub */
  NULL,                                 /* next */
  0,                                    /* static_pass_number */
  TV_PRE,                               /* tv_id */
  PROP_cfglayout,                       /* properties_required */
  0,                                    /* properties_provided */
  0,                                    /* properties_destroyed */
  0,                                    /* todo_flags_start */
  TODO_df_finish | TODO_verify_rtl_sharing |
  TODO_dump_func |
  TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
 }
};
3779

3780
struct rtl_opt_pass pass_rtl_hoist =
3781
{
3782 3783
 {
  RTL_PASS,
3784
  "hoist",                              /* name */
H.J. Lu committed
3785 3786
  gate_rtl_hoist,                       /* gate */
  execute_rtl_hoist,  			/* execute */
3787 3788 3789
  NULL,                                 /* sub */
  NULL,                                 /* next */
  0,                                    /* static_pass_number */
3790
  TV_HOIST,                             /* tv_id */
3791
  PROP_cfglayout,                       /* properties_required */
3792 3793 3794
  0,                                    /* properties_provided */
  0,                                    /* properties_destroyed */
  0,                                    /* todo_flags_start */
3795
  TODO_df_finish | TODO_verify_rtl_sharing |
3796
  TODO_dump_func |
3797 3798
  TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
 }
3799 3800
};

3801
#include "gt-gcse.h"
3802