tree-ssa-loop-im.c 71.1 KB
Newer Older
1
/* Loop invariant motion.
2 3
   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2010
   Free Software Foundation, Inc.
H.J. Lu committed
4

5
This file is part of GCC.
H.J. Lu committed
6

7 8
GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
9
Free Software Foundation; either version 3, or (at your option) any
10
later version.
H.J. Lu committed
11

12 13 14 15
GCC is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.
H.J. Lu committed
16

17
You should have received a copy of the GNU General Public License
18 19
along with GCC; see the file COPYING3.  If not see
<http://www.gnu.org/licenses/>.  */
20 21 22 23 24 25 26 27

#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "tree.h"
#include "tm_p.h"
#include "basic-block.h"
28
#include "gimple-pretty-print.h"
29 30 31 32 33 34
#include "tree-flow.h"
#include "cfgloop.h"
#include "domwalk.h"
#include "params.h"
#include "tree-pass.h"
#include "flags.h"
35
#include "hashtab.h"
36 37
#include "tree-affine.h"
#include "pointer-set.h"
38
#include "tree-ssa-propagate.h"
39

40 41 42 43 44 45 46 47 48 49 50
/* TODO:  Support for predicated code motion.  I.e.

   while (1)
     {
       if (cond)
	 {
	   a = inv;
	   something;
	 }
     }

51
   Where COND and INV are invariants, but evaluating INV may trap or be
52 53 54 55 56 57 58 59 60 61
   invalid from some other reason if !COND.  This may be transformed to

   if (cond)
     a = inv;
   while (1)
     {
       if (cond)
	 something;
     }  */

62 63 64 65 66
/* A type for the list of statements that have to be moved in order to be able
   to hoist an invariant computation.  */

struct depend
{
67
  gimple stmt;
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
  struct depend *next;
};

/* The auxiliary data kept for each statement.  */

struct lim_aux_data
{
  struct loop *max_loop;	/* The outermost loop in that the statement
				   is invariant.  */

  struct loop *tgt_loop;	/* The loop out of that we want to move the
				   invariant.  */

  struct loop *always_executed_in;
				/* The outermost loop for that we are sure
				   the statement is executed if the loop
				   is entered.  */

  unsigned cost;		/* Cost of the computation performed by the
				   statement.  */

  struct depend *depends;	/* List of statements that must be also hoisted
				   out of the loop when this statement is
				   hoisted; i.e. those that define the operands
				   of the statement and are inside of the
				   MAX_LOOP loop.  */
};

96 97 98
/* Maps statements to their lim_aux_data.  */

static struct pointer_map_t *lim_aux_data_map;
99

100
/* Description of a memory reference location.  */
101

102
typedef struct mem_ref_loc
103 104
{
  tree *ref;			/* The reference itself.  */
105
  gimple stmt;			/* The statement in that it occurs.  */
106 107 108 109 110 111
} *mem_ref_loc_p;

DEF_VEC_P(mem_ref_loc_p);
DEF_VEC_ALLOC_P(mem_ref_loc_p, heap);

/* The list of memory reference locations in a loop.  */
112

113 114 115 116 117 118 119
typedef struct mem_ref_locs
{
  VEC (mem_ref_loc_p, heap) *locs;
} *mem_ref_locs_p;

DEF_VEC_P(mem_ref_locs_p);
DEF_VEC_ALLOC_P(mem_ref_locs_p, heap);
120

121 122 123
/* Description of a memory reference.  */

typedef struct mem_ref
124 125
{
  tree mem;			/* The memory itself.  */
126 127
  unsigned id;			/* ID assigned to the memory reference
				   (its index in memory_accesses.refs_list)  */
128
  hashval_t hash;		/* Its hash value.  */
129
  bitmap stored;		/* The set of loops in that this memory location
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
				   is stored to.  */
  VEC (mem_ref_locs_p, heap) *accesses_in_loop;
				/* The locations of the accesses.  Vector
				   indexed by the loop number.  */

  /* The following sets are computed on demand.  We keep both set and
     its complement, so that we know whether the information was
     already computed or not.  */
  bitmap indep_loop;		/* The set of loops in that the memory
				   reference is independent, meaning:
				   If it is stored in the loop, this store
				     is independent on all other loads and
				     stores.
				   If it is only loaded, then it is independent
				     on all stores in the loop.  */
  bitmap dep_loop;		/* The complement of INDEP_LOOP.  */

  bitmap indep_ref;		/* The set of memory references on that
				   this reference is independent.  */
149
  bitmap dep_ref;		/* The complement of INDEP_REF.  */
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
} *mem_ref_p;

DEF_VEC_P(mem_ref_p);
DEF_VEC_ALLOC_P(mem_ref_p, heap);

DEF_VEC_P(bitmap);
DEF_VEC_ALLOC_P(bitmap, heap);

DEF_VEC_P(htab_t);
DEF_VEC_ALLOC_P(htab_t, heap);

/* Description of memory accesses in loops.  */

static struct
{
  /* The hash table of memory references accessed in loops.  */
  htab_t refs;

  /* The list of memory references.  */
  VEC (mem_ref_p, heap) *refs_list;

  /* The set of memory references accessed in each loop.  */
  VEC (bitmap, heap) *refs_in_loop;

  /* The set of memory references accessed in each loop, including
     subloops.  */
  VEC (bitmap, heap) *all_refs_in_loop;

178 179 180
  /* The set of memory references stored in each loop, including
     subloops.  */
  VEC (bitmap, heap) *all_refs_stored_in_loop;
181 182 183 184 185

  /* Cache for expanding memory addresses.  */
  struct pointer_map_t *ttae_cache;
} memory_accesses;

186 187 188
/* Obstack for the bitmaps in the above data structures.  */
static bitmap_obstack lim_bitmap_obstack;

189
static bool ref_indep_loop_p (struct loop *, mem_ref_p);
190 191 192 193

/* Minimum cost of an expensive expression.  */
#define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))

194
/* The outermost loop for which execution of the header guarantees that the
195 196
   block will be executed.  */
#define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
197
#define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
198

199 200 201
/* Whether the reference was analyzable.  */
#define MEM_ANALYZABLE(REF) ((REF)->mem != error_mark_node)

202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
static struct lim_aux_data *
init_lim_data (gimple stmt)
{
  void **p = pointer_map_insert (lim_aux_data_map, stmt);

  *p = XCNEW (struct lim_aux_data);
  return (struct lim_aux_data *) *p;
}

static struct lim_aux_data *
get_lim_data (gimple stmt)
{
  void **p = pointer_map_contains (lim_aux_data_map, stmt);
  if (!p)
    return NULL;

  return (struct lim_aux_data *) *p;
}

/* Releases the memory occupied by DATA.  */

static void
free_lim_aux_data (struct lim_aux_data *data)
{
  struct depend *dep, *next;

  for (dep = data->depends; dep; dep = next)
    {
      next = dep->next;
      free (dep);
    }
  free (data);
}

static void
clear_lim_data (gimple stmt)
{
  void **p = pointer_map_contains (lim_aux_data_map, stmt);
  if (!p)
    return;

  free_lim_aux_data ((struct lim_aux_data *) *p);
  *p = NULL;
}

247 248 249
/* Calls CBCK for each index in memory reference ADDR_P.  There are two
   kinds situations handled; in each of these cases, the memory reference
   and DATA are passed to the callback:
H.J. Lu committed
250

251 252 253 254 255
   Access to an array: ARRAY_{RANGE_}REF (base, index).  In this case we also
   pass the pointer to the index to the callback.

   Pointer dereference: INDIRECT_REF (addr).  In this case we also pass the
   pointer to addr to the callback.
H.J. Lu committed
256

257 258 259 260 261 262 263
   If the callback returns false, the whole search stops and false is returned.
   Otherwise the function returns true after traversing through the whole
   reference *ADDR_P.  */

bool
for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data)
{
264
  tree *nxt, *idx;
265 266 267 268 269 270 271 272

  for (; ; addr_p = nxt)
    {
      switch (TREE_CODE (*addr_p))
	{
	case SSA_NAME:
	  return cbck (*addr_p, addr_p, data);

273
	case MEM_REF:
274 275 276 277 278
	  nxt = &TREE_OPERAND (*addr_p, 0);
	  return cbck (*addr_p, nxt, data);

	case BIT_FIELD_REF:
	case VIEW_CONVERT_EXPR:
279 280
	case REALPART_EXPR:
	case IMAGPART_EXPR:
281 282 283
	  nxt = &TREE_OPERAND (*addr_p, 0);
	  break;

284 285 286 287 288 289 290 291 292 293 294
	case COMPONENT_REF:
	  /* If the component has varying offset, it behaves like index
	     as well.  */
	  idx = &TREE_OPERAND (*addr_p, 2);
	  if (*idx
	      && !cbck (*addr_p, idx, data))
	    return false;

	  nxt = &TREE_OPERAND (*addr_p, 0);
	  break;

295
	case ARRAY_REF:
296
	case ARRAY_RANGE_REF:
297 298 299 300 301 302 303 304 305
	  nxt = &TREE_OPERAND (*addr_p, 0);
	  if (!cbck (*addr_p, &TREE_OPERAND (*addr_p, 1), data))
	    return false;
	  break;

	case VAR_DECL:
	case PARM_DECL:
	case STRING_CST:
	case RESULT_DECL:
306
	case VECTOR_CST:
307
	case COMPLEX_CST:
308 309
	case INTEGER_CST:
	case REAL_CST:
310
	case FIXED_CST:
311
	case CONSTRUCTOR:
312 313
	  return true;

314 315 316 317
	case ADDR_EXPR:
	  gcc_assert (is_gimple_min_invariant (*addr_p));
	  return true;

318 319 320 321 322 323 324 325 326
	case TARGET_MEM_REF:
	  idx = &TMR_BASE (*addr_p);
	  if (*idx
	      && !cbck (*addr_p, idx, data))
	    return false;
	  idx = &TMR_INDEX (*addr_p);
	  if (*idx
	      && !cbck (*addr_p, idx, data))
	    return false;
327 328 329 330
	  idx = &TMR_INDEX2 (*addr_p);
	  if (*idx
	      && !cbck (*addr_p, idx, data))
	    return false;
331 332
	  return true;

333
	default:
334
    	  gcc_unreachable ();
335 336 337 338 339 340 341 342 343 344 345
	}
    }
}

/* If it is possible to hoist the statement STMT unconditionally,
   returns MOVE_POSSIBLE.
   If it is possible to hoist the statement STMT, but we must avoid making
   it executed if it would not be executed in the original program (e.g.
   because it may trap), return MOVE_PRESERVE_EXECUTION.
   Otherwise return MOVE_IMPOSSIBLE.  */

346
enum move_pos
347
movement_possibility (gimple stmt)
348
{
349 350
  tree lhs;
  enum move_pos ret = MOVE_POSSIBLE;
351 352

  if (flag_unswitch_loops
353
      && gimple_code (stmt) == GIMPLE_COND)
354 355 356 357 358 359
    {
      /* If we perform unswitching, force the operands of the invariant
	 condition to be moved out of the loop.  */
      return MOVE_POSSIBLE;
    }

360 361
  if (gimple_code (stmt) == GIMPLE_PHI
      && gimple_phi_num_args (stmt) <= 2
362
      && !virtual_operand_p (gimple_phi_result (stmt))
363 364 365
      && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
    return MOVE_POSSIBLE;

366
  if (gimple_get_lhs (stmt) == NULL_TREE)
367 368
    return MOVE_IMPOSSIBLE;

369
  if (gimple_vdef (stmt))
370 371
    return MOVE_IMPOSSIBLE;

372 373 374 375
  if (stmt_ends_bb_p (stmt)
      || gimple_has_volatile_ops (stmt)
      || gimple_has_side_effects (stmt)
      || stmt_could_throw_p (stmt))
376 377
    return MOVE_IMPOSSIBLE;

378
  if (is_gimple_call (stmt))
379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
    {
      /* While pure or const call is guaranteed to have no side effects, we
	 cannot move it arbitrarily.  Consider code like

	 char *s = something ();

	 while (1)
	   {
	     if (s)
	       t = strlen (s);
	     else
	       t = 0;
	   }

	 Here the strlen call cannot be moved out of the loop, even though
	 s is invariant.  In addition to possibly creating a call with
	 invalid arguments, moving out a function call that is not executed
	 may cause performance regressions in case the call is costly and
	 not executed at all.  */
398 399
      ret = MOVE_PRESERVE_EXECUTION;
      lhs = gimple_call_lhs (stmt);
400
    }
401 402 403 404 405 406 407 408 409 410 411 412 413
  else if (is_gimple_assign (stmt))
    lhs = gimple_assign_lhs (stmt);
  else
    return MOVE_IMPOSSIBLE;

  if (TREE_CODE (lhs) == SSA_NAME
      && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
    return MOVE_IMPOSSIBLE;

  if (TREE_CODE (lhs) != SSA_NAME
      || gimple_could_trap_p (stmt))
    return MOVE_PRESERVE_EXECUTION;

414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433
  /* Non local loads in a transaction cannot be hoisted out.  Well,
     unless the load happens on every path out of the loop, but we
     don't take this into account yet.  */
  if (flag_tm
      && gimple_in_transaction (stmt)
      && gimple_assign_single_p (stmt))
    {
      tree rhs = gimple_assign_rhs1 (stmt);
      if (DECL_P (rhs) && is_global_var (rhs))
	{
	  if (dump_file)
	    {
	      fprintf (dump_file, "Cannot hoist conditional load of ");
	      print_generic_expr (dump_file, rhs, TDF_SLIM);
	      fprintf (dump_file, " because it is in a transaction.\n");
	    }
	  return MOVE_IMPOSSIBLE;
	}
    }

434
  return ret;
435 436 437
}

/* Suppose that operand DEF is used inside the LOOP.  Returns the outermost
438
   loop to that we could move the expression using DEF if it did not have
439 440 441 442 443 444
   other operands, i.e. the outermost loop enclosing LOOP in that the value
   of DEF is invariant.  */

static struct loop *
outermost_invariant_loop (tree def, struct loop *loop)
{
445
  gimple def_stmt;
446 447
  basic_block def_bb;
  struct loop *max_loop;
448
  struct lim_aux_data *lim_data;
449

450
  if (!def)
451 452
    return superloop_at_depth (loop, 1);

453 454 455 456 457 458
  if (TREE_CODE (def) != SSA_NAME)
    {
      gcc_assert (is_gimple_min_invariant (def));
      return superloop_at_depth (loop, 1);
    }

459
  def_stmt = SSA_NAME_DEF_STMT (def);
460
  def_bb = gimple_bb (def_stmt);
461 462 463 464 465
  if (!def_bb)
    return superloop_at_depth (loop, 1);

  max_loop = find_common_loop (loop, def_bb->loop_father);

466 467
  lim_data = get_lim_data (def_stmt);
  if (lim_data != NULL && lim_data->max_loop != NULL)
468
    max_loop = find_common_loop (max_loop,
469
				 loop_outer (lim_data->max_loop));
470 471
  if (max_loop == loop)
    return NULL;
472
  max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
473 474 475 476 477 478

  return max_loop;
}

/* DATA is a structure containing information associated with a statement
   inside LOOP.  DEF is one of the operands of this statement.
H.J. Lu committed
479

480 481 482 483 484 485
   Find the outermost loop enclosing LOOP in that value of DEF is invariant
   and record this in DATA->max_loop field.  If DEF itself is defined inside
   this loop as well (i.e. we need to hoist it out of the loop if we want
   to hoist the statement represented by DATA), record the statement in that
   DEF is defined to the DATA->depends list.  Additionally if ADD_COST is true,
   add the cost of the computation of DEF to the DATA->cost.
H.J. Lu committed
486

487 488 489 490 491 492
   If DEF is not invariant in LOOP, return false.  Otherwise return TRUE.  */

static bool
add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
		bool add_cost)
{
493 494
  gimple def_stmt = SSA_NAME_DEF_STMT (def);
  basic_block def_bb = gimple_bb (def_stmt);
495 496
  struct loop *max_loop;
  struct depend *dep;
497
  struct lim_aux_data *def_data;
498 499 500 501 502 503 504 505 506 507 508

  if (!def_bb)
    return true;

  max_loop = outermost_invariant_loop (def, loop);
  if (!max_loop)
    return false;

  if (flow_loop_nested_p (data->max_loop, max_loop))
    data->max_loop = max_loop;

509 510
  def_data = get_lim_data (def_stmt);
  if (!def_data)
511 512 513 514 515 516 517 518
    return true;

  if (add_cost
      /* Only add the cost if the statement defining DEF is inside LOOP,
	 i.e. if it is likely that by moving the invariants dependent
	 on it, we will be able to avoid creating a new register for
	 it (since it will be only used in these dependent invariants).  */
      && def_bb->loop_father == loop)
519
    data->cost += def_data->cost;
520

521
  dep = XNEW (struct depend);
522 523 524 525 526 527 528
  dep->stmt = def_stmt;
  dep->next = data->depends;
  data->depends = dep;

  return true;
}

529 530
/* Returns an estimate for a cost of statement STMT.  The values here
   are just ad-hoc constants, similar to costs for inlining.  */
531 532

static unsigned
533
stmt_cost (gimple stmt)
534 535
{
  /* Always try to create possibilities for unswitching.  */
536 537
  if (gimple_code (stmt) == GIMPLE_COND
      || gimple_code (stmt) == GIMPLE_PHI)
538 539
    return LIM_EXPENSIVE;

540
  /* We should be hoisting calls if possible.  */
541
  if (is_gimple_call (stmt))
542
    {
543
      tree fndecl;
544 545 546

      /* Unless the call is a builtin_constant_p; this always folds to a
	 constant, so moving it is useless.  */
547 548 549 550
      fndecl = gimple_call_fndecl (stmt);
      if (fndecl
	  && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
	  && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CONSTANT_P)
551 552
	return 0;

553
      return LIM_EXPENSIVE;
554 555
    }

556 557 558 559
  /* Hoisting memory references out should almost surely be a win.  */
  if (gimple_references_memory_p (stmt))
    return LIM_EXPENSIVE;

560
  if (gimple_code (stmt) != GIMPLE_ASSIGN)
561
    return 1;
562

563 564
  switch (gimple_assign_rhs_code (stmt))
    {
565
    case MULT_EXPR:
566 567 568 569 570
    case WIDEN_MULT_EXPR:
    case WIDEN_MULT_PLUS_EXPR:
    case WIDEN_MULT_MINUS_EXPR:
    case DOT_PROD_EXPR:
    case FMA_EXPR:
571 572 573 574 575 576 577 578 579
    case TRUNC_DIV_EXPR:
    case CEIL_DIV_EXPR:
    case FLOOR_DIV_EXPR:
    case ROUND_DIV_EXPR:
    case EXACT_DIV_EXPR:
    case CEIL_MOD_EXPR:
    case FLOOR_MOD_EXPR:
    case ROUND_MOD_EXPR:
    case TRUNC_MOD_EXPR:
580
    case RDIV_EXPR:
581
      /* Division and multiplication are usually expensive.  */
582
      return LIM_EXPENSIVE;
583

584 585
    case LSHIFT_EXPR:
    case RSHIFT_EXPR:
586 587 588
    case WIDEN_LSHIFT_EXPR:
    case LROTATE_EXPR:
    case RROTATE_EXPR:
589 590 591 592 593 594 595 596 597 598 599 600 601 602
      /* Shifts and rotates are usually expensive.  */
      return LIM_EXPENSIVE;

    case CONSTRUCTOR:
      /* Make vector construction cost proportional to the number
         of elements.  */
      return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));

    case SSA_NAME:
    case PAREN_EXPR:
      /* Whether or not something is wrapped inside a PAREN_EXPR
         should not change move cost.  Nor should an intermediate
	 unpropagated SSA name copy.  */
      return 0;
603

604
    default:
605
      return 1;
606 607 608
    }
}

609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634
/* Finds the outermost loop between OUTER and LOOP in that the memory reference
   REF is independent.  If REF is not independent in LOOP, NULL is returned
   instead.  */

static struct loop *
outermost_indep_loop (struct loop *outer, struct loop *loop, mem_ref_p ref)
{
  struct loop *aloop;

  if (bitmap_bit_p (ref->stored, loop->num))
    return NULL;

  for (aloop = outer;
       aloop != loop;
       aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
    if (!bitmap_bit_p (ref->stored, aloop->num)
	&& ref_indep_loop_p (aloop, ref))
      return aloop;

  if (ref_indep_loop_p (loop, ref))
    return loop;
  else
    return NULL;
}

/* If there is a simple load or store to a memory reference in STMT, returns
635
   the location of the memory reference, and sets IS_STORE according to whether
636 637 638
   it is a store or load.  Otherwise, returns NULL.  */

static tree *
639
simple_mem_ref_in_stmt (gimple stmt, bool *is_store)
640
{
641
  tree *lhs, *rhs;
642

643 644
  /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns.  */
  if (!gimple_assign_single_p (stmt))
645 646
    return NULL;

647
  lhs = gimple_assign_lhs_ptr (stmt);
648
  rhs = gimple_assign_rhs1_ptr (stmt);
649

650
  if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
651 652
    {
      *is_store = false;
653
      return rhs;
654
    }
655 656
  else if (gimple_vdef (stmt)
	   && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
657 658 659 660 661 662 663 664 665 666 667
    {
      *is_store = true;
      return lhs;
    }
  else
    return NULL;
}

/* Returns the memory reference contained in STMT.  */

static mem_ref_p
668
mem_ref_in_stmt (gimple stmt)
669 670 671 672 673 674 675 676 677 678 679
{
  bool store;
  tree *mem = simple_mem_ref_in_stmt (stmt, &store);
  hashval_t hash;
  mem_ref_p ref;

  if (!mem)
    return NULL;
  gcc_assert (!store);

  hash = iterative_hash_expr (*mem, 0);
680
  ref = (mem_ref_p) htab_find_with_hash (memory_accesses.refs, *mem, hash);
681 682 683 684 685

  gcc_assert (ref != NULL);
  return ref;
}

686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703
/* From a controlling predicate in DOM determine the arguments from
   the PHI node PHI that are chosen if the predicate evaluates to
   true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
   they are non-NULL.  Returns true if the arguments can be determined,
   else return false.  */

static bool
extract_true_false_args_from_phi (basic_block dom, gimple phi,
				  tree *true_arg_p, tree *false_arg_p)
{
  basic_block bb = gimple_bb (phi);
  edge true_edge, false_edge, tem;
  tree arg0 = NULL_TREE, arg1 = NULL_TREE;

  /* We have to verify that one edge into the PHI node is dominated
     by the true edge of the predicate block and the other edge
     dominated by the false edge.  This ensures that the PHI argument
     we are going to take is completely determined by the path we
704 705 706 707
     take from the predicate block.
     We can only use BB dominance checks below if the destination of
     the true/false edges are dominated by their edge, thus only
     have a single predecessor.  */
708 709 710
  extract_true_false_edges_from_block (dom, &true_edge, &false_edge);
  tem = EDGE_PRED (bb, 0);
  if (tem == true_edge
711 712 713 714
      || (single_pred_p (true_edge->dest)
	  && (tem->src == true_edge->dest
	      || dominated_by_p (CDI_DOMINATORS,
				 tem->src, true_edge->dest))))
715 716
    arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
  else if (tem == false_edge
717 718 719 720
	   || (single_pred_p (false_edge->dest)
	       && (tem->src == false_edge->dest
		   || dominated_by_p (CDI_DOMINATORS,
				      tem->src, false_edge->dest))))
721 722 723 724 725
    arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
  else
    return false;
  tem = EDGE_PRED (bb, 1);
  if (tem == true_edge
726 727 728 729
      || (single_pred_p (true_edge->dest)
	  && (tem->src == true_edge->dest
	      || dominated_by_p (CDI_DOMINATORS,
				 tem->src, true_edge->dest))))
730 731
    arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
  else if (tem == false_edge
732 733 734 735
	   || (single_pred_p (false_edge->dest)
	       && (tem->src == false_edge->dest
		   || dominated_by_p (CDI_DOMINATORS,
				      tem->src, false_edge->dest))))
736 737 738 739 740 741 742 743 744 745 746 747 748 749
    arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
  else
    return false;
  if (!arg0 || !arg1)
    return false;

  if (true_arg_p)
    *true_arg_p = arg0;
  if (false_arg_p)
    *false_arg_p = arg1;

  return true;
}

750 751 752 753 754 755
/* Determine the outermost loop to that it is possible to hoist a statement
   STMT and store it to LIM_DATA (STMT)->max_loop.  To do this we determine
   the outermost loop in that the value computed by STMT is invariant.
   If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
   we preserve the fact whether STMT is executed.  It also fills other related
   information to LIM_DATA (STMT).
H.J. Lu committed
756

757 758 759 760
   The function returns false if STMT cannot be hoisted outside of the loop it
   is defined in, and true otherwise.  */

static bool
761
determine_max_movement (gimple stmt, bool must_preserve_exec)
762
{
763
  basic_block bb = gimple_bb (stmt);
764 765
  struct loop *loop = bb->loop_father;
  struct loop *level;
766
  struct lim_aux_data *lim_data = get_lim_data (stmt);
767 768
  tree val;
  ssa_op_iter iter;
H.J. Lu committed
769

770 771 772 773 774 775
  if (must_preserve_exec)
    level = ALWAYS_EXECUTED_IN (bb);
  else
    level = superloop_at_depth (loop, 1);
  lim_data->max_loop = level;

776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850
  if (gimple_code (stmt) == GIMPLE_PHI)
    {
      use_operand_p use_p;
      unsigned min_cost = UINT_MAX;
      unsigned total_cost = 0;
      struct lim_aux_data *def_data;

      /* We will end up promoting dependencies to be unconditionally
	 evaluated.  For this reason the PHI cost (and thus the
	 cost we remove from the loop by doing the invariant motion)
	 is that of the cheapest PHI argument dependency chain.  */
      FOR_EACH_PHI_ARG (use_p, stmt, iter, SSA_OP_USE)
	{
	  val = USE_FROM_PTR (use_p);
	  if (TREE_CODE (val) != SSA_NAME)
	    continue;
	  if (!add_dependency (val, lim_data, loop, false))
	    return false;
	  def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
	  if (def_data)
	    {
	      min_cost = MIN (min_cost, def_data->cost);
	      total_cost += def_data->cost;
	    }
	}

      lim_data->cost += min_cost;

      if (gimple_phi_num_args (stmt) > 1)
	{
	  basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
	  gimple cond;
	  if (gsi_end_p (gsi_last_bb (dom)))
	    return false;
	  cond = gsi_stmt (gsi_last_bb (dom));
	  if (gimple_code (cond) != GIMPLE_COND)
	    return false;
	  /* Verify that this is an extended form of a diamond and
	     the PHI arguments are completely controlled by the
	     predicate in DOM.  */
	  if (!extract_true_false_args_from_phi (dom, stmt, NULL, NULL))
	    return false;

	  /* Fold in dependencies and cost of the condition.  */
	  FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
	    {
	      if (!add_dependency (val, lim_data, loop, false))
		return false;
	      def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
	      if (def_data)
		total_cost += def_data->cost;
	    }

	  /* We want to avoid unconditionally executing very expensive
	     operations.  As costs for our dependencies cannot be
	     negative just claim we are not invariand for this case.
	     We also are not sure whether the control-flow inside the
	     loop will vanish.  */
	  if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
	      && !(min_cost != 0
		   && total_cost / min_cost <= 2))
	    return false;

	  /* Assume that the control-flow in the loop will vanish.
	     ???  We should verify this and not artificially increase
	     the cost if that is not the case.  */
	  lim_data->cost += stmt_cost (stmt);
	}

      return true;
    }
  else
    FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
      if (!add_dependency (val, lim_data, loop, true))
	return false;
851

852
  if (gimple_vuse (stmt))
853 854 855 856 857 858 859 860 861 862 863 864
    {
      mem_ref_p ref = mem_ref_in_stmt (stmt);

      if (ref)
	{
	  lim_data->max_loop
		  = outermost_indep_loop (lim_data->max_loop, loop, ref);
	  if (!lim_data->max_loop)
	    return false;
	}
      else
	{
865
	  if ((val = gimple_vuse (stmt)) != NULL_TREE)
866 867 868 869 870 871
	    {
	      if (!add_dependency (val, lim_data, loop, false))
		return false;
	    }
	}
    }
872 873 874 875 876 877 878 879 880 881 882 883

  lim_data->cost += stmt_cost (stmt);

  return true;
}

/* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
   and that one of the operands of this statement is computed by STMT.
   Ensure that STMT (together with all the statements that define its
   operands) is hoisted at least out of the loop LEVEL.  */

static void
884
set_level (gimple stmt, struct loop *orig_loop, struct loop *level)
885
{
886
  struct loop *stmt_loop = gimple_bb (stmt)->loop_father;
887
  struct depend *dep;
888
  struct lim_aux_data *lim_data;
889 890

  stmt_loop = find_common_loop (orig_loop, stmt_loop);
891 892
  lim_data = get_lim_data (stmt);
  if (lim_data != NULL && lim_data->tgt_loop != NULL)
893
    stmt_loop = find_common_loop (stmt_loop,
894
				  loop_outer (lim_data->tgt_loop));
895 896 897
  if (flow_loop_nested_p (stmt_loop, level))
    return;

898 899
  gcc_assert (level == lim_data->max_loop
	      || flow_loop_nested_p (lim_data->max_loop, level));
900

901 902
  lim_data->tgt_loop = level;
  for (dep = lim_data->depends; dep; dep = dep->next)
903 904 905 906 907 908 909 910
    set_level (dep->stmt, orig_loop, level);
}

/* Determines an outermost loop from that we want to hoist the statement STMT.
   For now we chose the outermost possible loop.  TODO -- use profiling
   information to set it more sanely.  */

static void
911
set_profitable_level (gimple stmt)
912
{
913
  set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
914 915
}

916
/* Returns true if STMT is a call that has side effects.  */
917 918

static bool
919
nonpure_call_p (gimple stmt)
920
{
921
  if (gimple_code (stmt) != GIMPLE_CALL)
922 923
    return false;

924
  return gimple_has_side_effects (stmt);
925 926
}

927 928
/* Rewrite a/b to a*(1/b).  Return the invariant stmt to process.  */

929 930
static gimple
rewrite_reciprocal (gimple_stmt_iterator *bsi)
931
{
932
  gimple stmt, stmt1, stmt2;
933
  tree name, lhs, type;
934
  tree real_one;
935
  gimple_stmt_iterator gsi;
936

937 938 939
  stmt = gsi_stmt (*bsi);
  lhs = gimple_assign_lhs (stmt);
  type = TREE_TYPE (lhs);
940

941
  real_one = build_one_cst (type);
942

943 944 945
  name = make_temp_ssa_name (type, NULL, "reciptmp");
  stmt1 = gimple_build_assign_with_ops (RDIV_EXPR, name, real_one,
					gimple_assign_rhs2 (stmt));
946 947 948

  stmt2 = gimple_build_assign_with_ops (MULT_EXPR, lhs, name,
					gimple_assign_rhs1 (stmt));
949 950 951 952

  /* Replace division stmt with reciprocal and multiply stmts.
     The multiply stmt is not invariant, so update iterator
     and avoid rescanning.  */
953 954 955
  gsi = *bsi;
  gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
  gsi_replace (&gsi, stmt2, true);
956 957 958 959 960 961 962 963

  /* Continue processing with invariant reciprocal statement.  */
  return stmt1;
}

/* Check if the pattern at *BSI is a bittest of the form
   (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0.  */

964 965
static gimple
rewrite_bittest (gimple_stmt_iterator *bsi)
966
{
967
  gimple stmt, use_stmt, stmt1, stmt2;
968
  tree lhs, name, t, a, b;
969 970
  use_operand_p use;

971 972
  stmt = gsi_stmt (*bsi);
  lhs = gimple_assign_lhs (stmt);
973 974 975

  /* Verify that the single use of lhs is a comparison against zero.  */
  if (TREE_CODE (lhs) != SSA_NAME
976
      || !single_imm_use (lhs, &use, &use_stmt)
977
      || gimple_code (use_stmt) != GIMPLE_COND)
978
    return stmt;
979 980 981 982
  if (gimple_cond_lhs (use_stmt) != lhs
      || (gimple_cond_code (use_stmt) != NE_EXPR
	  && gimple_cond_code (use_stmt) != EQ_EXPR)
      || !integer_zerop (gimple_cond_rhs (use_stmt)))
983 984 985
    return stmt;

  /* Get at the operands of the shift.  The rhs is TMP1 & 1.  */
986 987
  stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
  if (gimple_code (stmt1) != GIMPLE_ASSIGN)
988 989
    return stmt;

990
  /* There is a conversion in between possibly inserted by fold.  */
991
  if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
992
    {
993
      t = gimple_assign_rhs1 (stmt1);
994 995 996 997
      if (TREE_CODE (t) != SSA_NAME
	  || !has_single_use (t))
	return stmt;
      stmt1 = SSA_NAME_DEF_STMT (t);
998
      if (gimple_code (stmt1) != GIMPLE_ASSIGN)
999 1000 1001 1002 1003
	return stmt;
    }

  /* Verify that B is loop invariant but A is not.  Verify that with
     all the stmt walking we are still in the same loop.  */
1004 1005 1006
  if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
      || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
    return stmt;
1007

1008 1009 1010 1011 1012 1013
  a = gimple_assign_rhs1 (stmt1);
  b = gimple_assign_rhs2 (stmt1);

  if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
      && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
    {
1014 1015
      gimple_stmt_iterator rsi;

1016 1017 1018
      /* 1 << B */
      t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
		       build_int_cst (TREE_TYPE (a), 1), b);
1019 1020
      name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
      stmt1 = gimple_build_assign (name, t);
1021 1022 1023

      /* A & (1 << B) */
      t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
1024 1025
      name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
      stmt2 = gimple_build_assign (name, t);
1026 1027 1028

      /* Replace the SSA_NAME we compare against zero.  Adjust
	 the type of zero accordingly.  */
1029
      SET_USE (use, name);
1030
      gimple_cond_set_rhs (use_stmt, build_int_cst_type (TREE_TYPE (name), 0));
1031

1032 1033 1034 1035 1036 1037 1038 1039
      /* Don't use gsi_replace here, none of the new assignments sets
	 the variable originally set in stmt.  Move bsi to stmt1, and
	 then remove the original stmt, so that we get a chance to
	 retain debug info for it.  */
      rsi = *bsi;
      gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
      gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
      gsi_remove (&rsi, true);
1040 1041 1042 1043 1044 1045 1046 1047

      return stmt1;
    }

  return stmt;
}


1048 1049 1050 1051 1052 1053 1054 1055 1056
/* Determine the outermost loops in that statements in basic block BB are
   invariant, and record them to the LIM_DATA associated with the statements.
   Callback for walk_dominator_tree.  */

static void
determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
			      basic_block bb)
{
  enum move_pos pos;
1057 1058
  gimple_stmt_iterator bsi;
  gimple stmt;
1059 1060
  bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
  struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
1061
  struct lim_aux_data *lim_data;
1062

1063
  if (!loop_outer (bb->loop_father))
1064 1065 1066 1067
    return;

  if (dump_file && (dump_flags & TDF_DETAILS))
    fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
1068
	     bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
1069

1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106
  /* Look at PHI nodes, but only if there is at most two.
     ???  We could relax this further by post-processing the inserted
     code and transforming adjacent cond-exprs with the same predicate
     to control flow again.  */
  bsi = gsi_start_phis (bb);
  if (!gsi_end_p (bsi)
      && ((gsi_next (&bsi), gsi_end_p (bsi))
	  || (gsi_next (&bsi), gsi_end_p (bsi))))
    for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
      {
	stmt = gsi_stmt (bsi);

	pos = movement_possibility (stmt);
	if (pos == MOVE_IMPOSSIBLE)
	  continue;

	lim_data = init_lim_data (stmt);
	lim_data->always_executed_in = outermost;

	if (!determine_max_movement (stmt, false))
	  {
	    lim_data->max_loop = NULL;
	    continue;
	  }

	if (dump_file && (dump_flags & TDF_DETAILS))
	  {
	    print_gimple_stmt (dump_file, stmt, 2, 0);
	    fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
		     loop_depth (lim_data->max_loop),
		     lim_data->cost);
	  }

	if (lim_data->cost >= LIM_EXPENSIVE)
	  set_profitable_level (stmt);
      }

1107
  for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1108
    {
1109
      stmt = gsi_stmt (bsi);
1110 1111 1112 1113 1114 1115 1116 1117 1118

      pos = movement_possibility (stmt);
      if (pos == MOVE_IMPOSSIBLE)
	{
	  if (nonpure_call_p (stmt))
	    {
	      maybe_never = true;
	      outermost = NULL;
	    }
1119 1120 1121 1122
	  /* Make sure to note always_executed_in for stores to make
	     store-motion work.  */
	  else if (stmt_makes_single_store (stmt))
	    {
1123 1124
	      struct lim_aux_data *lim_data = init_lim_data (stmt);
	      lim_data->always_executed_in = outermost;
1125
	    }
1126 1127 1128
	  continue;
	}

1129 1130 1131
      if (is_gimple_assign (stmt)
	  && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
	      == GIMPLE_BINARY_RHS))
1132
	{
1133 1134 1135 1136
	  tree op0 = gimple_assign_rhs1 (stmt);
	  tree op1 = gimple_assign_rhs2 (stmt);
	  struct loop *ol1 = outermost_invariant_loop (op1,
					loop_containing_stmt (stmt));
1137 1138 1139 1140

	  /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
	     to be hoisted out of loop, saving expensive divide.  */
	  if (pos == MOVE_POSSIBLE
1141
	      && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1142 1143
	      && flag_unsafe_math_optimizations
	      && !flag_trapping_math
1144 1145
	      && ol1 != NULL
	      && outermost_invariant_loop (op0, ol1) == NULL)
1146 1147 1148 1149 1150 1151
	    stmt = rewrite_reciprocal (&bsi);

	  /* If the shift count is invariant, convert (A >> B) & 1 to
	     A & (1 << B) allowing the bit mask to be hoisted out of the loop
	     saving an expensive shift.  */
	  if (pos == MOVE_POSSIBLE
1152 1153 1154 1155
	      && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
	      && integer_onep (op1)
	      && TREE_CODE (op0) == SSA_NAME
	      && has_single_use (op0))
1156 1157
	    stmt = rewrite_bittest (&bsi);
	}
1158

1159 1160
      lim_data = init_lim_data (stmt);
      lim_data->always_executed_in = outermost;
1161 1162 1163 1164 1165 1166

      if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
	continue;

      if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
	{
1167
	  lim_data->max_loop = NULL;
1168 1169 1170 1171 1172
	  continue;
	}

      if (dump_file && (dump_flags & TDF_DETAILS))
	{
1173
	  print_gimple_stmt (dump_file, stmt, 2, 0);
1174
	  fprintf (dump_file, "  invariant up to level %d, cost %d.\n\n",
1175 1176
		   loop_depth (lim_data->max_loop),
		   lim_data->cost);
1177 1178
	}

1179
      if (lim_data->cost >= LIM_EXPENSIVE)
1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194
	set_profitable_level (stmt);
    }
}

/* For each statement determines the outermost loop in that it is invariant,
   statements on whose motion it depends and the cost of the computation.
   This information is stored to the LIM_DATA structure associated with
   each statement.  */

static void
determine_invariantness (void)
{
  struct dom_walk_data walk_data;

  memset (&walk_data, 0, sizeof (struct dom_walk_data));
1195
  walk_data.dom_direction = CDI_DOMINATORS;
1196
  walk_data.before_dom_children = determine_invariantness_stmt;
1197 1198 1199 1200 1201 1202 1203

  init_walk_dominator_tree (&walk_data);
  walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
  fini_walk_dominator_tree (&walk_data);
}

/* Hoist the statements in basic block BB out of the loops prescribed by
1204
   data stored in LIM_DATA structures associated with each statement.  Callback
1205 1206 1207
   for walk_dominator_tree.  */

static void
1208
move_computations_stmt (struct dom_walk_data *dw_data,
1209 1210 1211
			basic_block bb)
{
  struct loop *level;
1212 1213
  gimple_stmt_iterator bsi;
  gimple stmt;
1214
  unsigned cost = 0;
1215
  struct lim_aux_data *lim_data;
1216

1217
  if (!loop_outer (bb->loop_father))
1218 1219
    return;

1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268
  for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
    {
      gimple new_stmt;
      stmt = gsi_stmt (bsi);

      lim_data = get_lim_data (stmt);
      if (lim_data == NULL)
	{
	  gsi_next (&bsi);
	  continue;
	}

      cost = lim_data->cost;
      level = lim_data->tgt_loop;
      clear_lim_data (stmt);

      if (!level)
	{
	  gsi_next (&bsi);
	  continue;
	}

      if (dump_file && (dump_flags & TDF_DETAILS))
	{
	  fprintf (dump_file, "Moving PHI node\n");
	  print_gimple_stmt (dump_file, stmt, 0, 0);
	  fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
		   cost, level->num);
	}

      if (gimple_phi_num_args (stmt) == 1)
	{
	  tree arg = PHI_ARG_DEF (stmt, 0);
	  new_stmt = gimple_build_assign_with_ops (TREE_CODE (arg),
						   gimple_phi_result (stmt),
						   arg, NULL_TREE);
	  SSA_NAME_DEF_STMT (gimple_phi_result (stmt)) = new_stmt;
	}
      else
	{
	  basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
	  gimple cond = gsi_stmt (gsi_last_bb (dom));
	  tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
	  /* Get the PHI arguments corresponding to the true and false
	     edges of COND.  */
	  extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
	  gcc_assert (arg0 && arg1);
	  t = build2 (gimple_cond_code (cond), boolean_type_node,
		      gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1269 1270 1271
	  new_stmt = gimple_build_assign_with_ops (COND_EXPR,
						   gimple_phi_result (stmt),
						   t, arg0, arg1);
1272 1273 1274 1275 1276 1277 1278
	  SSA_NAME_DEF_STMT (gimple_phi_result (stmt)) = new_stmt;
	  *((unsigned int *)(dw_data->global_data)) |= TODO_cleanup_cfg;
	}
      gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
      remove_phi_node (&bsi, false);
    }

1279
  for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1280
    {
1281 1282
      edge e;

1283
      stmt = gsi_stmt (bsi);
1284

1285 1286
      lim_data = get_lim_data (stmt);
      if (lim_data == NULL)
1287
	{
1288
	  gsi_next (&bsi);
1289 1290 1291
	  continue;
	}

1292 1293 1294
      cost = lim_data->cost;
      level = lim_data->tgt_loop;
      clear_lim_data (stmt);
1295 1296 1297

      if (!level)
	{
1298
	  gsi_next (&bsi);
1299 1300 1301 1302 1303
	  continue;
	}

      /* We do not really want to move conditionals out of the loop; we just
	 placed it here to force its operands to be moved if necessary.  */
1304
      if (gimple_code (stmt) == GIMPLE_COND)
1305 1306 1307 1308 1309
	continue;

      if (dump_file && (dump_flags & TDF_DETAILS))
	{
	  fprintf (dump_file, "Moving statement\n");
1310
	  print_gimple_stmt (dump_file, stmt, 0, 0);
1311 1312 1313
	  fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
		   cost, level->num);
	}
1314

1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325
      e = loop_preheader_edge (level);
      gcc_assert (!gimple_vdef (stmt));
      if (gimple_vuse (stmt))
	{
	  /* The new VUSE is the one from the virtual PHI in the loop
	     header or the one already present.  */
	  gimple_stmt_iterator gsi2;
	  for (gsi2 = gsi_start_phis (e->dest);
	       !gsi_end_p (gsi2); gsi_next (&gsi2))
	    {
	      gimple phi = gsi_stmt (gsi2);
1326
	      if (virtual_operand_p (gimple_phi_result (phi)))
1327 1328 1329 1330 1331 1332
		{
		  gimple_set_vuse (stmt, PHI_ARG_DEF_FROM_EDGE (phi, e));
		  break;
		}
	    }
	}
1333
      gsi_remove (&bsi, false);
1334
      gsi_insert_on_edge (e, stmt);
1335 1336 1337 1338
    }
}

/* Hoist the statements out of the loops prescribed by data stored in
1339
   LIM_DATA structures associated with each statement.*/
1340

1341
static unsigned int
1342 1343 1344
move_computations (void)
{
  struct dom_walk_data walk_data;
1345
  unsigned int todo = 0;
1346 1347

  memset (&walk_data, 0, sizeof (struct dom_walk_data));
1348
  walk_data.global_data = &todo;
1349
  walk_data.dom_direction = CDI_DOMINATORS;
1350
  walk_data.before_dom_children = move_computations_stmt;
1351 1352 1353 1354 1355

  init_walk_dominator_tree (&walk_data);
  walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
  fini_walk_dominator_tree (&walk_data);

1356
  gsi_commit_edge_inserts ();
1357
  if (need_ssa_update_p (cfun))
Diego Novillo committed
1358
    rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1359 1360

  return todo;
1361 1362 1363 1364 1365 1366 1367 1368
}

/* Checks whether the statement defining variable *INDEX can be hoisted
   out of the loop passed in DATA.  Callback for for_each_index.  */

static bool
may_move_till (tree ref, tree *index, void *data)
{
1369
  struct loop *loop = (struct loop *) data, *max_loop;
1370 1371 1372 1373 1374

  /* If REF is an array reference, check also that the step and the lower
     bound is invariant in LOOP.  */
  if (TREE_CODE (ref) == ARRAY_REF)
    {
1375 1376
      tree step = TREE_OPERAND (ref, 3);
      tree lbound = TREE_OPERAND (ref, 2);
1377

1378
      max_loop = outermost_invariant_loop (step, loop);
1379 1380 1381
      if (!max_loop)
	return false;

1382
      max_loop = outermost_invariant_loop (lbound, loop);
1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393
      if (!max_loop)
	return false;
    }

  max_loop = outermost_invariant_loop (*index, loop);
  if (!max_loop)
    return false;

  return true;
}

1394
/* If OP is SSA NAME, force the statement that defines it to be
1395
   moved out of the LOOP.  ORIG_LOOP is the loop in that EXPR is used.  */
1396 1397

static void
1398
force_move_till_op (tree op, struct loop *orig_loop, struct loop *loop)
1399
{
1400
  gimple stmt;
1401

1402 1403 1404
  if (!op
      || is_gimple_min_invariant (op))
    return;
1405

1406
  gcc_assert (TREE_CODE (op) == SSA_NAME);
H.J. Lu committed
1407

1408 1409
  stmt = SSA_NAME_DEF_STMT (op);
  if (gimple_nop_p (stmt))
1410 1411
    return;

1412
  set_level (stmt, orig_loop, loop);
1413 1414 1415
}

/* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1416 1417 1418 1419 1420 1421 1422 1423
   the LOOP.  The reference REF is used in the loop ORIG_LOOP.  Callback for
   for_each_index.  */

struct fmt_data
{
  struct loop *loop;
  struct loop *orig_loop;
};
1424 1425 1426 1427

static bool
force_move_till (tree ref, tree *index, void *data)
{
1428
  struct fmt_data *fmt_data = (struct fmt_data *) data;
1429 1430 1431

  if (TREE_CODE (ref) == ARRAY_REF)
    {
1432 1433
      tree step = TREE_OPERAND (ref, 3);
      tree lbound = TREE_OPERAND (ref, 2);
1434

1435 1436
      force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
      force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1437 1438
    }

1439
  force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1440 1441 1442 1443

  return true;
}

1444 1445 1446 1447 1448
/* A hash function for struct mem_ref object OBJ.  */

static hashval_t
memref_hash (const void *obj)
{
1449
  const struct mem_ref *const mem = (const struct mem_ref *) obj;
1450 1451 1452 1453 1454 1455 1456 1457 1458 1459

  return mem->hash;
}

/* An equality function for struct mem_ref object OBJ1 with
   memory reference OBJ2.  */

static int
memref_eq (const void *obj1, const void *obj2)
{
1460
  const struct mem_ref *const mem1 = (const struct mem_ref *) obj1;
1461

1462
  return operand_equal_p (mem1->mem, (const_tree) obj2, 0);
1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475
}

/* Releases list of memory reference locations ACCS.  */

static void
free_mem_ref_locs (mem_ref_locs_p accs)
{
  unsigned i;
  mem_ref_loc_p loc;

  if (!accs)
    return;

1476
  FOR_EACH_VEC_ELT (mem_ref_loc_p, accs->locs, i, loc)
1477 1478 1479 1480 1481 1482 1483 1484
    free (loc);
  VEC_free (mem_ref_loc_p, heap, accs->locs);
  free (accs);
}

/* A function to free the mem_ref object OBJ.  */

static void
1485
memref_free (struct mem_ref *mem)
1486 1487 1488 1489
{
  unsigned i;
  mem_ref_locs_p accs;

1490
  FOR_EACH_VEC_ELT (mem_ref_locs_p, mem->accesses_in_loop, i, accs)
1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506
    free_mem_ref_locs (accs);
  VEC_free (mem_ref_locs_p, heap, mem->accesses_in_loop);

  free (mem);
}

/* Allocates and returns a memory reference description for MEM whose hash
   value is HASH and id is ID.  */

static mem_ref_p
mem_ref_alloc (tree mem, unsigned hash, unsigned id)
{
  mem_ref_p ref = XNEW (struct mem_ref);
  ref->mem = mem;
  ref->id = id;
  ref->hash = hash;
1507 1508 1509 1510 1511
  ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
  ref->indep_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
  ref->dep_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
  ref->indep_ref = BITMAP_ALLOC (&lim_bitmap_obstack);
  ref->dep_ref = BITMAP_ALLOC (&lim_bitmap_obstack);
1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528
  ref->accesses_in_loop = NULL;

  return ref;
}

/* Allocates and returns the new list of locations.  */

static mem_ref_locs_p
mem_ref_locs_alloc (void)
{
  mem_ref_locs_p accs = XNEW (struct mem_ref_locs);
  accs->locs = NULL;
  return accs;
}

/* Records memory reference location *LOC in LOOP to the memory reference
   description REF.  The reference occurs in statement STMT.  */
1529 1530

static void
1531
record_mem_ref_loc (mem_ref_p ref, struct loop *loop, gimple stmt, tree *loc)
1532
{
1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546
  mem_ref_loc_p aref = XNEW (struct mem_ref_loc);
  mem_ref_locs_p accs;
  bitmap ril = VEC_index (bitmap, memory_accesses.refs_in_loop, loop->num);

  if (VEC_length (mem_ref_locs_p, ref->accesses_in_loop)
      <= (unsigned) loop->num)
    VEC_safe_grow_cleared (mem_ref_locs_p, heap, ref->accesses_in_loop,
			   loop->num + 1);
  accs = VEC_index (mem_ref_locs_p, ref->accesses_in_loop, loop->num);
  if (!accs)
    {
      accs = mem_ref_locs_alloc ();
      VEC_replace (mem_ref_locs_p, ref->accesses_in_loop, loop->num, accs);
    }
1547 1548

  aref->stmt = stmt;
1549
  aref->ref = loc;
1550

1551 1552
  VEC_safe_push (mem_ref_loc_p, heap, accs->locs, aref);
  bitmap_set_bit (ril, ref->id);
1553 1554
}

1555
/* Marks reference REF as stored in LOOP.  */
1556 1557

static void
1558
mark_ref_stored (mem_ref_p ref, struct loop *loop)
1559
{
1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572
  for (;
       loop != current_loops->tree_root
       && !bitmap_bit_p (ref->stored, loop->num);
       loop = loop_outer (loop))
    bitmap_set_bit (ref->stored, loop->num);
}

/* Gathers memory references in statement STMT in LOOP, storing the
   information about them in the memory_accesses structure.  Marks
   the vops accessed through unrecognized statements there as
   well.  */

static void
1573
gather_mem_refs_stmt (struct loop *loop, gimple stmt)
1574 1575 1576 1577 1578 1579 1580
{
  tree *mem = NULL;
  hashval_t hash;
  PTR *slot;
  mem_ref_p ref;
  bool is_stored;
  unsigned id;
1581

1582
  if (!gimple_vuse (stmt))
1583 1584 1585 1586
    return;

  mem = simple_mem_ref_in_stmt (stmt, &is_stored);
  if (!mem)
1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600
    {
      id = VEC_length (mem_ref_p, memory_accesses.refs_list);
      ref = mem_ref_alloc (error_mark_node, 0, id);
      VEC_safe_push (mem_ref_p, heap, memory_accesses.refs_list, ref);
      if (dump_file && (dump_flags & TDF_DETAILS))
	{
	  fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
	}
      if (gimple_vdef (stmt))
	mark_ref_stored (ref, loop);
      record_mem_ref_loc (ref, loop, stmt, mem);
      return;
    }
1601 1602 1603 1604 1605 1606

  hash = iterative_hash_expr (*mem, 0);
  slot = htab_find_slot_with_hash (memory_accesses.refs, *mem, hash, INSERT);

  if (*slot)
    {
1607
      ref = (mem_ref_p) *slot;
1608 1609 1610
      id = ref->id;
    }
  else
1611
    {
1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622
      id = VEC_length (mem_ref_p, memory_accesses.refs_list);
      ref = mem_ref_alloc (*mem, hash, id);
      VEC_safe_push (mem_ref_p, heap, memory_accesses.refs_list, ref);
      *slot = ref;

      if (dump_file && (dump_flags & TDF_DETAILS))
	{
	  fprintf (dump_file, "Memory reference %u: ", id);
	  print_generic_expr (dump_file, ref->mem, TDF_SLIM);
	  fprintf (dump_file, "\n");
	}
1623
    }
1624

1625 1626 1627 1628 1629
  if (is_stored)
    mark_ref_stored (ref, loop);

  record_mem_ref_loc (ref, loop, stmt, mem);
  return;
1630 1631
}

1632
/* Gathers memory references in loops.  */
1633 1634

static void
1635
gather_mem_refs_in_loops (void)
1636
{
1637
  gimple_stmt_iterator bsi;
1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648
  basic_block bb;
  struct loop *loop;
  loop_iterator li;
  bitmap lrefs, alrefs, alrefso;

  FOR_EACH_BB (bb)
    {
      loop = bb->loop_father;
      if (loop == current_loops->tree_root)
	continue;

1649 1650
      for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
	gather_mem_refs_stmt (loop, gsi_stmt (bsi));
1651 1652
    }

1653 1654
  /* Propagate the information about accessed memory references up
     the loop hierarchy.  */
1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684
  FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
    {
      lrefs = VEC_index (bitmap, memory_accesses.refs_in_loop, loop->num);
      alrefs = VEC_index (bitmap, memory_accesses.all_refs_in_loop, loop->num);
      bitmap_ior_into (alrefs, lrefs);

      if (loop_outer (loop) == current_loops->tree_root)
	continue;

      alrefso = VEC_index (bitmap, memory_accesses.all_refs_in_loop,
			   loop_outer (loop)->num);
      bitmap_ior_into (alrefso, alrefs);
    }
}

/* Create a mapping from virtual operands to references that touch them
   in LOOP.  */

static void
create_vop_ref_mapping_loop (struct loop *loop)
{
  bitmap refs = VEC_index (bitmap, memory_accesses.refs_in_loop, loop->num);
  struct loop *sloop;
  bitmap_iterator bi;
  unsigned i;
  mem_ref_p ref;

  EXECUTE_IF_SET_IN_BITMAP (refs, 0, i, bi)
    {
      ref = VEC_index (mem_ref_p, memory_accesses.refs_list, i);
1685 1686 1687 1688 1689 1690 1691 1692 1693
      for (sloop = loop; sloop != current_loops->tree_root;
	   sloop = loop_outer (sloop))
	if (bitmap_bit_p (ref->stored, loop->num))
	  {
	    bitmap refs_stored
	      = VEC_index (bitmap, memory_accesses.all_refs_stored_in_loop,
			   sloop->num);
	    bitmap_set_bit (refs_stored, ref->id);
	  }
1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719
    }
}

/* For each non-clobbered virtual operand and each loop, record the memory
   references in this loop that touch the operand.  */

static void
create_vop_ref_mapping (void)
{
  loop_iterator li;
  struct loop *loop;

  FOR_EACH_LOOP (li, loop, 0)
    {
      create_vop_ref_mapping_loop (loop);
    }
}

/* Gathers information about memory accesses in the loops.  */

static void
analyze_memory_references (void)
{
  unsigned i;
  bitmap empty;

1720
  memory_accesses.refs = htab_create (100, memref_hash, memref_eq, NULL);
1721 1722 1723 1724 1725
  memory_accesses.refs_list = NULL;
  memory_accesses.refs_in_loop = VEC_alloc (bitmap, heap,
					    number_of_loops ());
  memory_accesses.all_refs_in_loop = VEC_alloc (bitmap, heap,
						number_of_loops ());
1726 1727
  memory_accesses.all_refs_stored_in_loop = VEC_alloc (bitmap, heap,
						       number_of_loops ());
1728 1729 1730

  for (i = 0; i < number_of_loops (); i++)
    {
1731
      empty = BITMAP_ALLOC (&lim_bitmap_obstack);
1732
      VEC_quick_push (bitmap, memory_accesses.refs_in_loop, empty);
1733
      empty = BITMAP_ALLOC (&lim_bitmap_obstack);
1734
      VEC_quick_push (bitmap, memory_accesses.all_refs_in_loop, empty);
1735
      empty = BITMAP_ALLOC (&lim_bitmap_obstack);
1736
      VEC_quick_push (bitmap, memory_accesses.all_refs_stored_in_loop, empty);
1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754
    }

  memory_accesses.ttae_cache = NULL;

  gather_mem_refs_in_loops ();
  create_vop_ref_mapping ();
}

/* Returns true if MEM1 and MEM2 may alias.  TTAE_CACHE is used as a cache in
   tree_to_aff_combination_expand.  */

static bool
mem_refs_may_alias_p (tree mem1, tree mem2, struct pointer_map_t **ttae_cache)
{
  /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
     object and their offset differ in such a way that the locations cannot
     overlap, then they cannot alias.  */
  double_int size1, size2;
1755
  aff_tree off1, off2;
1756

1757 1758
  /* Perform basic offset and type-based disambiguation.  */
  if (!refs_may_alias_p (mem1, mem2))
1759
    return false;
1760

1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772
  /* The expansion of addresses may be a bit expensive, thus we only do
     the check at -O2 and higher optimization levels.  */
  if (optimize < 2)
    return true;

  get_inner_reference_aff (mem1, &off1, &size1);
  get_inner_reference_aff (mem2, &off2, &size2);
  aff_combination_expand (&off1, ttae_cache);
  aff_combination_expand (&off2, ttae_cache);
  aff_combination_scale (&off1, double_int_minus_one);
  aff_combination_add (&off2, &off1);

1773
  if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809
    return false;

  return true;
}

/* Rewrites location LOC by TMP_VAR.  */

static void
rewrite_mem_ref_loc (mem_ref_loc_p loc, tree tmp_var)
{
  *loc->ref = tmp_var;
  update_stmt (loc->stmt);
}

/* Adds all locations of REF in LOOP and its subloops to LOCS.  */

static void
get_all_locs_in_loop (struct loop *loop, mem_ref_p ref,
		      VEC (mem_ref_loc_p, heap) **locs)
{
  mem_ref_locs_p accs;
  unsigned i;
  mem_ref_loc_p loc;
  bitmap refs = VEC_index (bitmap, memory_accesses.all_refs_in_loop,
			   loop->num);
  struct loop *subloop;

  if (!bitmap_bit_p (refs, ref->id))
    return;

  if (VEC_length (mem_ref_locs_p, ref->accesses_in_loop)
      > (unsigned) loop->num)
    {
      accs = VEC_index (mem_ref_locs_p, ref->accesses_in_loop, loop->num);
      if (accs)
	{
1810
	  FOR_EACH_VEC_ELT (mem_ref_loc_p, accs->locs, i, loc)
1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828
	    VEC_safe_push (mem_ref_loc_p, heap, *locs, loc);
	}
    }

  for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
    get_all_locs_in_loop (subloop, ref, locs);
}

/* Rewrites all references to REF in LOOP by variable TMP_VAR.  */

static void
rewrite_mem_refs (struct loop *loop, mem_ref_p ref, tree tmp_var)
{
  unsigned i;
  mem_ref_loc_p loc;
  VEC (mem_ref_loc_p, heap) *locs = NULL;

  get_all_locs_in_loop (loop, ref, &locs);
1829
  FOR_EACH_VEC_ELT (mem_ref_loc_p, locs, i, loc)
1830 1831
    rewrite_mem_ref_loc (loc, tmp_var);
  VEC_free (mem_ref_loc_p, heap, locs);
1832 1833
}

1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862
/* The name and the length of the currently generated variable
   for lsm.  */
#define MAX_LSM_NAME_LENGTH 40
static char lsm_tmp_name[MAX_LSM_NAME_LENGTH + 1];
static int lsm_tmp_name_length;

/* Adds S to lsm_tmp_name.  */

static void
lsm_tmp_name_add (const char *s)
{
  int l = strlen (s) + lsm_tmp_name_length;
  if (l > MAX_LSM_NAME_LENGTH)
    return;

  strcpy (lsm_tmp_name + lsm_tmp_name_length, s);
  lsm_tmp_name_length = l;
}

/* Stores the name for temporary variable that replaces REF to
   lsm_tmp_name.  */

static void
gen_lsm_tmp_name (tree ref)
{
  const char *name;

  switch (TREE_CODE (ref))
    {
1863
    case MEM_REF:
1864
    case TARGET_MEM_REF:
1865 1866 1867 1868
      gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
      lsm_tmp_name_add ("_");
      break;

1869 1870 1871 1872
    case ADDR_EXPR:
      gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
      break;

1873 1874 1875 1876 1877 1878 1879 1880 1881 1882
    case BIT_FIELD_REF:
    case VIEW_CONVERT_EXPR:
    case ARRAY_RANGE_REF:
      gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
      break;

    case REALPART_EXPR:
      gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
      lsm_tmp_name_add ("_RE");
      break;
H.J. Lu committed
1883

1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895
    case IMAGPART_EXPR:
      gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
      lsm_tmp_name_add ("_IM");
      break;

    case COMPONENT_REF:
      gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
      lsm_tmp_name_add ("_");
      name = get_name (TREE_OPERAND (ref, 1));
      if (!name)
	name = "F";
      lsm_tmp_name_add (name);
1896
      break;
1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919

    case ARRAY_REF:
      gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
      lsm_tmp_name_add ("_I");
      break;

    case SSA_NAME:
    case VAR_DECL:
    case PARM_DECL:
      name = get_name (ref);
      if (!name)
	name = "D";
      lsm_tmp_name_add (name);
      break;

    case STRING_CST:
      lsm_tmp_name_add ("S");
      break;

    case RESULT_DECL:
      lsm_tmp_name_add ("R");
      break;

1920 1921 1922 1923
    case INTEGER_CST:
      /* Nothing.  */
      break;

1924 1925 1926 1927 1928 1929
    default:
      gcc_unreachable ();
    }
}

/* Determines name for temporary variable that replaces REF.
1930 1931
   The name is accumulated into the lsm_tmp_name variable.
   N is added to the name of the temporary.  */
1932

1933 1934
char *
get_lsm_tmp_name (tree ref, unsigned n)
1935
{
1936 1937
  char ns[2];

1938 1939 1940
  lsm_tmp_name_length = 0;
  gen_lsm_tmp_name (ref);
  lsm_tmp_name_add ("_lsm");
1941 1942 1943 1944 1945 1946
  if (n < 10)
    {
      ns[0] = '0' + n;
      ns[1] = 0;
      lsm_tmp_name_add (ns);
    }
1947 1948 1949
  return lsm_tmp_name;
}

1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079
struct prev_flag_edges {
  /* Edge to insert new flag comparison code.  */
  edge append_cond_position;

  /* Edge for fall through from previous flag comparison.  */
  edge last_cond_fallthru;
};

/* Helper function for execute_sm.  Emit code to store TMP_VAR into
   MEM along edge EX.

   The store is only done if MEM has changed.  We do this so no
   changes to MEM occur on code paths that did not originally store
   into it.

   The common case for execute_sm will transform:

     for (...) {
       if (foo)
         stuff;
       else
         MEM = TMP_VAR;
     }

   into:

     lsm = MEM;
     for (...) {
       if (foo)
         stuff;
       else
         lsm = TMP_VAR;
     }
     MEM = lsm;

  This function will generate:

     lsm = MEM;

     lsm_flag = false;
     ...
     for (...) {
       if (foo)
         stuff;
       else {
         lsm = TMP_VAR;
         lsm_flag = true;
       }
     }
     if (lsm_flag)	<--
       MEM = lsm;	<--
*/

static void
execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag)
{
  basic_block new_bb, then_bb, old_dest;
  bool loop_has_only_one_exit;
  edge then_old_edge, orig_ex = ex;
  gimple_stmt_iterator gsi;
  gimple stmt;
  struct prev_flag_edges *prev_edges = (struct prev_flag_edges *) ex->aux;

  /* ?? Insert store after previous store if applicable.  See note
     below.  */
  if (prev_edges)
    ex = prev_edges->append_cond_position;

  loop_has_only_one_exit = single_pred_p (ex->dest);

  if (loop_has_only_one_exit)
    ex = split_block_after_labels (ex->dest);

  old_dest = ex->dest;
  new_bb = split_edge (ex);
  then_bb = create_empty_bb (new_bb);
  if (current_loops && new_bb->loop_father)
    add_bb_to_loop (then_bb, new_bb->loop_father);

  gsi = gsi_start_bb (new_bb);
  stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
			    NULL_TREE, NULL_TREE);
  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);

  gsi = gsi_start_bb (then_bb);
  /* Insert actual store.  */
  stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);

  make_edge (new_bb, then_bb, EDGE_TRUE_VALUE);
  make_edge (new_bb, old_dest, EDGE_FALSE_VALUE);
  then_old_edge = make_edge (then_bb, old_dest, EDGE_FALLTHRU);

  set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);

  if (prev_edges)
    {
      basic_block prevbb = prev_edges->last_cond_fallthru->src;
      redirect_edge_succ (prev_edges->last_cond_fallthru, new_bb);
      set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
      set_immediate_dominator (CDI_DOMINATORS, old_dest,
			       recompute_dominator (CDI_DOMINATORS, old_dest));
    }

  /* ?? Because stores may alias, they must happen in the exact
     sequence they originally happened.  Save the position right after
     the (_lsm) store we just created so we can continue appending after
     it and maintain the original order.  */
  {
    struct prev_flag_edges *p;

    if (orig_ex->aux)
      orig_ex->aux = NULL;
    alloc_aux_for_edge (orig_ex, sizeof (struct prev_flag_edges));
    p = (struct prev_flag_edges *) orig_ex->aux;
    p->append_cond_position = then_old_edge;
    p->last_cond_fallthru = find_edge (new_bb, old_dest);
    orig_ex->aux = (void *) p;
  }

  if (!loop_has_only_one_exit)
    for (gsi = gsi_start_phis (old_dest); !gsi_end_p (gsi); gsi_next (&gsi))
      {
	gimple phi = gsi_stmt (gsi);
	unsigned i;

	for (i = 0; i < gimple_phi_num_args (phi); i++)
	  if (gimple_phi_arg_edge (phi, i)->src == new_bb)
	    {
	      tree arg = gimple_phi_arg_def (phi, i);
2080
	      add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101
	      update_stmt (phi);
	    }
      }
  /* Remove the original fall through edge.  This was the
     single_succ_edge (new_bb).  */
  EDGE_SUCC (new_bb, 0)->flags &= ~EDGE_FALLTHRU;
}

/* Helper function for execute_sm.  On every location where REF is
   set, set an appropriate flag indicating the store.  */

static tree
execute_sm_if_changed_flag_set (struct loop *loop, mem_ref_p ref)
{
  unsigned i;
  mem_ref_loc_p loc;
  tree flag;
  VEC (mem_ref_loc_p, heap) *locs = NULL;
  char *str = get_lsm_tmp_name (ref->mem, ~0);

  lsm_tmp_name_add ("_flag");
2102
  flag = create_tmp_reg (boolean_type_node, str);
2103 2104 2105 2106 2107 2108
  get_all_locs_in_loop (loop, ref, &locs);
  FOR_EACH_VEC_ELT (mem_ref_loc_p, locs, i, loc)
    {
      gimple_stmt_iterator gsi;
      gimple stmt;

2109 2110 2111 2112 2113 2114 2115 2116
      /* Only set the flag for writes.  */
      if (is_gimple_assign (loc->stmt)
	  && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
	{
	  gsi = gsi_for_stmt (loc->stmt);
	  stmt = gimple_build_assign (flag, boolean_true_node);
	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
	}
2117 2118 2119 2120 2121
    }
  VEC_free (mem_ref_loc_p, heap, locs);
  return flag;
}

2122
/* Executes store motion of memory reference REF from LOOP.
2123 2124 2125
   Exits from the LOOP are stored in EXITS.  The initialization of the
   temporary variable is put to the preheader of the loop, and assignments
   to the reference from the temporary variable are emitted to exits.  */
2126 2127

static void
2128
execute_sm (struct loop *loop, VEC (edge, heap) *exits, mem_ref_p ref)
2129
{
2130
  tree tmp_var, store_flag;
2131
  unsigned i;
2132
  gimple load;
2133
  struct fmt_data fmt_data;
2134
  edge ex, latch_edge;
2135
  struct lim_aux_data *lim_data;
2136
  bool multi_threaded_model_p = false;
2137

2138 2139 2140
  if (dump_file && (dump_flags & TDF_DETAILS))
    {
      fprintf (dump_file, "Executing store motion of ");
2141
      print_generic_expr (dump_file, ref->mem, 0);
2142 2143 2144
      fprintf (dump_file, " from loop %d\n", loop->num);
    }

2145
  tmp_var = create_tmp_reg (TREE_TYPE (ref->mem),
2146
			      get_lsm_tmp_name (ref->mem, ~0));
2147

2148 2149
  fmt_data.loop = loop;
  fmt_data.orig_loop = loop;
2150
  for_each_index (&ref->mem, force_move_till, &fmt_data);
2151

2152
  if (block_in_transaction (loop_preheader_edge (loop)->src)
2153 2154 2155 2156 2157 2158
      || !PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES))
    multi_threaded_model_p = true;

  if (multi_threaded_model_p)
    store_flag = execute_sm_if_changed_flag_set (loop, ref);

2159
  rewrite_mem_refs (loop, ref, tmp_var);
2160

2161 2162 2163 2164 2165 2166 2167
  /* Emit the load code into the latch, so that we are sure it will
     be processed after all dependencies.  */
  latch_edge = loop_latch_edge (loop);

  /* FIXME/TODO: For the multi-threaded variant, we could avoid this
     load altogether, since the store is predicated by a flag.  We
     could, do the load only if it was originally in the loop.  */
2168 2169 2170 2171
  load = gimple_build_assign (tmp_var, unshare_expr (ref->mem));
  lim_data = init_lim_data (load);
  lim_data->max_loop = loop;
  lim_data->tgt_loop = loop;
2172
  gsi_insert_on_edge (latch_edge, load);
2173

2174
  if (multi_threaded_model_p)
2175
    {
2176 2177 2178 2179 2180
      load = gimple_build_assign (store_flag, boolean_false_node);
      lim_data = init_lim_data (load);
      lim_data->max_loop = loop;
      lim_data->tgt_loop = loop;
      gsi_insert_on_edge (latch_edge, load);
2181
    }
2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192

  /* Sink the store to every exit from the loop.  */
  FOR_EACH_VEC_ELT (edge, exits, i, ex)
    if (!multi_threaded_model_p)
      {
	gimple store;
	store = gimple_build_assign (unshare_expr (ref->mem), tmp_var);
	gsi_insert_on_edge (ex, store);
      }
    else
      execute_sm_if_changed (ex, ref->mem, tmp_var, store_flag);
2193 2194
}

2195 2196
/* Hoists memory references MEM_REFS out of LOOP.  EXITS is the list of exit
   edges of the LOOP.  */
2197 2198

static void
2199 2200
hoist_memory_references (struct loop *loop, bitmap mem_refs,
			 VEC (edge, heap) *exits)
2201
{
2202 2203 2204
  mem_ref_p ref;
  unsigned  i;
  bitmap_iterator bi;
2205

2206
  EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2207
    {
2208 2209
      ref = VEC_index (mem_ref_p, memory_accesses.refs_list, i);
      execute_sm (loop, exits, ref);
2210
    }
2211 2212
}

2213 2214
/* Returns true if REF is always accessed in LOOP.  If STORED_P is true
   make sure REF is always stored to in LOOP.  */
2215 2216

static bool
2217
ref_always_accessed_p (struct loop *loop, mem_ref_p ref, bool stored_p)
2218
{
2219
  VEC (mem_ref_loc_p, heap) *locs = NULL;
2220
  unsigned i;
2221 2222 2223
  mem_ref_loc_p loc;
  bool ret = false;
  struct loop *must_exec;
2224 2225 2226
  tree base;

  base = get_base_address (ref->mem);
2227 2228
  if (INDIRECT_REF_P (base)
      || TREE_CODE (base) == MEM_REF)
2229
    base = TREE_OPERAND (base, 0);
2230

2231
  get_all_locs_in_loop (loop, ref, &locs);
2232
  FOR_EACH_VEC_ELT (mem_ref_loc_p, locs, i, loc)
2233
    {
2234
      if (!get_lim_data (loc->stmt))
2235
	continue;
2236

2237 2238 2239 2240 2241 2242 2243 2244 2245 2246
      /* If we require an always executed store make sure the statement
         stores to the reference.  */
      if (stored_p)
	{
	  tree lhs;
	  if (!gimple_get_lhs (loc->stmt))
	    continue;
	  lhs = get_base_address (gimple_get_lhs (loc->stmt));
	  if (!lhs)
	    continue;
2247 2248
	  if (INDIRECT_REF_P (lhs)
	      || TREE_CODE (lhs) == MEM_REF)
2249 2250 2251 2252 2253
	    lhs = TREE_OPERAND (lhs, 0);
	  if (lhs != base)
	    continue;
	}

2254
      must_exec = get_lim_data (loc->stmt)->always_executed_in;
2255 2256
      if (!must_exec)
	continue;
2257

2258 2259 2260 2261 2262 2263 2264 2265
      if (must_exec == loop
	  || flow_loop_nested_p (must_exec, loop))
	{
	  ret = true;
	  break;
	}
    }
  VEC_free (mem_ref_loc_p, heap, locs);
2266

2267
  return ret;
2268 2269
}

2270
/* Returns true if REF1 and REF2 are independent.  */
2271

2272 2273
static bool
refs_independent_p (mem_ref_p ref1, mem_ref_p ref2)
2274
{
2275 2276 2277 2278 2279
  if (ref1 == ref2
      || bitmap_bit_p (ref1->indep_ref, ref2->id))
    return true;
  if (bitmap_bit_p (ref1->dep_ref, ref2->id))
    return false;
2280 2281 2282
  if (!MEM_ANALYZABLE (ref1)
      || !MEM_ANALYZABLE (ref2))
    return false;
2283

2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304
  if (dump_file && (dump_flags & TDF_DETAILS))
    fprintf (dump_file, "Querying dependency of refs %u and %u: ",
	     ref1->id, ref2->id);

  if (mem_refs_may_alias_p (ref1->mem, ref2->mem,
			    &memory_accesses.ttae_cache))
    {
      bitmap_set_bit (ref1->dep_ref, ref2->id);
      bitmap_set_bit (ref2->dep_ref, ref1->id);
      if (dump_file && (dump_flags & TDF_DETAILS))
	fprintf (dump_file, "dependent.\n");
      return false;
    }
  else
    {
      bitmap_set_bit (ref1->indep_ref, ref2->id);
      bitmap_set_bit (ref2->indep_ref, ref1->id);
      if (dump_file && (dump_flags & TDF_DETAILS))
	fprintf (dump_file, "independent.\n");
      return true;
    }
2305 2306
}

2307 2308
/* Records the information whether REF is independent in LOOP (according
   to INDEP).  */
2309 2310

static void
2311
record_indep_loop (struct loop *loop, mem_ref_p ref, bool indep)
2312
{
2313 2314 2315 2316 2317
  if (indep)
    bitmap_set_bit (ref->indep_loop, loop->num);
  else
    bitmap_set_bit (ref->dep_loop, loop->num);
}
2318

2319 2320
/* Returns true if REF is independent on all other memory references in
   LOOP.  */
2321

2322 2323 2324
static bool
ref_indep_loop_p_1 (struct loop *loop, mem_ref_p ref)
{
2325
  bitmap refs_to_check;
2326 2327 2328 2329 2330
  unsigned i;
  bitmap_iterator bi;
  bool ret = true, stored = bitmap_bit_p (ref->stored, loop->num);
  mem_ref_p aref;

2331 2332 2333 2334 2335 2336 2337
  if (stored)
    refs_to_check = VEC_index (bitmap,
			       memory_accesses.all_refs_in_loop, loop->num);
  else
    refs_to_check = VEC_index (bitmap,
			       memory_accesses.all_refs_stored_in_loop,
			       loop->num);
2338

2339
  EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2340
    {
2341
      aref = VEC_index (mem_ref_p, memory_accesses.refs_list, i);
2342 2343
      if (!MEM_ANALYZABLE (aref)
	  || !refs_independent_p (ref, aref))
2344 2345 2346 2347 2348
	{
	  ret = false;
	  record_indep_loop (loop, aref, false);
	  break;
	}
2349 2350
    }

2351
  return ret;
2352 2353
}

2354 2355
/* Returns true if REF is independent on all other memory references in
   LOOP.  Wrapper over ref_indep_loop_p_1, caching its results.  */
2356

2357 2358
static bool
ref_indep_loop_p (struct loop *loop, mem_ref_p ref)
2359
{
2360
  bool ret;
2361

2362 2363 2364 2365
  if (bitmap_bit_p (ref->indep_loop, loop->num))
    return true;
  if (bitmap_bit_p (ref->dep_loop, loop->num))
    return false;
2366

2367
  ret = ref_indep_loop_p_1 (loop, ref);
2368

2369 2370 2371 2372 2373 2374 2375
  if (dump_file && (dump_flags & TDF_DETAILS))
    fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
	     ref->id, loop->num, ret ? "independent" : "dependent");

  record_indep_loop (loop, ref, ret);

  return ret;
2376 2377
}

2378
/* Returns true if we can perform store motion of REF from LOOP.  */
2379

2380 2381
static bool
can_sm_ref_p (struct loop *loop, mem_ref_p ref)
2382
{
2383 2384
  tree base;

2385 2386 2387 2388
  /* Can't hoist unanalyzable refs.  */
  if (!MEM_ANALYZABLE (ref))
    return false;

2389 2390 2391
  /* Unless the reference is stored in the loop, there is nothing to do.  */
  if (!bitmap_bit_p (ref->stored, loop->num))
    return false;
2392

2393 2394 2395 2396 2397
  /* It should be movable.  */
  if (!is_gimple_reg_type (TREE_TYPE (ref->mem))
      || TREE_THIS_VOLATILE (ref->mem)
      || !for_each_index (&ref->mem, may_move_till, loop))
    return false;
2398

2399 2400 2401 2402
  /* If it can throw fail, we do not properly update EH info.  */
  if (tree_could_throw_p (ref->mem))
    return false;

2403 2404 2405 2406 2407 2408 2409 2410
  /* If it can trap, it must be always executed in LOOP.
     Readonly memory locations may trap when storing to them, but
     tree_could_trap_p is a predicate for rvalues, so check that
     explicitly.  */
  base = get_base_address (ref->mem);
  if ((tree_could_trap_p (ref->mem)
       || (DECL_P (base) && TREE_READONLY (base)))
      && !ref_always_accessed_p (loop, ref, true))
2411
    return false;
2412

2413 2414 2415 2416
  /* And it must be independent on all other memory references
     in LOOP.  */
  if (!ref_indep_loop_p (loop, ref))
    return false;
2417

2418
  return true;
2419 2420
}

2421 2422 2423
/* Marks the references in LOOP for that store motion should be performed
   in REFS_TO_SM.  SM_EXECUTED is the set of references for that store
   motion was performed in one of the outer loops.  */
2424 2425

static void
2426
find_refs_for_sm (struct loop *loop, bitmap sm_executed, bitmap refs_to_sm)
2427
{
2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439
  bitmap refs = VEC_index (bitmap, memory_accesses.all_refs_in_loop,
			   loop->num);
  unsigned i;
  bitmap_iterator bi;
  mem_ref_p ref;

  EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
    {
      ref = VEC_index (mem_ref_p, memory_accesses.refs_list, i);
      if (can_sm_ref_p (loop, ref))
	bitmap_set_bit (refs_to_sm, i);
    }
2440
}
2441

2442 2443 2444
/* Checks whether LOOP (with exits stored in EXITS array) is suitable
   for a store motion optimization (i.e. whether we can insert statement
   on its exits).  */
2445

2446 2447 2448
static bool
loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED,
		      VEC (edge, heap) *exits)
2449
{
2450 2451
  unsigned i;
  edge ex;
2452

2453
  FOR_EACH_VEC_ELT (edge, exits, i, ex)
2454
    if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
2455 2456 2457
      return false;

  return true;
2458 2459
}

2460
/* Try to perform store motion for all memory references modified inside
2461 2462
   LOOP.  SM_EXECUTED is the bitmap of the memory references for that
   store motion was executed in one of the outer loops.  */
2463 2464

static void
2465
store_motion_loop (struct loop *loop, bitmap sm_executed)
2466
{
2467
  VEC (edge, heap) *exits = get_loop_exit_edges (loop);
2468 2469
  struct loop *subloop;
  bitmap sm_in_loop = BITMAP_ALLOC (NULL);
2470

2471
  if (loop_suitable_for_sm (loop, exits))
2472
    {
2473 2474
      find_refs_for_sm (loop, sm_executed, sm_in_loop);
      hoist_memory_references (loop, sm_in_loop, exits);
2475
    }
2476
  VEC_free (edge, heap, exits);
2477 2478 2479 2480 2481 2482

  bitmap_ior_into (sm_executed, sm_in_loop);
  for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
    store_motion_loop (subloop, sm_executed);
  bitmap_and_compl_into (sm_executed, sm_in_loop);
  BITMAP_FREE (sm_in_loop);
2483 2484 2485
}

/* Try to perform store motion for all memory references modified inside
2486
   loops.  */
2487 2488

static void
2489
store_motion (void)
2490 2491
{
  struct loop *loop;
2492
  bitmap sm_executed = BITMAP_ALLOC (NULL);
2493

2494 2495
  for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
    store_motion_loop (loop, sm_executed);
2496

2497
  BITMAP_FREE (sm_executed);
2498
  gsi_commit_edge_inserts ();
2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513
}

/* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
   for each such basic block bb records the outermost loop for that execution
   of its header implies execution of bb.  CONTAINS_CALL is the bitmap of
   blocks that contain a nonpure call.  */

static void
fill_always_executed_in (struct loop *loop, sbitmap contains_call)
{
  basic_block bb = NULL, *bbs, last = NULL;
  unsigned i;
  edge e;
  struct loop *inn_loop = loop;

2514
  if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
2515 2516 2517 2518 2519
    {
      bbs = get_loop_body_in_dom_order (loop);

      for (i = 0; i < loop->num_nodes; i++)
	{
2520
	  edge_iterator ei;
2521 2522 2523 2524 2525 2526 2527 2528
	  bb = bbs[i];

	  if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
	    last = bb;

	  if (TEST_BIT (contains_call, bb->index))
	    break;

2529
	  FOR_EACH_EDGE (e, ei, bb->succs)
2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555
	    if (!flow_bb_inside_loop_p (loop, e->dest))
	      break;
	  if (e)
	    break;

	  /* A loop might be infinite (TODO use simple loop analysis
	     to disprove this if possible).  */
	  if (bb->flags & BB_IRREDUCIBLE_LOOP)
	    break;

	  if (!flow_bb_inside_loop_p (inn_loop, bb))
	    break;

	  if (bb->loop_father->header == bb)
	    {
	      if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
		break;

	      /* In a loop that is always entered we may proceed anyway.
		 But record that we entered it and stop once we leave it.  */
	      inn_loop = bb->loop_father;
	    }
	}

      while (1)
	{
2556
	  SET_ALWAYS_EXECUTED_IN (last, loop);
2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568
	  if (last == loop->header)
	    break;
	  last = get_immediate_dominator (CDI_DOMINATORS, last);
	}

      free (bbs);
    }

  for (loop = loop->inner; loop; loop = loop->next)
    fill_always_executed_in (loop, contains_call);
}

2569
/* Compute the global information needed by the loop invariant motion pass.  */
2570 2571

static void
2572
tree_ssa_lim_initialize (void)
2573 2574
{
  sbitmap contains_call = sbitmap_alloc (last_basic_block);
2575
  gimple_stmt_iterator bsi;
2576 2577 2578
  struct loop *loop;
  basic_block bb;

2579 2580
  bitmap_obstack_initialize (&lim_bitmap_obstack);

2581 2582 2583
  sbitmap_zero (contains_call);
  FOR_EACH_BB (bb)
    {
2584
      for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2585
	{
2586
	  if (nonpure_call_p (gsi_stmt (bsi)))
2587 2588 2589
	    break;
	}

2590
      if (!gsi_end_p (bsi))
2591 2592 2593
	SET_BIT (contains_call, bb->index);
    }

2594
  for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
2595 2596 2597
    fill_always_executed_in (loop, contains_call);

  sbitmap_free (contains_call);
2598 2599

  lim_aux_data_map = pointer_map_create ();
2600 2601 2602

  if (flag_tm)
    compute_transaction_bits ();
2603 2604

  alloc_aux_for_edges (0);
2605 2606 2607 2608 2609 2610 2611 2612
}

/* Cleans up after the invariant motion pass.  */

static void
tree_ssa_lim_finalize (void)
{
  basic_block bb;
2613
  unsigned i;
2614
  mem_ref_p ref;
2615

2616 2617
  free_aux_for_edges ();

2618
  FOR_EACH_BB (bb)
2619
    SET_ALWAYS_EXECUTED_IN (bb, NULL);
2620

2621
  bitmap_obstack_release (&lim_bitmap_obstack);
2622 2623
  pointer_map_destroy (lim_aux_data_map);

2624 2625
  htab_delete (memory_accesses.refs);

2626 2627 2628 2629
  FOR_EACH_VEC_ELT (mem_ref_p, memory_accesses.refs_list, i, ref)
    memref_free (ref);
  VEC_free (mem_ref_p, heap, memory_accesses.refs_list);

2630 2631
  VEC_free (bitmap, heap, memory_accesses.refs_in_loop);
  VEC_free (bitmap, heap, memory_accesses.all_refs_in_loop);
2632
  VEC_free (bitmap, heap, memory_accesses.all_refs_stored_in_loop);
2633 2634

  if (memory_accesses.ttae_cache)
2635
    free_affine_expand_cache (&memory_accesses.ttae_cache);
2636 2637
}

2638
/* Moves invariants from loops.  Only "expensive" invariants are moved out --
2639 2640
   i.e. those that are likely to be win regardless of the register pressure.  */

2641
unsigned int
2642
tree_ssa_lim (void)
2643
{
2644 2645
  unsigned int todo;

2646
  tree_ssa_lim_initialize ();
2647

2648 2649 2650
  /* Gathers information about memory accesses in the loops.  */
  analyze_memory_references ();

2651 2652 2653 2654
  /* For each statement determine the outermost loop in that it is
     invariant and cost for computing the invariant.  */
  determine_invariantness ();

2655 2656 2657
  /* Execute store motion.  Force the necessary invariants to be moved
     out of the loops as well.  */
  store_motion ();
2658 2659

  /* Move the expressions that are expensive enough.  */
2660
  todo = move_computations ();
2661 2662

  tree_ssa_lim_finalize ();
2663 2664

  return todo;
2665
}