tree-ssa-loop-ivcanon.c 15.9 KB
Newer Older
1
/* Induction variable canonicalization.
2 3
   Copyright (C) 2004, 2005, 2007, 2008, 2010
   Free Software Foundation, Inc.
H.J. Lu committed
4

5
This file is part of GCC.
H.J. Lu committed
6

7 8
GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
9
Free Software Foundation; either version 3, or (at your option) any
10
later version.
H.J. Lu committed
11

12 13 14 15
GCC is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.
H.J. Lu committed
16

17
You should have received a copy of the GNU General Public License
18 19
along with GCC; see the file COPYING3.  If not see
<http://www.gnu.org/licenses/>.  */
20 21

/* This pass detects the loops that iterate a constant number of times,
H.J. Lu committed
22
   adds a canonical induction variable (step -1, tested against 0)
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
   and replaces the exit test.  This enables the less powerful rtl
   level analysis to use this information.

   This might spoil the code in some cases (by increasing register pressure).
   Note that in the case the new variable is not needed, ivopts will get rid
   of it, so it might only be a problem when there are no other linear induction
   variables.  In that case the created optimization possibilities are likely
   to pay up.

   Additionally in case we detect that it is beneficial to unroll the
   loop completely, we do it right here to expose the optimization
   possibilities to the following passes.  */

#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "tree.h"
#include "tm_p.h"
#include "basic-block.h"
43 44
#include "tree-pretty-print.h"
#include "gimple-pretty-print.h"
45 46 47 48 49 50 51 52 53
#include "tree-flow.h"
#include "tree-dump.h"
#include "cfgloop.h"
#include "tree-pass.h"
#include "tree-chrec.h"
#include "tree-scalar-evolution.h"
#include "params.h"
#include "flags.h"
#include "tree-inline.h"
54
#include "target.h"
55

56 57 58 59
/* Specifies types of loops that may be unrolled.  */

enum unroll_level
{
60
  UL_SINGLE_ITER,	/* Only loops that exit immediately in the first
61 62 63 64 65 66
			   iteration.  */
  UL_NO_GROWTH,		/* Only loops whose unrolling will not cause increase
			   of code size.  */
  UL_ALL		/* All suitable loops.  */
};

67 68 69 70 71 72 73
/* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
   is the exit edge whose condition is replaced.  */

static void
create_canonical_iv (struct loop *loop, edge exit, tree niter)
{
  edge in;
74 75 76
  tree type, var;
  gimple cond;
  gimple_stmt_iterator incr_at;
77 78 79 80 81 82 83 84 85 86
  enum tree_code cmp;

  if (dump_file && (dump_flags & TDF_DETAILS))
    {
      fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
      print_generic_expr (dump_file, niter, TDF_SLIM);
      fprintf (dump_file, " iterations.\n");
    }

  cond = last_stmt (exit->src);
87
  in = EDGE_SUCC (exit->src, 0);
88
  if (in == exit)
89
    in = EDGE_SUCC (exit->src, 1);
90 91 92 93 94 95 96

  /* Note that we do not need to worry about overflows, since
     type of niter is always unsigned and all comparisons are
     just for equality/nonequality -- i.e. everything works
     with a modulo arithmetics.  */

  type = TREE_TYPE (niter);
97 98 99
  niter = fold_build2 (PLUS_EXPR, type,
		       niter,
		       build_int_cst (type, 1));
100
  incr_at = gsi_last_bb (in->src);
101
  create_iv (niter,
102
	     build_int_cst (type, -1),
103 104 105 106
	     NULL_TREE, loop,
	     &incr_at, false, NULL, &var);

  cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
107 108 109
  gimple_cond_set_code (cond, cmp);
  gimple_cond_set_lhs (cond, var);
  gimple_cond_set_rhs (cond, build_int_cst (type, 0));
110
  update_stmt (cond);
111 112
}

113
/* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.  */
114 115

unsigned
116
tree_num_loop_insns (struct loop *loop, eni_weights *weights)
117 118
{
  basic_block *body = get_loop_body (loop);
119
  gimple_stmt_iterator gsi;
120
  unsigned size = 0, i;
121 122

  for (i = 0; i < loop->num_nodes; i++)
123 124
    for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
      size += estimate_num_insns (gsi_stmt (gsi), weights);
125 126 127 128 129
  free (body);

  return size;
}

130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
/* Describe size of loop as detected by tree_estimate_loop_size.  */
struct loop_size
{
  /* Number of instructions in the loop.  */
  int overall;

  /* Number of instructions that will be likely optimized out in
     peeled iterations of loop  (i.e. computation based on induction
     variable where induction variable starts at known constant.)  */
  int eliminated_by_peeling;

  /* Same statistics for last iteration of loop: it is smaller because
     instructions after exit are not executed.  */
  int last_iteration;
  int last_iteration_eliminated_by_peeling;
};

/* Return true if OP in STMT will be constant after peeling LOOP.  */

static bool
constant_after_peeling (tree op, gimple stmt, struct loop *loop)
{
  affine_iv iv;

  if (is_gimple_min_invariant (op))
    return true;
H.J. Lu committed
156

157 158 159 160 161 162 163 164
  /* We can still fold accesses to constant arrays when index is known.  */
  if (TREE_CODE (op) != SSA_NAME)
    {
      tree base = op;

      /* First make fast look if we see constant array inside.  */
      while (handled_component_p (base))
	base = TREE_OPERAND (base, 0);
165 166
      if ((DECL_P (base) == VAR_DECL
	   && const_value_known_p (base))
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
	  || CONSTANT_CLASS_P (base))
	{
	  /* If so, see if we understand all the indices.  */
	  base = op;
	  while (handled_component_p (base))
	    {
	      if (TREE_CODE (base) == ARRAY_REF
		  && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
		return false;
	      base = TREE_OPERAND (base, 0);
	    }
	  return true;
	}
      return false;
    }

  /* Induction variables are constants.  */
  if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false))
    return false;
  if (!is_gimple_min_invariant (iv.base))
    return false;
  if (!is_gimple_min_invariant (iv.step))
    return false;
  return true;
}

/* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.
   Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.  */

static void
tree_estimate_loop_size (struct loop *loop, edge exit, struct loop_size *size)
{
  basic_block *body = get_loop_body (loop);
  gimple_stmt_iterator gsi;
  unsigned int i;
  bool after_exit;

  size->overall = 0;
  size->eliminated_by_peeling = 0;
  size->last_iteration = 0;
  size->last_iteration_eliminated_by_peeling = 0;

  if (dump_file && (dump_flags & TDF_DETAILS))
    fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
  for (i = 0; i < loop->num_nodes; i++)
    {
      if (exit && body[i] != exit->src
	  && dominated_by_p (CDI_DOMINATORS, body[i], exit->src))
	after_exit = true;
      else
	after_exit = false;
      if (dump_file && (dump_flags & TDF_DETAILS))
	fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, after_exit);

      for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
	{
	  gimple stmt = gsi_stmt (gsi);
	  int num = estimate_num_insns (stmt, &eni_size_weights);
	  bool likely_eliminated = false;

	  if (dump_file && (dump_flags & TDF_DETAILS))
	    {
	      fprintf (dump_file, "  size: %3i ", num);
	      print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0);
	    }

	  /* Look for reasons why we might optimize this stmt away. */

	  /* Exit conditional.  */
	  if (body[i] == exit->src && stmt == last_stmt (exit->src))
	    {
	      if (dump_file && (dump_flags & TDF_DETAILS))
	        fprintf (dump_file, "   Exit condition will be eliminated.\n");
	      likely_eliminated = true;
	    }
	  /* Sets of IV variables  */
	  else if (gimple_code (stmt) == GIMPLE_ASSIGN
	      && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
	    {
	      if (dump_file && (dump_flags & TDF_DETAILS))
	        fprintf (dump_file, "   Induction variable computation will"
			 " be folded away.\n");
	      likely_eliminated = true;
	    }
	  /* Assignments of IV variables.  */
	  else if (gimple_code (stmt) == GIMPLE_ASSIGN
		   && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
		   && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt,loop)
		   && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
		       || constant_after_peeling (gimple_assign_rhs2 (stmt),
		       				  stmt, loop)))
	    {
	      if (dump_file && (dump_flags & TDF_DETAILS))
	        fprintf (dump_file, "   Constant expression will be folded away.\n");
	      likely_eliminated = true;
	    }
	  /* Conditionals.  */
	  else if (gimple_code (stmt) == GIMPLE_COND
		   && constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
		   && constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop))
	    {
	      if (dump_file && (dump_flags & TDF_DETAILS))
	        fprintf (dump_file, "   Constant conditional.\n");
	      likely_eliminated = true;
	    }

	  size->overall += num;
	  if (likely_eliminated)
	    size->eliminated_by_peeling += num;
	  if (!after_exit)
	    {
	      size->last_iteration += num;
	      if (likely_eliminated)
		size->last_iteration_eliminated_by_peeling += num;
	    }
	}
    }
  if (dump_file && (dump_flags & TDF_DETAILS))
    fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
    	     size->eliminated_by_peeling, size->last_iteration,
	     size->last_iteration_eliminated_by_peeling);
H.J. Lu committed
288

289 290
  free (body);
}
291

292 293 294 295 296
/* Estimate number of insns of completely unrolled loop.
   It is (NUNROLL + 1) * size of loop body with taking into account
   the fact that in last copy everything after exit conditional
   is dead and that some instructions will be eliminated after
   peeling.
297

298 299
   Loop body is likely going to simplify futher, this is difficult
   to guess, we just decrease the result by 1/3.  */
300 301

static unsigned HOST_WIDE_INT
302
estimated_unrolled_size (struct loop_size *size,
303 304
			 unsigned HOST_WIDE_INT nunroll)
{
305 306 307 308 309 310 311 312
  HOST_WIDE_INT unr_insns = ((nunroll)
  			     * (HOST_WIDE_INT) (size->overall
			     			- size->eliminated_by_peeling));
  if (!nunroll)
    unr_insns = 0;
  unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;

  unr_insns = unr_insns * 2 / 3;
313 314 315 316 317 318
  if (unr_insns <= 0)
    unr_insns = 1;

  return unr_insns;
}

319
/* Tries to unroll LOOP completely, i.e. NITER times.
H.J. Lu committed
320
   UL determines which loops we are allowed to unroll.
321
   EXIT is the exit of the loop that should be eliminated.  */
322 323

static bool
324
try_unroll_loop_completely (struct loop *loop,
325
			    edge exit, tree niter,
326
			    enum unroll_level ul)
327
{
328
  unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
329
  gimple cond;
330
  struct loop_size size;
331 332 333 334 335 336 337 338 339 340 341 342 343 344

  if (loop->inner)
    return false;

  if (!host_integerp (niter, 1))
    return false;
  n_unroll = tree_low_cst (niter, 1);

  max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
  if (n_unroll > max_unroll)
    return false;

  if (n_unroll)
    {
345
      if (ul == UL_SINGLE_ITER)
346 347
	return false;

348 349
      tree_estimate_loop_size (loop, exit, &size);
      ninsns = size.overall;
350

351
      unr_insns = estimated_unrolled_size (&size, n_unroll);
352 353 354 355 356 357 358
      if (dump_file && (dump_flags & TDF_DETAILS))
	{
	  fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
	  fprintf (dump_file, "  Estimated size after unrolling: %d\n",
		   (int) unr_insns);
	}

359 360 361 362 363 364 365 366 367 368 369
      if (unr_insns > ninsns
	  && (unr_insns
	      > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)))
	{
	  if (dump_file && (dump_flags & TDF_DETAILS))
	    fprintf (dump_file, "Not unrolling loop %d "
		     "(--param max-completely-peeled-insns limit reached).\n",
		     loop->num);
	  return false;
	}

370 371
      if (ul == UL_NO_GROWTH
	  && unr_insns > ninsns)
372 373
	{
	  if (dump_file && (dump_flags & TDF_DETAILS))
374 375
	    fprintf (dump_file, "Not unrolling loop %d.\n", loop->num);
	  return false;
376
	}
377 378 379 380
    }

  if (n_unroll)
    {
381
      sbitmap wont_exit;
382 383 384
      edge e;
      unsigned i;
      VEC (edge, heap) *to_remove = NULL;
385

386
      initialize_original_copy_tables ();
387 388 389 390
      wont_exit = sbitmap_alloc (n_unroll + 1);
      sbitmap_ones (wont_exit);
      RESET_BIT (wont_exit, 0);

391 392 393 394 395
      if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
						 n_unroll, wont_exit,
						 exit, &to_remove,
						 DLTHE_FLAG_UPDATE_FREQ
						 | DLTHE_FLAG_COMPLETTE_PEEL))
396
	{
397
          free_original_copy_tables ();
398
	  free (wont_exit);
399 400
	  return false;
	}
401

402
      FOR_EACH_VEC_ELT (edge, to_remove, i, e)
403 404 405 406 407 408
	{
	  bool ok = remove_path (e);
	  gcc_assert (ok);
	}

      VEC_free (edge, heap, to_remove);
409
      free (wont_exit);
410
      free_original_copy_tables ();
411 412
    }

413
  cond = last_stmt (exit->src);
414 415 416 417
  if (exit->flags & EDGE_TRUE_VALUE)
    gimple_cond_make_true (cond);
  else
    gimple_cond_make_false (cond);
418
  update_stmt (cond);
Diego Novillo committed
419 420
  update_ssa (TODO_update_ssa);

421 422 423 424 425 426
  if (dump_file && (dump_flags & TDF_DETAILS))
    fprintf (dump_file, "Unrolled loop %d completely.\n", loop->num);

  return true;
}

427
/* Adds a canonical induction variable to LOOP if suitable.
H.J. Lu committed
428
   CREATE_IV is true if we may create a new iv.  UL determines
429
   which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
H.J. Lu committed
430
   to determine the number of iterations of a loop by direct evaluation.
431
   Returns true if cfg is changed.  */
432 433

static bool
434
canonicalize_loop_induction_variables (struct loop *loop,
435
				       bool create_iv, enum unroll_level ul,
436 437 438 439 440
				       bool try_eval)
{
  edge exit = NULL;
  tree niter;

441
  niter = number_of_latch_executions (loop);
442 443
  if (TREE_CODE (niter) == INTEGER_CST)
    {
444
      exit = single_exit (loop);
445 446 447
      if (!just_once_each_iteration_p (loop, exit->src))
	return false;
    }
448 449 450 451
  else
    {
      /* If the loop has more than one exit, try checking all of them
	 for # of iterations determinable through scev.  */
452
      if (!single_exit (loop))
453 454 455 456 457 458 459 460 461 462 463 464
	niter = find_loop_niter (loop, &exit);

      /* Finally if everything else fails, try brute force evaluation.  */
      if (try_eval
	  && (chrec_contains_undetermined (niter)
	      || TREE_CODE (niter) != INTEGER_CST))
	niter = find_loop_niter_by_eval (loop, &exit);

      if (chrec_contains_undetermined (niter)
	  || TREE_CODE (niter) != INTEGER_CST)
	return false;
    }
465 466 467 468 469 470 471 472

  if (dump_file && (dump_flags & TDF_DETAILS))
    {
      fprintf (dump_file, "Loop %d iterates ", loop->num);
      print_generic_expr (dump_file, niter, TDF_SLIM);
      fprintf (dump_file, " times.\n");
    }

473
  if (try_unroll_loop_completely (loop, exit, niter, ul))
474 475 476 477 478 479 480 481 482
    return true;

  if (create_iv)
    create_canonical_iv (loop, exit, niter);

  return false;
}

/* The main entry point of the pass.  Adds canonical induction variables
483
   to the suitable loops.  */
484

485
unsigned int
486
canonicalize_induction_variables (void)
487
{
488
  loop_iterator li;
489
  struct loop *loop;
490
  bool changed = false;
H.J. Lu committed
491

492
  FOR_EACH_LOOP (li, loop, 0)
493
    {
494 495 496
      changed |= canonicalize_loop_induction_variables (loop,
							true, UL_SINGLE_ITER,
							true);
497 498
    }

499 500 501 502
  /* Clean up the information about numbers of iterations, since brute force
     evaluation could reveal new information.  */
  scev_reset ();

503
  if (changed)
504 505
    return TODO_cleanup_cfg;
  return 0;
506 507
}

508 509 510
/* Unroll LOOPS completely if they iterate just few times.  Unless
   MAY_INCREASE_SIZE is true, perform the unrolling only if the
   size of the code does not increase.  */
511

512
unsigned int
513
tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
514
{
515
  loop_iterator li;
516
  struct loop *loop;
517
  bool changed;
518
  enum unroll_level ul;
519
  int iteration = 0;
520

521
  do
522
    {
523
      changed = false;
524

525 526
      FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
	{
527
	  if (may_increase_size && optimize_loop_for_speed_p (loop)
528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543
	      /* Unroll outermost loops only if asked to do so or they do
		 not cause code growth.  */
	      && (unroll_outer
		  || loop_outer (loop_outer (loop))))
	    ul = UL_ALL;
	  else
	    ul = UL_NO_GROWTH;
	  changed |= canonicalize_loop_induction_variables
		       (loop, false, ul, !flag_tree_loop_ivcanon);
	}

      if (changed)
	{
	  /* This will take care of removing completely unrolled loops
	     from the loop structures so we can continue unrolling now
	     innermost loops.  */
544 545
	  if (cleanup_tree_cfg ())
	    update_ssa (TODO_update_ssa_only_virtuals);
546 547 548 549 550 551

	  /* Clean up the information about numbers of iterations, since
	     complete unrolling might have invalidated it.  */
	  scev_reset ();
	}
    }
552 553
  while (changed
	 && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
554

555
  return 0;
556
}