loop-doloop.c 23.6 KB
Newer Older
Zdenek Dvorak committed
1
/* Perform doloop optimizations
2
   Copyright (C) 2004-2020 Free Software Foundation, Inc.
Zdenek Dvorak committed
3 4 5 6 7 8
   Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
9
Software Foundation; either version 3, or (at your option) any later
Zdenek Dvorak committed
10 11 12 13 14 15 16 17
version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
18 19
along with GCC; see the file COPYING3.  If not see
<http://www.gnu.org/licenses/>.  */
Zdenek Dvorak committed
20 21 22 23

#include "config.h"
#include "system.h"
#include "coretypes.h"
24
#include "backend.h"
25
#include "target.h"
Zdenek Dvorak committed
26
#include "rtl.h"
27
#include "tree.h"
28
#include "cfghooks.h"
29
#include "memmodel.h"
30
#include "emit-rtl.h"
31 32
#include "dojump.h"
#include "expr.h"
Zdenek Dvorak committed
33
#include "cfgloop.h"
34
#include "cfgrtl.h"
35
#include "dumpfile.h"
36
#include "loop-unroll.h"
37 38
#include "regs.h"
#include "df.h"
Zdenek Dvorak committed
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71

/* This module is used to modify loops with a determinable number of
   iterations to use special low-overhead looping instructions.

   It first validates whether the loop is well behaved and has a
   determinable number of iterations (either at compile or run-time).
   It then modifies the loop to use a low-overhead looping pattern as
   follows:

   1. A pseudo register is allocated as the loop iteration counter.

   2. The number of loop iterations is calculated and is stored
      in the loop counter.

   3. At the end of the loop, the jump insn is replaced by the
      doloop_end pattern.  The compare must remain because it might be
      used elsewhere.  If the loop-variable or condition register are
      used elsewhere, they will be eliminated by flow.

   4. An optional doloop_begin pattern is inserted at the top of the
      loop.

   TODO The optimization should only performed when either the biv used for exit
   condition is unused at all except for the exit test, or if we do not have to
   change its value, since otherwise we have to add a new induction variable,
   which usually will not pay up (unless the cost of the doloop pattern is
   somehow extremely lower than the cost of compare & jump, or unless the bct
   register cannot be used for anything else but doloop -- ??? detect these
   cases).  */

/* Return the loop termination condition for PATTERN or zero
   if it is not a decrement and branch jump insn.  */

72
rtx
73
doloop_condition_get (rtx_insn *doloop_pat)
Zdenek Dvorak committed
74 75 76 77
{
  rtx cmp;
  rtx inc;
  rtx reg;
78
  rtx inc_src;
Zdenek Dvorak committed
79
  rtx condition;
80
  rtx pattern;
Revital Eres committed
81 82
  rtx cc_reg = NULL_RTX;
  rtx reg_orig = NULL_RTX;
Zdenek Dvorak committed
83

84 85
  /* The canonical doloop pattern we expect has one of the following
     forms:
Zdenek Dvorak committed
86

87 88 89 90 91
     1)  (parallel [(set (pc) (if_then_else (condition)
	  			            (label_ref (label))
				            (pc)))
	             (set (reg) (plus (reg) (const_int -1)))
	             (additional clobbers and uses)])
Zdenek Dvorak committed
92

93 94 95 96
     The branch must be the first entry of the parallel (also required
     by jump.c), and the second entry of the parallel must be a set of
     the loop counter register.  Some targets (IA-64) wrap the set of
     the loop counter in an if_then_else too.
97

98 99 100
     2)  (set (reg) (plus (reg) (const_int -1))
         (set (pc) (if_then_else (reg != 0)
	                         (label_ref (label))
Revital Eres committed
101 102 103 104 105 106 107 108 109 110
			         (pc))).  

     Some targets (ARM) do the comparison before the branch, as in the
     following form:

     3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0)))
                   (set (reg) (plus (reg) (const_int -1)))])
        (set (pc) (if_then_else (cc == NE)
                                (label_ref (label))
                                (pc))) */
111 112

  pattern = PATTERN (doloop_pat);
Zdenek Dvorak committed
113 114

  if (GET_CODE (pattern) != PARALLEL)
115 116
    {
      rtx cond;
117
      rtx_insn *prev_insn = prev_nondebug_insn (doloop_pat);
Revital Eres committed
118 119
      rtx cmp_arg1, cmp_arg2;
      rtx cmp_orig;
120

Revital Eres committed
121 122 123 124 125
      /* In case the pattern is not PARALLEL we expect two forms
	 of doloop which are cases 2) and 3) above: in case 2) the
	 decrement immediately precedes the branch, while in case 3)
	 the compare and decrement instructions immediately precede
	 the branch.  */
Zdenek Dvorak committed
126

127
      if (prev_insn == NULL_RTX || !INSN_P (prev_insn))
128 129 130
        return 0;

      cmp = pattern;
Revital Eres committed
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
      if (GET_CODE (PATTERN (prev_insn)) == PARALLEL)
        {
	  /* The third case: the compare and decrement instructions
	     immediately precede the branch.  */
	  cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0);
	  if (GET_CODE (cmp_orig) != SET)
	    return 0;
	  if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE)
	    return 0;
	  cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0);
          cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1);
	  if (cmp_arg2 != const0_rtx 
	      || GET_CODE (cmp_arg1) != PLUS)
	    return 0;
	  reg_orig = XEXP (cmp_arg1, 0);
	  if (XEXP (cmp_arg1, 1) != GEN_INT (-1) 
	      || !REG_P (reg_orig))
	    return 0;
	  cc_reg = SET_DEST (cmp_orig);
	  
	  inc = XVECEXP (PATTERN (prev_insn), 0, 1);
	}
      else
154
        inc = PATTERN (prev_insn);
155 156 157 158 159 160 161
      if (GET_CODE (cmp) == SET && GET_CODE (SET_SRC (cmp)) == IF_THEN_ELSE)
	{
	  /* We expect the condition to be of the form (reg != 0)  */
	  cond = XEXP (SET_SRC (cmp), 0);
	  if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
	    return 0;
	}
162 163 164 165 166 167
    }
  else
    {
      cmp = XVECEXP (pattern, 0, 0);
      inc = XVECEXP (pattern, 0, 1);
    }
Zdenek Dvorak committed
168 169

  /* Check for (set (reg) (something)).  */
170
  if (GET_CODE (inc) != SET)
Zdenek Dvorak committed
171 172
    return 0;
  reg = SET_DEST (inc);
173 174 175 176 177 178 179 180 181 182 183 184
  if (! REG_P (reg))
    return 0;

  /* Check if something = (plus (reg) (const_int -1)).
     On IA-64, this decrement is wrapped in an if_then_else.  */
  inc_src = SET_SRC (inc);
  if (GET_CODE (inc_src) == IF_THEN_ELSE)
    inc_src = XEXP (inc_src, 1);
  if (GET_CODE (inc_src) != PLUS
      || XEXP (inc_src, 0) != reg
      || XEXP (inc_src, 1) != constm1_rtx)
    return 0;
Zdenek Dvorak committed
185 186 187 188 189 190 191 192 193 194 195 196 197 198

  /* Check for (set (pc) (if_then_else (condition)
                                       (label_ref (label))
                                       (pc))).  */
  if (GET_CODE (cmp) != SET
      || SET_DEST (cmp) != pc_rtx
      || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
      || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
      || XEXP (SET_SRC (cmp), 2) != pc_rtx)
    return 0;

  /* Extract loop termination condition.  */
  condition = XEXP (SET_SRC (cmp), 0);

199 200 201 202 203
  /* We expect a GE or NE comparison with 0 or 1.  */
  if ((GET_CODE (condition) != GE
       && GET_CODE (condition) != NE)
      || (XEXP (condition, 1) != const0_rtx
          && XEXP (condition, 1) != const1_rtx))
Zdenek Dvorak committed
204 205
    return 0;

206
  if ((XEXP (condition, 0) == reg)
Revital Eres committed
207 208 209 210
      /* For the third case:  */  
      || ((cc_reg != NULL_RTX)
	  && (XEXP (condition, 0) == cc_reg)
	  && (reg_orig == reg))
211
      || (GET_CODE (XEXP (condition, 0)) == PLUS
Revital Eres committed
212
	  && XEXP (XEXP (condition, 0), 0) == reg))
213 214
   {
     if (GET_CODE (pattern) != PARALLEL)
Revital Eres committed
215
     /*  For the second form we expect:
216 217 218 219 220 221 222 223 224 225 226 227 228 229

         (set (reg) (plus (reg) (const_int -1))
         (set (pc) (if_then_else (reg != 0)
                                 (label_ref (label))
                                 (pc))).

         is equivalent to the following:

         (parallel [(set (pc) (if_then_else (reg != 1)
                                            (label_ref (label))
                                            (pc)))
                     (set (reg) (plus (reg) (const_int -1)))
                     (additional clobbers and uses)])

Revital Eres committed
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
        For the third form we expect:

        (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
                   (set (reg) (plus (reg) (const_int -1)))])
        (set (pc) (if_then_else (cc == NE)
                                (label_ref (label))
                                (pc))) 

        which is equivalent to the following:

        (parallel [(set (cc) (compare (reg,  1))
                   (set (reg) (plus (reg) (const_int -1)))
                   (set (pc) (if_then_else (NE == cc)
                                           (label_ref (label))
                                           (pc))))])

        So we return the second form instead for the two cases.

248 249 250
     */
        condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);

Zdenek Dvorak committed
251
    return condition;
252
   }
Zdenek Dvorak committed
253 254

  /* ??? If a machine uses a funny comparison, we could return a
255
     canonicalized form here.  */
Zdenek Dvorak committed
256 257 258 259 260 261 262 263 264

  return 0;
}

/* Return nonzero if the loop specified by LOOP is suitable for
   the use of special low-overhead looping instructions.  DESC
   describes the number of iterations of the loop.  */

static bool
265
doloop_valid_p (class loop *loop, class niter_desc *desc)
Zdenek Dvorak committed
266 267
{
  basic_block *body = get_loop_body (loop), bb;
268
  rtx_insn *insn;
Zdenek Dvorak committed
269
  unsigned i;
270
  bool result = true;
Zdenek Dvorak committed
271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300

  /* Check for loops that may not terminate under special conditions.  */
  if (!desc->simple_p
      || desc->assumptions
      || desc->infinite)
    {
      /* There are some cases that would require a special attention.
	 For example if the comparison is LEU and the comparison value
	 is UINT_MAX then the loop will not terminate.  Similarly, if the
	 comparison code is GEU and the comparison value is 0, the
	 loop will not terminate.

	 If the absolute increment is not 1, the loop can be infinite
	 even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)

	 ??? We could compute these conditions at run-time and have a
	 additional jump around the loop to ensure an infinite loop.
	 However, it is very unlikely that this is the intended
	 behavior of the loop and checking for these rare boundary
	 conditions would pessimize all other code.

	 If the loop is executed only a few times an extra check to
	 restart the loop could use up most of the benefits of using a
	 count register loop.  Note however, that normally, this
	 restart branch would never execute, so it could be predicted
	 well by the CPU.  We should generate the pessimistic code by
	 default, and have an option, e.g. -funsafe-loops that would
	 enable count-register loops in this case.  */
      if (dump_file)
	fprintf (dump_file, "Doloop: Possible infinite iteration case.\n");
301 302
      result = false;
      goto cleanup;
Zdenek Dvorak committed
303 304 305 306 307 308 309 310 311 312
    }

  for (i = 0; i < loop->num_nodes; i++)
    {
      bb = body[i];

      for (insn = BB_HEAD (bb);
	   insn != NEXT_INSN (BB_END (bb));
	   insn = NEXT_INSN (insn))
	{
313 314
	  /* Different targets have different necessities for low-overhead
	     looping.  Call the back end for each instruction within the loop
315 316 317 318 319 320 321
	     to let it decide whether the insn prohibits a low-overhead loop.
	     It will then return the cause for it to emit to the dump file.  */
	  const char * invalid = targetm.invalid_within_doloop (insn);
	  if (invalid)
	    {
	      if (dump_file)
		fprintf (dump_file, "Doloop: %s\n", invalid);
322 323
	      result = false;
	      goto cleanup;
324
	    }
Zdenek Dvorak committed
325 326
	}
    }
327 328 329
  result = true;

cleanup:
Zdenek Dvorak committed
330 331
  free (body);

332
  return result;
Zdenek Dvorak committed
333 334
}

335 336 337 338
/* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru
   edge.  If the condition is always false, do not do anything.  If it is always
   true, redirect E to DEST and return false.  In all other cases, true is
   returned.  */
Zdenek Dvorak committed
339

340 341
static bool
add_test (rtx cond, edge *e, basic_block dest)
Zdenek Dvorak committed
342
{
343
  rtx_insn *seq, *jump;
344
  rtx_code_label *label;
345
  machine_mode mode;
Zdenek Dvorak committed
346 347
  rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1);
  enum rtx_code code = GET_CODE (cond);
348
  basic_block bb;
349 350
  /* The jump is supposed to handle an unlikely special case.  */
  profile_probability prob = profile_probability::guessed_never ();
Zdenek Dvorak committed
351 352 353 354 355 356 357 358 359

  mode = GET_MODE (XEXP (cond, 0));
  if (mode == VOIDmode)
    mode = GET_MODE (XEXP (cond, 1));

  start_sequence ();
  op0 = force_operand (op0, NULL_RTX);
  op1 = force_operand (op1, NULL_RTX);
  label = block_label (dest);
360
  do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL, label,
361
			   prob);
Zdenek Dvorak committed
362 363

  jump = get_last_insn ();
364
  if (!jump || !JUMP_P (jump))
365
    {
366 367 368
      /* The condition is always false and the jump was optimized out.  */
      end_sequence ();
      return true;
369
    }
Zdenek Dvorak committed
370 371

  seq = get_insns ();
372
  unshare_all_rtl_in_chain (seq);
Zdenek Dvorak committed
373
  end_sequence ();
374 375 376 377

  /* There always is at least the jump insn in the sequence.  */
  gcc_assert (seq != NULL_RTX);

378
  bb = split_edge_and_insert (*e, seq);
379 380 381 382 383 384 385 386 387
  *e = single_succ_edge (bb);

  if (any_uncondjump_p (jump))
    {
      /* The condition is always true.  */
      delete_insn (jump);
      redirect_edge_and_branch_force (*e, dest);
      return false;
    }
H.J. Lu committed
388

389 390 391 392
  JUMP_LABEL (jump) = label;

  LABEL_NUSES (label)++;

393 394 395 396
  edge e2 = make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU);
  e2->probability = prob;
  (*e)->probability = prob.invert ();
  update_br_prob_note (e2->src);
397
  return true;
Zdenek Dvorak committed
398 399 400 401 402 403
}

/* Modify the loop to use the low-overhead looping insn where LOOP
   describes the loop, DESC describes the number of iterations of the
   loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
   end of the loop.  CONDITION is the condition separated from the
Joseph Myers committed
404
   DOLOOP_SEQ.  COUNT is the number of iterations of the LOOP.  */
Zdenek Dvorak committed
405 406

static void
407
doloop_modify (class loop *loop, class niter_desc *desc,
408
	       rtx_insn *doloop_seq, rtx condition, rtx count)
Zdenek Dvorak committed
409 410
{
  rtx counter_reg;
411
  rtx tmp, noloop = NULL_RTX;
412 413
  rtx_insn *sequence;
  rtx_insn *jump_insn;
414
  rtx_code_label *jump_label;
415
  int nonneg = 0;
Zdenek Dvorak committed
416 417
  bool increment_count;
  basic_block loop_end = desc->out_edge->src;
418
  scalar_int_mode mode;
Kenneth Zadeck committed
419
  widest_int iterations;
Zdenek Dvorak committed
420 421 422 423 424 425 426

  jump_insn = BB_END (loop_end);

  if (dump_file)
    {
      fprintf (dump_file, "Doloop: Inserting doloop pattern (");
      if (desc->const_iter)
427
	fprintf (dump_file, "%" PRId64, desc->niter);
Zdenek Dvorak committed
428 429 430 431 432 433 434 435 436 437 438 439
      else
	fputs ("runtime", dump_file);
      fputs (" iterations).\n", dump_file);
    }

  /* Discard original jump to continue loop.  The original compare
     result may still be live, so it cannot be discarded explicitly.  */
  delete_insn (jump_insn);

  counter_reg = XEXP (condition, 0);
  if (GET_CODE (counter_reg) == PLUS)
    counter_reg = XEXP (counter_reg, 0);
440 441
  /* These patterns must operate on integer counters.  */
  mode = as_a <scalar_int_mode> (GET_MODE (counter_reg));
Zdenek Dvorak committed
442 443 444 445 446 447

  increment_count = false;
  switch (GET_CODE (condition))
    {
    case NE:
      /* Currently only NE tests against zero and one are supported.  */
448 449
      noloop = XEXP (condition, 1);
      if (noloop != const0_rtx)
Zdenek Dvorak committed
450
	{
451
	  gcc_assert (noloop == const1_rtx);
Zdenek Dvorak committed
452 453 454 455 456 457
	  increment_count = true;
	}
      break;

    case GE:
      /* Currently only GE tests against zero are supported.  */
458
      gcc_assert (XEXP (condition, 1) == const0_rtx);
Zdenek Dvorak committed
459 460 461 462 463 464 465 466

      noloop = constm1_rtx;

      /* The iteration count does not need incrementing for a GE test.  */
      increment_count = false;

      /* Determine if the iteration counter will be non-negative.
	 Note that the maximum value loaded is iterations_max - 1.  */
467
      if (get_max_loop_iterations (loop, &iterations)
Kenneth Zadeck committed
468 469 470
	  && wi::leu_p (iterations,
			wi::set_bit_in_zero <widest_int>
			(GET_MODE_PRECISION (mode) - 1)))
Zdenek Dvorak committed
471 472 473
	nonneg = 1;
      break;

474
      /* Abort if an invalid doloop pattern has been generated.  */
475
    default:
476
      gcc_unreachable ();
Zdenek Dvorak committed
477 478 479
    }

  if (increment_count)
Joseph Myers committed
480
    count = simplify_gen_binary (PLUS, mode, count, const1_rtx);
481

Zdenek Dvorak committed
482 483
  /* Insert initialization of the count register into the loop header.  */
  start_sequence ();
484 485 486
  /* count has been already copied through copy_rtx.  */
  reset_used_flags (count);
  set_used_flags (condition);
Zdenek Dvorak committed
487 488 489
  tmp = force_operand (count, counter_reg);
  convert_move (counter_reg, tmp, 1);
  sequence = get_insns ();
490
  unshare_all_rtl_in_chain (sequence);
Zdenek Dvorak committed
491 492 493 494 495
  end_sequence ();
  emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));

  if (desc->noloop_assumptions)
    {
496
      rtx ass = copy_rtx (desc->noloop_assumptions);
Zdenek Dvorak committed
497
      basic_block preheader = loop_preheader_edge (loop)->src;
498 499
      basic_block set_zero = split_edge (loop_preheader_edge (loop));
      basic_block new_preheader = split_edge (loop_preheader_edge (loop));
Zdenek Dvorak committed
500 501 502 503
      edge te;

      /* Expand the condition testing the assumptions and if it does not pass,
	 reset the count register to 0.  */
504
      redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader);
Zdenek Dvorak committed
505 506
      set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader);

507
      set_zero->count = profile_count::uninitialized ();
Zdenek Dvorak committed
508

509 510 511 512 513 514
      te = single_succ_edge (preheader);
      for (; ass; ass = XEXP (ass, 1))
	if (!add_test (XEXP (ass, 0), &te, set_zero))
	  break;

      if (ass)
Zdenek Dvorak committed
515
	{
516 517 518 519 520 521 522
	  /* We reached a condition that is always true.  This is very hard to
	     reproduce (such a loop does not roll, and thus it would most
	     likely get optimized out by some of the preceding optimizations).
	     In fact, I do not have any testcase for it.  However, it would
	     also be very hard to show that it is impossible, so we must
	     handle this case.  */
	  set_zero->count = preheader->count;
Zdenek Dvorak committed
523
	}
H.J. Lu committed
524

525 526 527 528 529 530 531 532 533 534 535 536 537 538
      if (EDGE_COUNT (set_zero->preds) == 0)
	{
	  /* All the conditions were simplified to false, remove the
	     unreachable set_zero block.  */
	  delete_basic_block (set_zero);
	}
      else
	{
	  /* Reset the counter to zero in the set_zero block.  */
	  start_sequence ();
	  convert_move (counter_reg, noloop, 0);
	  sequence = get_insns ();
	  end_sequence ();
	  emit_insn_after (sequence, BB_END (set_zero));
H.J. Lu committed
539

540
	  set_immediate_dominator (CDI_DOMINATORS, set_zero,
541 542
				   recompute_dominator (CDI_DOMINATORS,
							set_zero));
543 544 545
	}

      set_immediate_dominator (CDI_DOMINATORS, new_preheader,
546 547
			       recompute_dominator (CDI_DOMINATORS,
						    new_preheader));
Zdenek Dvorak committed
548 549 550 551
    }

  /* Some targets (eg, C4x) need to initialize special looping
     registers.  */
552 553 554
  if (targetm.have_doloop_begin ())
    if (rtx_insn *seq = targetm.gen_doloop_begin (counter_reg, doloop_seq))
      emit_insn_after (seq, BB_END (loop_preheader_edge (loop)->src));
Zdenek Dvorak committed
555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570

  /* Insert the new low-overhead looping insn.  */
  emit_jump_insn_after (doloop_seq, BB_END (loop_end));
  jump_insn = BB_END (loop_end);
  jump_label = block_label (desc->in_edge->dest);
  JUMP_LABEL (jump_insn) = jump_label;
  LABEL_NUSES (jump_label)++;

  /* Ensure the right fallthru edge is marked, for case we have reversed
     the condition.  */
  desc->in_edge->flags &= ~EDGE_FALLTHRU;
  desc->out_edge->flags |= EDGE_FALLTHRU;

  /* Add a REG_NONNEG note if the actual or estimated maximum number
     of iterations is non-negative.  */
  if (nonneg)
571 572
    add_reg_note (jump_insn, REG_NONNEG, NULL_RTX);

Mircea Namolaru committed
573
  /* Update the REG_BR_PROB note.  */
574 575
  if (desc->in_edge->probability.initialized_p ())
    add_reg_br_prob_note (jump_insn, desc->in_edge->probability);
Zdenek Dvorak committed
576 577
}

578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598
/* Called through note_stores.  */

static void
record_reg_sets (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
{
  bitmap mod = (bitmap)data;
  if (REG_P (x))
    {
      unsigned int regno = REGNO (x);
      if (HARD_REGISTER_P (x))
	{
	  unsigned int end_regno = end_hard_regno (GET_MODE (x), regno);
	  do
	    bitmap_set_bit (mod, regno);
	  while (++regno < end_regno);
	}
      else
	bitmap_set_bit (mod, regno);
    }
}

Zdenek Dvorak committed
599 600 601 602 603 604
/* Process loop described by LOOP validating that the loop is suitable for
   conversion to use a low overhead looping instruction, replacing the jump
   insn where suitable.  Returns true if the loop was successfully
   modified.  */

static bool
605
doloop_optimize (class loop *loop)
Zdenek Dvorak committed
606
{
607
  scalar_int_mode mode;
608
  rtx doloop_reg;
609
  rtx count;
Kenneth Zadeck committed
610
  widest_int iterations, iterations_max;
611
  rtx_code_label *start_label;
Zdenek Dvorak committed
612
  rtx condition;
613 614
  unsigned level;
  HOST_WIDE_INT est_niter;
615
  int max_cost;
616
  class niter_desc *desc;
617 618
  unsigned word_mode_size;
  unsigned HOST_WIDE_INT word_mode_max;
619
  int entered_at_top;
Zdenek Dvorak committed
620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638

  if (dump_file)
    fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);

  iv_analysis_loop_init (loop);

  /* Find the simple exit of a LOOP.  */
  desc = get_simple_loop_desc (loop);

  /* Check that loop is a candidate for a low-overhead looping insn.  */
  if (!doloop_valid_p (loop, desc))
    {
      if (dump_file)
	fprintf (dump_file,
		 "Doloop: The loop is not suitable.\n");
      return false;
    }
  mode = desc->mode;

639 640
  est_niter = get_estimated_loop_iterations_int (loop);
  if (est_niter == -1)
641
    est_niter = get_likely_max_loop_iterations_int (loop);
642 643

  if (est_niter >= 0 && est_niter < 3)
Zdenek Dvorak committed
644 645 646 647
    {
      if (dump_file)
	fprintf (dump_file,
		 "Doloop: Too few iterations (%u) to be profitable.\n",
648
		 (unsigned int)est_niter);
Zdenek Dvorak committed
649 650 651
      return false;
    }

652
  max_cost
653
    = COSTS_N_INSNS (param_max_iterations_computation_cost);
654
  if (set_src_cost (desc->niter_expr, mode, optimize_loop_for_speed_p (loop))
655
      > max_cost)
656 657 658
    {
      if (dump_file)
	fprintf (dump_file,
659
		 "Doloop: number of iterations too costly to compute.\n");
660 661 662
      return false;
    }

663
  if (desc->const_iter)
664
    iterations = widest_int::from (rtx_mode_t (desc->niter_expr, mode),
Kenneth Zadeck committed
665
				   UNSIGNED);
666
  else
Kenneth Zadeck committed
667
    iterations = 0;
668
  if (!get_max_loop_iterations (loop, &iterations_max))
Kenneth Zadeck committed
669
    iterations_max = 0;
Zdenek Dvorak committed
670
  level = get_loop_level (loop) + 1;
671 672 673 674 675 676 677 678 679
  entered_at_top = (loop->latch == desc->in_edge->dest
		    && contains_no_active_insn_p (loop->latch));
  if (!targetm.can_use_doloop_p (iterations, iterations_max, level,
				 entered_at_top))
    {
      if (dump_file)
	fprintf (dump_file, "Loop rejected by can_use_doloop_p.\n");
      return false;
    }
Zdenek Dvorak committed
680 681 682 683

  /* Generate looping insn.  If the pattern FAILs then give up trying
     to modify the loop since there is some aspect the back-end does
     not like.  */
684
  count = copy_rtx (desc->niter_expr);
Zdenek Dvorak committed
685 686
  start_label = block_label (desc->in_edge->dest);
  doloop_reg = gen_reg_rtx (mode);
687
  rtx_insn *doloop_seq = targetm.gen_doloop_end (doloop_reg, start_label);
688

689
  word_mode_size = GET_MODE_PRECISION (word_mode);
690
  word_mode_max = (HOST_WIDE_INT_1U << (word_mode_size - 1) << 1) - 1;
691 692 693 694 695
  if (! doloop_seq
      && mode != word_mode
      /* Before trying mode different from the one in that # of iterations is
	 computed, we must be sure that the number of iterations fits into
	 the new mode.  */
696
      && (word_mode_size >= GET_MODE_PRECISION (mode)
Kenneth Zadeck committed
697
 	  || wi::leu_p (iterations_max, word_mode_max)))
Zdenek Dvorak committed
698
    {
699
      if (word_mode_size > GET_MODE_PRECISION (mode))
700
	count = simplify_gen_unary (ZERO_EXTEND, word_mode, count, mode);
701
      else
702
	count = lowpart_subreg (word_mode, count, mode);
Zdenek Dvorak committed
703
      PUT_MODE (doloop_reg, word_mode);
704
      doloop_seq = targetm.gen_doloop_end (doloop_reg, start_label);
Zdenek Dvorak committed
705 706 707 708 709 710 711 712 713 714
    }
  if (! doloop_seq)
    {
      if (dump_file)
	fprintf (dump_file,
		 "Doloop: Target unwilling to use doloop pattern!\n");
      return false;
    }

  /* If multiple instructions were created, the last must be the
715 716 717 718 719 720
     jump instruction.  */
  rtx_insn *doloop_insn = doloop_seq;
  while (NEXT_INSN (doloop_insn) != NULL_RTX)
    doloop_insn = NEXT_INSN (doloop_insn);
  if (!JUMP_P (doloop_insn)
      || !(condition = doloop_condition_get (doloop_insn)))
Zdenek Dvorak committed
721 722 723 724 725 726
    {
      if (dump_file)
	fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n");
      return false;
    }

727 728 729 730 731 732
  /* Ensure that the new sequence doesn't clobber a register that
     is live at the end of the block.  */
  {
    bitmap modified = BITMAP_ALLOC (NULL);

    for (rtx_insn *i = doloop_seq; i != NULL; i = NEXT_INSN (i))
733
      note_stores (i, record_reg_sets, modified);
734 735 736 737 738 739 740 741 742 743 744 745 746

    basic_block loop_end = desc->out_edge->src;
    bool fail = bitmap_intersect_p (df_get_live_out (loop_end), modified);
    BITMAP_FREE (modified);

    if (fail)
      {
	if (dump_file)
	  fprintf (dump_file, "Doloop: doloop pattern clobbers live out\n");
	return false;
      }
  }

Joseph Myers committed
747
  doloop_modify (loop, desc, doloop_seq, condition, count);
Zdenek Dvorak committed
748 749 750
  return true;
}

751
/* This is the main entry point.  Process all loops using doloop_optimize.  */
Zdenek Dvorak committed
752 753

void
754
doloop_optimize_loops (void)
Zdenek Dvorak committed
755
{
756
  class loop *loop;
Zdenek Dvorak committed
757

758 759 760 761 762 763
  if (optimize == 1)
    {
      df_live_add_problem ();
      df_live_set_all_dirty ();
    }

764
  FOR_EACH_LOOP (loop, 0)
Zdenek Dvorak committed
765 766 767 768
    {
      doloop_optimize (loop);
    }

769 770 771
  if (optimize == 1)
    df_remove_problem (df_live);

Zdenek Dvorak committed
772 773
  iv_analysis_done ();

774
  checking_verify_loop_structure ();
Zdenek Dvorak committed
775
}