lower-subreg.c 48 KB
Newer Older
1
/* Decompose multiword subregs.
2
   Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 4 5 6 7 8 9
   Contributed by Richard Henderson <rth@redhat.com>
		  Ian Lance Taylor <iant@google.com>

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
10
Software Foundation; either version 3, or (at your option) any later
11 12 13 14 15 16 17 18
version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
19 20
along with GCC; see the file COPYING3.  If not see
<http://www.gnu.org/licenses/>.  */
21 22 23 24

#include "config.h"
#include "system.h"
#include "coretypes.h"
25
#include "backend.h"
26
#include "rtl.h"
27 28
#include "tree.h"
#include "cfghooks.h"
29
#include "df.h"
30
#include "memmodel.h"
31
#include "tm_p.h"
32
#include "expmed.h"
33
#include "insn-config.h"
34 35
#include "emit-rtl.h"
#include "recog.h"
36 37
#include "cfgrtl.h"
#include "cfgbuild.h"
38
#include "dce.h"
39 40
#include "expr.h"
#include "tree-pass.h"
41
#include "lower-subreg.h"
42
#include "rtl-iter.h"
43
#include "target.h"
44 45 46


/* Decompose multi-word pseudo-registers into individual
47 48 49 50 51 52 53 54
   pseudo-registers when possible and profitable.  This is possible
   when all the uses of a multi-word register are via SUBREG, or are
   copies of the register to another location.  Breaking apart the
   register permits more CSE and permits better register allocation.
   This is profitable if the machine does not have move instructions
   to do this.

   This pass only splits moves with modes that are wider than
55 56 57
   word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
   integer modes that are twice the width of word_mode.  The latter
   could be generalized if there was a need to do this, but the trend in
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
   architectures is to not need this.

   There are two useful preprocessor defines for use by maintainers:

   #define LOG_COSTS 1

   if you wish to see the actual cost estimates that are being used
   for each mode wider than word mode and the cost estimates for zero
   extension and the shifts.   This can be useful when port maintainers
   are tuning insn rtx costs.

   #define FORCE_LOWERING 1

   if you wish to test the pass with all the transformation forced on.
   This can be useful for finding bugs in the transformations.  */

#define LOG_COSTS 0
#define FORCE_LOWERING 0
76 77 78 79 80 81 82 83 84

/* Bit N in this bitmap is set if regno N is used in a context in
   which we can decompose it.  */
static bitmap decomposable_context;

/* Bit N in this bitmap is set if regno N is used in a context in
   which it can not be decomposed.  */
static bitmap non_decomposable_context;

85 86 87 88 89 90
/* Bit N in this bitmap is set if regno N is used in a subreg
   which changes the mode but not the size.  This typically happens
   when the register accessed as a floating-point value; we want to
   avoid generating accesses to its subwords in integer modes.  */
static bitmap subreg_context;

91 92
/* Bit N in the bitmap in element M of this array is set if there is a
   copy from reg M to reg N.  */
93
static vec<bitmap> reg_copy_graph;
94

95 96 97 98 99 100 101 102 103 104 105
struct target_lower_subreg default_target_lower_subreg;
#if SWITCHABLE_TARGET
struct target_lower_subreg *this_target_lower_subreg
  = &default_target_lower_subreg;
#endif

#define twice_word_mode \
  this_target_lower_subreg->x_twice_word_mode
#define choices \
  this_target_lower_subreg->x_choices

106 107 108 109 110 111 112
/* Return true if MODE is a mode we know how to lower.  When returning true,
   store its byte size in *BYTES and its word size in *WORDS.  */

static inline bool
interesting_mode_p (machine_mode mode, unsigned int *bytes,
		    unsigned int *words)
{
113 114
  if (!GET_MODE_SIZE (mode).is_constant (bytes))
    return false;
115 116 117 118
  *words = CEIL (*bytes, UNITS_PER_WORD);
  return true;
}

119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
/* RTXes used while computing costs.  */
struct cost_rtxes {
  /* Source and target registers.  */
  rtx source;
  rtx target;

  /* A twice_word_mode ZERO_EXTEND of SOURCE.  */
  rtx zext;

  /* A shift of SOURCE.  */
  rtx shift;

  /* A SET of TARGET.  */
  rtx set;
};

/* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
   rtxes in RTXES.  SPEED_P selects between the speed and size cost.  */

static int
shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
140
	    machine_mode mode, int op1)
141 142 143 144
{
  PUT_CODE (rtxes->shift, code);
  PUT_MODE (rtxes->shift, mode);
  PUT_MODE (rtxes->source, mode);
145
  XEXP (rtxes->shift, 1) = gen_int_shift_amount (mode, op1);
146
  return set_src_cost (rtxes->shift, mode, speed_p);
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
}

/* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
   to true if it is profitable to split a double-word CODE shift
   of X + BITS_PER_WORD bits.  SPEED_P says whether we are testing
   for speed or size profitability.

   Use the rtxes in RTXES to calculate costs.  WORD_MOVE_ZERO_COST is
   the cost of moving zero into a word-mode register.  WORD_MOVE_COST
   is the cost of moving between word registers.  */

static void
compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
			 bool *splitting, enum rtx_code code,
			 int word_move_zero_cost, int word_move_cost)
{
163
  int wide_cost, narrow_cost, upper_cost, i;
164 165 166 167 168 169 170 171 172 173

  for (i = 0; i < BITS_PER_WORD; i++)
    {
      wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
			      i + BITS_PER_WORD);
      if (i == 0)
	narrow_cost = word_move_cost;
      else
	narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);

174 175 176 177 178 179 180 181
      if (code != ASHIFTRT)
	upper_cost = word_move_zero_cost;
      else if (i == BITS_PER_WORD - 1)
	upper_cost = word_move_cost;
      else
	upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
				 BITS_PER_WORD - 1);

182 183 184
      if (LOG_COSTS)
	fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
		 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
185
		 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
186

187
      if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
188 189 190 191 192 193 194 195 196 197 198 199 200
	splitting[i] = true;
    }
}

/* Compute what we should do when optimizing for speed or size; SPEED_P
   selects which.  Use RTXES for computing costs.  */

static void
compute_costs (bool speed_p, struct cost_rtxes *rtxes)
{
  unsigned int i;
  int word_move_zero_cost, word_move_cost;

201
  PUT_MODE (rtxes->target, word_mode);
202
  SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
203
  word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
204 205

  SET_SRC (rtxes->set) = rtxes->source;
206
  word_move_cost = set_rtx_cost (rtxes->set, speed_p);
207 208 209 210 211 212 213

  if (LOG_COSTS)
    fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
	     GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);

  for (i = 0; i < MAX_MACHINE_MODE; i++)
    {
214
      machine_mode mode = (machine_mode) i;
215 216
      unsigned int size, factor;
      if (interesting_mode_p (mode, &size, &factor) && factor > 1)
217
	{
218
	  unsigned int mode_move_cost;
219 220 221

	  PUT_MODE (rtxes->target, mode);
	  PUT_MODE (rtxes->source, mode);
222
	  mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249

	  if (LOG_COSTS)
	    fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
		     GET_MODE_NAME (mode), mode_move_cost,
		     word_move_cost, factor);

	  if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
	    {
	      choices[speed_p].move_modes_to_split[i] = true;
	      choices[speed_p].something_to_do = true;
	    }
	}
    }

  /* For the moves and shifts, the only case that is checked is one
     where the mode of the target is an integer mode twice the width
     of the word_mode.

     If it is not profitable to split a double word move then do not
     even consider the shifts or the zero extension.  */
  if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
    {
      int zext_cost;

      /* The only case here to check to see if moving the upper part with a
	 zero is cheaper than doing the zext itself.  */
      PUT_MODE (rtxes->source, word_mode);
250
      zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p);
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265

      if (LOG_COSTS)
	fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
		 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
		 zext_cost, word_move_cost, word_move_zero_cost);

      if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
	choices[speed_p].splitting_zext = true;

      compute_splitting_shift (speed_p, rtxes,
			       choices[speed_p].splitting_ashift, ASHIFT,
			       word_move_zero_cost, word_move_cost);
      compute_splitting_shift (speed_p, rtxes,
			       choices[speed_p].splitting_lshiftrt, LSHIFTRT,
			       word_move_zero_cost, word_move_cost);
266 267 268
      compute_splitting_shift (speed_p, rtxes,
			       choices[speed_p].splitting_ashiftrt, ASHIFTRT,
			       word_move_zero_cost, word_move_cost);
269 270 271 272 273 274 275 276 277 278 279 280 281 282
    }
}

/* Do one-per-target initialisation.  This involves determining
   which operations on the machine are profitable.  If none are found,
   then the pass just returns when called.  */

void
init_lower_subreg (void)
{
  struct cost_rtxes rtxes;

  memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));

283
  twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode).require ();
284

285 286
  rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
  rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
287
  rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
288 289 290 291 292 293 294 295 296 297 298
  rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
  rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);

  if (LOG_COSTS)
    fprintf (stderr, "\nSize costs\n==========\n\n");
  compute_costs (false, &rtxes);

  if (LOG_COSTS)
    fprintf (stderr, "\nSpeed costs\n===========\n\n");
  compute_costs (true, &rtxes);
}
299 300 301 302 303 304 305 306 307 308 309 310

static bool
simple_move_operand (rtx x)
{
  if (GET_CODE (x) == SUBREG)
    x = SUBREG_REG (x);

  if (!OBJECT_P (x))
    return false;

  if (GET_CODE (x) == LABEL_REF
      || GET_CODE (x) == SYMBOL_REF
311 312
      || GET_CODE (x) == HIGH
      || GET_CODE (x) == CONST)
313 314 315 316
    return false;

  if (MEM_P (x)
      && (MEM_VOLATILE_P (x)
317
	  || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
318 319 320 321 322
    return false;

  return true;
}

323 324 325 326 327 328
/* If INSN is a single set between two objects that we want to split,
   return the single set.  SPEED_P says whether we are optimizing
   INSN for speed or size.

   INSN should have been passed to recog and extract_insn before this
   is called.  */
329 330

static rtx
331
simple_move (rtx_insn *insn, bool speed_p)
332 333 334
{
  rtx x;
  rtx set;
335
  machine_mode mode;
336 337 338 339 340 341 342 343 344 345 346

  if (recog_data.n_operands != 2)
    return NULL_RTX;

  set = single_set (insn);
  if (!set)
    return NULL_RTX;

  x = SET_DEST (set);
  if (x != recog_data.operand[0] && x != recog_data.operand[1])
    return NULL_RTX;
347
  if (!simple_move_operand (x))
348 349 350 351 352
    return NULL_RTX;

  x = SET_SRC (set);
  if (x != recog_data.operand[0] && x != recog_data.operand[1])
    return NULL_RTX;
353 354 355 356
  /* For the src we can handle ASM_OPERANDS, and it is beneficial for
     things like x86 rdtsc which returns a DImode value.  */
  if (GET_CODE (x) != ASM_OPERANDS
      && !simple_move_operand (x))
357 358 359 360 361 362 363
    return NULL_RTX;

  /* We try to decompose in integer modes, to avoid generating
     inefficient code copying between integer and floating point
     registers.  That means that we can't decompose if this is a
     non-integer mode for which there is no integer mode of the same
     size.  */
364
  mode = GET_MODE (SET_DEST (set));
365
  if (!SCALAR_INT_MODE_P (mode)
366
      && !int_mode_for_size (GET_MODE_BITSIZE (mode), 0).exists ())
367 368
    return NULL_RTX;

369 370 371 372 373
  /* Reject PARTIAL_INT modes.  They are used for processor specific
     purposes and it's probably best not to tamper with them.  */
  if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
    return NULL_RTX;

374 375 376
  if (!choices[speed_p].move_modes_to_split[(int) mode])
    return NULL_RTX;

377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399
  return set;
}

/* If SET is a copy from one multi-word pseudo-register to another,
   record that in reg_copy_graph.  Return whether it is such a
   copy.  */

static bool
find_pseudo_copy (rtx set)
{
  rtx dest = SET_DEST (set);
  rtx src = SET_SRC (set);
  unsigned int rd, rs;
  bitmap b;

  if (!REG_P (dest) || !REG_P (src))
    return false;

  rd = REGNO (dest);
  rs = REGNO (src);
  if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
    return false;

400
  b = reg_copy_graph[rs];
401 402 403
  if (b == NULL)
    {
      b = BITMAP_ALLOC (NULL);
404
      reg_copy_graph[rs] = b;
405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420
    }

  bitmap_set_bit (b, rd);

  return true;
}

/* Look through the registers in DECOMPOSABLE_CONTEXT.  For each case
   where they are copied to another register, add the register to
   which they are copied to DECOMPOSABLE_CONTEXT.  Use
   NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
   copies of registers which are in NON_DECOMPOSABLE_CONTEXT.  */

static void
propagate_pseudo_copies (void)
{
Trevor Saunders committed
421
  auto_bitmap queue, propagate;
422 423 424 425 426 427 428 429 430 431 432

  bitmap_copy (queue, decomposable_context);
  do
    {
      bitmap_iterator iter;
      unsigned int i;

      bitmap_clear (propagate);

      EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
	{
433
	  bitmap b = reg_copy_graph[i];
434 435 436 437 438 439 440 441 442 443 444
	  if (b)
	    bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
	}

      bitmap_and_compl (queue, propagate, decomposable_context);
      bitmap_ior_into (decomposable_context, propagate);
    }
  while (!bitmap_empty_p (queue));
}

/* A pointer to one of these values is passed to
445
   find_decomposable_subregs.  */
446 447 448 449 450

enum classify_move_insn
{
  /* Not a simple move from one location to another.  */
  NOT_SIMPLE_MOVE,
451 452 453
  /* A simple move we want to decompose.  */
  DECOMPOSABLE_SIMPLE_MOVE,
  /* Any other simple move.  */
454 455 456
  SIMPLE_MOVE
};

457 458 459 460 461
/* If we find a SUBREG in *LOC which we could use to decompose a
   pseudo-register, set a bit in DECOMPOSABLE_CONTEXT.  If we find an
   unadorned register which is not a simple pseudo-register copy,
   DATA will point at the type of move, and we set a bit in
   DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate.  */
462

463 464
static void
find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
465
{
466 467
  subrtx_var_iterator::array_type array;
  FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
468
    {
469 470 471 472 473
      rtx x = *iter;
      if (GET_CODE (x) == SUBREG)
	{
	  rtx inner = SUBREG_REG (x);
	  unsigned int regno, outer_size, inner_size, outer_words, inner_words;
474

475 476
	  if (!REG_P (inner))
	    continue;
477

478 479 480 481 482 483
	  regno = REGNO (inner);
	  if (HARD_REGISTER_NUM_P (regno))
	    {
	      iter.skip_subrtxes ();
	      continue;
	    }
484

485 486 487 488
	  if (!interesting_mode_p (GET_MODE (x), &outer_size, &outer_words)
	      || !interesting_mode_p (GET_MODE (inner), &inner_size,
				      &inner_words))
	    continue;
489

490 491 492
	  /* We only try to decompose single word subregs of multi-word
	     registers.  When we find one, we return -1 to avoid iterating
	     over the inner register.
493

494 495 496 497 498
	     ??? This doesn't allow, e.g., DImode subregs of TImode values
	     on 32-bit targets.  We would need to record the way the
	     pseudo-register was used, and only decompose if all the uses
	     were the same number and size of pieces.  Hopefully this
	     doesn't happen much.  */
499

500 501 502 503 504 505 506 507 508 509
	  if (outer_words == 1
	      && inner_words > 1
	      /* Don't allow to decompose floating point subregs of
		 multi-word pseudos if the floating point mode does
		 not have word size, because otherwise we'd generate
		 a subreg with that floating mode from a different
		 sized integral pseudo which is not allowed by
		 validate_subreg.  */
	      && (!FLOAT_MODE_P (GET_MODE (x))
		  || outer_size == UNITS_PER_WORD))
510 511 512 513 514
	    {
	      bitmap_set_bit (decomposable_context, regno);
	      iter.skip_subrtxes ();
	      continue;
	    }
515

516 517 518 519 520 521
	  /* If this is a cast from one mode to another, where the modes
	     have the same size, and they are not tieable, then mark this
	     register as non-decomposable.  If we decompose it we are
	     likely to mess up whatever the backend is trying to do.  */
	  if (outer_words > 1
	      && outer_size == inner_size
522
	      && !targetm.modes_tieable_p (GET_MODE (x), GET_MODE (inner)))
523 524 525 526 527 528
	    {
	      bitmap_set_bit (non_decomposable_context, regno);
	      bitmap_set_bit (subreg_context, regno);
	      iter.skip_subrtxes ();
	      continue;
	    }
529
	}
530
      else if (REG_P (x))
531
	{
532
	  unsigned int regno, size, words;
533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551

	  /* We will see an outer SUBREG before we see the inner REG, so
	     when we see a plain REG here it means a direct reference to
	     the register.

	     If this is not a simple copy from one location to another,
	     then we can not decompose this register.  If this is a simple
	     copy we want to decompose, and the mode is right,
	     then we mark the register as decomposable.
	     Otherwise we don't say anything about this register --
	     it could be decomposed, but whether that would be
	     profitable depends upon how it is used elsewhere.

	     We only set bits in the bitmap for multi-word
	     pseudo-registers, since those are the only ones we care about
	     and it keeps the size of the bitmaps down.  */

	  regno = REGNO (x);
	  if (!HARD_REGISTER_NUM_P (regno)
552 553
	      && interesting_mode_p (GET_MODE (x), &size, &words)
	      && words > 1)
554
	    {
555 556 557 558 559 560
	      switch (*pcmi)
		{
		case NOT_SIMPLE_MOVE:
		  bitmap_set_bit (non_decomposable_context, regno);
		  break;
		case DECOMPOSABLE_SIMPLE_MOVE:
561
		  if (targetm.modes_tieable_p (GET_MODE (x), word_mode))
562 563 564 565 566 567 568
		    bitmap_set_bit (decomposable_context, regno);
		  break;
		case SIMPLE_MOVE:
		  break;
		default:
		  gcc_unreachable ();
		}
569 570
	    }
	}
571 572 573
      else if (MEM_P (x))
	{
	  enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
574

575 576 577 578 579 580
	  /* Any registers used in a MEM do not participate in a
	     SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE.  Do our own recursion
	     here, and return -1 to block the parent's recursion.  */
	  find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
	  iter.skip_subrtxes ();
	}
581
    }
582 583 584 585 586 587 588 589 590 591 592
}

/* Decompose REGNO into word-sized components.  We smash the REG node
   in place.  This ensures that (1) something goes wrong quickly if we
   fail to make some replacement, and (2) the debug information inside
   the symbol table is automatically kept up to date.  */

static void
decompose_register (unsigned int regno)
{
  rtx reg;
593
  unsigned int size, words, i;
594 595 596 597 598 599
  rtvec v;

  reg = regno_reg_rtx[regno];

  regno_reg_rtx[regno] = NULL_RTX;

600 601
  if (!interesting_mode_p (GET_MODE (reg), &size, &words))
    gcc_unreachable ();
602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621

  v = rtvec_alloc (words);
  for (i = 0; i < words; ++i)
    RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);

  PUT_CODE (reg, CONCATN);
  XVEC (reg, 0) = v;

  if (dump_file)
    {
      fprintf (dump_file, "; Splitting reg %u ->", regno);
      for (i = 0; i < words; ++i)
	fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
      fputc ('\n', dump_file);
    }
}

/* Get a SUBREG of a CONCATN.  */

static rtx
622
simplify_subreg_concatn (machine_mode outermode, rtx op, poly_uint64 orig_byte)
623
{
624
  unsigned int outer_size, outer_words, inner_size, inner_words;
625
  machine_mode innermode, partmode;
626 627
  rtx part;
  unsigned int final_offset;
628
  unsigned int byte;
629

630 631 632 633 634
  innermode = GET_MODE (op);
  if (!interesting_mode_p (outermode, &outer_size, &outer_words)
      || !interesting_mode_p (innermode, &inner_size, &inner_words))
    gcc_unreachable ();

635 636
  /* Must be constant if interesting_mode_p passes.  */
  byte = orig_byte.to_constant ();
637
  gcc_assert (GET_CODE (op) == CONCATN);
638
  gcc_assert (byte % outer_size == 0);
639

640 641
  gcc_assert (byte < inner_size);
  if (outer_size > inner_size)
642
    return NULL_RTX;
643

644
  inner_size /= XVECLEN (op, 0);
645
  part = XVECEXP (op, 0, byte / inner_size);
646 647
  partmode = GET_MODE (part);

648
  final_offset = byte % inner_size;
649
  if (final_offset + outer_size > inner_size)
650 651
    return NULL_RTX;

652 653 654 655 656 657
  /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
     regular CONST_VECTORs.  They have vector or integer modes, depending
     on the capabilities of the target.  Cope with them.  */
  if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
    partmode = GET_MODE_INNER (innermode);
  else if (partmode == VOIDmode)
658 659
    partmode = mode_for_size (inner_size * BITS_PER_UNIT,
			      GET_MODE_CLASS (innermode), 0).require ();
660

661
  return simplify_gen_subreg (outermode, part, partmode, final_offset);
662 663 664 665 666
}

/* Wrapper around simplify_gen_subreg which handles CONCATN.  */

static rtx
667 668
simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
			     machine_mode innermode, unsigned int byte)
669
{
670 671
  rtx ret;

672 673 674 675 676 677 678 679
  /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
     If OP is a SUBREG of a CONCATN, then it must be a simple mode
     change with the same size and offset 0, or it must extract a
     part.  We shouldn't see anything else here.  */
  if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
    {
      rtx op2;

680 681
      if (known_eq (GET_MODE_SIZE (GET_MODE (op)),
		    GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
682
	  && known_eq (SUBREG_BYTE (op), 0))
683 684 685 686 687 688 689 690
	return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
					    GET_MODE (SUBREG_REG (op)), byte);

      op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
				     SUBREG_BYTE (op));
      if (op2 == NULL_RTX)
	{
	  /* We don't handle paradoxical subregs here.  */
691 692
	  gcc_assert (!paradoxical_subreg_p (outermode, GET_MODE (op)));
	  gcc_assert (!paradoxical_subreg_p (op));
693 694 695 696 697 698 699 700 701 702
	  op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
					 byte + SUBREG_BYTE (op));
	  gcc_assert (op2 != NULL_RTX);
	  return op2;
	}

      op = op2;
      gcc_assert (op != NULL_RTX);
      gcc_assert (innermode == GET_MODE (op));
    }
703

704 705
  if (GET_CODE (op) == CONCATN)
    return simplify_subreg_concatn (outermode, op, byte);
706 707 708 709 710 711 712

  ret = simplify_gen_subreg (outermode, op, innermode, byte);

  /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
     resolve_simple_move will ask for the high part of the paradoxical
     subreg, which does not have a value.  Just return a zero.  */
  if (ret == NULL_RTX
713
      && paradoxical_subreg_p (op))
714 715 716 717
    return CONST0_RTX (outermode);

  gcc_assert (ret != NULL_RTX);
  return ret;
718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739
}

/* Return whether we should resolve X into the registers into which it
   was decomposed.  */

static bool
resolve_reg_p (rtx x)
{
  return GET_CODE (x) == CONCATN;
}

/* Return whether X is a SUBREG of a register which we need to
   resolve.  */

static bool
resolve_subreg_p (rtx x)
{
  if (GET_CODE (x) != SUBREG)
    return false;
  return resolve_reg_p (SUBREG_REG (x));
}

740
/* Look for SUBREGs in *LOC which need to be decomposed.  */
741

742 743
static bool
resolve_subreg_use (rtx *loc, rtx insn)
744
{
745 746
  subrtx_ptr_iterator::array_type array;
  FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
747
    {
748 749 750
      rtx *loc = *iter;
      rtx x = *loc;
      if (resolve_subreg_p (x))
751
	{
752 753
	  x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
				       SUBREG_BYTE (x));
754

755 756 757 758 759 760 761 762
	  /* It is possible for a note to contain a reference which we can
	     decompose.  In this case, return 1 to the caller to indicate
	     that the note must be removed.  */
	  if (!x)
	    {
	      gcc_assert (!insn);
	      return true;
	    }
763

764 765 766 767 768 769 770 771 772
	  validate_change (insn, loc, x, 1);
	  iter.skip_subrtxes ();
	}
      else if (resolve_reg_p (x))
	/* Return 1 to the caller to indicate that we found a direct
	   reference to a register which is being decomposed.  This can
	   happen inside notes, multiword shift or zero-extend
	   instructions.  */
	return true;
773 774
    }

775
  return false;
776 777 778 779 780 781
}

/* Resolve any decomposed registers which appear in register notes on
   INSN.  */

static void
782
resolve_reg_notes (rtx_insn *insn)
783 784 785 786 787 788
{
  rtx *pnote, note;

  note = find_reg_equal_equiv_note (insn);
  if (note)
    {
789
      int old_count = num_validated_changes ();
790
      if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
Steven Bosscher committed
791
	remove_note (insn, note);
792 793 794
      else
	if (old_count != num_validated_changes ())
	  df_notes_rescan (insn);
795 796 797 798 799
    }

  pnote = &REG_NOTES (insn);
  while (*pnote != NULL_RTX)
    {
800
      bool del = false;
801 802 803 804

      note = *pnote;
      switch (REG_NOTE_KIND (note))
	{
805 806
	case REG_DEAD:
	case REG_UNUSED:
807
	  if (resolve_reg_p (XEXP (note, 0)))
808
	    del = true;
809 810 811 812 813 814
	  break;

	default:
	  break;
	}

815
      if (del)
816 817 818 819 820 821
	*pnote = XEXP (note, 1);
      else
	pnote = &XEXP (note, 1);
    }
}

822
/* Return whether X can be decomposed into subwords.  */
823 824

static bool
825
can_decompose_p (rtx x)
826 827 828 829 830 831
{
  if (REG_P (x))
    {
      unsigned int regno = REGNO (x);

      if (HARD_REGISTER_NUM_P (regno))
832
	{
833
	  unsigned int byte, num_bytes, num_words;
834

835 836
	  if (!interesting_mode_p (GET_MODE (x), &num_bytes, &num_words))
	    return false;
837 838 839 840 841
	  for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
	    if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
	      return false;
	  return true;
	}
842
      else
843
	return !bitmap_bit_p (subreg_context, regno);
844 845
    }

846
  return true;
847 848 849 850 851 852
}

/* Decompose the registers used in a simple move SET within INSN.  If
   we don't change anything, return INSN, otherwise return the start
   of the sequence of moves.  */

853 854
static rtx_insn *
resolve_simple_move (rtx set, rtx_insn *insn)
855
{
856 857
  rtx src, dest, real_dest;
  rtx_insn *insns;
858
  machine_mode orig_mode, dest_mode;
859
  unsigned int orig_size, words;
860 861 862 863 864 865
  bool pushing;

  src = SET_SRC (set);
  dest = SET_DEST (set);
  orig_mode = GET_MODE (dest);

866 867
  if (!interesting_mode_p (orig_mode, &orig_size, &words))
    gcc_unreachable ();
868
  gcc_assert (words > 1);
869 870 871 872 873 874 875 876 877 878 879 880

  start_sequence ();

  /* We have to handle copying from a SUBREG of a decomposed reg where
     the SUBREG is larger than word size.  Rather than assume that we
     can take a word_mode SUBREG of the destination, we copy to a new
     register and then copy that to the destination.  */

  real_dest = NULL_RTX;

  if (GET_CODE (src) == SUBREG
      && resolve_reg_p (SUBREG_REG (src))
881
      && (maybe_ne (SUBREG_BYTE (src), 0)
882
	  || maybe_ne (orig_size, GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
883 884 885 886 887 888 889 890 891 892 893 894
    {
      real_dest = dest;
      dest = gen_reg_rtx (orig_mode);
      if (REG_P (real_dest))
	REG_ATTRS (dest) = REG_ATTRS (real_dest);
    }

  /* Similarly if we are copying to a SUBREG of a decomposed reg where
     the SUBREG is larger than word size.  */

  if (GET_CODE (dest) == SUBREG
      && resolve_reg_p (SUBREG_REG (dest))
895
      && (maybe_ne (SUBREG_BYTE (dest), 0)
896 897
	  || maybe_ne (orig_size,
		       GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
898
    {
899 900
      rtx reg, smove;
      rtx_insn *minsn;
901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924

      reg = gen_reg_rtx (orig_mode);
      minsn = emit_move_insn (reg, src);
      smove = single_set (minsn);
      gcc_assert (smove != NULL_RTX);
      resolve_simple_move (smove, minsn);
      src = reg;
    }

  /* If we didn't have any big SUBREGS of decomposed registers, and
     neither side of the move is a register we are decomposing, then
     we don't have to do anything here.  */

  if (src == SET_SRC (set)
      && dest == SET_DEST (set)
      && !resolve_reg_p (src)
      && !resolve_subreg_p (src)
      && !resolve_reg_p (dest)
      && !resolve_subreg_p (dest))
    {
      end_sequence ();
      return insn;
    }

925 926 927 928 929 930 931 932 933 934
  /* It's possible for the code to use a subreg of a decomposed
     register while forming an address.  We need to handle that before
     passing the address to emit_move_insn.  We pass NULL_RTX as the
     insn parameter to resolve_subreg_use because we can not validate
     the insn yet.  */
  if (MEM_P (src) || MEM_P (dest))
    {
      int acg;

      if (MEM_P (src))
935
	resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
936
      if (MEM_P (dest))
937
	resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
938 939 940 941
      acg = apply_change_group ();
      gcc_assert (acg);
    }

942 943 944
  /* If SRC is a register which we can't decompose, or has side
     effects, we need to move via a temporary register.  */

945
  if (!can_decompose_p (src)
946 947 948 949 950 951
      || side_effects_p (src)
      || GET_CODE (src) == ASM_OPERANDS)
    {
      rtx reg;

      reg = gen_reg_rtx (orig_mode);
952

953 954
      if (AUTO_INC_DEC)
	{
955
	  rtx_insn *move = emit_move_insn (reg, src);
956 957 958 959 960 961 962 963 964 965
	  if (MEM_P (src))
	    {
	      rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
	      if (note)
		add_reg_note (move, REG_INC, XEXP (note, 0));
	    }
	}
      else
	emit_move_insn (reg, src);

966 967 968 969 970 971 972 973 974 975 976 977
      src = reg;
    }

  /* If DEST is a register which we can't decompose, or has side
     effects, we need to first move to a temporary register.  We
     handle the common case of pushing an operand directly.  We also
     go through a temporary register if it holds a floating point
     value.  This gives us better code on systems which can't move
     data easily between integer and floating point registers.  */

  dest_mode = orig_mode;
  pushing = push_operand (dest, dest_mode);
978
  if (!can_decompose_p (dest)
979 980 981 982 983 984 985 986
      || (side_effects_p (dest) && !pushing)
      || (!SCALAR_INT_MODE_P (dest_mode)
	  && !resolve_reg_p (dest)
	  && !resolve_subreg_p (dest)))
    {
      if (real_dest == NULL_RTX)
	real_dest = dest;
      if (!SCALAR_INT_MODE_P (dest_mode))
987
	dest_mode = int_mode_for_mode (dest_mode).require ();
988 989 990 991 992 993 994 995 996
      dest = gen_reg_rtx (dest_mode);
      if (REG_P (real_dest))
	REG_ATTRS (dest) = REG_ATTRS (real_dest);
    }

  if (pushing)
    {
      unsigned int i, j, jinc;

997
      gcc_assert (orig_size % UNITS_PER_WORD == 0);
998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029
      gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
      gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);

      if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
	{
	  j = 0;
	  jinc = 1;
	}
      else
	{
	  j = words - 1;
	  jinc = -1;
	}

      for (i = 0; i < words; ++i, j += jinc)
	{
	  rtx temp;

	  temp = copy_rtx (XEXP (dest, 0));
	  temp = adjust_automodify_address_nv (dest, word_mode, temp,
					       j * UNITS_PER_WORD);
	  emit_move_insn (temp,
			  simplify_gen_subreg_concatn (word_mode, src,
						       orig_mode,
						       j * UNITS_PER_WORD));
	}
    }
  else
    {
      unsigned int i;

      if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1030
	emit_clobber (dest);
1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042

      for (i = 0; i < words; ++i)
	emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
						     dest_mode,
						     i * UNITS_PER_WORD),
			simplify_gen_subreg_concatn (word_mode, src,
						     orig_mode,
						     i * UNITS_PER_WORD));
    }

  if (real_dest != NULL_RTX)
    {
1043 1044
      rtx mdest, smove;
      rtx_insn *minsn;
1045 1046 1047 1048 1049 1050 1051

      if (dest_mode == orig_mode)
	mdest = dest;
      else
	mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
      minsn = emit_move_insn (real_dest, mdest);

1052
  if (AUTO_INC_DEC && MEM_P (real_dest)
1053 1054 1055 1056 1057 1058 1059
      && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
    {
      rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
      if (note)
	add_reg_note (minsn, REG_INC, XEXP (note, 0));
    }

1060 1061 1062 1063 1064 1065 1066 1067 1068
      smove = single_set (minsn);
      gcc_assert (smove != NULL_RTX);

      resolve_simple_move (smove, minsn);
    }

  insns = get_insns ();
  end_sequence ();

1069
  copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1070

1071 1072
  emit_insn_before (insns, insn);

1073
  /* If we get here via self-recursion, then INSN is not yet in the insns
1074 1075 1076 1077 1078 1079
     chain and delete_insn will fail.  We only want to remove INSN from the
     current sequence.  See PR56738.  */
  if (in_sequence_p ())
    remove_insn (insn);
  else
    delete_insn (insn);
1080 1081 1082 1083 1084 1085 1086 1087

  return insns;
}

/* Change a CLOBBER of a decomposed register into a CLOBBER of the
   component registers.  Return whether we changed something.  */

static bool
1088
resolve_clobber (rtx pat, rtx_insn *insn)
1089
{
1090
  rtx reg;
1091
  machine_mode orig_mode;
1092
  unsigned int orig_size, words, i;
1093
  int ret;
1094 1095

  reg = XEXP (pat, 0);
1096
  if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1097 1098 1099
    return false;

  orig_mode = GET_MODE (reg);
1100 1101
  if (!interesting_mode_p (orig_mode, &orig_size, &words))
    gcc_unreachable ();
1102

1103 1104 1105 1106
  ret = validate_change (NULL_RTX, &XEXP (pat, 0),
			 simplify_gen_subreg_concatn (word_mode, reg,
						      orig_mode, 0),
			 0);
1107
  df_insn_rescan (insn);
1108 1109
  gcc_assert (ret != 0);

1110 1111 1112 1113
  for (i = words - 1; i > 0; --i)
    {
      rtx x;

1114 1115
      x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
				       i * UNITS_PER_WORD);
1116 1117 1118 1119
      x = gen_rtx_CLOBBER (VOIDmode, x);
      emit_insn_after (x, insn);
    }

1120 1121
  resolve_reg_notes (insn);

1122 1123 1124 1125 1126 1127 1128
  return true;
}

/* A USE of a decomposed register is no longer meaningful.  Return
   whether we changed something.  */

static bool
1129
resolve_use (rtx pat, rtx_insn *insn)
1130 1131 1132 1133 1134 1135
{
  if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
    {
      delete_insn (insn);
      return true;
    }
1136 1137 1138

  resolve_reg_notes (insn);

1139 1140 1141
  return false;
}

1142 1143 1144
/* A VAR_LOCATION can be simplified.  */

static void
1145
resolve_debug (rtx_insn *insn)
1146
{
1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164
  subrtx_ptr_iterator::array_type array;
  FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
    {
      rtx *loc = *iter;
      rtx x = *loc;
      if (resolve_subreg_p (x))
	{
	  x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
				       SUBREG_BYTE (x));

	  if (x)
	    *loc = x;
	  else
	    x = copy_rtx (*loc);
	}
      if (resolve_reg_p (x))
	*loc = copy_rtx (x);
    }
1165 1166 1167 1168 1169 1170

  df_insn_rescan (insn);

  resolve_reg_notes (insn);
}

1171 1172 1173 1174
/* Check if INSN is a decomposable multiword-shift or zero-extend and
   set the decomposable_context bitmap accordingly.  SPEED_P is true
   if we are optimizing INSN for speed rather than size.  Return true
   if INSN is decomposable.  */
1175

1176
static bool
1177
find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1178 1179 1180 1181 1182 1183 1184
{
  rtx set;
  rtx op;
  rtx op_operand;

  set = single_set (insn);
  if (!set)
1185
    return false;
1186 1187 1188 1189

  op = SET_SRC (set);
  if (GET_CODE (op) != ASHIFT
      && GET_CODE (op) != LSHIFTRT
1190
      && GET_CODE (op) != ASHIFTRT
1191
      && GET_CODE (op) != ZERO_EXTEND)
1192
    return false;
1193 1194 1195 1196 1197

  op_operand = XEXP (op, 0);
  if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
      || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
      || HARD_REGISTER_NUM_P (REGNO (op_operand))
1198 1199
      || GET_MODE (op) != twice_word_mode)
    return false;
1200 1201 1202 1203

  if (GET_CODE (op) == ZERO_EXTEND)
    {
      if (GET_MODE (op_operand) != word_mode
1204 1205
	  || !choices[speed_p].splitting_zext)
	return false;
1206 1207 1208
    }
  else /* left or right shift */
    {
1209 1210
      bool *splitting = (GET_CODE (op) == ASHIFT
			 ? choices[speed_p].splitting_ashift
1211 1212
			 : GET_CODE (op) == ASHIFTRT
			 ? choices[speed_p].splitting_ashiftrt
1213
			 : choices[speed_p].splitting_lshiftrt);
Shujing Zhao committed
1214
      if (!CONST_INT_P (XEXP (op, 1))
1215 1216 1217 1218 1219 1220
	  || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
			2 * BITS_PER_WORD - 1)
	  || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
	return false;

      bitmap_set_bit (decomposable_context, REGNO (op_operand));
1221 1222 1223 1224
    }

  bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));

1225
  return true;
1226 1227 1228 1229 1230 1231 1232
}

/* Decompose a more than word wide shift (in INSN) of a multiword
   pseudo or a multiword zero-extend of a wordmode pseudo into a move
   and 'set to zero' insn.  Return a pointer to the new insn when a
   replacement was done.  */

1233 1234
static rtx_insn *
resolve_shift_zext (rtx_insn *insn)
1235 1236 1237 1238
{
  rtx set;
  rtx op;
  rtx op_operand;
1239
  rtx_insn *insns;
1240
  rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1241
  int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1242
  scalar_int_mode inner_mode;
1243 1244 1245

  set = single_set (insn);
  if (!set)
1246
    return NULL;
1247 1248 1249 1250

  op = SET_SRC (set);
  if (GET_CODE (op) != ASHIFT
      && GET_CODE (op) != LSHIFTRT
1251
      && GET_CODE (op) != ASHIFTRT
1252
      && GET_CODE (op) != ZERO_EXTEND)
1253
    return NULL;
1254 1255

  op_operand = XEXP (op, 0);
1256 1257
  if (!is_a <scalar_int_mode> (GET_MODE (op_operand), &inner_mode))
    return NULL;
1258

1259 1260
  /* We can tear this operation apart only if the regs were already
     torn apart.  */
1261
  if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1262
    return NULL;
1263 1264 1265 1266

  /* src_reg_num is the number of the word mode register which we
     are operating on.  For a left shift and a zero_extend on little
     endian machines this is register 0.  */
1267 1268
  src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
		? 1 : 0;
1269

1270
  if (WORDS_BIG_ENDIAN && GET_MODE_SIZE (inner_mode) > UNITS_PER_WORD)
1271 1272 1273
    src_reg_num = 1 - src_reg_num;

  if (GET_CODE (op) == ZERO_EXTEND)
1274
    dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286
  else
    dest_reg_num = 1 - src_reg_num;

  offset1 = UNITS_PER_WORD * dest_reg_num;
  offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
  src_offset = UNITS_PER_WORD * src_reg_num;

  start_sequence ();

  dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
                                          GET_MODE (SET_DEST (set)),
                                          offset1);
1287 1288 1289
  dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
					    GET_MODE (SET_DEST (set)),
					    offset2);
1290 1291 1292
  src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
                                         GET_MODE (op_operand),
                                         src_offset);
1293 1294 1295 1296 1297
  if (GET_CODE (op) == ASHIFTRT
      && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
    upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
			      BITS_PER_WORD - 1, NULL_RTX, 0);

1298 1299 1300 1301 1302 1303 1304
  if (GET_CODE (op) != ZERO_EXTEND)
    {
      int shift_count = INTVAL (XEXP (op, 1));
      if (shift_count > BITS_PER_WORD)
	src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
				LSHIFT_EXPR : RSHIFT_EXPR,
				word_mode, src_reg,
1305
				shift_count - BITS_PER_WORD,
1306
				dest_reg, GET_CODE (op) != ASHIFTRT);
1307 1308 1309 1310
    }

  if (dest_reg != src_reg)
    emit_move_insn (dest_reg, src_reg);
1311 1312 1313 1314 1315 1316
  if (GET_CODE (op) != ASHIFTRT)
    emit_move_insn (dest_upper, CONST0_RTX (word_mode));
  else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
    emit_move_insn (dest_upper, copy_rtx (src_reg));
  else
    emit_move_insn (dest_upper, upper_src);
1317 1318 1319 1320 1321 1322 1323 1324
  insns = get_insns ();

  end_sequence ();

  emit_insn_before (insns, insn);

  if (dump_file)
    {
1325
      rtx_insn *in;
1326 1327 1328 1329 1330 1331 1332 1333 1334 1335
      fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
      for (in = insns; in != insn; in = NEXT_INSN (in))
	fprintf (dump_file, "%d ", INSN_UID (in));
      fprintf (dump_file, "\n");
    }

  delete_insn (insn);
  return insns;
}

1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364
/* Print to dump_file a description of what we're doing with shift code CODE.
   SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD.  */

static void
dump_shift_choices (enum rtx_code code, bool *splitting)
{
  int i;
  const char *sep;

  fprintf (dump_file,
	   "  Splitting mode %s for %s lowering with shift amounts = ",
	   GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
  sep = "";
  for (i = 0; i < BITS_PER_WORD; i++)
    if (splitting[i])
      {
	fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
	sep = ",";
      }
  fprintf (dump_file, "\n");
}

/* Print to dump_file a description of what we're doing when optimizing
   for speed or size; SPEED_P says which.  DESCRIPTION is a description
   of the SPEED_P choice.  */

static void
dump_choices (bool speed_p, const char *description)
{
1365
  unsigned int size, factor, i;
1366 1367 1368 1369

  fprintf (dump_file, "Choices when optimizing for %s:\n", description);

  for (i = 0; i < MAX_MACHINE_MODE; i++)
1370 1371
    if (interesting_mode_p ((machine_mode) i, &size, &factor)
	&& factor > 1)
1372 1373 1374 1375
      fprintf (dump_file, "  %s mode %s for copy lowering.\n",
	       choices[speed_p].move_modes_to_split[i]
	       ? "Splitting"
	       : "Skipping",
1376
	       GET_MODE_NAME ((machine_mode) i));
1377 1378 1379 1380 1381 1382

  fprintf (dump_file, "  %s mode %s for zero_extend lowering.\n",
	   choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
	   GET_MODE_NAME (twice_word_mode));

  dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1383 1384
  dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
  dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1385 1386 1387
  fprintf (dump_file, "\n");
}

1388
/* Look for registers which are always accessed via word-sized SUBREGs
1389 1390
   or -if DECOMPOSE_COPIES is true- via copies.  Decompose these
   registers into several word-sized pseudo-registers.  */
1391 1392

static void
1393
decompose_multiword_subregs (bool decompose_copies)
1394 1395 1396
{
  unsigned int max;
  basic_block bb;
1397
  bool speed_p;
1398

1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411
  if (dump_file)
    {
      dump_choices (false, "size");
      dump_choices (true, "speed");
    }

  /* Check if this target even has any modes to consider lowering.   */
  if (!choices[false].something_to_do && !choices[true].something_to_do)
    {
      if (dump_file)
	fprintf (dump_file, "Nothing to do!\n");
      return;
    }
1412

1413 1414 1415 1416 1417 1418 1419 1420
  max = max_reg_num ();

  /* First see if there are any multi-word pseudo-registers.  If there
     aren't, there is nothing we can do.  This should speed up this
     pass in the normal case, since it should be faster than scanning
     all the insns.  */
  {
    unsigned int i;
1421
    bool useful_modes_seen = false;
1422 1423

    for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1424 1425
      if (regno_reg_rtx[i] != NULL)
	{
1426
	  machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1427 1428 1429 1430 1431 1432 1433 1434 1435
	  if (choices[false].move_modes_to_split[(int) mode]
	      || choices[true].move_modes_to_split[(int) mode])
	    {
	      useful_modes_seen = true;
	      break;
	    }
	}

    if (!useful_modes_seen)
1436
      {
1437 1438 1439
	if (dump_file)
	  fprintf (dump_file, "Nothing to lower in this function.\n");
	return;
1440 1441 1442
      }
  }

1443
  if (df)
1444 1445 1446 1447
    {
      df_set_flags (DF_DEFER_INSN_RESCAN);
      run_word_dce ();
    }
1448

1449 1450 1451 1452
  /* FIXME: It may be possible to change this code to look for each
     multi-word pseudo-register and to find each insn which sets or
     uses that register.  That should be faster than scanning all the
     insns.  */
1453 1454 1455

  decomposable_context = BITMAP_ALLOC (NULL);
  non_decomposable_context = BITMAP_ALLOC (NULL);
1456
  subreg_context = BITMAP_ALLOC (NULL);
1457

1458 1459 1460
  reg_copy_graph.create (max);
  reg_copy_graph.safe_grow_cleared (max);
  memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1461

1462
  speed_p = optimize_function_for_speed_p (cfun);
1463
  FOR_EACH_BB_FN (bb, cfun)
1464
    {
1465
      rtx_insn *insn;
1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477

      FOR_BB_INSNS (bb, insn)
	{
	  rtx set;
	  enum classify_move_insn cmi;
	  int i, n;

	  if (!INSN_P (insn)
	      || GET_CODE (PATTERN (insn)) == CLOBBER
	      || GET_CODE (PATTERN (insn)) == USE)
	    continue;

1478 1479
	  recog_memoized (insn);

1480
	  if (find_decomposable_shift_zext (insn, speed_p))
1481 1482
	    continue;

1483 1484
	  extract_insn (insn);

1485
	  set = simple_move (insn, speed_p);
1486 1487 1488 1489 1490

	  if (!set)
	    cmi = NOT_SIMPLE_MOVE;
	  else
	    {
1491 1492 1493 1494 1495 1496 1497
	      /* We mark pseudo-to-pseudo copies as decomposable during the
		 second pass only.  The first pass is so early that there is
		 good chance such moves will be optimized away completely by
		 subsequent optimizations anyway.

		 However, we call find_pseudo_copy even during the first pass
		 so as to properly set up the reg_copy_graph.  */
Steven Bosscher committed
1498
	      if (find_pseudo_copy (set))
1499
		cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1500 1501 1502 1503 1504 1505 1506
	      else
		cmi = SIMPLE_MOVE;
	    }

	  n = recog_data.n_operands;
	  for (i = 0; i < n; ++i)
	    {
1507
	      find_decomposable_subregs (&recog_data.operand[i], &cmi);
1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526

	      /* We handle ASM_OPERANDS as a special case to support
		 things like x86 rdtsc which returns a DImode value.
		 We can decompose the output, which will certainly be
		 operand 0, but not the inputs.  */

	      if (cmi == SIMPLE_MOVE
		  && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
		{
		  gcc_assert (i == 0);
		  cmi = NOT_SIMPLE_MOVE;
		}
	    }
	}
    }

  bitmap_and_compl_into (decomposable_context, non_decomposable_context);
  if (!bitmap_empty_p (decomposable_context))
    {
1527 1528
      unsigned int i;
      sbitmap_iterator sbi;
1529 1530 1531 1532 1533
      bitmap_iterator iter;
      unsigned int regno;

      propagate_pseudo_copies ();

1534
      auto_sbitmap sub_blocks (last_basic_block_for_fn (cfun));
1535
      bitmap_clear (sub_blocks);
1536 1537 1538 1539

      EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
	decompose_register (regno);

1540
      FOR_EACH_BB_FN (bb, cfun)
1541
	{
1542
	  rtx_insn *insn;
1543

1544
	  FOR_BB_INSNS (bb, insn)
1545
	    {
1546
	      rtx pat;
1547 1548 1549 1550 1551 1552

	      if (!INSN_P (insn))
		continue;

	      pat = PATTERN (insn);
	      if (GET_CODE (pat) == CLOBBER)
1553
		resolve_clobber (pat, insn);
1554
	      else if (GET_CODE (pat) == USE)
1555
		resolve_use (pat, insn);
1556 1557
	      else if (DEBUG_INSN_P (insn))
		resolve_debug (insn);
1558 1559 1560 1561 1562 1563 1564 1565
	      else
		{
		  rtx set;
		  int i;

		  recog_memoized (insn);
		  extract_insn (insn);

1566
		  set = simple_move (insn, speed_p);
1567 1568
		  if (set)
		    {
1569
		      rtx_insn *orig_insn = insn;
1570
		      bool cfi = control_flow_insn_p (insn);
1571

1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583
		      /* We can end up splitting loads to multi-word pseudos
			 into separate loads to machine word size pseudos.
			 When this happens, we first had one load that can
			 throw, and after resolve_simple_move we'll have a
			 bunch of loads (at least two).  All those loads may
			 trap if we can have non-call exceptions, so they
			 all will end the current basic block.  We split the
			 block after the outer loop over all insns, but we
			 make sure here that we will be able to split the
			 basic block and still produce the correct control
			 flow graph for it.  */
		      gcc_assert (!cfi
1584
				  || (cfun->can_throw_non_call_exceptions
1585 1586
				      && can_throw_internal (insn)));

1587 1588 1589 1590 1591
		      insn = resolve_simple_move (set, insn);
		      if (insn != orig_insn)
			{
			  recog_memoized (insn);
			  extract_insn (insn);
1592 1593

			  if (cfi)
1594
			    bitmap_set_bit (sub_blocks, bb->index);
1595 1596
			}
		    }
1597 1598
		  else
		    {
1599
		      rtx_insn *decomposed_shift;
1600 1601 1602 1603 1604 1605 1606 1607 1608

		      decomposed_shift = resolve_shift_zext (insn);
		      if (decomposed_shift != NULL_RTX)
			{
			  insn = decomposed_shift;
			  recog_memoized (insn);
			  extract_insn (insn);
			}
		    }
1609 1610

		  for (i = recog_data.n_operands - 1; i >= 0; --i)
1611
		    resolve_subreg_use (recog_data.operand_loc[i], insn);
1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622

		  resolve_reg_notes (insn);

		  if (num_validated_changes () > 0)
		    {
		      for (i = recog_data.n_dups - 1; i >= 0; --i)
			{
			  rtx *pl = recog_data.dup_loc[i];
			  int dup_num = recog_data.dup_num[i];
			  rtx *px = recog_data.operand_loc[dup_num];

1623
			  validate_unshare_change (insn, pl, *px, 1);
1624 1625 1626 1627 1628 1629 1630 1631 1632
			}

		      i = apply_change_group ();
		      gcc_assert (i);
		    }
		}
	    }
	}

1633 1634 1635 1636
      /* If we had insns to split that caused control flow insns in the middle
	 of a basic block, split those blocks now.  Note that we only handle
	 the case where splitting a load has caused multiple possibly trapping
	 loads to appear.  */
1637
      EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1638
	{
1639
	  rtx_insn *insn, *end;
1640 1641
	  edge fallthru;

1642
	  bb = BASIC_BLOCK_FOR_FN (cfun, i);
1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661
	  insn = BB_HEAD (bb);
	  end = BB_END (bb);

	  while (insn != end)
	    {
	      if (control_flow_insn_p (insn))
		{
		  /* Split the block after insn.  There will be a fallthru
		     edge, which is OK so we keep it.  We have to create the
		     exception edges ourselves.  */
		  fallthru = split_block (bb, insn);
		  rtl_make_eh_edge (NULL, bb, BB_END (bb));
		  bb = fallthru->dest;
		  insn = BB_HEAD (bb);
		}
	      else
	        insn = NEXT_INSN (insn);
	    }
	}
1662 1663 1664 1665 1666 1667
    }

  {
    unsigned int i;
    bitmap b;

1668
    FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1669 1670 1671 1672
      if (b)
	BITMAP_FREE (b);
  }

1673
  reg_copy_graph.release ();
1674 1675 1676

  BITMAP_FREE (decomposable_context);
  BITMAP_FREE (non_decomposable_context);
1677
  BITMAP_FREE (subreg_context);
1678 1679 1680 1681
}

/* Implement first lower subreg pass.  */

1682 1683 1684
namespace {

const pass_data pass_data_lower_subreg =
1685
{
1686 1687 1688 1689 1690 1691 1692 1693
  RTL_PASS, /* type */
  "subreg1", /* name */
  OPTGROUP_NONE, /* optinfo_flags */
  TV_LOWER_SUBREG, /* tv_id */
  0, /* properties_required */
  0, /* properties_provided */
  0, /* properties_destroyed */
  0, /* todo_flags_start */
1694
  0, /* todo_flags_finish */
1695 1696
};

1697 1698 1699
class pass_lower_subreg : public rtl_opt_pass
{
public:
1700 1701
  pass_lower_subreg (gcc::context *ctxt)
    : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1702 1703 1704
  {}

  /* opt_pass methods: */
1705
  virtual bool gate (function *) { return flag_split_wide_types != 0; }
1706 1707 1708 1709 1710
  virtual unsigned int execute (function *)
    {
      decompose_multiword_subregs (false);
      return 0;
    }
1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721

}; // class pass_lower_subreg

} // anon namespace

rtl_opt_pass *
make_pass_lower_subreg (gcc::context *ctxt)
{
  return new pass_lower_subreg (ctxt);
}

1722 1723
/* Implement second lower subreg pass.  */

1724 1725 1726
namespace {

const pass_data pass_data_lower_subreg2 =
1727
{
1728 1729 1730 1731 1732 1733 1734 1735
  RTL_PASS, /* type */
  "subreg2", /* name */
  OPTGROUP_NONE, /* optinfo_flags */
  TV_LOWER_SUBREG, /* tv_id */
  0, /* properties_required */
  0, /* properties_provided */
  0, /* properties_destroyed */
  0, /* todo_flags_start */
1736
  TODO_df_finish, /* todo_flags_finish */
1737
};
1738 1739 1740 1741

class pass_lower_subreg2 : public rtl_opt_pass
{
public:
1742 1743
  pass_lower_subreg2 (gcc::context *ctxt)
    : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1744 1745 1746
  {}

  /* opt_pass methods: */
1747
  virtual bool gate (function *) { return flag_split_wide_types != 0; }
1748 1749 1750 1751 1752
  virtual unsigned int execute (function *)
    {
      decompose_multiword_subregs (true);
      return 0;
    }
1753 1754 1755 1756 1757 1758 1759 1760 1761 1762

}; // class pass_lower_subreg2

} // anon namespace

rtl_opt_pass *
make_pass_lower_subreg2 (gcc::context *ctxt)
{
  return new pass_lower_subreg2 (ctxt);
}