ree.c 29.5 KB
Newer Older
1
/* Redundant Extension Elimination pass for the GNU compiler.
2
   Copyright (C) 2010-2013 Free Software Foundation, Inc.
3
   Contributed by Ilya Enkovich (ilya.enkovich@intel.com)
4

5 6
   Based on the Redundant Zero-extension elimination pass contributed by
   Sriraman Tallam (tmsriram@google.com) and Silvius Rus (rus@google.com).
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
<http://www.gnu.org/licenses/>.  */


/* Problem Description :
   --------------------
27 28 29 30 31 32 33 34
   This pass is intended to remove redundant extension instructions.
   Such instructions appear for different reasons.  We expect some of
   them due to implicit zero-extension in 64-bit registers after writing
   to their lower 32-bit half (e.g. for the x86-64 architecture).
   Another possible reason is a type cast which follows a load (for
   instance a register restore) and which can be combined into a single
   instruction, and for which earlier local passes, e.g. the combiner,
   weren't able to optimize.
35 36 37 38 39

   How does this pass work  ?
   --------------------------

   This pass is run after register allocation.  Hence, all registers that
40 41 42 43 44
   this pass deals with are hard registers.  This pass first looks for an
   extension instruction that could possibly be redundant.  Such extension
   instructions show up in RTL with the pattern  :
   (set (reg:<SWI248> x) (any_extend:<SWI248> (reg:<SWI124> x))),
   where x can be any hard register.
45
   Now, this pass tries to eliminate this instruction by merging the
46
   extension with the definitions of register x.  For instance, if
47 48
   one of the definitions of register x was  :
   (set (reg:SI x) (plus:SI (reg:SI z1) (reg:SI z2))),
49 50
   followed by extension  :
   (set (reg:DI x) (zero_extend:DI (reg:SI x)))
51 52 53
   then the combination converts this into :
   (set (reg:DI x) (zero_extend:DI (plus:SI (reg:SI z1) (reg:SI z2)))).
   If all the merged definitions are recognizable assembly instructions,
54 55 56 57 58 59 60
   the extension is effectively eliminated.

   For example, for the x86-64 architecture, implicit zero-extensions
   are captured with appropriate patterns in the i386.md file.  Hence,
   these merged definition can be matched to a single assembly instruction.
   The original extension instruction is then deleted if all the
   definitions can be merged.
61 62

   However, there are cases where the definition instruction cannot be
63 64
   merged with an extension.  Examples are CALL instructions.  In such
   cases, the original extension is not redundant and this pass does
65 66 67 68 69
   not delete it.

   Handling conditional moves :
   ----------------------------

70 71
   Architectures like x86-64 support conditional moves whose semantics for
   extension differ from the other instructions.  For instance, the
72 73
   instruction *cmov ebx, eax*
   zero-extends eax onto rax only when the move from ebx to eax happens.
74
   Otherwise, eax may not be zero-extended.  Consider conditional moves as
75 76
   RTL instructions of the form
   (set (reg:SI x) (if_then_else (cond) (reg:SI y) (reg:SI z))).
77
   This pass tries to merge an extension with a conditional move by
78
   actually merging the definitions of y and z with an extension and then
79 80
   converting the conditional move into :
   (set (reg:DI x) (if_then_else (cond) (reg:DI y) (reg:DI z))).
81 82 83
   Since registers y and z are extended, register x will also be extended
   after the conditional move.  Note that this step has to be done
   transitively since the definition of a conditional copy can be
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
   another conditional copy.

   Motivating Example I :
   ---------------------
   For this program :
   **********************************************
   bad_code.c

   int mask[1000];

   int foo(unsigned x)
   {
     if (x < 10)
       x = x * 45;
     else
       x = x * 78;
     return mask[x];
   }
   **********************************************

104
   $ gcc -O2 bad_code.c
105 106 107
     ........
     400315:       b8 4e 00 00 00          mov    $0x4e,%eax
     40031a:       0f af f8                imul   %eax,%edi
108
     40031d:       89 ff                   mov    %edi,%edi - useless extension
109 110 111 112 113
     40031f:       8b 04 bd 60 19 40 00    mov    0x401960(,%rdi,4),%eax
     400326:       c3                      retq
     ......
     400330:       ba 2d 00 00 00          mov    $0x2d,%edx
     400335:       0f af fa                imul   %edx,%edi
114
     400338:       89 ff                   mov    %edi,%edi - useless extension
115 116 117
     40033a:       8b 04 bd 60 19 40 00    mov    0x401960(,%rdi,4),%eax
     400341:       c3                      retq

118
   $ gcc -O2 -free bad_code.c
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
     ......
     400315:       6b ff 4e                imul   $0x4e,%edi,%edi
     400318:       8b 04 bd 40 19 40 00    mov    0x401940(,%rdi,4),%eax
     40031f:       c3                      retq
     400320:       6b ff 2d                imul   $0x2d,%edi,%edi
     400323:       8b 04 bd 40 19 40 00    mov    0x401940(,%rdi,4),%eax
     40032a:       c3                      retq

   Motivating Example II :
   ---------------------

   Here is an example with a conditional move.

   For this program :
   **********************************************

   unsigned long long foo(unsigned x , unsigned y)
   {
     unsigned z;
     if (x > 100)
       z = x + y;
     else
       z = x - y;
     return (unsigned long long)(z);
   }

145
   $ gcc -O2 bad_code.c
146 147 148 149 150 151
     ............
     400360:       8d 14 3e                lea    (%rsi,%rdi,1),%edx
     400363:       89 f8                   mov    %edi,%eax
     400365:       29 f0                   sub    %esi,%eax
     400367:       83 ff 65                cmp    $0x65,%edi
     40036a:       0f 43 c2                cmovae %edx,%eax
152
     40036d:       89 c0                   mov    %eax,%eax - useless extension
153 154
     40036f:       c3                      retq

155
   $ gcc -O2 -free bad_code.c
156 157 158 159 160 161 162 163 164
     .............
     400360:       89 fa                   mov    %edi,%edx
     400362:       8d 04 3e                lea    (%rsi,%rdi,1),%eax
     400365:       29 f2                   sub    %esi,%edx
     400367:       83 ff 65                cmp    $0x65,%edi
     40036a:       89 d6                   mov    %edx,%esi
     40036c:       48 0f 42 c6             cmovb  %rsi,%rax
     400370:       c3                      retq

165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
  Motivating Example III :
  ---------------------

  Here is an example with a type cast.

  For this program :
  **********************************************

  void test(int size, unsigned char *in, unsigned char *out)
  {
    int i;
    unsigned char xr, xg, xy=0;

    for (i = 0; i < size; i++) {
      xr = *in++;
      xg = *in++;
      xy = (unsigned char) ((19595*xr + 38470*xg) >> 16);
      *out++ = xy;
    }
  }

  $ gcc -O2 bad_code.c
    ............
    10:   0f b6 0e                movzbl (%rsi),%ecx
    13:   0f b6 46 01             movzbl 0x1(%rsi),%eax
    17:   48 83 c6 02             add    $0x2,%rsi
191 192
    1b:   0f b6 c9                movzbl %cl,%ecx - useless extension
    1e:   0f b6 c0                movzbl %al,%eax - useless extension
193 194 195 196 197 198 199 200 201 202
    21:   69 c9 8b 4c 00 00       imul   $0x4c8b,%ecx,%ecx
    27:   69 c0 46 96 00 00       imul   $0x9646,%eax,%eax

   $ gcc -O2 -free bad_code.c
     .............
    10:   0f b6 0e                movzbl (%rsi),%ecx
    13:   0f b6 46 01             movzbl 0x1(%rsi),%eax
    17:   48 83 c6 02             add    $0x2,%rsi
    1b:   69 c9 8b 4c 00 00       imul   $0x4c8b,%ecx,%ecx
    21:   69 c0 46 96 00 00       imul   $0x9646,%eax,%eax
203 204 205 206

   Usefulness :
   ----------

207 208 209
   The original redundant zero-extension elimination pass reported reduction
   of the dynamic instruction count of a compression benchmark by 2.8% and
   improvement of its run time by about 1%.
210

211 212 213 214 215
   The additional performance gain with the enhanced pass is mostly expected
   on in-order architectures where redundancy cannot be compensated by out of
   order execution.  Measurements showed up to 10% performance gain (reduced
   run time) on EEMBC 2.0 benchmarks on Atom processor with geomean performance
   gain 1%.  */
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233


#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "rtl.h"
#include "tree.h"
#include "tm_p.h"
#include "flags.h"
#include "regs.h"
#include "hard-reg-set.h"
#include "basic-block.h"
#include "insn-config.h"
#include "function.h"
#include "expr.h"
#include "insn-attr.h"
#include "recog.h"
234
#include "diagnostic-core.h"
235 236 237 238 239 240 241 242 243
#include "target.h"
#include "optabs.h"
#include "insn-codes.h"
#include "rtlhooks-def.h"
#include "params.h"
#include "tree-pass.h"
#include "df.h"
#include "cgraph.h"

244
/* This structure represents a candidate for elimination.  */
245

246
typedef struct ext_cand
247
{
248 249
  /* The expression.  */
  const_rtx expr;
250

251 252
  /* The kind of extension.  */
  enum rtx_code code;
253

254 255 256 257
  /* The destination mode.  */
  enum machine_mode mode;

  /* The instruction where it lives.  */
258
  rtx insn;
259
} ext_cand;
260 261


262 263
static int max_insn_uid;

264 265 266 267 268 269
/* Given a insn (CURR_INSN), an extension candidate for removal (CAND)
   and a pointer to the SET rtx (ORIG_SET) that needs to be modified,
   this code modifies the SET rtx to a new SET rtx that extends the
   right hand expression into a register on the left hand side.  Note
   that multiple assumptions are made about the nature of the set that
   needs to be true for this to work and is called from merge_def_and_ext.
270 271

   Original :
272
   (set (reg a) (expression))
273 274

   Transform :
275
   (set (reg a) (any_extend (expression)))
276 277

   Special Cases :
278
   If the expression is a constant or another extension, then directly
279
   assign it to the register.  */
280 281

static bool
282
combine_set_extension (ext_cand *cand, rtx curr_insn, rtx *orig_set)
283
{
284 285 286
  rtx orig_src = SET_SRC (*orig_set);
  rtx new_reg = gen_rtx_REG (cand->mode, REGNO (SET_DEST (*orig_set)));
  rtx new_set;
287

288 289
  /* Merge constants by directly moving the constant into the register under
     some conditions.  Recall that RTL constants are sign-extended.  */
290
  if (GET_CODE (orig_src) == CONST_INT
291
      && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (cand->mode))
292
    {
293 294
      if (INTVAL (orig_src) >= 0 || cand->code == SIGN_EXTEND)
	new_set = gen_rtx_SET (VOIDmode, new_reg, orig_src);
295
      else
296 297 298 299
	{
	  /* Zero-extend the negative constant by masking out the bits outside
	     the source mode.  */
	  enum machine_mode src_mode = GET_MODE (SET_DEST (*orig_set));
300
	  rtx new_const_int
301 302
	    = gen_int_mode (INTVAL (orig_src) & GET_MODE_MASK (src_mode),
			    GET_MODE (new_reg));
303
	  new_set = gen_rtx_SET (VOIDmode, new_reg, new_const_int);
304 305 306 307
	}
    }
  else if (GET_MODE (orig_src) == VOIDmode)
    {
308
      /* This is mostly due to a call insn that should not be optimized.  */
309
      return false;
310
    }
311
  else if (GET_CODE (orig_src) == cand->code)
312
    {
313 314 315 316
      /* Here is a sequence of two extensions.  Try to merge them.  */
      rtx temp_extension
	= gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
      rtx simplified_temp_extension = simplify_rtx (temp_extension);
317 318
      if (simplified_temp_extension)
        temp_extension = simplified_temp_extension;
319
      new_set = gen_rtx_SET (VOIDmode, new_reg, temp_extension);
320 321 322
    }
  else if (GET_CODE (orig_src) == IF_THEN_ELSE)
    {
323
      /* Only IF_THEN_ELSE of phi-type copies are combined.  Otherwise,
324 325 326 327 328
         in general, IF_THEN_ELSE should not be combined.  */
      return false;
    }
  else
    {
329 330 331 332
      /* This is the normal case.  */
      rtx temp_extension
	= gen_rtx_fmt_e (cand->code, cand->mode, orig_src);
      rtx simplified_temp_extension = simplify_rtx (temp_extension);
333 334
      if (simplified_temp_extension)
        temp_extension = simplified_temp_extension;
335
      new_set = gen_rtx_SET (VOIDmode, new_reg, temp_extension);
336 337
    }

338
  /* This change is a part of a group of changes.  Hence,
339 340 341 342 343
     validate_change will not try to commit the change.  */
  if (validate_change (curr_insn, orig_set, new_set, true))
    {
      if (dump_file)
        {
344 345
          fprintf (dump_file,
		   "Tentatively merged extension with definition:\n");
346 347 348 349
          print_rtl_single (dump_file, curr_insn);
        }
      return true;
    }
350

351 352 353 354
  return false;
}

/* Treat if_then_else insns, where the operands of both branches
355
   are registers, as copies.  For instance,
356 357 358 359 360 361 362
   Original :
   (set (reg:SI a) (if_then_else (cond) (reg:SI b) (reg:SI c)))
   Transformed :
   (set (reg:DI a) (if_then_else (cond) (reg:DI b) (reg:DI c)))
   DEF_INSN is the if_then_else insn.  */

static bool
363
transform_ifelse (ext_cand *cand, rtx def_insn)
364 365 366 367 368 369 370 371 372
{
  rtx set_insn = PATTERN (def_insn);
  rtx srcreg, dstreg, srcreg2;
  rtx map_srcreg, map_dstreg, map_srcreg2;
  rtx ifexpr;
  rtx cond;
  rtx new_set;

  gcc_assert (GET_CODE (set_insn) == SET);
373

374 375 376 377
  cond = XEXP (SET_SRC (set_insn), 0);
  dstreg = SET_DEST (set_insn);
  srcreg = XEXP (SET_SRC (set_insn), 1);
  srcreg2 = XEXP (SET_SRC (set_insn), 2);
378 379 380 381 382
  /* If the conditional move already has the right or wider mode,
     there is nothing to do.  */
  if (GET_MODE_SIZE (GET_MODE (dstreg)) >= GET_MODE_SIZE (cand->mode))
    return true;

383 384 385 386
  map_srcreg = gen_rtx_REG (cand->mode, REGNO (srcreg));
  map_srcreg2 = gen_rtx_REG (cand->mode, REGNO (srcreg2));
  map_dstreg = gen_rtx_REG (cand->mode, REGNO (dstreg));
  ifexpr = gen_rtx_IF_THEN_ELSE (cand->mode, cond, map_srcreg, map_srcreg2);
387 388 389 390 391 392
  new_set = gen_rtx_SET (VOIDmode, map_dstreg, ifexpr);

  if (validate_change (def_insn, &PATTERN (def_insn), new_set, true))
    {
      if (dump_file)
        {
393 394
          fprintf (dump_file,
		   "Mode of conditional move instruction extended:\n");
395 396 397 398
          print_rtl_single (dump_file, def_insn);
        }
      return true;
    }
399 400

  return false;
401 402
}

403 404 405 406
/* Get all the reaching definitions of an instruction.  The definitions are
   desired for REG used in INSN.  Return the definition list or NULL if a
   definition is missing.  If DEST is non-NULL, additionally push the INSN
   of the definitions onto DEST.  */
407

408
static struct df_link *
409
get_defs (rtx insn, rtx reg, vec<rtx> *dest)
410
{
411
  df_ref reg_info, *uses;
412
  struct df_link *ref_chain, *ref_link;
413 414 415

  reg_info = NULL;

416
  for (uses = DF_INSN_USES (insn); *uses; uses++)
417
    {
418
      reg_info = *uses;
419
      if (GET_CODE (DF_REF_REG (reg_info)) == SUBREG)
420 421
        return NULL;
      if (REGNO (DF_REF_REG (reg_info)) == REGNO (reg))
422 423 424
        break;
    }

425
  gcc_assert (reg_info != NULL && uses != NULL);
426

427 428 429
  ref_chain = DF_REF_CHAIN (reg_info);

  for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
430 431
    {
      /* Problem getting some definition for this instruction.  */
432 433 434 435
      if (ref_link->ref == NULL)
        return NULL;
      if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
        return NULL;
436 437
    }

438 439
  if (dest)
    for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
440
      dest->safe_push (DF_REF_INSN (ref_link->ref));
441

442
  return ref_chain;
443 444
}

445 446 447
/* Return true if INSN is
     (SET (reg REGNO (def_reg)) (if_then_else (cond) (REG x1) (REG x2)))
   and store x1 and x2 in REG_1 and REG_2.  */
448

449 450
static bool
is_cond_copy_insn (rtx insn, rtx *reg1, rtx *reg2)
451
{
452
  rtx expr = single_set (insn);
453

454 455
  if (expr != NULL_RTX
      && GET_CODE (expr) == SET
456 457 458
      && GET_CODE (SET_DEST (expr)) == REG
      && GET_CODE (SET_SRC (expr))  == IF_THEN_ELSE
      && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG
459
      && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG)
460
    {
461 462 463
      *reg1 = XEXP (SET_SRC (expr), 1);
      *reg2 = XEXP (SET_SRC (expr), 2);
      return true;
464 465
    }

466
  return false;
467 468
}

469 470 471 472 473 474 475 476 477 478
enum ext_modified_kind
{
  /* The insn hasn't been modified by ree pass yet.  */
  EXT_MODIFIED_NONE,
  /* Changed into zero extension.  */
  EXT_MODIFIED_ZEXT,
  /* Changed into sign extension.  */
  EXT_MODIFIED_SEXT
};

479
struct ATTRIBUTE_PACKED ext_modified
480 481 482 483 484 485 486 487 488 489 490 491 492 493
{
  /* Mode from which ree has zero or sign extended the destination.  */
  ENUM_BITFIELD(machine_mode) mode : 8;

  /* Kind of modification of the insn.  */
  ENUM_BITFIELD(ext_modified_kind) kind : 2;

  /* True if the insn is scheduled to be deleted.  */
  unsigned int deleted : 1;
};

/* Vectors used by combine_reaching_defs and its helpers.  */
typedef struct ext_state
{
494
  /* In order to avoid constant alloc/free, we keep these
495
     4 vectors live through the entire find_and_remove_re and just
496 497 498 499 500
     truncate them each time.  */
  vec<rtx> defs_list;
  vec<rtx> copies_list;
  vec<rtx> modified_list;
  vec<rtx> work_list;
501 502 503 504 505 506 507

  /* For instructions that have been successfully modified, this is
     the original mode from which the insn is extending and
     kind of extension.  */
  struct ext_modified *modified;
} ext_state;

508 509
/* Reaching Definitions of the extended register could be conditional copies
   or regular definitions.  This function separates the two types into two
510 511 512 513 514 515 516 517 518 519 520
   lists, STATE->DEFS_LIST and STATE->COPIES_LIST.  This is necessary because,
   if a reaching definition is a conditional copy, merging the extension with
   this definition is wrong.  Conditional copies are merged by transitively
   merging their definitions.  The defs_list is populated with all the reaching
   definitions of the extension instruction (EXTEND_INSN) which must be merged
   with an extension.  The copies_list contains all the conditional moves that
   will later be extended into a wider mode conditional move if all the merges
   are successful.  The function returns false upon failure, true upon
   success.  */

static bool
521
make_defs_and_copies_lists (rtx extend_insn, const_rtx set_pat,
522
			    ext_state *state)
523
{
524
  rtx src_reg = XEXP (SET_SRC (set_pat), 0);
525
  bool *is_insn_visited;
526 527
  bool ret = true;

528
  state->work_list.truncate (0);
529

530
  /* Initialize the work list.  */
531 532
  if (!get_defs (extend_insn, src_reg, &state->work_list))
    gcc_unreachable ();
533

534
  is_insn_visited = XCNEWVEC (bool, max_insn_uid);
535 536

  /* Perform transitive closure for conditional copies.  */
537
  while (!state->work_list.is_empty ())
538
    {
539
      rtx def_insn = state->work_list.pop ();
540 541
      rtx reg1, reg2;

542 543 544
      gcc_assert (INSN_UID (def_insn) < max_insn_uid);

      if (is_insn_visited[INSN_UID (def_insn)])
545
	continue;
546 547
      is_insn_visited[INSN_UID (def_insn)] = true;

548 549 550
      if (is_cond_copy_insn (def_insn, &reg1, &reg2))
	{
	  /* Push it onto the copy list first.  */
551
	  state->copies_list.safe_push (def_insn);
552 553

	  /* Now perform the transitive closure.  */
554 555
	  if (!get_defs (def_insn, reg1, &state->work_list)
	      || !get_defs (def_insn, reg2, &state->work_list))
556
	    {
557
	      ret = false;
558 559
	      break;
	    }
560 561
        }
      else
562
	state->defs_list.safe_push (def_insn);
563 564 565
    }

  XDELETEVEC (is_insn_visited);
566 567

  return ret;
568 569
}

570
/* Merge the DEF_INSN with an extension.  Calls combine_set_extension
571 572 573
   on the SET pattern.  */

static bool
574
merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
575
{
576
  enum machine_mode ext_src_mode;
577 578 579 580 581
  enum rtx_code code;
  rtx *sub_rtx;
  rtx s_expr;
  int i;

582
  ext_src_mode = GET_MODE (XEXP (SET_SRC (cand->expr), 0));
583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612
  code = GET_CODE (PATTERN (def_insn));
  sub_rtx = NULL;

  if (code == PARALLEL)
    {
      for (i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
        {
          s_expr = XVECEXP (PATTERN (def_insn), 0, i);
          if (GET_CODE (s_expr) != SET)
            continue;

          if (sub_rtx == NULL)
            sub_rtx = &XVECEXP (PATTERN (def_insn), 0, i);
          else
            {
              /* PARALLEL with multiple SETs.  */
              return false;
            }
        }
    }
  else if (code == SET)
    sub_rtx = &PATTERN (def_insn);
  else
    {
      /* It is not a PARALLEL or a SET, what could it be ? */
      return false;
    }

  gcc_assert (sub_rtx != NULL);

613 614 615 616 617 618 619
  if (REG_P (SET_DEST (*sub_rtx))
      && (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
	  || ((state->modified[INSN_UID (def_insn)].kind
	       == (cand->code == ZERO_EXTEND
		   ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT))
	      && state->modified[INSN_UID (def_insn)].mode
		 == ext_src_mode)))
620
    {
621 622 623 624 625 626 627 628 629 630 631 632 633
      if (GET_MODE_SIZE (GET_MODE (SET_DEST (*sub_rtx)))
	  >= GET_MODE_SIZE (cand->mode))
	return true;
      /* If def_insn is already scheduled to be deleted, don't attempt
	 to modify it.  */
      if (state->modified[INSN_UID (def_insn)].deleted)
	return false;
      if (combine_set_extension (cand, def_insn, sub_rtx))
	{
	  if (state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE)
	    state->modified[INSN_UID (def_insn)].mode = ext_src_mode;
	  return true;
	}
634
    }
635 636

  return false;
637 638 639
}

/* This function goes through all reaching defs of the source
640 641 642 643 644 645 646
   of the candidate for elimination (CAND) and tries to combine
   the extension with the definition instruction.  The changes
   are made as a group so that even if one definition cannot be
   merged, all reaching definitions end up not being merged.
   When a conditional copy is encountered, merging is attempted
   transitively on its definitions.  It returns true upon success
   and false upon failure.  */
647 648

static bool
649
combine_reaching_defs (ext_cand *cand, const_rtx set_pat, ext_state *state)
650 651 652 653 654
{
  rtx def_insn;
  bool merge_successful = true;
  int i;
  int defs_ix;
655
  bool outcome;
656

657 658
  state->defs_list.truncate (0);
  state->copies_list.truncate (0);
659

660
  outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
661

662 663
  if (!outcome)
    return false;
664

665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682
  /* If cand->insn has been already modified, update cand->mode to a wider
     mode if possible, or punt.  */
  if (state->modified[INSN_UID (cand->insn)].kind != EXT_MODIFIED_NONE)
    {
      enum machine_mode mode;
      rtx set;

      if (state->modified[INSN_UID (cand->insn)].kind
	  != (cand->code == ZERO_EXTEND
	      ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT)
	  || state->modified[INSN_UID (cand->insn)].mode != cand->mode
	  || (set = single_set (cand->insn)) == NULL_RTX)
	return false;
      mode = GET_MODE (SET_DEST (set));
      gcc_assert (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (cand->mode));
      cand->mode = mode;
    }

683 684 685 686
  merge_successful = true;

  /* Go through the defs vector and try to merge all the definitions
     in this vector.  */
687 688
  state->modified_list.truncate (0);
  FOR_EACH_VEC_ELT (state->defs_list, defs_ix, def_insn)
689
    {
690
      if (merge_def_and_ext (cand, def_insn, state))
691
	state->modified_list.safe_push (def_insn);
692 693 694 695 696 697 698 699 700 701 702
      else
        {
          merge_successful = false;
          break;
        }
    }

  /* Now go through the conditional copies vector and try to merge all
     the copies in this vector.  */
  if (merge_successful)
    {
703
      FOR_EACH_VEC_ELT (state->copies_list, i, def_insn)
704
        {
705
          if (transform_ifelse (cand, def_insn))
706
	    state->modified_list.safe_push (def_insn);
707 708 709 710 711 712 713 714 715 716
          else
            {
              merge_successful = false;
              break;
            }
        }
    }

  if (merge_successful)
    {
717 718 719 720 721
      /* Commit the changes here if possible
	 FIXME: It's an all-or-nothing scenario.  Even if only one definition
	 cannot be merged, we entirely give up.  In the future, we should allow
	 extensions to be partially eliminated along those paths where the
	 definitions could be merged.  */
722 723 724
      if (apply_change_group ())
        {
          if (dump_file)
725
            fprintf (dump_file, "All merges were successful.\n");
726

727
	  FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
728 729 730 731 732
	    if (state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE)
	      state->modified[INSN_UID (def_insn)].kind
		= (cand->code == ZERO_EXTEND
		   ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT);

733 734 735 736
          return true;
        }
      else
        {
737 738
          /* Changes need not be cancelled explicitly as apply_change_group
             does it.  Print list of definitions in the dump_file for debug
739
             purposes.  This extension cannot be deleted.  */
740 741
          if (dump_file)
            {
742 743
	      fprintf (dump_file,
		       "Merge cancelled, non-mergeable definitions:\n");
744
	      FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
745
	        print_rtl_single (dump_file, def_insn);
746 747 748 749 750 751 752 753 754 755 756 757
            }
        }
    }
  else
    {
      /* Cancel any changes that have been made so far.  */
      cancel_changes (0);
    }

  return false;
}

758
/* Add an extension pattern that could be eliminated.  */
759 760

static void
761
add_removable_extension (const_rtx expr, rtx insn,
762
			 vec<ext_cand> *insn_list,
763
			 unsigned *def_map)
764
{
765 766
  enum rtx_code code;
  enum machine_mode mode;
767
  unsigned int idx;
768 769
  rtx src, dest;

770
  /* We are looking for SET (REG N) (ANY_EXTEND (REG N)).  */
771 772 773 774
  if (GET_CODE (expr) != SET)
    return;

  src = SET_SRC (expr);
775
  code = GET_CODE (src);
776
  dest = SET_DEST (expr);
777
  mode = GET_MODE (dest);
778 779

  if (REG_P (dest)
780
      && (code == SIGN_EXTEND || code == ZERO_EXTEND)
781 782 783
      && REG_P (XEXP (src, 0))
      && REGNO (dest) == REGNO (XEXP (src, 0)))
    {
784 785 786 787
      struct df_link *defs, *def;
      ext_cand *cand;

      /* First, make sure we can get all the reaching definitions.  */
788
      defs = get_defs (insn, XEXP (src, 0), NULL);
789
      if (!defs)
790
	{
791 792 793
	  if (dump_file)
	    {
	      fprintf (dump_file, "Cannot eliminate extension:\n");
794
	      print_rtl_single (dump_file, insn);
795 796 797
	      fprintf (dump_file, " because of missing definition(s)\n");
	    }
	  return;
798
	}
799 800 801 802

      /* Second, make sure the reaching definitions don't feed another and
	 different extension.  FIXME: this obviously can be improved.  */
      for (def = defs; def; def = def->next)
803
	if ((idx = def_map[INSN_UID (DF_REF_INSN (def->ref))])
804
	    && (cand = &(*insn_list)[idx - 1])
805
	    && cand->code != code)
806 807 808 809
	  {
	    if (dump_file)
	      {
	        fprintf (dump_file, "Cannot eliminate extension:\n");
810
		print_rtl_single (dump_file, insn);
811 812 813 814 815 816 817
	        fprintf (dump_file, " because of other extension\n");
	      }
	    return;
	  }

      /* Then add the candidate to the list and insert the reaching definitions
         into the definition map.  */
818
      ext_cand e = {expr, code, mode, insn};
819 820
      insn_list->safe_push (e);
      idx = insn_list->length ();
821 822

      for (def = defs; def; def = def->next)
823
	def_map[INSN_UID (DF_REF_INSN (def->ref))] = idx;
824 825 826
    }
}

827
/* Traverse the instruction stream looking for extensions and return the
828
   list of candidates.  */
829

830
static vec<ext_cand>
831
find_removable_extensions (void)
832
{
833
  vec<ext_cand> insn_list = vNULL;
834
  basic_block bb;
835
  rtx insn, set;
836
  unsigned *def_map = XCNEWVEC (unsigned, max_insn_uid);
837

838
  FOR_EACH_BB_FN (bb, cfun)
839 840 841 842
    FOR_BB_INSNS (bb, insn)
      {
	if (!NONDEBUG_INSN_P (insn))
	  continue;
843

844 845 846 847
	set = single_set (insn);
	if (set == NULL_RTX)
	  continue;
	add_removable_extension (set, insn, &insn_list, def_map);
848 849
      }

850
  XDELETEVEC (def_map);
851

852
  return insn_list;
853 854 855
}

/* This is the main function that checks the insn stream for redundant
856
   extensions and tries to remove them if possible.  */
857

858
static void
859
find_and_remove_re (void)
860
{
861
  ext_cand *curr_cand;
862
  rtx curr_insn = NULL_RTX;
863
  int num_re_opportunities = 0, num_realized = 0, i;
864
  vec<ext_cand> reinsn_list;
Trevor Saunders committed
865
  auto_vec<rtx> reinsn_del_list;
866
  ext_state state;
867 868

  /* Construct DU chain to get all reaching definitions of each
869
     extension instruction.  */
870
  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
871 872
  df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
  df_analyze ();
873
  df_set_flags (DF_DEFER_INSN_RESCAN);
874 875

  max_insn_uid = get_max_uid ();
876
  reinsn_list = find_removable_extensions ();
877 878 879 880 881
  state.defs_list.create (0);
  state.copies_list.create (0);
  state.modified_list.create (0);
  state.work_list.create (0);
  if (reinsn_list.is_empty ())
882 883 884
    state.modified = NULL;
  else
    state.modified = XCNEWVEC (struct ext_modified, max_insn_uid);
885

886
  FOR_EACH_VEC_ELT (reinsn_list, i, curr_cand)
887
    {
888
      num_re_opportunities++;
889

890
      /* Try to combine the extension with the definition.  */
891 892
      if (dump_file)
        {
893 894
          fprintf (dump_file, "Trying to eliminate extension:\n");
          print_rtl_single (dump_file, curr_cand->insn);
895 896
        }

897
      if (combine_reaching_defs (curr_cand, curr_cand->expr, &state))
898 899
        {
          if (dump_file)
900
            fprintf (dump_file, "Eliminated the extension.\n");
901
          num_realized++;
902
          reinsn_del_list.safe_push (curr_cand->insn);
903
	  state.modified[INSN_UID (curr_cand->insn)].deleted = 1;
904 905 906
        }
    }

907
  /* Delete all useless extensions here in one sweep.  */
908
  FOR_EACH_VEC_ELT (reinsn_del_list, i, curr_insn)
909
    delete_insn (curr_insn);
910

911 912 913 914 915
  reinsn_list.release ();
  state.defs_list.release ();
  state.copies_list.release ();
  state.modified_list.release ();
  state.work_list.release ();
916
  XDELETEVEC (state.modified);
917

918
  if (dump_file && num_re_opportunities > 0)
919 920
    fprintf (dump_file, "Elimination opportunities = %d realized = %d\n",
	     num_re_opportunities, num_realized);
921 922
}

923
/* Find and remove redundant extensions.  */
924 925

static unsigned int
926
rest_of_handle_ree (void)
927
{
928 929 930
  timevar_push (TV_REE);
  find_and_remove_re ();
  timevar_pop (TV_REE);
931 932 933
  return 0;
}

934
/* Run REE pass when flag_ree is set at optimization level > 0.  */
935 936

static bool
937
gate_handle_ree (void)
938
{
939
  return (optimize > 0 && flag_ree);
940 941
}

942 943 944
namespace {

const pass_data pass_data_ree =
945
{
946 947 948 949 950 951 952 953 954 955 956
  RTL_PASS, /* type */
  "ree", /* name */
  OPTGROUP_NONE, /* optinfo_flags */
  true, /* has_gate */
  true, /* has_execute */
  TV_REE, /* tv_id */
  0, /* properties_required */
  0, /* properties_provided */
  0, /* properties_destroyed */
  0, /* todo_flags_start */
  ( TODO_df_finish | TODO_verify_rtl_sharing ), /* todo_flags_finish */
957
};
958 959 960 961

class pass_ree : public rtl_opt_pass
{
public:
962 963
  pass_ree (gcc::context *ctxt)
    : rtl_opt_pass (pass_data_ree, ctxt)
964 965 966 967 968 969 970 971 972 973 974 975 976 977 978
  {}

  /* opt_pass methods: */
  bool gate () { return gate_handle_ree (); }
  unsigned int execute () { return rest_of_handle_ree (); }

}; // class pass_ree

} // anon namespace

rtl_opt_pass *
make_pass_ree (gcc::context *ctxt)
{
  return new pass_ree (ctxt);
}