lcm.c 45.2 KB
Newer Older
1
/* Generic partial redundancy elimination with lazy code motion support.
2
   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004
3
   Free Software Foundation, Inc.
Jeffrey A Law committed
4

5
This file is part of GCC.
Jeffrey A Law committed
6

7 8 9 10
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2, or (at your option) any later
version.
Jeffrey A Law committed
11

12 13 14 15
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.
Jeffrey A Law committed
16 17

You should have received a copy of the GNU General Public License
18 19 20
along with GCC; see the file COPYING.  If not, write to the Free
Software Foundation, 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA.  */
Jeffrey A Law committed
21 22

/* These routines are meant to be used by various optimization
23
   passes which can be modeled as lazy code motion problems.
Jeffrey A Law committed
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
   Including, but not limited to:

	* Traditional partial redundancy elimination.

	* Placement of caller/caller register save/restores.

	* Load/store motion.

	* Copy motion.

	* Conversion of flat register files to a stacked register
	model.

	* Dead load/store elimination.

  These routines accept as input:

	* Basic block information (number of blocks, lists of
	predecessors and successors).  Note the granularity
	does not need to be basic block, they could be statements
	or functions.

	* Bitmaps of local properties (computed, transparent and
	anticipatable expressions).

  The output of these routines is bitmap of redundant computations
  and a bitmap of optimal placement points.  */


#include "config.h"
#include "system.h"
55 56
#include "coretypes.h"
#include "tm.h"
Jeffrey A Law committed
57 58 59 60 61 62 63 64
#include "rtl.h"
#include "regs.h"
#include "hard-reg-set.h"
#include "flags.h"
#include "real.h"
#include "insn-config.h"
#include "recog.h"
#include "basic-block.h"
65
#include "output.h"
66
#include "tm_p.h"
67
#include "function.h"
68

69 70 71
/* We want target macros for the mode switching code to be able to refer
   to instruction attribute values.  */
#include "insn-attr.h"
Jeffrey A Law committed
72

73
/* Edge based LCM routines.  */
74 75 76 77 78 79 80
static void compute_antinout_edge (sbitmap *, sbitmap *, sbitmap *, sbitmap *);
static void compute_earliest (struct edge_list *, int, sbitmap *, sbitmap *,
			      sbitmap *, sbitmap *, sbitmap *);
static void compute_laterin (struct edge_list *, sbitmap *, sbitmap *,
			     sbitmap *, sbitmap *);
static void compute_insert_delete (struct edge_list *edge_list, sbitmap *,
				   sbitmap *, sbitmap *, sbitmap *, sbitmap *);
81 82

/* Edge based LCM routines on a reverse flowgraph.  */
83 84 85 86 87 88 89
static void compute_farthest (struct edge_list *, int, sbitmap *, sbitmap *,
			      sbitmap*, sbitmap *, sbitmap *);
static void compute_nearerout (struct edge_list *, sbitmap *, sbitmap *,
			       sbitmap *, sbitmap *);
static void compute_rev_insert_delete (struct edge_list *edge_list, sbitmap *,
				       sbitmap *, sbitmap *, sbitmap *,
				       sbitmap *);
90 91

/* Edge based lcm routines.  */
Daniel Berlin committed
92

93 94
/* Compute expression anticipatability at entrance and exit of each block.
   This is done based on the flow graph, and not on the pred-succ lists.
95
   Other than that, its pretty much identical to compute_antinout.  */
Jeffrey A Law committed
96 97

static void
98 99
compute_antinout_edge (sbitmap *antloc, sbitmap *transp, sbitmap *antin,
		       sbitmap *antout)
Jeffrey A Law committed
100
{
101
  basic_block bb;
102
  edge e;
103 104
  basic_block *worklist, *qin, *qout, *qend;
  unsigned int qlen;
105
  edge_iterator ei;
Daniel Berlin committed
106

107 108 109
  /* Allocate a worklist array/queue.  Entries are only added to the
     list if they were not already on the list.  So the size is
     bounded by the number of basic blocks.  */
110
  qin = qout = worklist = xmalloc (sizeof (basic_block) * n_basic_blocks);
Jeffrey A Law committed
111

112 113
  /* We want a maximal solution, so make an optimistic initialization of
     ANTIN.  */
114
  sbitmap_vector_ones (antin, last_basic_block);
Jeffrey A Law committed
115

116 117
  /* Put every block on the worklist; this is necessary because of the
     optimistic initialization of ANTIN above.  */
118
  FOR_EACH_BB_REVERSE (bb)
Jeffrey A Law committed
119
    {
120
      *qin++ = bb;
121
      bb->aux = bb;
122
    }
123

124
  qin = worklist;
125 126
  qend = &worklist[n_basic_blocks];
  qlen = n_basic_blocks;
Jeffrey A Law committed
127

128 129
  /* Mark blocks which are predecessors of the exit block so that we
     can easily identify them below.  */
130
  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
131 132
    e->src->aux = EXIT_BLOCK_PTR;

133
  /* Iterate until the worklist is empty.  */
134
  while (qlen)
135 136
    {
      /* Take the first entry off the worklist.  */
137
      bb = *qout++;
138
      qlen--;
Daniel Berlin committed
139

140
      if (qout >= qend)
141
	qout = worklist;
Jeffrey A Law committed
142

143
      if (bb->aux == EXIT_BLOCK_PTR)
144 145 146
	/* Do not clear the aux field for blocks which are predecessors of
	   the EXIT block.  That way we never add then to the worklist
	   again.  */
147
	sbitmap_zero (antout[bb->index]);
148 149 150 151
      else
	{
	  /* Clear the aux field of this block so that it can be added to
	     the worklist again if necessary.  */
152 153
	  bb->aux = NULL;
	  sbitmap_intersection_of_succs (antout[bb->index], antin, bb->index);
154
	}
155

156 157
      if (sbitmap_a_or_b_and_c_cg (antin[bb->index], antloc[bb->index],
				   transp[bb->index], antout[bb->index]))
158 159 160
	/* If the in state of this block changed, then we need
	   to add the predecessors of this block to the worklist
	   if they are not already on the worklist.  */
161
	FOR_EACH_EDGE (e, ei, bb->preds)
162
	  if (!e->src->aux && e->src != ENTRY_BLOCK_PTR)
Jeffrey A Law committed
163
	    {
164
	      *qin++ = e->src;
165
	      e->src->aux = e;
166 167
	      qlen++;
	      if (qin >= qend)
168
		qin = worklist;
Jeffrey A Law committed
169 170
	    }
    }
171

172 173
  clear_aux_for_edges ();
  clear_aux_for_blocks ();
174
  free (worklist);
Jeffrey A Law committed
175 176
}

177
/* Compute the earliest vector for edge based lcm.  */
178

Jeffrey A Law committed
179
static void
180 181 182
compute_earliest (struct edge_list *edge_list, int n_exprs, sbitmap *antin,
		  sbitmap *antout, sbitmap *avout, sbitmap *kill,
		  sbitmap *earliest)
Jeffrey A Law committed
183
{
184
  sbitmap difference, temp_bitmap;
185
  int x, num_edges;
186
  basic_block pred, succ;
Jeffrey A Law committed
187

188
  num_edges = NUM_EDGES (edge_list);
Jeffrey A Law committed
189

190 191
  difference = sbitmap_alloc (n_exprs);
  temp_bitmap = sbitmap_alloc (n_exprs);
Jeffrey A Law committed
192

193
  for (x = 0; x < num_edges; x++)
Jeffrey A Law committed
194
    {
195 196 197
      pred = INDEX_EDGE_PRED_BB (edge_list, x);
      succ = INDEX_EDGE_SUCC_BB (edge_list, x);
      if (pred == ENTRY_BLOCK_PTR)
198
	sbitmap_copy (earliest[x], antin[succ->index]);
199
      else
200
	{
201
	  if (succ == EXIT_BLOCK_PTR)
202
	    sbitmap_zero (earliest[x]);
203
	  else
Jeffrey A Law committed
204
	    {
205 206 207
	      sbitmap_difference (difference, antin[succ->index],
				  avout[pred->index]);
	      sbitmap_not (temp_bitmap, antout[pred->index]);
208
	      sbitmap_a_and_b_or_c (earliest[x], difference,
209
				    kill[pred->index], temp_bitmap);
Jeffrey A Law committed
210 211 212
	    }
	}
    }
213

214 215
  sbitmap_free (temp_bitmap);
  sbitmap_free (difference);
Jeffrey A Law committed
216 217
}

218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
/* later(p,s) is dependent on the calculation of laterin(p).
   laterin(p) is dependent on the calculation of later(p2,p).

     laterin(ENTRY) is defined as all 0's
     later(ENTRY, succs(ENTRY)) are defined using laterin(ENTRY)
     laterin(succs(ENTRY)) is defined by later(ENTRY, succs(ENTRY)).

   If we progress in this manner, starting with all basic blocks
   in the work list, anytime we change later(bb), we need to add
   succs(bb) to the worklist if they are not already on the worklist.

   Boundary conditions:

     We prime the worklist all the normal basic blocks.   The ENTRY block can
     never be added to the worklist since it is never the successor of any
     block.  We explicitly prevent the EXIT block from being added to the
     worklist.

     We optimistically initialize LATER.  That is the only time this routine
     will compute LATER for an edge out of the entry block since the entry
     block is never on the worklist.  Thus, LATERIN is neither used nor
     computed for the ENTRY block.

     Since the EXIT block is never added to the worklist, we will neither
     use nor compute LATERIN for the exit block.  Edges which reach the
     EXIT block are handled in the normal fashion inside the loop.  However,
     the insertion/deletion computation needs LATERIN(EXIT), so we have
     to compute it.  */
246

Jeffrey A Law committed
247
static void
248 249
compute_laterin (struct edge_list *edge_list, sbitmap *earliest,
		 sbitmap *antloc, sbitmap *later, sbitmap *laterin)
Jeffrey A Law committed
250
{
251
  int num_edges, i;
252
  edge e;
253
  basic_block *worklist, *qin, *qout, *qend, bb;
254
  unsigned int qlen;
255
  edge_iterator ei;
Jeffrey A Law committed
256

257
  num_edges = NUM_EDGES (edge_list);
Jeffrey A Law committed
258

259 260 261
  /* Allocate a worklist array/queue.  Entries are only added to the
     list if they were not already on the list.  So the size is
     bounded by the number of basic blocks.  */
262
  qin = qout = worklist
263
    = xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
264 265 266

  /* Initialize a mapping from each edge to its index.  */
  for (i = 0; i < num_edges; i++)
267
    INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;
268 269 270 271 272 273 274 275 276 277 278 279 280

  /* We want a maximal solution, so initially consider LATER true for
     all edges.  This allows propagation through a loop since the incoming
     loop edge will have LATER set, so if all the other incoming edges
     to the loop are set, then LATERIN will be set for the head of the
     loop.

     If the optimistic setting of LATER on that edge was incorrect (for
     example the expression is ANTLOC in a block within the loop) then
     this algorithm will detect it when we process the block at the head
     of the optimistic edge.  That will requeue the affected blocks.  */
  sbitmap_vector_ones (later, num_edges);

281 282 283 284
  /* Note that even though we want an optimistic setting of LATER, we
     do not want to be overly optimistic.  Consider an outgoing edge from
     the entry block.  That edge should always have a LATER value the
     same as EARLIEST for that edge.  */
285
  FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
286
    sbitmap_copy (later[(size_t) e->aux], earliest[(size_t) e->aux]);
287

288 289
  /* Add all the blocks to the worklist.  This prevents an early exit from
     the loop given our optimistic initialization of LATER above.  */
290
  FOR_EACH_BB (bb)
Jeffrey A Law committed
291
    {
292 293
      *qin++ = bb;
      bb->aux = bb;
294
    }
295

296 297
  /* Note that we do not use the last allocated element for our queue,
     as EXIT_BLOCK is never inserted into it. In fact the above allocation
298
     of n_basic_blocks + 1 elements is not necessary.  */
299
  qin = worklist;
300 301
  qend = &worklist[n_basic_blocks];
  qlen = n_basic_blocks;
302

303
  /* Iterate until the worklist is empty.  */
304
  while (qlen)
305
    {
306
      /* Take the first entry off the worklist.  */
307 308
      bb = *qout++;
      bb->aux = NULL;
309 310
      qlen--;
      if (qout >= qend)
311
	qout = worklist;
312 313

      /* Compute the intersection of LATERIN for each incoming edge to B.  */
314
      sbitmap_ones (laterin[bb->index]);
315
      FOR_EACH_EDGE (e, ei, bb->preds)
316 317
	sbitmap_a_and_b (laterin[bb->index], laterin[bb->index],
			 later[(size_t)e->aux]);
318 319

      /* Calculate LATER for all outgoing edges.  */
320
      FOR_EACH_EDGE (e, ei, bb->succs)
321
	if (sbitmap_union_of_diff_cg (later[(size_t) e->aux],
322 323 324
				      earliest[(size_t) e->aux],
				      laterin[e->src->index],
				      antloc[e->src->index])
325 326 327 328
	    /* If LATER for an outgoing edge was changed, then we need
	       to add the target of the outgoing edge to the worklist.  */
	    && e->dest != EXIT_BLOCK_PTR && e->dest->aux == 0)
	  {
329
	    *qin++ = e->dest;
330
	    e->dest->aux = e;
331 332 333
	    qlen++;
	    if (qin >= qend)
	      qin = worklist;
334
	  }
Jeffrey A Law committed
335 336
    }

337 338 339
  /* Computation of insertion and deletion points requires computing LATERIN
     for the EXIT block.  We allocated an extra entry in the LATERIN array
     for just this purpose.  */
340
  sbitmap_ones (laterin[last_basic_block]);
341
  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
342 343
    sbitmap_a_and_b (laterin[last_basic_block],
		     laterin[last_basic_block],
344
		     later[(size_t) e->aux]);
345

346
  clear_aux_for_edges ();
347
  free (worklist);
Jeffrey A Law committed
348 349
}

350
/* Compute the insertion and deletion points for edge based LCM.  */
351

352
static void
353 354 355
compute_insert_delete (struct edge_list *edge_list, sbitmap *antloc,
		       sbitmap *later, sbitmap *laterin, sbitmap *insert,
		       sbitmap *delete)
356 357
{
  int x;
358
  basic_block bb;
Jeffrey A Law committed
359

360
  FOR_EACH_BB (bb)
361 362
    sbitmap_difference (delete[bb->index], antloc[bb->index],
			laterin[bb->index]);
363

364 365 366
  for (x = 0; x < NUM_EDGES (edge_list); x++)
    {
      basic_block b = INDEX_EDGE_SUCC_BB (edge_list, x);
367

368
      if (b == EXIT_BLOCK_PTR)
369
	sbitmap_difference (insert[x], later[x], laterin[last_basic_block]);
370
      else
371
	sbitmap_difference (insert[x], later[x], laterin[b->index]);
372 373
    }
}
Jeffrey A Law committed
374

375 376 377
/* Given local properties TRANSP, ANTLOC, AVOUT, KILL return the insert and
   delete vectors for edge based LCM.  Returns an edgelist which is used to
   map the insert vector to what edge an expression should be inserted on.  */
Jeffrey A Law committed
378

379
struct edge_list *
380 381 382
pre_edge_lcm (FILE *file ATTRIBUTE_UNUSED, int n_exprs, sbitmap *transp,
	      sbitmap *avloc, sbitmap *antloc, sbitmap *kill,
	      sbitmap **insert, sbitmap **delete)
Jeffrey A Law committed
383
{
384 385 386 387 388
  sbitmap *antin, *antout, *earliest;
  sbitmap *avin, *avout;
  sbitmap *later, *laterin;
  struct edge_list *edge_list;
  int num_edges;
Jeffrey A Law committed
389

390 391
  edge_list = create_edge_list ();
  num_edges = NUM_EDGES (edge_list);
Jeffrey A Law committed
392

393 394
#ifdef LCM_DEBUG_INFO
  if (file)
Jeffrey A Law committed
395
    {
396 397 398
      fprintf (file, "Edge List:\n");
      verify_edge_list (file, edge_list);
      print_edge_list (file, edge_list);
399 400 401 402
      dump_sbitmap_vector (file, "transp", "", transp, last_basic_block);
      dump_sbitmap_vector (file, "antloc", "", antloc, last_basic_block);
      dump_sbitmap_vector (file, "avloc", "", avloc, last_basic_block);
      dump_sbitmap_vector (file, "kill", "", kill, last_basic_block);
Jeffrey A Law committed
403
    }
404
#endif
Jeffrey A Law committed
405

406
  /* Compute global availability.  */
407 408
  avin = sbitmap_vector_alloc (last_basic_block, n_exprs);
  avout = sbitmap_vector_alloc (last_basic_block, n_exprs);
409
  compute_available (avloc, kill, avout, avin);
410
  sbitmap_vector_free (avin);
Jeffrey A Law committed
411

412
  /* Compute global anticipatability.  */
413 414
  antin = sbitmap_vector_alloc (last_basic_block, n_exprs);
  antout = sbitmap_vector_alloc (last_basic_block, n_exprs);
415
  compute_antinout_edge (antloc, transp, antin, antout);
Jeffrey A Law committed
416

417 418
#ifdef LCM_DEBUG_INFO
  if (file)
Jeffrey A Law committed
419
    {
420 421
      dump_sbitmap_vector (file, "antin", "", antin, last_basic_block);
      dump_sbitmap_vector (file, "antout", "", antout, last_basic_block);
Jeffrey A Law committed
422
    }
423
#endif
Jeffrey A Law committed
424

425 426 427
  /* Compute earliestness.  */
  earliest = sbitmap_vector_alloc (num_edges, n_exprs);
  compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest);
Jeffrey A Law committed
428

429 430 431 432
#ifdef LCM_DEBUG_INFO
  if (file)
    dump_sbitmap_vector (file, "earliest", "", earliest, num_edges);
#endif
Jeffrey A Law committed
433

434 435 436
  sbitmap_vector_free (antout);
  sbitmap_vector_free (antin);
  sbitmap_vector_free (avout);
Jeffrey A Law committed
437

438
  later = sbitmap_vector_alloc (num_edges, n_exprs);
439

440
  /* Allocate an extra element for the exit block in the laterin vector.  */
441
  laterin = sbitmap_vector_alloc (last_basic_block + 1, n_exprs);
442 443
  compute_laterin (edge_list, earliest, antloc, later, laterin);

444 445 446
#ifdef LCM_DEBUG_INFO
  if (file)
    {
447
      dump_sbitmap_vector (file, "laterin", "", laterin, last_basic_block + 1);
448 449 450
      dump_sbitmap_vector (file, "later", "", later, num_edges);
    }
#endif
Jeffrey A Law committed
451

452
  sbitmap_vector_free (earliest);
453 454

  *insert = sbitmap_vector_alloc (num_edges, n_exprs);
455
  *delete = sbitmap_vector_alloc (last_basic_block, n_exprs);
456
  compute_insert_delete (edge_list, antloc, later, laterin, *insert, *delete);
Jeffrey A Law committed
457

458 459
  sbitmap_vector_free (laterin);
  sbitmap_vector_free (later);
460 461 462

#ifdef LCM_DEBUG_INFO
  if (file)
Jeffrey A Law committed
463
    {
464
      dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
465
      dump_sbitmap_vector (file, "pre_delete_map", "", *delete,
466
			   last_basic_block);
Jeffrey A Law committed
467
    }
468
#endif
Jeffrey A Law committed
469

470 471
  return edge_list;
}
Daniel Berlin committed
472 473 474 475

/* Compute the AVIN and AVOUT vectors from the AVLOC and KILL vectors.
   Return the number of passes we performed to iterate to a solution.  */

476
void
477 478
compute_available (sbitmap *avloc, sbitmap *kill, sbitmap *avout,
		   sbitmap *avin)
Jeffrey A Law committed
479
{
Daniel Berlin committed
480
  edge e;
481
  basic_block *worklist, *qin, *qout, *qend, bb;
Daniel Berlin committed
482
  unsigned int qlen;
483
  edge_iterator ei;
Daniel Berlin committed
484 485 486 487

  /* Allocate a worklist array/queue.  Entries are only added to the
     list if they were not already on the list.  So the size is
     bounded by the number of basic blocks.  */
488
  qin = qout = worklist = xmalloc (sizeof (basic_block) * n_basic_blocks);
Daniel Berlin committed
489 490

  /* We want a maximal solution.  */
491
  sbitmap_vector_ones (avout, last_basic_block);
Daniel Berlin committed
492 493 494

  /* Put every block on the worklist; this is necessary because of the
     optimistic initialization of AVOUT above.  */
495
  FOR_EACH_BB (bb)
Daniel Berlin committed
496
    {
497 498
      *qin++ = bb;
      bb->aux = bb;
Daniel Berlin committed
499 500 501
    }

  qin = worklist;
502 503
  qend = &worklist[n_basic_blocks];
  qlen = n_basic_blocks;
Daniel Berlin committed
504 505 506

  /* Mark blocks which are successors of the entry block so that we
     can easily identify them below.  */
507
  FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
Daniel Berlin committed
508 509 510 511 512 513
    e->dest->aux = ENTRY_BLOCK_PTR;

  /* Iterate until the worklist is empty.  */
  while (qlen)
    {
      /* Take the first entry off the worklist.  */
514
      bb = *qout++;
Daniel Berlin committed
515 516 517
      qlen--;

      if (qout >= qend)
518
	qout = worklist;
Daniel Berlin committed
519 520 521 522

      /* If one of the predecessor blocks is the ENTRY block, then the
	 intersection of avouts is the null set.  We can identify such blocks
	 by the special value in the AUX field in the block structure.  */
523
      if (bb->aux == ENTRY_BLOCK_PTR)
Daniel Berlin committed
524 525
	/* Do not clear the aux field for blocks which are successors of the
	   ENTRY block.  That way we never add then to the worklist again.  */
526
	sbitmap_zero (avin[bb->index]);
Daniel Berlin committed
527 528 529 530
      else
	{
	  /* Clear the aux field of this block so that it can be added to
	     the worklist again if necessary.  */
531 532
	  bb->aux = NULL;
	  sbitmap_intersection_of_preds (avin[bb->index], avout, bb->index);
Daniel Berlin committed
533 534
	}

535 536
      if (sbitmap_union_of_diff_cg (avout[bb->index], avloc[bb->index],
				    avin[bb->index], kill[bb->index]))
Daniel Berlin committed
537 538 539
	/* If the out state of this block changed, then we need
	   to add the successors of this block to the worklist
	   if they are not already on the worklist.  */
540
	FOR_EACH_EDGE (e, ei, bb->succs)
Daniel Berlin committed
541 542 543 544 545 546 547
	  if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR)
	    {
	      *qin++ = e->dest;
	      e->dest->aux = e;
	      qlen++;

	      if (qin >= qend)
548
		qin = worklist;
Daniel Berlin committed
549 550 551 552 553 554
	    }
    }

  clear_aux_for_edges ();
  clear_aux_for_blocks ();
  free (worklist);
Jeffrey A Law committed
555 556
}

557
/* Compute the farthest vector for edge based lcm.  */
558

Jeffrey A Law committed
559
static void
560 561 562
compute_farthest (struct edge_list *edge_list, int n_exprs,
		  sbitmap *st_avout, sbitmap *st_avin, sbitmap *st_antin,
		  sbitmap *kill, sbitmap *farthest)
Jeffrey A Law committed
563
{
564
  sbitmap difference, temp_bitmap;
565
  int x, num_edges;
566
  basic_block pred, succ;
Jeffrey A Law committed
567

568
  num_edges = NUM_EDGES (edge_list);
Jeffrey A Law committed
569

570 571
  difference = sbitmap_alloc (n_exprs);
  temp_bitmap = sbitmap_alloc (n_exprs);
Jeffrey A Law committed
572

573
  for (x = 0; x < num_edges; x++)
Jeffrey A Law committed
574
    {
575 576 577
      pred = INDEX_EDGE_PRED_BB (edge_list, x);
      succ = INDEX_EDGE_SUCC_BB (edge_list, x);
      if (succ == EXIT_BLOCK_PTR)
578
	sbitmap_copy (farthest[x], st_avout[pred->index]);
579
      else
Jeffrey A Law committed
580
	{
581
	  if (pred == ENTRY_BLOCK_PTR)
582
	    sbitmap_zero (farthest[x]);
583 584
	  else
	    {
585 586 587
	      sbitmap_difference (difference, st_avout[pred->index],
				  st_antin[succ->index]);
	      sbitmap_not (temp_bitmap, st_avin[succ->index]);
588
	      sbitmap_a_and_b_or_c (farthest[x], difference,
589
				    kill[succ->index], temp_bitmap);
590
	    }
Jeffrey A Law committed
591 592
	}
    }
593

594 595
  sbitmap_free (temp_bitmap);
  sbitmap_free (difference);
Jeffrey A Law committed
596 597
}

598 599 600 601 602
/* Compute nearer and nearerout vectors for edge based lcm.

   This is the mirror of compute_laterin, additional comments on the
   implementation can be found before compute_laterin.  */

Jeffrey A Law committed
603
static void
604 605
compute_nearerout (struct edge_list *edge_list, sbitmap *farthest,
		   sbitmap *st_avloc, sbitmap *nearer, sbitmap *nearerout)
Jeffrey A Law committed
606
{
607
  int num_edges, i;
608
  edge e;
609
  basic_block *worklist, *tos, bb;
610
  edge_iterator ei;
Jeffrey A Law committed
611

612
  num_edges = NUM_EDGES (edge_list);
Jeffrey A Law committed
613

614 615 616
  /* Allocate a worklist array/queue.  Entries are only added to the
     list if they were not already on the list.  So the size is
     bounded by the number of basic blocks.  */
617
  tos = worklist = xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
Jeffrey A Law committed
618

619 620 621
  /* Initialize NEARER for each edge and build a mapping from an edge to
     its index.  */
  for (i = 0; i < num_edges; i++)
622
    INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;
623

624 625 626
  /* We want a maximal solution.  */
  sbitmap_vector_ones (nearer, num_edges);

627 628 629 630
  /* Note that even though we want an optimistic setting of NEARER, we
     do not want to be overly optimistic.  Consider an incoming edge to
     the exit block.  That edge should always have a NEARER value the
     same as FARTHEST for that edge.  */
631
  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
632
    sbitmap_copy (nearer[(size_t)e->aux], farthest[(size_t)e->aux]);
633

634 635
  /* Add all the blocks to the worklist.  This prevents an early exit
     from the loop given our optimistic initialization of NEARER.  */
636
  FOR_EACH_BB (bb)
Jeffrey A Law committed
637
    {
638 639
      *tos++ = bb;
      bb->aux = bb;
640
    }
641

642 643
  /* Iterate until the worklist is empty.  */
  while (tos != worklist)
644
    {
645
      /* Take the first entry off the worklist.  */
646 647
      bb = *--tos;
      bb->aux = NULL;
648 649

      /* Compute the intersection of NEARER for each outgoing edge from B.  */
650
      sbitmap_ones (nearerout[bb->index]);
651
      FOR_EACH_EDGE (e, ei, bb->succs)
652
	sbitmap_a_and_b (nearerout[bb->index], nearerout[bb->index],
653
			 nearer[(size_t) e->aux]);
654 655

      /* Calculate NEARER for all incoming edges.  */
656
      FOR_EACH_EDGE (e, ei, bb->preds)
657
	if (sbitmap_union_of_diff_cg (nearer[(size_t) e->aux],
658 659 660
				      farthest[(size_t) e->aux],
				      nearerout[e->dest->index],
				      st_avloc[e->dest->index])
661 662 663 664 665 666 667
	    /* If NEARER for an incoming edge was changed, then we need
	       to add the source of the incoming edge to the worklist.  */
	    && e->src != ENTRY_BLOCK_PTR && e->src->aux == 0)
	  {
	    *tos++ = e->src;
	    e->src->aux = e;
	  }
668
    }
Jeffrey A Law committed
669

670 671 672
  /* Computation of insertion and deletion points requires computing NEAREROUT
     for the ENTRY block.  We allocated an extra entry in the NEAREROUT array
     for just this purpose.  */
673
  sbitmap_ones (nearerout[last_basic_block]);
674
  FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
675 676
    sbitmap_a_and_b (nearerout[last_basic_block],
		     nearerout[last_basic_block],
677
		     nearer[(size_t) e->aux]);
678

679
  clear_aux_for_edges ();
680
  free (tos);
681
}
Jeffrey A Law committed
682

683
/* Compute the insertion and deletion points for edge based LCM.  */
684

Jeffrey A Law committed
685
static void
686 687 688
compute_rev_insert_delete (struct edge_list *edge_list, sbitmap *st_avloc,
			   sbitmap *nearer, sbitmap *nearerout,
			   sbitmap *insert, sbitmap *delete)
Jeffrey A Law committed
689
{
690
  int x;
691
  basic_block bb;
Jeffrey A Law committed
692

693
  FOR_EACH_BB (bb)
694 695
    sbitmap_difference (delete[bb->index], st_avloc[bb->index],
			nearerout[bb->index]);
696

697
  for (x = 0; x < NUM_EDGES (edge_list); x++)
Jeffrey A Law committed
698
    {
699 700
      basic_block b = INDEX_EDGE_PRED_BB (edge_list, x);
      if (b == ENTRY_BLOCK_PTR)
701
	sbitmap_difference (insert[x], nearer[x], nearerout[last_basic_block]);
Jeffrey A Law committed
702
      else
703
	sbitmap_difference (insert[x], nearer[x], nearerout[b->index]);
Jeffrey A Law committed
704 705 706
    }
}

707
/* Given local properties TRANSP, ST_AVLOC, ST_ANTLOC, KILL return the
708 709 710
   insert and delete vectors for edge based reverse LCM.  Returns an
   edgelist which is used to map the insert vector to what edge
   an expression should be inserted on.  */
Jeffrey A Law committed
711

712
struct edge_list *
713 714 715
pre_edge_rev_lcm (FILE *file ATTRIBUTE_UNUSED, int n_exprs, sbitmap *transp,
		  sbitmap *st_avloc, sbitmap *st_antloc, sbitmap *kill,
		  sbitmap **insert, sbitmap **delete)
Jeffrey A Law committed
716
{
717 718 719 720
  sbitmap *st_antin, *st_antout;
  sbitmap *st_avout, *st_avin, *farthest;
  sbitmap *nearer, *nearerout;
  struct edge_list *edge_list;
721
  int num_edges;
722 723 724 725

  edge_list = create_edge_list ();
  num_edges = NUM_EDGES (edge_list);

726 727
  st_antin = sbitmap_vector_alloc (last_basic_block, n_exprs);
  st_antout = sbitmap_vector_alloc (last_basic_block, n_exprs);
728 729
  sbitmap_vector_zero (st_antin, last_basic_block);
  sbitmap_vector_zero (st_antout, last_basic_block);
730 731 732
  compute_antinout_edge (st_antloc, transp, st_antin, st_antout);

  /* Compute global anticipatability.  */
733 734
  st_avout = sbitmap_vector_alloc (last_basic_block, n_exprs);
  st_avin = sbitmap_vector_alloc (last_basic_block, n_exprs);
735 736 737 738 739 740 741 742
  compute_available (st_avloc, kill, st_avout, st_avin);

#ifdef LCM_DEBUG_INFO
  if (file)
    {
      fprintf (file, "Edge List:\n");
      verify_edge_list (file, edge_list);
      print_edge_list (file, edge_list);
743 744 745 746 747 748
      dump_sbitmap_vector (file, "transp", "", transp, last_basic_block);
      dump_sbitmap_vector (file, "st_avloc", "", st_avloc, last_basic_block);
      dump_sbitmap_vector (file, "st_antloc", "", st_antloc, last_basic_block);
      dump_sbitmap_vector (file, "st_antin", "", st_antin, last_basic_block);
      dump_sbitmap_vector (file, "st_antout", "", st_antout, last_basic_block);
      dump_sbitmap_vector (file, "st_kill", "", kill, last_basic_block);
749 750
    }
#endif
Jeffrey A Law committed
751

752 753 754
#ifdef LCM_DEBUG_INFO
  if (file)
    {
755 756
      dump_sbitmap_vector (file, "st_avout", "", st_avout, last_basic_block);
      dump_sbitmap_vector (file, "st_avin", "", st_avin, last_basic_block);
757 758
    }
#endif
Jeffrey A Law committed
759

760 761
  /* Compute farthestness.  */
  farthest = sbitmap_vector_alloc (num_edges, n_exprs);
762
  compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin,
763 764 765 766 767 768 769
		    kill, farthest);

#ifdef LCM_DEBUG_INFO
  if (file)
    dump_sbitmap_vector (file, "farthest", "", farthest, num_edges);
#endif

770 771 772 773 774
  sbitmap_vector_free (st_antin);
  sbitmap_vector_free (st_antout);

  sbitmap_vector_free (st_avin);
  sbitmap_vector_free (st_avout);
775 776

  nearer = sbitmap_vector_alloc (num_edges, n_exprs);
777

778
  /* Allocate an extra element for the entry block.  */
779
  nearerout = sbitmap_vector_alloc (last_basic_block + 1, n_exprs);
780
  compute_nearerout (edge_list, farthest, st_avloc, nearer, nearerout);
781 782 783

#ifdef LCM_DEBUG_INFO
  if (file)
Jeffrey A Law committed
784
    {
785
      dump_sbitmap_vector (file, "nearerout", "", nearerout,
786
			   last_basic_block + 1);
787
      dump_sbitmap_vector (file, "nearer", "", nearer, num_edges);
Jeffrey A Law committed
788
    }
789 790
#endif

791
  sbitmap_vector_free (farthest);
792 793

  *insert = sbitmap_vector_alloc (num_edges, n_exprs);
794
  *delete = sbitmap_vector_alloc (last_basic_block, n_exprs);
795 796
  compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout,
			     *insert, *delete);
797

798 799
  sbitmap_vector_free (nearerout);
  sbitmap_vector_free (nearer);
800 801 802 803 804

#ifdef LCM_DEBUG_INFO
  if (file)
    {
      dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
805
      dump_sbitmap_vector (file, "pre_delete_map", "", *delete,
806
			   last_basic_block);
807 808 809
    }
#endif
  return edge_list;
Jeffrey A Law committed
810
}
811

812 813 814
/* Mode switching:

   The algorithm for setting the modes consists of scanning the insn list
815 816 817 818 819 820 821 822 823 824 825 826
   and finding all the insns which require a specific mode.  Each insn gets
   a unique struct seginfo element.  These structures are inserted into a list
   for each basic block.  For each entity, there is an array of bb_info over
   the flow graph basic blocks (local var 'bb_info'), and contains a list
   of all insns within that basic block, in the order they are encountered.

   For each entity, any basic block WITHOUT any insns requiring a specific
   mode are given a single entry, without a mode.  (Each basic block
   in the flow graph must have at least one entry in the segment table.)

   The LCM algorithm is then run over the flow graph to determine where to
   place the sets to the highest-priority value in respect of first the first
827
   insn in any one block.  Any adjustments required to the transparency
828
   vectors are made, then the next iteration starts for the next-lower
829
   priority mode, till for each entity all modes are exhausted.
830 831 832 833

   More details are located in the code for optimize_mode_switching().  */

/* This structure contains the information for each insn which requires
834
   either single or double mode to be set.
835
   MODE is the mode this insn must be executed in.
836 837
   INSN_PTR is the insn to be executed (may be the note that marks the
   beginning of a basic block).
838 839
   BBNUM is the flow graph basic block this insn occurs in.
   NEXT is the next insn in the same basic block.  */
840
struct seginfo
841 842 843 844 845 846 847 848
{
  int mode;
  rtx insn_ptr;
  int bbnum;
  struct seginfo *next;
  HARD_REG_SET regs_live;
};

Daniel Berlin committed
849
struct bb_info
850 851 852 853 854 855 856
{
  struct seginfo *seginfo;
  int computing;
};

/* These bitmaps are used for the LCM algorithm.  */

857
#ifdef OPTIMIZE_MODE_SWITCHING
858 859 860 861
static sbitmap *antic;
static sbitmap *transp;
static sbitmap *comp;

862 863 864 865 866
static struct seginfo * new_seginfo (int, rtx, int, HARD_REG_SET);
static void add_seginfo (struct bb_info *, struct seginfo *);
static void reg_dies (rtx, HARD_REG_SET);
static void reg_becomes_live (rtx, rtx, void *);
static void make_preds_opaque (basic_block, int);
867 868 869
#endif

#ifdef OPTIMIZE_MODE_SWITCHING
870 871

/* This function will allocate a new BBINFO structure, initialized
872
   with the MODE, INSN, and basic block BB parameters.  */
873

874
static struct seginfo *
875
new_seginfo (int mode, rtx insn, int bb, HARD_REG_SET regs_live)
876 877 878 879 880 881 882 883 884 885 886
{
  struct seginfo *ptr;
  ptr = xmalloc (sizeof (struct seginfo));
  ptr->mode = mode;
  ptr->insn_ptr = insn;
  ptr->bbnum = bb;
  ptr->next = NULL;
  COPY_HARD_REG_SET (ptr->regs_live, regs_live);
  return ptr;
}

887
/* Add a seginfo element to the end of a list.
888 889
   HEAD is a pointer to the list beginning.
   INFO is the structure to be linked in.  */
890

891
static void
892
add_seginfo (struct bb_info *head, struct seginfo *info)
893 894 895 896 897 898 899 900 901
{
  struct seginfo *ptr;

  if (head->seginfo == NULL)
    head->seginfo = info;
  else
    {
      ptr = head->seginfo;
      while (ptr->next != NULL)
902
	ptr = ptr->next;
903 904 905 906 907 908 909 910 911
      ptr->next = info;
    }
}

/* Make all predecessors of basic block B opaque, recursively, till we hit
   some that are already non-transparent, or an edge where aux is set; that
   denotes that a mode set is to be done on that edge.
   J is the bit number in the bitmaps that corresponds to the entity that
   we are currently handling mode-switching for.  */
912

913
static void
914
make_preds_opaque (basic_block b, int j)
915 916
{
  edge e;
917
  edge_iterator ei;
918

919
  FOR_EACH_EDGE (e, ei, b->preds)
920 921
    {
      basic_block pb = e->src;
922

923
      if (e->aux || ! TEST_BIT (transp[pb->index], j))
924
	continue;
925

926
      RESET_BIT (transp[pb->index], j);
927 928 929 930 931
      make_preds_opaque (pb, j);
    }
}

/* Record in LIVE that register REG died.  */
932

933
static void
934
reg_dies (rtx reg, HARD_REG_SET live)
935
{
936
  int regno, nregs;
937

938
  if (!REG_P (reg))
939
    return;
940

941 942
  regno = REGNO (reg);
  if (regno < FIRST_PSEUDO_REGISTER)
943
    for (nregs = hard_regno_nregs[regno][GET_MODE (reg)] - 1; nregs >= 0;
944 945
	 nregs--)
      CLEAR_HARD_REG_BIT (live, regno + nregs);
946 947 948 949
}

/* Record in LIVE that register REG became live.
   This is called via note_stores.  */
950

951
static void
952
reg_becomes_live (rtx reg, rtx setter ATTRIBUTE_UNUSED, void *live)
953
{
954
  int regno, nregs;
955 956 957 958

  if (GET_CODE (reg) == SUBREG)
    reg = SUBREG_REG (reg);

959
  if (!REG_P (reg))
960 961 962 963
    return;

  regno = REGNO (reg);
  if (regno < FIRST_PSEUDO_REGISTER)
964
    for (nregs = hard_regno_nregs[regno][GET_MODE (reg)] - 1; nregs >= 0;
965 966
	 nregs--)
      SET_HARD_REG_BIT (* (HARD_REG_SET *) live, regno + nregs);
967 968
}

969 970 971 972 973 974
/* Make sure if MODE_ENTRY is defined the MODE_EXIT is defined
   and vice versa.  */
#if defined (MODE_ENTRY) != defined (MODE_EXIT)
 #error "Both MODE_ENTRY and MODE_EXIT must be defined"
#endif

975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148
#if defined (MODE_ENTRY) && defined (MODE_EXIT)
/* Split the fallthrough edge to the exit block, so that we can note
   that there NORMAL_MODE is required.  Return the new block if it's
   inserted before the exit block.  Otherwise return null.  */

static basic_block
create_pre_exit (int n_entities, int *entity_map, const int *num_modes)
{
  edge eg;
  edge_iterator ei;
  basic_block pre_exit;

  /* The only non-call predecessor at this stage is a block with a
     fallthrough edge; there can be at most one, but there could be
     none at all, e.g. when exit is called.  */
  pre_exit = 0;
  FOR_EACH_EDGE (eg, ei, EXIT_BLOCK_PTR->preds)
    if (eg->flags & EDGE_FALLTHRU)
      {
	basic_block src_bb = eg->src;
	regset live_at_end = src_bb->global_live_at_end;
	rtx last_insn, ret_reg;

	gcc_assert (!pre_exit);
	/* If this function returns a value at the end, we have to
	   insert the final mode switch before the return value copy
	   to its hard register.  */
	if (EDGE_COUNT (EXIT_BLOCK_PTR->preds) == 1
	    && GET_CODE ((last_insn = BB_END (src_bb))) == INSN
	    && GET_CODE (PATTERN (last_insn)) == USE
	    && GET_CODE ((ret_reg = XEXP (PATTERN (last_insn), 0))) == REG)
	  {
	    int ret_start = REGNO (ret_reg);
	    int nregs = hard_regno_nregs[ret_start][GET_MODE (ret_reg)];
	    int ret_end = ret_start + nregs;
	    int short_block = 0;
	    int maybe_builtin_apply = 0;
	    int forced_late_switch = 0;
	    rtx before_return_copy;

	    do
	      {
		rtx return_copy = PREV_INSN (last_insn);
		rtx return_copy_pat, copy_reg;
		int copy_start, copy_num;
		int j;

		if (INSN_P (return_copy))
		  {
		    if (GET_CODE (PATTERN (return_copy)) == USE
			&& GET_CODE (XEXP (PATTERN (return_copy), 0)) == REG
			&& (FUNCTION_VALUE_REGNO_P
			    (REGNO (XEXP (PATTERN (return_copy), 0)))))
		      {
			maybe_builtin_apply = 1;
			last_insn = return_copy;
			continue;
		      }
		    /* If the return register is not (in its entirety)
		       likely spilled, the return copy might be
		       partially or completely optimized away.  */
		    return_copy_pat = single_set (return_copy);
		    if (!return_copy_pat)
		      {
			return_copy_pat = PATTERN (return_copy);
			if (GET_CODE (return_copy_pat) != CLOBBER)
			  break;
		      }
		    copy_reg = SET_DEST (return_copy_pat);
		    if (GET_CODE (copy_reg) == REG)
		      copy_start = REGNO (copy_reg);
		    else if (GET_CODE (copy_reg) == SUBREG
			     && GET_CODE (SUBREG_REG (copy_reg)) == REG)
		      copy_start = REGNO (SUBREG_REG (copy_reg));
		    else
		      break;
		    if (copy_start >= FIRST_PSEUDO_REGISTER)
		      break;
		    copy_num
		      = hard_regno_nregs[copy_start][GET_MODE (copy_reg)];

		    /* If the return register is not likely spilled, - as is
		       the case for floating point on SH4 - then it might
		       be set by an arithmetic operation that needs a
		       different mode than the exit block.  */
		    for (j = n_entities - 1; j >= 0; j--)
		      {
			int e = entity_map[j];
			int mode = MODE_NEEDED (e, return_copy);

			if (mode != num_modes[e] && mode != MODE_EXIT (e))
			  break;
		      }
		    if (j >= 0)
		      {
			/* For the SH4, floating point loads depend on fpscr,
			   thus we might need to put the final mode switch
			   after the return value copy.  That is still OK,
			   because a floating point return value does not
			   conflict with address reloads.  */
			if (copy_start >= ret_start
			    && copy_start + copy_num <= ret_end
			    && OBJECT_P (SET_SRC (return_copy_pat)))
			  forced_late_switch = 1;
			break;
		      }

		    if (copy_start >= ret_start
			&& copy_start + copy_num <= ret_end)
		      nregs -= copy_num;
		    else if (!maybe_builtin_apply
			     || !FUNCTION_VALUE_REGNO_P (copy_start))
		      break;
		    last_insn = return_copy;
		  }
		/* ??? Exception handling can lead to the return value
		   copy being already separated from the return value use,
		   as in  unwind-dw2.c .
		   Similarly, conditionally returning without a value,
		   and conditionally using builtin_return can lead to an
		   isolated use.  */
		if (return_copy == BB_HEAD (src_bb))
		  {
		    short_block = 1;
		    break;
		  }
		last_insn = return_copy;
	      }
	    while (nregs);
	    /* If we didn't see a full return value copy, verify that there
	       is a plausible reason for this.  If some, but not all of the
	       return register is likely spilled, we can expect that there
	       is a copy for the likely spilled part.  */
	    if (nregs
		&& ! forced_late_switch
		&& ! short_block
		&& CLASS_LIKELY_SPILLED_P (REGNO_REG_CLASS (ret_start))
		&& nregs == hard_regno_nregs[ret_start][GET_MODE (ret_reg)]
		/* For multi-hard-register floating point values,
		   sometimes the likely-spilled part is ordinarily copied
		   first, then the other part is set with an arithmetic
		   operation.  This doesn't actually cause reload failures,
		   so let it pass.  */
		&& (GET_MODE_CLASS (GET_MODE (ret_reg)) == MODE_INT
		    || nregs == 1))
	      abort ();
	    if (INSN_P (last_insn))
	      {
		before_return_copy
		  = emit_note_before (NOTE_INSN_DELETED, last_insn);
		/* Instructions preceding LAST_INSN in the same block might
		   require a different mode than MODE_EXIT, so if we might
		   have such instructions, keep them in a separate block
		   from pre_exit.  */
		if (last_insn != BB_HEAD (src_bb))
		  src_bb = split_block (src_bb,
					PREV_INSN (before_return_copy))->dest;
	      }
	    else
	      before_return_copy = last_insn;
	    pre_exit = split_block (src_bb, before_return_copy)->src;
	  }
	else
	  {
	    pre_exit = split_edge (eg);
	    COPY_REG_SET (pre_exit->global_live_at_start, live_at_end);
	    COPY_REG_SET (pre_exit->global_live_at_end, live_at_end);
	  }
      }

  return pre_exit;
}
#endif

1149 1150
/* Find all insns that need a particular mode setting, and insert the
   necessary mode switches.  Return true if we did work.  */
1151

1152
int
1153
optimize_mode_switching (FILE *file)
1154 1155
{
  rtx insn;
1156 1157
  int e;
  basic_block bb;
1158 1159 1160
  int need_commit = 0;
  sbitmap *kill;
  struct edge_list *edge_list;
1161
  static const int num_modes[] = NUM_MODES_FOR_MODE_SWITCHING;
1162
#define N_ENTITIES ARRAY_SIZE (num_modes)
1163
  int entity_map[N_ENTITIES];
Daniel Berlin committed
1164
  struct bb_info *bb_info[N_ENTITIES];
1165 1166 1167
  int i, j;
  int n_entities;
  int max_num_modes = 0;
1168
  bool emited = false;
1169
  basic_block post_entry ATTRIBUTE_UNUSED, pre_exit ATTRIBUTE_UNUSED;
1170

1171
  clear_bb_flags ();
1172

1173
  for (e = N_ENTITIES - 1, n_entities = 0; e >= 0; e--)
1174 1175
    if (OPTIMIZE_MODE_SWITCHING (e))
      {
1176 1177 1178 1179
	int entry_exit_extra = 0;

	/* Create the list of segments within each basic block.
	   If NORMAL_MODE is defined, allow for two extra
1180
	   blocks split from the entry and exit block.  */
1181
#if defined (MODE_ENTRY) && defined (MODE_EXIT)
1182
	entry_exit_extra = 3;
1183
#endif
1184
	bb_info[n_entities]
1185
	  = xcalloc (last_basic_block + entry_exit_extra, sizeof **bb_info);
1186 1187 1188 1189 1190
	entity_map[n_entities++] = e;
	if (num_modes[e] > max_num_modes)
	  max_num_modes = num_modes[e];
      }

1191
  if (! n_entities)
1192
    return 0;
1193

1194
#if defined (MODE_ENTRY) && defined (MODE_EXIT)
1195 1196 1197 1198
  /* Split the edge from the entry block, so that we can note that
     there NORMAL_MODE is supplied.  */
  post_entry = split_edge (EDGE_SUCC (ENTRY_BLOCK_PTR, 0));
  pre_exit = create_pre_exit (n_entities, entity_map, num_modes);
1199 1200
#endif

1201 1202
  /* Create the bitmap vectors.  */

1203 1204 1205
  antic = sbitmap_vector_alloc (last_basic_block, n_entities);
  transp = sbitmap_vector_alloc (last_basic_block, n_entities);
  comp = sbitmap_vector_alloc (last_basic_block, n_entities);
1206

1207
  sbitmap_vector_ones (transp, last_basic_block);
1208 1209 1210 1211 1212

  for (j = n_entities - 1; j >= 0; j--)
    {
      int e = entity_map[j];
      int no_mode = num_modes[e];
Daniel Berlin committed
1213
      struct bb_info *info = bb_info[j];
1214 1215

      /* Determine what the first use (if any) need for a mode of entity E is.
1216
	 This will be the mode that is anticipatable for this block.
1217
	 Also compute the initial transparency settings.  */
1218
      FOR_EACH_BB (bb)
1219 1220 1221 1222 1223 1224
	{
	  struct seginfo *ptr;
	  int last_mode = no_mode;
	  HARD_REG_SET live_now;

	  REG_SET_TO_HARD_REG_SET (live_now,
1225
				   bb->global_live_at_start);
1226 1227
	  for (insn = BB_HEAD (bb);
	       insn != NULL && insn != NEXT_INSN (BB_END (bb));
1228 1229
	       insn = NEXT_INSN (insn))
	    {
1230
	      if (INSN_P (insn))
1231 1232 1233 1234 1235 1236 1237
		{
		  int mode = MODE_NEEDED (e, insn);
		  rtx link;

		  if (mode != no_mode && mode != last_mode)
		    {
		      last_mode = mode;
1238 1239 1240
		      ptr = new_seginfo (mode, insn, bb->index, live_now);
		      add_seginfo (info + bb->index, ptr);
		      RESET_BIT (transp[bb->index], j);
1241
		    }
1242 1243 1244
#ifdef MODE_AFTER
		  last_mode = MODE_AFTER (last_mode, insn);
#endif
1245 1246 1247 1248
		  /* Update LIVE_NOW.  */
		  for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
		    if (REG_NOTE_KIND (link) == REG_DEAD)
		      reg_dies (XEXP (link, 0), live_now);
1249

1250 1251 1252 1253 1254 1255
		  note_stores (PATTERN (insn), reg_becomes_live, &live_now);
		  for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
		    if (REG_NOTE_KIND (link) == REG_UNUSED)
		      reg_dies (XEXP (link, 0), live_now);
		}
	    }
1256

1257
	  info[bb->index].computing = last_mode;
1258 1259 1260
	  /* Check for blocks without ANY mode requirements.  */
	  if (last_mode == no_mode)
	    {
1261
	      ptr = new_seginfo (no_mode, BB_END (bb), bb->index, live_now);
1262
	      add_seginfo (info + bb->index, ptr);
1263 1264
	    }
	}
1265
#if defined (MODE_ENTRY) && defined (MODE_EXIT)
1266
      {
1267
	int mode = MODE_ENTRY (e);
1268

1269 1270
	if (mode != no_mode)
	  {
1271
	    bb = post_entry;
1272

1273 1274 1275 1276 1277
	    /* By always making this nontransparent, we save
	       an extra check in make_preds_opaque.  We also
	       need this to avoid confusing pre_edge_lcm when
	       antic is cleared but transp and comp are set.  */
	    RESET_BIT (transp[bb->index], j);
1278

1279 1280 1281 1282 1283 1284
	    /* Insert a fake computing definition of MODE into entry
	       blocks which compute no mode. This represents the mode on
	       entry.  */
	    info[bb->index].computing = mode;

	    if (pre_exit)
1285
	      info[pre_exit->index].seginfo->mode = MODE_EXIT (e);
1286 1287
	  }
      }
1288
#endif /* NORMAL_MODE */
1289 1290
    }

1291
  kill = sbitmap_vector_alloc (last_basic_block, n_entities);
1292 1293 1294
  for (i = 0; i < max_num_modes; i++)
    {
      int current_mode[N_ENTITIES];
1295 1296
      sbitmap *delete;
      sbitmap *insert;
1297 1298

      /* Set the anticipatable and computing arrays.  */
1299 1300
      sbitmap_vector_zero (antic, last_basic_block);
      sbitmap_vector_zero (comp, last_basic_block);
1301 1302 1303
      for (j = n_entities - 1; j >= 0; j--)
	{
	  int m = current_mode[j] = MODE_PRIORITY_TO_MODE (entity_map[j], i);
Daniel Berlin committed
1304
	  struct bb_info *info = bb_info[j];
1305

1306
	  FOR_EACH_BB (bb)
1307
	    {
1308 1309
	      if (info[bb->index].seginfo->mode == m)
		SET_BIT (antic[bb->index], j);
1310

1311 1312
	      if (info[bb->index].computing == m)
		SET_BIT (comp[bb->index], j);
1313 1314 1315 1316 1317 1318
	    }
	}

      /* Calculate the optimal locations for the
	 placement mode switches to modes with priority I.  */

1319 1320
      FOR_EACH_BB (bb)
	sbitmap_not (kill[bb->index], transp[bb->index]);
1321 1322 1323
      edge_list = pre_edge_lcm (file, 1, transp, comp, antic,
				kill, &insert, &delete);

1324
      for (j = n_entities - 1; j >= 0; j--)
1325 1326 1327
	{
	  /* Insert all mode sets that have been inserted by lcm.  */
	  int no_mode = num_modes[entity_map[j]];
1328

1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353
	  /* Wherever we have moved a mode setting upwards in the flow graph,
	     the blocks between the new setting site and the now redundant
	     computation ceases to be transparent for any lower-priority
	     mode of the same entity.  First set the aux field of each
	     insertion site edge non-transparent, then propagate the new
	     non-transparency from the redundant computation upwards till
	     we hit an insertion site or an already non-transparent block.  */
	  for (e = NUM_EDGES (edge_list) - 1; e >= 0; e--)
	    {
	      edge eg = INDEX_EDGE (edge_list, e);
	      int mode;
	      basic_block src_bb;
	      HARD_REG_SET live_at_edge;
	      rtx mode_set;

	      eg->aux = 0;

	      if (! TEST_BIT (insert[e], j))
		continue;

	      eg->aux = (void *)1;

	      mode = current_mode[j];
	      src_bb = eg->src;

1354 1355 1356
	      REG_SET_TO_HARD_REG_SET (live_at_edge,
				       src_bb->global_live_at_end);

1357 1358
	      start_sequence ();
	      EMIT_MODE_SET (entity_map[j], mode, live_at_edge);
1359
	      mode_set = get_insns ();
1360 1361
	      end_sequence ();

1362
	      /* Do not bother to insert empty sequence.  */
1363
	      if (mode_set == NULL_RTX)
1364 1365
		continue;

1366 1367
	      /* If this is an abnormal edge, we'll insert at the end
		 of the previous block.  */
1368 1369
	      if (eg->flags & EDGE_ABNORMAL)
		{
1370
		  emited = true;
1371
		  if (JUMP_P (BB_END (src_bb)))
1372
		    emit_insn_before (mode_set, BB_END (src_bb));
1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385
		  /* It doesn't make sense to switch to normal mode
		     after a CALL_INSN, so we're going to abort if we
		     find one.  The cases in which a CALL_INSN may
		     have an abnormal edge are sibcalls and EH edges.
		     In the case of sibcalls, the dest basic-block is
		     the EXIT_BLOCK, that runs in normal mode; it is
		     assumed that a sibcall insn requires normal mode
		     itself, so no mode switch would be required after
		     the call (it wouldn't make sense, anyway).  In
		     the case of EH edges, EH entry points also start
		     in normal mode, so a similar reasoning applies.  */
		  else if (NONJUMP_INSN_P (BB_END (src_bb)))
		    emit_insn_after (mode_set, BB_END (src_bb));
1386
		  else
1387
		    abort ();
1388 1389
		  bb_info[j][src_bb->index].computing = mode;
		  RESET_BIT (transp[src_bb->index], j);
1390 1391 1392 1393 1394 1395 1396 1397
		}
	      else
		{
		  need_commit = 1;
		  insert_insn_on_edge (mode_set, eg);
		}
	    }

1398 1399
	  FOR_EACH_BB_REVERSE (bb)
	    if (TEST_BIT (delete[bb->index], j))
1400
	      {
1401
		make_preds_opaque (bb, j);
1402
		/* Cancel the 'deleted' mode set.  */
1403
		bb_info[j][bb->index].seginfo->mode = no_mode;
1404
	      }
1405
	}
1406

1407 1408
      sbitmap_vector_free (delete);
      sbitmap_vector_free (insert);
1409
      clear_aux_for_edges ();
1410 1411 1412 1413 1414 1415
      free_edge_list (edge_list);
    }

  /* Now output the remaining mode sets in all the segments.  */
  for (j = n_entities - 1; j >= 0; j--)
    {
1416 1417
      int no_mode = num_modes[entity_map[j]];

1418
      FOR_EACH_BB_REVERSE (bb)
1419 1420
	{
	  struct seginfo *ptr, *next;
1421
	  for (ptr = bb_info[j][bb->index].seginfo; ptr; ptr = next)
1422 1423
	    {
	      next = ptr->next;
1424
	      if (ptr->mode != no_mode)
1425 1426 1427 1428 1429
		{
		  rtx mode_set;

		  start_sequence ();
		  EMIT_MODE_SET (entity_map[j], ptr->mode, ptr->regs_live);
1430
		  mode_set = get_insns ();
1431 1432
		  end_sequence ();

1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443
		  /* Insert MODE_SET only if it is nonempty.  */
		  if (mode_set != NULL_RTX)
		    {
		      emited = true;
		      if (NOTE_P (ptr->insn_ptr)
			  && (NOTE_LINE_NUMBER (ptr->insn_ptr)
			      == NOTE_INSN_BASIC_BLOCK))
			emit_insn_after (mode_set, ptr->insn_ptr);
		      else
			emit_insn_before (mode_set, ptr->insn_ptr);
		    }
1444
		}
1445

1446 1447 1448
	      free (ptr);
	    }
	}
1449

1450 1451 1452 1453
      free (bb_info[j]);
    }

  /* Finished. Free up all the things we've allocated.  */
1454

1455 1456 1457 1458 1459 1460 1461
  sbitmap_vector_free (kill);
  sbitmap_vector_free (antic);
  sbitmap_vector_free (transp);
  sbitmap_vector_free (comp);

  if (need_commit)
    commit_edge_insertions ();
1462

1463
#if defined (MODE_ENTRY) && defined (MODE_EXIT)
1464 1465
  cleanup_cfg (CLEANUP_NO_INSN_DEL);
#else
1466 1467
  if (!need_commit && !emited)
    return 0;
1468
#endif
1469

1470 1471 1472 1473 1474
  max_regno = max_reg_num ();
  allocate_reg_info (max_regno, FALSE, FALSE);
  update_life_info_in_dirty_blocks (UPDATE_LIFE_GLOBAL_RM_NOTES,
				    (PROP_DEATH_NOTES | PROP_KILL_DEAD_CODE
				     | PROP_SCAN_DEAD_CODE));
1475 1476

  return 1;
1477
}
1478
#endif /* OPTIMIZE_MODE_SWITCHING */