ipa-inline.c 73.5 KB
Newer Older
1
/* Inlining decision heuristics.
2
   Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 4 5 6 7 8
   Contributed by Jan Hubicka

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
9
Software Foundation; either version 3, or (at your option) any later
10 11 12 13 14 15 16 17
version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
18 19
along with GCC; see the file COPYING3.  If not see
<http://www.gnu.org/licenses/>.  */
20 21 22

/*  Inlining decision heuristics

23
    The implementation of inliner is organized as follows:
24 25 26

    inlining heuristics limits

27 28 29 30 31 32 33 34
      can_inline_edge_p allow to check that particular inlining is allowed
      by the limits specified by user (allowed function growth, growth and so
      on).

      Functions are inlined when it is obvious the result is profitable (such
      as functions called once or when inlining reduce code size).
      In addition to that we perform inlining of small functions and recursive
      inlining.
35 36 37

    inlining heuristics

38 39 40
       The inliner itself is split into two passes:

       pass_early_inlining
41

42 43 44
	 Simple local inlining pass inlining callees into current function.
	 This pass makes no use of whole unit analysis and thus it can do only
	 very simple decisions based on local properties.
45

46 47 48 49
	 The strength of the pass is that it is run in topological order
	 (reverse postorder) on the callgraph. Functions are converted into SSA
	 form just before this pass and optimized subsequently. As a result, the
	 callees of the function seen by the early inliner was already optimized
50
	 and results of early inlining adds a lot of optimization opportunities
51
	 for the local optimization.
52

53
	 The pass handle the obvious inlining decisions within the compilation
54 55
	 unit - inlining auto inline functions, inlining for size and
	 flattening.
56

57 58 59 60
	 main strength of the pass is the ability to eliminate abstraction
	 penalty in C++ code (via combination of inlining and early
	 optimization) and thus improve quality of analysis done by real IPA
	 optimizers.
61

62 63 64
	 Because of lack of whole unit knowledge, the pass can not really make
	 good code size/performance tradeoffs.  It however does very simple
	 speculative inlining allowing code size to grow by
65 66
	 EARLY_INLINING_INSNS when callee is leaf function.  In this case the
	 optimizations performed later are very likely to eliminate the cost.
67

68
       pass_ipa_inline
69

70 71
	 This is the real inliner able to handle inlining with whole program
	 knowledge. It performs following steps:
72

73 74 75
	 1) inlining of small functions.  This is implemented by greedy
	 algorithm ordering all inlinable cgraph edges by their badness and
	 inlining them in this order as long as inline limits allows doing so.
76

77 78 79 80
	 This heuristics is not very good on inlining recursive calls. Recursive
	 calls can be inlined with results similar to loop unrolling. To do so,
	 special purpose recursive inliner is executed on function when
	 recursive edge is met as viable candidate.
81

82 83 84 85 86 87 88 89 90
	 2) Unreachable functions are removed from callgraph.  Inlining leads
	 to devirtualization and other modification of callgraph so functions
	 may become unreachable during the process. Also functions declared as
	 extern inline or virtual functions are removed, since after inlining
	 we no longer need the offline bodies.

	 3) Functions called once and not exported from the unit are inlined.
	 This should almost always lead to reduction of code size by eliminating
	 the need for offline copy of the function.  */
91 92 93 94 95 96

#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "tree.h"
97 98
#include "trans-mem.h"
#include "calls.h"
99 100 101 102
#include "tree-inline.h"
#include "langhooks.h"
#include "flags.h"
#include "diagnostic.h"
103
#include "gimple-pretty-print.h"
104 105 106 107
#include "params.h"
#include "fibheap.h"
#include "intl.h"
#include "tree-pass.h"
108
#include "coverage.h"
109
#include "rtl.h"
110
#include "bitmap.h"
111 112 113 114 115
#include "basic-block.h"
#include "tree-ssa-alias.h"
#include "internal-fn.h"
#include "gimple-expr.h"
#include "is-a.h"
116 117
#include "gimple.h"
#include "gimple-ssa.h"
118
#include "ipa-prop.h"
119
#include "except.h"
120
#include "target.h"
Jan Hubicka committed
121
#include "ipa-inline.h"
122
#include "ipa-utils.h"
123
#include "sreal.h"
124
#include "cilk.h"
125

126
/* Statistics we collect about inlining algorithm.  */
127
static int overall_size;
128
static gcov_type max_count;
129
static sreal max_count_real, max_relbenefit_real, half_int_min_real;
130

131 132 133 134
/* Return false when inlining edge E would lead to violating
   limits on function unit growth or stack usage growth.  

   The relative function body growth limit is present generally
135
   to avoid problems with non-linear behavior of the compiler.
136 137 138 139 140 141 142
   To allow inlining huge functions into tiny wrapper, the limit
   is always based on the bigger of the two functions considered.

   For stack growth limits we always base the growth in stack usage
   of the callers.  We want to prevent applications from segfaulting
   on stack overflow when functions with huge stack frames gets
   inlined. */
143 144

static bool
145
caller_growth_limits (struct cgraph_edge *e)
146
{
147
  struct cgraph_node *to = e->caller;
148
  struct cgraph_node *what = cgraph_function_or_thunk_node (e->callee, NULL);
149
  int newsize;
150 151 152 153 154 155 156 157 158 159 160 161
  int limit = 0;
  HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
  struct inline_summary *info, *what_info, *outer_info = inline_summary (to);

  /* Look for function e->caller is inlined to.  While doing
     so work out the largest function body on the way.  As
     described above, we want to base our function growth
     limits based on that.  Not on the self size of the
     outer function, not on the self size of inline code
     we immediately inline to.  This is the most relaxed
     interpretation of the rule "do not grow large functions
     too much in order to prevent compiler from exploding".  */
162
  while (true)
163 164 165 166 167 168 169 170
    {
      info = inline_summary (to);
      if (limit < info->self_size)
	limit = info->self_size;
      if (stack_size_limit < info->estimated_self_stack_size)
	stack_size_limit = info->estimated_self_stack_size;
      if (to->global.inlined_to)
        to = to->callers->caller;
171 172
      else
	break;
173
    }
174

175 176
  what_info = inline_summary (what);

177
  if (limit < what_info->self_size)
178
    limit = what_info->self_size;
179 180 181

  limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;

182 183
  /* Check the size after inlining against the function limits.  But allow
     the function to shrink if it went over the limits by forced inlining.  */
Jan Hubicka committed
184
  newsize = estimate_size_after_inlining (to, e);
185
  if (newsize >= info->size
186
      && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
187 188
      && newsize > limit)
    {
189
      e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
190 191
      return false;
    }
192

193 194 195
  if (!what_info->estimated_stack_size)
    return true;

196 197
  /* FIXME: Stack size limit often prevents inlining in Fortran programs
     due to large i/o datastructures used by the Fortran front-end.
198 199 200
     We ought to ignore this limit when we know that the edge is executed
     on every invocation of the caller (i.e. its call statement dominates
     exit block).  We do not track this information, yet.  */
201
  stack_size_limit += ((gcov_type)stack_size_limit
202
		       * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
203

204 205
  inlined_stack = (outer_info->stack_frame_offset
		   + outer_info->estimated_self_stack_size
206
		   + what_info->estimated_stack_size);
207 208 209
  /* Check new stack consumption with stack consumption at the place
     stack is used.  */
  if (inlined_stack > stack_size_limit
210
      /* If function already has large stack usage from sibling
211 212 213 214
	 inline call, we can inline, too.
	 This bit overoptimistically assume that we are good at stack
	 packing.  */
      && inlined_stack > info->estimated_stack_size
215 216
      && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
    {
217
      e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
218 219
      return false;
    }
220 221 222
  return true;
}

223 224 225 226 227 228 229 230
/* Dump info about why inlining has failed.  */

static void
report_inline_failed_reason (struct cgraph_edge *e)
{
  if (dump_file)
    {
      fprintf (dump_file, "  not inlinable: %s/%i -> %s/%i, %s\n",
231 232
	       xstrdup (e->caller->name ()), e->caller->order,
	       xstrdup (e->callee->name ()), e->callee->order,
233 234 235 236 237 238 239 240 241
	       cgraph_inline_failed_string (e->inline_failed));
    }
}

/* Decide if we can inline the edge and possibly update
   inline_failed reason.  
   We check whether inlining is possible at all and whether
   caller growth limits allow doing so.  

242 243 244
   if REPORT is true, output reason to the dump file.  

   if DISREGARD_LIMITES is true, ignore size limits.*/
245

Jan Hubicka committed
246
static bool
247 248
can_inline_edge_p (struct cgraph_edge *e, bool report,
		   bool disregard_limits = false)
249
{
250
  bool inlinable = true;
251
  enum availability avail;
252 253
  struct cgraph_node *callee
    = cgraph_function_or_thunk_node (e->callee, &avail);
254
  tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->caller->decl);
255
  tree callee_tree
256 257
    = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
  struct function *caller_cfun = DECL_STRUCT_FUNCTION (e->caller->decl);
258
  struct function *callee_cfun
259
    = callee ? DECL_STRUCT_FUNCTION (callee->decl) : NULL;
260 261

  if (!caller_cfun && e->caller->clone_of)
262
    caller_cfun = DECL_STRUCT_FUNCTION (e->caller->clone_of->decl);
263 264

  if (!callee_cfun && callee && callee->clone_of)
265
    callee_cfun = DECL_STRUCT_FUNCTION (callee->clone_of->decl);
266

267
  gcc_assert (e->inline_failed);
268

269
  if (!callee || !callee->definition)
270 271 272 273
    {
      e->inline_failed = CIF_BODY_NOT_AVAILABLE;
      inlinable = false;
    }
274 275
  else if (!inline_summary (callee)->inlinable 
	   || (caller_cfun && fn_contains_cilk_spawn_p (caller_cfun)))
276 277 278 279
    {
      e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
      inlinable = false;
    }
280
  else if (avail <= AVAIL_OVERWRITABLE)
281
    {
282
      e->inline_failed = CIF_OVERWRITABLE;
283
      inlinable = false;
284
    }
285
  else if (e->call_stmt_cannot_inline_p)
286
    {
287 288
      if (e->inline_failed != CIF_FUNCTION_NOT_OPTIMIZED)
        e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
289 290 291
      inlinable = false;
    }
  /* Don't inline if the functions have different EH personalities.  */
292 293 294 295
  else if (DECL_FUNCTION_PERSONALITY (e->caller->decl)
	   && DECL_FUNCTION_PERSONALITY (callee->decl)
	   && (DECL_FUNCTION_PERSONALITY (e->caller->decl)
	       != DECL_FUNCTION_PERSONALITY (callee->decl)))
296 297 298 299
    {
      e->inline_failed = CIF_EH_PERSONALITY;
      inlinable = false;
    }
300 301
  /* TM pure functions should not be inlined into non-TM_pure
     functions.  */
302 303
  else if (is_tm_pure (callee->decl)
	   && !is_tm_pure (e->caller->decl))
304 305 306 307
    {
      e->inline_failed = CIF_UNSPECIFIED;
      inlinable = false;
    }
308 309 310 311
  /* Don't inline if the callee can throw non-call exceptions but the
     caller cannot.
     FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing.
     Move the flag into cgraph node or mirror it in the inline summary.  */
312 313
  else if (callee_cfun && callee_cfun->can_throw_non_call_exceptions
	   && !(caller_cfun && caller_cfun->can_throw_non_call_exceptions))
314 315 316 317
    {
      e->inline_failed = CIF_NON_CALL_EXCEPTIONS;
      inlinable = false;
    }
318
  /* Check compatibility of target optimization options.  */
319 320
  else if (!targetm.target_option.can_inline_p (e->caller->decl,
						callee->decl))
321 322 323 324 325
    {
      e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
      inlinable = false;
    }
  /* Check if caller growth allows the inlining.  */
326
  else if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
327
	   && !disregard_limits
328 329 330
	   && !lookup_attribute ("flatten",
				 DECL_ATTRIBUTES
				   (e->caller->global.inlined_to
331 332
				    ? e->caller->global.inlined_to->decl
				    : e->caller->decl))
333 334 335 336 337 338
           && !caller_growth_limits (e))
    inlinable = false;
  /* Don't inline a function with a higher optimization level than the
     caller.  FIXME: this is really just tip of iceberg of handling
     optimization attribute.  */
  else if (caller_tree != callee_tree)
339
    {
340 341 342 343 344 345 346 347 348 349
      struct cl_optimization *caller_opt
	= TREE_OPTIMIZATION ((caller_tree)
			     ? caller_tree
			     : optimization_default_node);

      struct cl_optimization *callee_opt
	= TREE_OPTIMIZATION ((callee_tree)
			     ? callee_tree
			     : optimization_default_node);

350 351 352
      if (((caller_opt->x_optimize > callee_opt->x_optimize)
	   || (caller_opt->x_optimize_size != callee_opt->x_optimize_size))
	  /* gcc.dg/pr43564.c.  Look at forced inline even in -O0.  */
353
	  && !DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
354
	{
355
	  e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
	  inlinable = false;
	}
    }

  if (!inlinable && report)
    report_inline_failed_reason (e);
  return inlinable;
}


/* Return true if the edge E is inlinable during early inlining.  */

static bool
can_early_inline_edge_p (struct cgraph_edge *e)
{
371 372
  struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee,
							      NULL);
373 374 375
  /* Early inliner might get called at WPA stage when IPA pass adds new
     function.  In this case we can not really do any of early inlining
     because function bodies are missing.  */
376
  if (!gimple_has_body_p (callee->decl))
377 378
    {
      e->inline_failed = CIF_BODY_NOT_AVAILABLE;
379 380
      return false;
    }
381 382 383 384
  /* In early inliner some of callees may not be in SSA form yet
     (i.e. the callgraph is cyclic and we did not process
     the callee by early inliner, yet).  We don't have CIF code for this
     case; later we will re-do the decision in the real inliner.  */
385 386
  if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
      || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
387
    {
388 389
      if (dump_file)
	fprintf (dump_file, "  edge not inlinable: not in SSA form\n");
390 391
      return false;
    }
392 393 394 395 396 397
  if (!can_inline_edge_p (e, true))
    return false;
  return true;
}


398
/* Return number of calls in N.  Ignore cheap builtins.  */
399

400 401
static int
num_calls (struct cgraph_node *n)
402 403
{
  struct cgraph_edge *e;
404 405
  int num = 0;

406
  for (e = n->callees; e; e = e->next_callee)
407
    if (!is_inexpensive_builtin (e->callee->decl))
408 409
      num++;
  return num;
410 411
}

412

413
/* Return true if we are interested in inlining small function.  */
414

415 416 417 418
static bool
want_early_inline_function_p (struct cgraph_edge *e)
{
  bool want_inline = true;
419
  struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
420

421
  if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
422
    ;
423
  else if (!DECL_DECLARED_INLINE_P (callee->decl)
424 425 426 427 428 429 430
	   && !flag_inline_small_functions)
    {
      e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
      report_inline_failed_reason (e);
      want_inline = false;
    }
  else
431
    {
432
      int growth = estimate_edge_growth (e);
433 434
      int n;

435 436 437 438 439 440 441 442
      if (growth <= 0)
	;
      else if (!cgraph_maybe_hot_edge_p (e)
	       && growth > 0)
	{
	  if (dump_file)
	    fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
		     "call is cold and code would grow by %i\n",
443
		     xstrdup (e->caller->name ()),
444
		     e->caller->order,
445
		     xstrdup (callee->name ()), callee->order,
446 447 448
		     growth);
	  want_inline = false;
	}
449
      else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
450
	{
451 452
	  if (dump_file)
	    fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
453
		     "growth %i exceeds --param early-inlining-insns\n",
454
		     xstrdup (e->caller->name ()),
455
		     e->caller->order,
456
		     xstrdup (callee->name ()), callee->order,
457 458
		     growth);
	  want_inline = false;
459
	}
460 461
      else if ((n = num_calls (callee)) != 0
	       && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
462 463 464
	{
	  if (dump_file)
	    fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
465 466
		     "growth %i exceeds --param early-inlining-insns "
		     "divided by number of calls\n",
467
		     xstrdup (e->caller->name ()),
468
		     e->caller->order,
469
		     xstrdup (callee->name ()), callee->order,
470 471 472 473 474 475 476
		     growth);
	  want_inline = false;
	}
    }
  return want_inline;
}

477 478 479
/* Compute time of the edge->caller + edge->callee execution when inlining
   does not happen.  */

480
inline gcov_type
481 482 483
compute_uninlined_call_time (struct inline_summary *callee_info,
			     struct cgraph_edge *edge)
{
484
  gcov_type uninlined_call_time =
485 486
    RDIV ((gcov_type)callee_info->time * MAX (edge->frequency, 1),
	  CGRAPH_FREQ_BASE);
487 488 489
  gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
				          ? edge->caller->global.inlined_to
				          : edge->caller)->time;
490 491 492 493 494 495 496 497 498 499
  return uninlined_call_time + caller_time;
}

/* Same as compute_uinlined_call_time but compute time when inlining
   does happen.  */

inline gcov_type
compute_inlined_call_time (struct cgraph_edge *edge,
			   int edge_time)
{
500 501 502 503 504 505 506
  gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
					  ? edge->caller->global.inlined_to
					  : edge->caller)->time;
  gcov_type time = (caller_time
		    + RDIV (((gcov_type) edge_time
			     - inline_edge_summary (edge)->call_stmt_time)
		    * MAX (edge->frequency, 1), CGRAPH_FREQ_BASE));
507 508 509 510 511 512 513
  /* Possible one roundoff error, but watch for overflows.  */
  gcc_checking_assert (time >= INT_MIN / 2);
  if (time < 0)
    time = 0;
  return time;
}

514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529
/* Return true if the speedup for inlining E is bigger than
   PARAM_MAX_INLINE_MIN_SPEEDUP.  */

static bool
big_speedup_p (struct cgraph_edge *e)
{
  gcov_type time = compute_uninlined_call_time (inline_summary (e->callee),
					  	e);
  gcov_type inlined_time = compute_inlined_call_time (e,
					              estimate_edge_time (e));
  if (time - inlined_time
      > RDIV (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP), 100))
    return true;
  return false;
}

530 531 532 533 534 535 536
/* Return true if we are interested in inlining small function.
   When REPORT is true, report reason to dump file.  */

static bool
want_inline_small_function_p (struct cgraph_edge *e, bool report)
{
  bool want_inline = true;
537
  struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
538

539
  if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
540
    ;
541
  else if (!DECL_DECLARED_INLINE_P (callee->decl)
542 543 544 545
	   && !flag_inline_small_functions)
    {
      e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
      want_inline = false;
546
    }
547
  else
548
    {
549
      int growth = estimate_edge_growth (e);
550
      inline_hints hints = estimate_edge_hints (e);
551
      bool big_speedup = big_speedup_p (e);
552 553 554

      if (growth <= 0)
	;
555 556
      /* Apply MAX_INLINE_INSNS_SINGLE limit.  Do not do so when
	 hints suggests that inlining given function is very profitable.  */
557
      else if (DECL_DECLARED_INLINE_P (callee->decl)
558
	       && growth >= MAX_INLINE_INSNS_SINGLE
559
	       && !big_speedup
560
	       && !(hints & (INLINE_HINT_indirect_call
561
			     | INLINE_HINT_loop_iterations
562
			     | INLINE_HINT_array_index
563
			     | INLINE_HINT_loop_stride)))
564 565 566 567
	{
          e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
	  want_inline = false;
	}
568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599
      /* Before giving up based on fact that caller size will grow, allow
         functions that are called few times and eliminating the offline
	 copy will lead to overall code size reduction.
	 Not all of these will be handled by subsequent inlining of functions
	 called once: in particular weak functions are not handled or funcitons
	 that inline to multiple calls but a lot of bodies is optimized out.
	 Finally we want to inline earlier to allow inlining of callbacks.

	 This is slightly wrong on aggressive side:  it is entirely possible
	 that function is called many times with a context where inlining
	 reduces code size and few times with a context where inlining increase
	 code size.  Resoluting growth estimate will be negative even if it
	 would make more sense to keep offline copy and do not inline into the
	 call sites that makes the code size grow.  

	 When badness orders the calls in a way that code reducing calls come
	 first, this situation is not a problem at all: after inlining all
	 "good" calls, we will realize that keeping the function around is
	 better.  */
      else if (growth <= MAX_INLINE_INSNS_SINGLE
	       /* Unlike for functions called once, we play unsafe with
		  COMDATs.  We can allow that since we know functions
		  in consideration are small (and thus risk is small) and
		  moreover grow estimates already accounts that COMDAT
		  functions may or may not disappear when eliminated from
		  current unit. With good probability making aggressive
		  choice in all units is going to make overall program
		  smaller.

		  Consequently we ask cgraph_can_remove_if_no_direct_calls_p
		  instead of
		  cgraph_will_be_removed_from_program_if_no_direct_calls  */
600
	        && !DECL_EXTERNAL (callee->decl)
601 602 603
		&& cgraph_can_remove_if_no_direct_calls_p (callee)
		&& estimate_growth (callee) <= 0)
	;
604
      else if (!DECL_DECLARED_INLINE_P (callee->decl)
605 606 607 608 609
	       && !flag_inline_functions)
	{
          e->inline_failed = CIF_NOT_DECLARED_INLINED;
	  want_inline = false;
	}
610 611 612
      /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
	 Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
	 inlining given function is very profitable.  */
613
      else if (!DECL_DECLARED_INLINE_P (callee->decl)
614
	       && !big_speedup
615
	       && growth >= ((hints & (INLINE_HINT_indirect_call
616
				       | INLINE_HINT_loop_iterations
617
			               | INLINE_HINT_array_index
618
				       | INLINE_HINT_loop_stride))
619 620 621
			     ? MAX (MAX_INLINE_INSNS_AUTO,
				    MAX_INLINE_INSNS_SINGLE)
			     : MAX_INLINE_INSNS_AUTO))
622 623 624 625
	{
          e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
	  want_inline = false;
	}
626 627
      /* If call is cold, do not inline when function body would grow. */
      else if (!cgraph_maybe_hot_edge_p (e))
628
	{
629 630
          e->inline_failed = CIF_UNLIKELY_CALL;
	  want_inline = false;
631 632
	}
    }
633 634 635 636
  if (!want_inline && report)
    report_inline_failed_reason (e);
  return want_inline;
}
637

638 639 640 641 642 643 644 645 646 647
/* EDGE is self recursive edge.
   We hand two cases - when function A is inlining into itself
   or when function A is being inlined into another inliner copy of function
   A within function B.  

   In first case OUTER_NODE points to the toplevel copy of A, while
   in the second case OUTER_NODE points to the outermost copy of A in B.

   In both cases we want to be extra selective since
   inlining the call will just introduce new recursive calls to appear.  */
648

649 650 651 652 653 654 655 656 657 658 659
static bool
want_inline_self_recursive_call_p (struct cgraph_edge *edge,
				   struct cgraph_node *outer_node,
				   bool peeling,
				   int depth)
{
  char const *reason = NULL;
  bool want_inline = true;
  int caller_freq = CGRAPH_FREQ_BASE;
  int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);

660
  if (DECL_DECLARED_INLINE_P (edge->caller->decl))
661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686
    max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);

  if (!cgraph_maybe_hot_edge_p (edge))
    {
      reason = "recursive call is cold";
      want_inline = false;
    }
  else if (max_count && !outer_node->count)
    {
      reason = "not executed in profile";
      want_inline = false;
    }
  else if (depth > max_depth)
    {
      reason = "--param max-inline-recursive-depth exceeded.";
      want_inline = false;
    }

  if (outer_node->global.inlined_to)
    caller_freq = outer_node->callers->frequency;

  if (!want_inline)
    ;
  /* Inlining of self recursive function into copy of itself within other function
     is transformation similar to loop peeling.

687
     Peeling is profitable if we can inline enough copies to make probability
688 689 690
     of actual call to the self recursive function very small.  Be sure that
     the probability of recursion is small.

691 692
     We ensure that the frequency of recursing is at most 1 - (1/max_depth).
     This way the expected number of recision is at most max_depth.  */
693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714
  else if (peeling)
    {
      int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
					 / max_depth);
      int i;
      for (i = 1; i < depth; i++)
	max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
      if (max_count
	  && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
	      >= max_prob))
	{
	  reason = "profile of recursive call is too large";
	  want_inline = false;
	}
      if (!max_count
	  && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
	      >= max_prob))
	{
	  reason = "frequency of recursive call is too large";
	  want_inline = false;
	}
    }
715
  /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
716 717 718 719 720 721 722
     depth is large.  We reduce function call overhead and increase chances that
     things fit in hardware return predictor.

     Recursive inlining might however increase cost of stack frame setup
     actually slowing down functions whose recursion tree is wide rather than
     deep.

723
     Deciding reliably on when to do recursive inlining without profile feedback
724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750
     is tricky.  For now we disable recursive inlining when probability of self
     recursion is low. 

     Recursive inlining of self recursive call within loop also results in large loop
     depths that generally optimize badly.  We may want to throttle down inlining
     in those cases.  In particular this seems to happen in one of libstdc++ rb tree
     methods.  */
  else
    {
      if (max_count
	  && (edge->count * 100 / outer_node->count
	      <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
	{
	  reason = "profile of recursive call is too small";
	  want_inline = false;
	}
      else if (!max_count
	       && (edge->frequency * 100 / caller_freq
	           <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
	{
	  reason = "frequency of recursive call is too small";
	  want_inline = false;
	}
    }
  if (!want_inline && dump_file)
    fprintf (dump_file, "   not inlining recursively: %s\n", reason);
  return want_inline;
751 752
}

753 754
/* Return true when NODE has uninlinable caller;
   set HAS_HOT_CALL if it has hot call. 
755 756 757
   Worker for cgraph_for_node_and_aliases.  */

static bool
758
check_callers (struct cgraph_node *node, void *has_hot_call)
759
{
760 761 762 763 764
  struct cgraph_edge *e;
   for (e = node->callers; e; e = e->next_caller)
     {
       if (!can_inline_edge_p (e, true))
         return true;
765
       if (!(*(bool *)has_hot_call) && cgraph_maybe_hot_edge_p (e))
766 767 768
	 *(bool *)has_hot_call = true;
     }
  return false;
769 770
}

771 772 773 774 775 776 777 778 779
/* If NODE has a caller, return true.  */

static bool
has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
{
  if (node->callers)
    return true;
  return false;
}
780

781 782 783
/* Decide if inlining NODE would reduce unit size by eliminating
   the offline copy of function.  
   When COLD is true the cold calls are considered, too.  */
784 785

static bool
786
want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
787
{
788
   struct cgraph_node *function = cgraph_function_or_thunk_node (node, NULL);
789 790 791
   bool has_hot_call = false;

   /* Does it have callers?  */
792
   if (!cgraph_for_node_and_aliases (node, has_caller_p, NULL, true))
793
     return false;
794
   /* Already inlined?  */
795
   if (function->global.inlined_to)
796
     return false;
797 798 799 800
   if (cgraph_function_or_thunk_node (node, NULL) != node)
     return false;
   /* Inlining into all callers would increase size?  */
   if (estimate_growth (node) > 0)
801
     return false;
802
   /* All inlines must be possible.  */
803 804
   if (cgraph_for_node_and_aliases (node, check_callers, &has_hot_call, true))
     return false;
805
   if (!cold && !has_hot_call)
806 807 808 809
     return false;
   return true;
}

810
#define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64)
811 812

/* Return relative time improvement for inlining EDGE in range
813
   1...RELATIVE_TIME_BENEFIT_RANGE  */
814 815 816 817

static inline int
relative_time_benefit (struct inline_summary *callee_info,
		       struct cgraph_edge *edge,
818
		       int edge_time)
819
{
820 821 822
  gcov_type relbenefit;
  gcov_type uninlined_call_time = compute_uninlined_call_time (callee_info, edge);
  gcov_type inlined_call_time = compute_inlined_call_time (edge, edge_time);
823 824 825

  /* Inlining into extern inline function is not a win.  */
  if (DECL_EXTERNAL (edge->caller->global.inlined_to
826 827
		     ? edge->caller->global.inlined_to->decl
		     : edge->caller->decl))
828 829 830 831 832 833
    return 1;

  /* Watch overflows.  */
  gcc_checking_assert (uninlined_call_time >= 0);
  gcc_checking_assert (inlined_call_time >= 0);
  gcc_checking_assert (uninlined_call_time >= inlined_call_time);
834 835 836 837 838 839 840

  /* Compute relative time benefit, i.e. how much the call becomes faster.
     ??? perhaps computing how much the caller+calle together become faster
     would lead to more realistic results.  */
  if (!uninlined_call_time)
    uninlined_call_time = 1;
  relbenefit =
841 842 843 844
    RDIV (((gcov_type)uninlined_call_time - inlined_call_time) * RELATIVE_TIME_BENEFIT_RANGE,
	  uninlined_call_time);
  relbenefit = MIN (relbenefit, RELATIVE_TIME_BENEFIT_RANGE);
  gcc_checking_assert (relbenefit >= 0);
845 846 847 848 849
  relbenefit = MAX (relbenefit, 1);
  return relbenefit;
}


850 851
/* A cost model driving the inlining heuristics in a way so the edges with
   smallest badness are inlined first.  After each inlining is performed
852
   the costs of all caller edges of nodes affected are recomputed so the
853
   metrics may accurately depend on values such as number of inlinable callers
854
   of the function or function body size.  */
855 856

static int
857
edge_badness (struct cgraph_edge *edge, bool dump)
858
{
859
  gcov_type badness;
860
  int growth, edge_time;
861 862 863
  struct cgraph_node *callee = cgraph_function_or_thunk_node (edge->callee,
							      NULL);
  struct inline_summary *callee_info = inline_summary (callee);
864
  inline_hints hints;
865

866
  if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
867 868
    return INT_MIN;

Jan Hubicka committed
869
  growth = estimate_edge_growth (edge);
870
  edge_time = estimate_edge_time (edge);
871
  hints = estimate_edge_hints (edge);
872 873 874
  gcc_checking_assert (edge_time >= 0);
  gcc_checking_assert (edge_time <= callee_info->time);
  gcc_checking_assert (growth <= callee_info->size);
875

876 877
  if (dump)
    {
878
      fprintf (dump_file, "    Badness calculation for %s/%i -> %s/%i\n",
879
	       xstrdup (edge->caller->name ()),
880
	       edge->caller->order,
881
	       xstrdup (callee->name ()),
882
	       edge->callee->order);
883
      fprintf (dump_file, "      size growth %i, time %i ",
884
	       growth,
885
	       edge_time);
886
      dump_inline_hints (dump_file, hints);
887 888
      if (big_speedup_p (edge))
	fprintf (dump_file, " big_speedup");
889
      fprintf (dump_file, "\n");
890
    }
891 892 893

  /* Always prefer inlining saving code size.  */
  if (growth <= 0)
894
    {
895
      badness = INT_MIN / 2 + growth;
896
      if (dump)
897
	fprintf (dump_file, "      %i: Growth %i <= 0\n", (int) badness,
898 899
		 growth);
    }
900

901 902 903 904
  /* When profiling is available, compute badness as:

	        relative_edge_count * relative_time_benefit
     goodness = -------------------------------------------
905
		growth_f_caller
906 907
     badness = -goodness  

Joseph Myers committed
908
    The fraction is upside down, because on edge counts and time beneits
909 910
    the bounds are known. Edge growth is essentially unlimited.  */

911
  else if (max_count)
912
    {
913
      sreal tmp, relbenefit_real, growth_real;
914
      int relbenefit = relative_time_benefit (callee_info, edge, edge_time);
915 916 917
      /* Capping edge->count to max_count. edge->count can be larger than
	 max_count if an inline adds new edges which increase max_count
	 after max_count is computed.  */
918
      gcov_type edge_count = edge->count > max_count ? max_count : edge->count;
919

920 921
      sreal_init (&relbenefit_real, relbenefit, 0);
      sreal_init (&growth_real, growth, 0);
922 923

      /* relative_edge_count.  */
924
      sreal_init (&tmp, edge_count, 0);
925 926 927 928 929 930 931 932 933 934 935 936
      sreal_div (&tmp, &tmp, &max_count_real);

      /* relative_time_benefit.  */
      sreal_mul (&tmp, &tmp, &relbenefit_real);
      sreal_div (&tmp, &tmp, &max_relbenefit_real);

      /* growth_f_caller.  */
      sreal_mul (&tmp, &tmp, &half_int_min_real);
      sreal_div (&tmp, &tmp, &growth_real);

      badness = -1 * sreal_to_int (&tmp);
 
937 938 939
      if (dump)
	{
	  fprintf (dump_file,
940
		   "      %i (relative %f): profile info. Relative count %f%s"
941 942
		   " * Relative benefit %f\n",
		   (int) badness, (double) badness / INT_MIN,
943 944
		   (double) edge_count / max_count,
		   edge->count > max_count ? " (capped to max_count)" : "",
945
		   relbenefit * 100.0 / RELATIVE_TIME_BENEFIT_RANGE);
946 947
	}
    }
948

949 950
  /* When function local profile is available. Compute badness as:
     
951 952 953 954 955
                 relative_time_benefit
     goodness =  ---------------------------------
	         growth_of_caller * overall_growth

     badness = - goodness
956

957
     compensated by the inline hints.
958
  */
959
  else if (flag_guess_branch_prob)
960
    {
961 962
      badness = (relative_time_benefit (callee_info, edge, edge_time)
		 * (INT_MIN / 16 / RELATIVE_TIME_BENEFIT_RANGE));
963
      badness /= (MIN (65536/2, growth) * MIN (65536/2, MAX (1, callee_info->growth)));
964 965 966
      gcc_checking_assert (badness <=0 && badness >= INT_MIN / 16);
      if ((hints & (INLINE_HINT_indirect_call
		    | INLINE_HINT_loop_iterations
967
	            | INLINE_HINT_array_index
968 969 970
		    | INLINE_HINT_loop_stride))
	  || callee_info->growth <= 0)
	badness *= 8;
971
      if (hints & (INLINE_HINT_same_scc))
972 973 974 975 976 977 978 979
	badness /= 16;
      else if (hints & (INLINE_HINT_in_scc))
	badness /= 8;
      else if (hints & (INLINE_HINT_cross_module))
	badness /= 2;
      gcc_checking_assert (badness <= 0 && badness >= INT_MIN / 2);
      if ((hints & INLINE_HINT_declared_inline) && badness >= INT_MIN / 32)
	badness *= 16;
980 981 982
      if (dump)
	{
	  fprintf (dump_file,
983
		   "      %i: guessed profile. frequency %f,"
984 985
		   " benefit %f%%, time w/o inlining %i, time w inlining %i"
		   " overall growth %i (current) %i (original)\n",
986
		   (int) badness, (double)edge->frequency / CGRAPH_FREQ_BASE,
987 988
		   relative_time_benefit (callee_info, edge, edge_time) * 100.0
		   / RELATIVE_TIME_BENEFIT_RANGE, 
989
		   (int)compute_uninlined_call_time (callee_info, edge),
990 991 992
		   (int)compute_inlined_call_time (edge, edge_time),
		   estimate_growth (callee),
		   callee_info->growth);
993
	}
994 995 996 997 998 999 1000
    }
  /* When function local profile is not available or it does not give
     useful information (ie frequency is zero), base the cost on
     loop nest and overall size growth, so we optimize for overall number
     of functions fully inlined in program.  */
  else
    {
1001
      int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
1002
      badness = growth * 256;
1003

1004
      /* Decrease badness if call is nested.  */
H.J. Lu committed
1005
      if (badness > 0)
1006 1007
	badness >>= nest;
      else
1008
	{
1009
	  badness <<= nest;
1010 1011 1012 1013
	}
      if (dump)
	fprintf (dump_file, "      %i: no profile. nest %i\n", (int) badness,
		 nest);
1014
    }
1015 1016 1017 1018

  /* Ensure that we did not overflow in all the fixed point math above.  */
  gcc_assert (badness >= INT_MIN);
  gcc_assert (badness <= INT_MAX - 1);
1019
  /* Make recursive inlining happen always after other inlining is done.  */
1020
  if (cgraph_edge_recursive_p (edge))
1021
    return badness + 1;
1022
  else
1023
    return badness;
1024 1025
}

1026
/* Recompute badness of EDGE and update its key in HEAP if needed.  */
1027
static inline void
1028 1029
update_edge_key (fibheap_t heap, struct cgraph_edge *edge)
{
1030
  int badness = edge_badness (edge, false);
1031 1032 1033 1034 1035 1036 1037 1038
  if (edge->aux)
    {
      fibnode_t n = (fibnode_t) edge->aux;
      gcc_checking_assert (n->data == edge);

      /* fibheap_replace_key only decrease the keys.
	 When we increase the key we do not update heap
	 and instead re-insert the element once it becomes
1039
	 a minimum of heap.  */
1040 1041
      if (badness < n->key)
	{
1042 1043 1044 1045
	  if (dump_file && (dump_flags & TDF_DETAILS))
	    {
	      fprintf (dump_file,
		       "  decreasing badness %s/%i -> %s/%i, %i to %i\n",
1046
		       xstrdup (edge->caller->name ()),
1047
		       edge->caller->order,
1048
		       xstrdup (edge->callee->name ()),
1049
		       edge->callee->order,
1050 1051 1052
		       (int)n->key,
		       badness);
	    }
1053
	  fibheap_replace_key (heap, n, badness);
1054 1055 1056 1057
	  gcc_checking_assert (n->key == badness);
	}
    }
  else
1058 1059 1060 1061 1062
    {
       if (dump_file && (dump_flags & TDF_DETAILS))
	 {
	   fprintf (dump_file,
		    "  enqueuing call %s/%i -> %s/%i, badness %i\n",
1063
		    xstrdup (edge->caller->name ()),
1064
		    edge->caller->order,
1065
		    xstrdup (edge->callee->name ()),
1066
		    edge->callee->order,
1067 1068 1069 1070
		    badness);
	 }
      edge->aux = fibheap_insert (heap, badness, edge);
    }
1071 1072
}

1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084

/* NODE was inlined.
   All caller edges needs to be resetted because
   size estimates change. Similarly callees needs reset
   because better context may be known.  */

static void
reset_edge_caches (struct cgraph_node *node)
{
  struct cgraph_edge *edge;
  struct cgraph_edge *e = node->callees;
  struct cgraph_node *where = node;
1085 1086
  int i;
  struct ipa_ref *ref;
1087 1088 1089 1090 1091 1092 1093 1094 1095 1096

  if (where->global.inlined_to)
    where = where->global.inlined_to;

  /* WHERE body size has changed, the cached growth is invalid.  */
  reset_node_growth_cache (where);

  for (edge = where->callers; edge; edge = edge->next_caller)
    if (edge->inline_failed)
      reset_edge_growth_cache (edge);
1097
  for (i = 0; ipa_ref_list_referring_iterate (&where->ref_list,
1098
					      i, ref); i++)
1099
    if (ref->use == IPA_REF_ALIAS)
1100
      reset_edge_caches (ipa_ref_referring_node (ref));
1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131

  if (!e)
    return;

  while (true)
    if (!e->inline_failed && e->callee->callees)
      e = e->callee->callees;
    else
      {
	if (e->inline_failed)
	  reset_edge_growth_cache (e);
	if (e->next_callee)
	  e = e->next_callee;
	else
	  {
	    do
	      {
		if (e->caller == node)
		  return;
		e = e->caller->callers;
	      }
	    while (!e->next_callee);
	    e = e->next_callee;
	  }
      }
}

/* Recompute HEAP nodes for each of caller of NODE.
   UPDATED_NODES track nodes we already visited, to avoid redundant work.
   When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
   it is inlinable. Otherwise check all edges.  */
1132 1133 1134

static void
update_caller_keys (fibheap_t heap, struct cgraph_node *node,
1135 1136
		    bitmap updated_nodes,
		    struct cgraph_edge *check_inlinablity_for)
1137 1138
{
  struct cgraph_edge *edge;
1139 1140
  int i;
  struct ipa_ref *ref;
1141

1142
  if ((!node->alias && !inline_summary (node)->inlinable)
1143 1144
      || node->global.inlined_to)
    return;
1145
  if (!bitmap_set_bit (updated_nodes, node->uid))
1146 1147
    return;

1148
  for (i = 0; ipa_ref_list_referring_iterate (&node->ref_list,
1149
					      i, ref); i++)
1150 1151
    if (ref->use == IPA_REF_ALIAS)
      {
1152
	struct cgraph_node *alias = ipa_ref_referring_node (ref);
1153 1154 1155
        update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
      }

1156 1157
  for (edge = node->callers; edge; edge = edge->next_caller)
    if (edge->inline_failed)
1158
      {
1159 1160
        if (!check_inlinablity_for
	    || check_inlinablity_for == edge)
1161
	  {
1162 1163 1164 1165 1166 1167 1168 1169 1170
	    if (can_inline_edge_p (edge, false)
		&& want_inline_small_function_p (edge, false))
	      update_edge_key (heap, edge);
	    else if (edge->aux)
	      {
		report_inline_failed_reason (edge);
		fibheap_delete_node (heap, (fibnode_t) edge->aux);
		edge->aux = NULL;
	      }
1171
	  }
1172 1173
	else if (edge->aux)
	  update_edge_key (heap, edge);
1174
      }
1175 1176
}

1177
/* Recompute HEAP nodes for each uninlined call in NODE.
1178 1179 1180 1181 1182 1183 1184 1185 1186
   This is used when we know that edge badnesses are going only to increase
   (we introduced new call site) and thus all we need is to insert newly
   created edges into heap.  */

static void
update_callee_keys (fibheap_t heap, struct cgraph_node *node,
		    bitmap updated_nodes)
{
  struct cgraph_edge *e = node->callees;
1187

1188 1189 1190 1191 1192 1193
  if (!e)
    return;
  while (true)
    if (!e->inline_failed && e->callee->callees)
      e = e->callee->callees;
    else
1194
      {
1195 1196
	enum availability avail;
	struct cgraph_node *callee;
1197 1198 1199
	/* We do not reset callee growth cache here.  Since we added a new call,
	   growth chould have just increased and consequentely badness metric
           don't need updating.  */
1200
	if (e->inline_failed
1201 1202
	    && (callee = cgraph_function_or_thunk_node (e->callee, &avail))
	    && inline_summary (callee)->inlinable
1203
	    && avail >= AVAIL_AVAILABLE
1204
	    && !bitmap_bit_p (updated_nodes, callee->uid))
1205
	  {
1206 1207 1208 1209 1210 1211 1212 1213 1214
	    if (can_inline_edge_p (e, false)
		&& want_inline_small_function_p (e, false))
	      update_edge_key (heap, e);
	    else if (e->aux)
	      {
		report_inline_failed_reason (e);
		fibheap_delete_node (heap, (fibnode_t) e->aux);
		e->aux = NULL;
	      }
1215 1216 1217 1218 1219 1220
	  }
	if (e->next_callee)
	  e = e->next_callee;
	else
	  {
	    do
1221
	      {
1222 1223 1224
		if (e->caller == node)
		  return;
		e = e->caller->callers;
1225
	      }
1226 1227
	    while (!e->next_callee);
	    e = e->next_callee;
1228 1229 1230 1231 1232
	  }
      }
}

/* Enqueue all recursive calls from NODE into priority queue depending on
1233
   how likely we want to recursively inline the call.  */
1234

1235 1236
static void
lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1237
			fibheap_t heap)
1238 1239
{
  struct cgraph_edge *e;
1240 1241
  enum availability avail;

1242
  for (e = where->callees; e; e = e->next_callee)
1243 1244 1245
    if (e->callee == node
	|| (cgraph_function_or_thunk_node (e->callee, &avail) == node
	    && avail > AVAIL_OVERWRITABLE))
1246
      {
1247
	/* When profile feedback is available, prioritize by expected number
1248
	   of calls.  */
1249
        fibheap_insert (heap,
1250
			!max_count ? -e->frequency
1251 1252
		        : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
		        e);
1253 1254 1255
      }
  for (e = where->callees; e; e = e->next_callee)
    if (!e->inline_failed)
1256
      lookup_recursive_calls (node, e->callee, heap);
1257 1258 1259
}

/* Decide on recursive inlining: in the case function has recursive calls,
1260
   inline until body size reaches given argument.  If any new indirect edges
1261 1262
   are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
   is NULL.  */
1263 1264

static bool
1265
recursive_inlining (struct cgraph_edge *edge,
1266
		    vec<cgraph_edge_p> *new_edges)
1267 1268
{
  int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1269
  fibheap_t heap;
1270
  struct cgraph_node *node;
1271
  struct cgraph_edge *e;
1272
  struct cgraph_node *master_clone = NULL, *next;
1273 1274 1275
  int depth = 0;
  int n = 0;

1276 1277 1278 1279
  node = edge->caller;
  if (node->global.inlined_to)
    node = node->global.inlined_to;

1280
  if (DECL_DECLARED_INLINE_P (node->decl))
1281
    limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1282 1283

  /* Make sure that function is small enough to be considered for inlining.  */
1284
  if (estimate_size_after_inlining (node, edge)  >= limit)
1285 1286 1287 1288 1289 1290 1291 1292
    return false;
  heap = fibheap_new ();
  lookup_recursive_calls (node, node, heap);
  if (fibheap_empty (heap))
    {
      fibheap_delete (heap);
      return false;
    }
1293 1294

  if (dump_file)
H.J. Lu committed
1295
    fprintf (dump_file,
1296
	     "  Performing recursive inlining on %s\n",
1297
	     node->name ());
1298 1299

  /* Do the inlining and update list of recursive call during process.  */
1300
  while (!fibheap_empty (heap))
1301
    {
1302 1303
      struct cgraph_edge *curr
	= (struct cgraph_edge *) fibheap_extract_min (heap);
1304
      struct cgraph_node *cnode, *dest = curr->callee;
1305

1306 1307 1308
      if (!can_inline_edge_p (curr, true))
	continue;

1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325
      /* MASTER_CLONE is produced in the case we already started modified
	 the function. Be sure to redirect edge to the original body before
	 estimating growths otherwise we will be seeing growths after inlining
	 the already modified body.  */
      if (master_clone)
	{
          cgraph_redirect_edge_callee (curr, master_clone);
          reset_edge_growth_cache (curr);
	}

      if (estimate_size_after_inlining (node, curr) > limit)
	{
	  cgraph_redirect_edge_callee (curr, dest);
	  reset_edge_growth_cache (curr);
	  break;
	}

1326 1327 1328
      depth = 1;
      for (cnode = curr->caller;
	   cnode->global.inlined_to; cnode = cnode->callers->caller)
1329 1330
	if (node->decl
	    == cgraph_function_or_thunk_node (curr->callee, NULL)->decl)
1331
          depth++;
1332

1333
      if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1334 1335 1336 1337 1338
	{
	  cgraph_redirect_edge_callee (curr, dest);
	  reset_edge_growth_cache (curr);
	  continue;
	}
1339

1340
      if (dump_file)
1341
	{
H.J. Lu committed
1342
	  fprintf (dump_file,
1343 1344 1345 1346 1347 1348 1349 1350
		   "   Inlining call of depth %i", depth);
	  if (node->count)
	    {
	      fprintf (dump_file, " called approx. %.2f times per call",
		       (double)curr->count / node->count);
	    }
	  fprintf (dump_file, "\n");
	}
1351 1352 1353
      if (!master_clone)
	{
	  /* We need original clone to copy around.  */
1354
	  master_clone = cgraph_clone_node (node, node->decl,
1355
					    node->count, CGRAPH_FREQ_BASE,
1356
					    false, vNULL, true, NULL);
1357 1358
	  for (e = master_clone->callees; e; e = e->next_callee)
	    if (!e->inline_failed)
1359
	      clone_inlined_nodes (e, true, false, NULL);
1360 1361
          cgraph_redirect_edge_callee (curr, master_clone);
          reset_edge_growth_cache (curr);
1362 1363
	}

1364
      inline_call (curr, false, new_edges, &overall_size, true);
1365
      lookup_recursive_calls (node, curr->callee, heap);
1366 1367
      n++;
    }
1368

1369 1370
  if (!fibheap_empty (heap) && dump_file)
    fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1371
  fibheap_delete (heap);
1372 1373 1374 1375

  if (!master_clone)
    return false;

1376
  if (dump_file)
H.J. Lu committed
1377
    fprintf (dump_file,
1378 1379
	     "\n   Inlined %i times, "
	     "body grown from size %i to %i, time %i to %i\n", n,
1380 1381
	     inline_summary (master_clone)->size, inline_summary (node)->size,
	     inline_summary (master_clone)->time, inline_summary (node)->time);
1382 1383 1384 1385

  /* Remove master clone we used for inlining.  We rely that clones inlined
     into master clone gets queued just before master clone so we don't
     need recursion.  */
1386
  for (node = cgraph_first_function (); node != master_clone;
1387 1388
       node = next)
    {
1389
      next = cgraph_next_function (node);
1390 1391 1392
      if (node->global.inlined_to == master_clone)
	cgraph_remove_node (node);
    }
1393
  cgraph_remove_node (master_clone);
1394
  return true;
1395 1396
}

1397

1398
/* Given whole compilation unit estimate of INSNS, compute how large we can
1399
   allow the unit to grow.  */
1400

1401 1402 1403 1404 1405 1406 1407
static int
compute_max_insns (int insns)
{
  int max_insns = insns;
  if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
    max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);

1408 1409
  return ((HOST_WIDEST_INT) max_insns
	  * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1410 1411
}

1412

1413
/* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1414

1415
static void
1416
add_new_edges_to_heap (fibheap_t heap, vec<cgraph_edge_p> new_edges)
1417
{
1418
  while (new_edges.length () > 0)
1419
    {
1420
      struct cgraph_edge *edge = new_edges.pop ();
1421 1422

      gcc_assert (!edge->aux);
1423
      if (edge->inline_failed
1424 1425 1426
	  && can_inline_edge_p (edge, true)
	  && want_inline_small_function_p (edge, true))
        edge->aux = fibheap_insert (heap, edge_badness (edge, false), edge);
1427 1428 1429
    }
}

1430 1431 1432 1433 1434
/* Remove EDGE from the fibheap.  */

static void
heap_edge_removal_hook (struct cgraph_edge *e, void *data)
{
1435 1436
  if (e->callee)
    reset_node_growth_cache (e->callee);
1437 1438 1439 1440 1441 1442
  if (e->aux)
    {
      fibheap_delete_node ((fibheap_t)data, (fibnode_t)e->aux);
      e->aux = NULL;
    }
}
1443

1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465
/* Return true if speculation of edge E seems useful.
   If ANTICIPATE_INLINING is true, be conservative and hope that E
   may get inlined.  */

bool
speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
{
  enum availability avail;
  struct cgraph_node *target = cgraph_function_or_thunk_node (e->callee, &avail);
  struct cgraph_edge *direct, *indirect;
  struct ipa_ref *ref;

  gcc_assert (e->speculative && !e->indirect_unknown_callee);

  if (!cgraph_maybe_hot_edge_p (e))
    return false;

  /* See if IP optimizations found something potentially useful about the
     function.  For now we look only for CONST/PURE flags.  Almost everything
     else we propagate is useless.  */
  if (avail >= AVAIL_AVAILABLE)
    {
1466
      int ecf_flags = flags_from_decl_or_type (target->decl);
1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510
      if (ecf_flags & ECF_CONST)
        {
          cgraph_speculative_call_info (e, direct, indirect, ref);
	  if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
	    return true;
        }
      else if (ecf_flags & ECF_PURE)
        {
          cgraph_speculative_call_info (e, direct, indirect, ref);
	  if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
	    return true;
        }
    }
  /* If we did not managed to inline the function nor redirect
     to an ipa-cp clone (that are seen by having local flag set),
     it is probably pointless to inline it unless hardware is missing
     indirect call predictor.  */
  if (!anticipate_inlining && e->inline_failed && !target->local.local)
    return false;
  /* For overwritable targets there is not much to do.  */
  if (e->inline_failed && !can_inline_edge_p (e, false, true))
    return false;
  /* OK, speculation seems interesting.  */
  return true;
}

/* We know that EDGE is not going to be inlined.
   See if we can remove speculation.  */

static void
resolve_noninline_speculation (fibheap_t edge_heap, struct cgraph_edge *edge)
{
  if (edge->speculative && !speculation_useful_p (edge, false))
    {
      struct cgraph_node *node = edge->caller;
      struct cgraph_node *where = node->global.inlined_to
				  ? node->global.inlined_to : node;
      bitmap updated_nodes = BITMAP_ALLOC (NULL);

      cgraph_resolve_speculation (edge, NULL);
      reset_edge_caches (where);
      inline_update_overall_summary (where);
      update_caller_keys (edge_heap, where,
			  updated_nodes, NULL);
1511 1512
      update_callee_keys (edge_heap, where,
			  updated_nodes);
1513 1514 1515 1516
      BITMAP_FREE (updated_nodes);
    }
}

1517
/* We use greedy algorithm for inlining of small functions:
1518 1519
   All inline candidates are put into prioritized heap ordered in
   increasing badness.
1520

1521
   The inlining of small functions is bounded by unit growth parameters.  */
1522 1523

static void
1524
inline_small_functions (void)
1525 1526
{
  struct cgraph_node *node;
1527
  struct cgraph_edge *edge;
1528
  fibheap_t edge_heap = fibheap_new ();
1529
  bitmap updated_nodes = BITMAP_ALLOC (NULL);
1530
  int min_size, max_size;
Trevor Saunders committed
1531
  auto_vec<cgraph_edge_p> new_indirect_edges;
1532
  int initial_size = 0;
Jan Hubicka committed
1533
  struct cgraph_node **order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
1534
  struct cgraph_edge_hook_list *edge_removal_hook_holder;
1535

1536
  if (flag_indirect_inlining)
1537
    new_indirect_edges.create (8);
1538

1539 1540 1541
  edge_removal_hook_holder
    = cgraph_add_edge_removal_hook (&heap_edge_removal_hook, edge_heap);

1542 1543
  /* Compute overall unit size and other global parameters used by badness
     metrics.  */
1544

1545
  max_count = 0;
Jan Hubicka committed
1546 1547
  ipa_reduced_postorder (order, true, true, NULL);
  free (order);
1548

1549 1550
  FOR_EACH_DEFINED_FUNCTION (node)
    if (!node->global.inlined_to)
1551
      {
1552 1553 1554 1555
	if (cgraph_function_with_gimple_body_p (node)
	    || node->thunk.thunk_p)
	  {
	    struct inline_summary *info = inline_summary (node);
1556
	    struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1557

1558
	    if (!DECL_EXTERNAL (node->decl))
1559
	      initial_size += info->size;
1560
	    info->growth = estimate_growth (node);
1561 1562 1563 1564 1565
	    if (dfs && dfs->next_cycle)
	      {
		struct cgraph_node *n2;
		int id = dfs->scc_no + 1;
		for (n2 = node; n2;
1566
		     n2 = ((struct ipa_dfs_info *) node->aux)->next_cycle)
1567 1568 1569 1570 1571 1572 1573
		  {
		    struct inline_summary *info2 = inline_summary (n2);
		    if (info2->scc_no)
		      break;
		    info2->scc_no = id;
		  }
	      }
1574
	  }
1575

1576
	for (edge = node->callers; edge; edge = edge->next_caller)
1577 1578
	  if (max_count < edge->count)
	    max_count = edge->count;
1579
      }
1580 1581 1582
  sreal_init (&max_count_real, max_count, 0);
  sreal_init (&max_relbenefit_real, RELATIVE_TIME_BENEFIT_RANGE, 0);
  sreal_init (&half_int_min_real, INT_MAX / 2, 0);
1583 1584 1585 1586 1587 1588 1589
  ipa_free_postorder_info ();
  initialize_growth_caches ();

  if (dump_file)
    fprintf (dump_file,
	     "\nDeciding on inlining of small functions.  Starting with size %i.\n",
	     initial_size);
1590

1591
  overall_size = initial_size;
1592 1593
  max_size = compute_max_insns (overall_size);
  min_size = overall_size;
1594 1595 1596

  /* Populate the heeap with all edges we might inline.  */

1597
  FOR_EACH_DEFINED_FUNCTION (node)
1598 1599 1600
    {
      bool update = false;
      struct cgraph_edge *next;
1601

1602 1603
      if (dump_file)
	fprintf (dump_file, "Enqueueing calls in %s/%i.\n",
1604
		 node->name (), node->order);
1605 1606 1607 1608

      for (edge = node->callees; edge; edge = next)
	{
	  next = edge->next_callee;
1609
	  if (edge->inline_failed
1610
	      && !edge->aux
1611 1612 1613 1614 1615
	      && can_inline_edge_p (edge, true)
	      && want_inline_small_function_p (edge, true)
	      && edge->inline_failed)
	    {
	      gcc_assert (!edge->aux);
1616
	      update_edge_key (edge_heap, edge);
1617
	    }
1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635
	  if (edge->speculative && !speculation_useful_p (edge, edge->aux != NULL))
	    {
	      cgraph_resolve_speculation (edge, NULL);
	      update = true;
	    }
	}
      if (update)
	{
	  struct cgraph_node *where = node->global.inlined_to
				      ? node->global.inlined_to : node;
	  inline_update_overall_summary (where);
          reset_node_growth_cache (where);
	  reset_edge_caches (where);
          update_caller_keys (edge_heap, where,
			      updated_nodes, NULL);
          bitmap_clear (updated_nodes);
	}
    }
1636

1637 1638 1639
  gcc_assert (in_lto_p
	      || !max_count
	      || (profile_info && flag_branch_probabilities));
1640

1641
  while (!fibheap_empty (edge_heap))
1642
    {
1643
      int old_size = overall_size;
1644
      struct cgraph_node *where, *callee;
1645
      int badness = fibheap_min_key (edge_heap);
1646
      int current_badness;
1647
      int cached_badness;
1648
      int growth;
1649

1650
      edge = (struct cgraph_edge *) fibheap_extract_min (edge_heap);
1651 1652 1653 1654
      gcc_assert (edge->aux);
      edge->aux = NULL;
      if (!edge->inline_failed)
	continue;
1655

1656
      /* Be sure that caches are maintained consistent.  
Joseph Myers committed
1657
         We can not make this ENABLE_CHECKING only because it cause different
1658 1659
         updates of the fibheap queue.  */
      cached_badness = edge_badness (edge, false);
1660 1661 1662
      reset_edge_growth_cache (edge);
      reset_node_growth_cache (edge->callee);

1663
      /* When updating the edge costs, we only decrease badness in the keys.
1664 1665
	 Increases of badness are handled lazilly; when we see key with out
	 of date value on it, we re-insert it now.  */
1666
      current_badness = edge_badness (edge, false);
1667
      gcc_assert (cached_badness == current_badness);
1668 1669 1670
      gcc_assert (current_badness >= badness);
      if (current_badness != badness)
	{
1671
	  edge->aux = fibheap_insert (edge_heap, current_badness, edge);
1672 1673
	  continue;
	}
1674 1675

      if (!can_inline_edge_p (edge, true))
1676 1677 1678 1679
	{
	  resolve_noninline_speculation (edge_heap, edge);
	  continue;
	}
1680
      
1681
      callee = cgraph_function_or_thunk_node (edge->callee, NULL);
Jan Hubicka committed
1682
      growth = estimate_edge_growth (edge);
1683 1684
      if (dump_file)
	{
H.J. Lu committed
1685
	  fprintf (dump_file,
1686
		   "\nConsidering %s/%i with %i size\n",
1687
		   callee->name (), callee->order,
1688
		   inline_summary (callee)->size);
H.J. Lu committed
1689
	  fprintf (dump_file,
1690
		   " to be inlined into %s/%i in %s:%i\n"
1691
		   " Estimated growth after inlined into all is %+i insns.\n"
1692
		   " Estimated badness is %i, frequency %.2f.\n",
1693
		   edge->caller->name (), edge->caller->order,
1694 1695
		   flag_wpa ? "unknown"
		   : gimple_filename ((const_gimple) edge->call_stmt),
1696 1697
		   flag_wpa ? -1
		   : gimple_lineno ((const_gimple) edge->call_stmt),
1698
		   estimate_growth (callee),
1699
		   badness,
1700
		   edge->frequency / (double)CGRAPH_FREQ_BASE);
1701
	  if (edge->count)
1702 1703
	    fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n",
		     edge->count);
1704
	  if (dump_flags & TDF_DETAILS)
1705
	    edge_badness (edge, true);
1706 1707
	}

1708
      if (overall_size + growth > max_size
1709
	  && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1710
	{
1711 1712
	  edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
	  report_inline_failed_reason (edge);
1713
	  resolve_noninline_speculation (edge_heap, edge);
1714 1715
	  continue;
	}
1716 1717

      if (!want_inline_small_function_p (edge, true))
1718 1719 1720 1721
	{
	  resolve_noninline_speculation (edge_heap, edge);
	  continue;
	}
1722 1723 1724 1725 1726

      /* Heuristics for inlining small functions works poorly for
	 recursive calls where we do efect similar to loop unrolling.
	 When inliing such edge seems profitable, leave decision on
	 specific inliner.  */
1727
      if (cgraph_edge_recursive_p (edge))
1728 1729 1730 1731
	{
	  where = edge->caller;
	  if (where->global.inlined_to)
	    where = where->global.inlined_to;
1732 1733 1734
	  if (!recursive_inlining (edge,
				   flag_indirect_inlining
				   ? &new_indirect_edges : NULL))
1735 1736
	    {
	      edge->inline_failed = CIF_RECURSIVE_INLINING;
1737
	      resolve_noninline_speculation (edge_heap, edge);
1738 1739
	      continue;
	    }
1740
	  reset_edge_caches (where);
1741 1742
	  /* Recursive inliner inlines all recursive calls of the function
	     at once. Consequently we need to update all callee keys.  */
1743
	  if (flag_indirect_inlining)
1744 1745
	    add_new_edges_to_heap (edge_heap, new_indirect_edges);
          update_callee_keys (edge_heap, where, updated_nodes);
1746
	  bitmap_clear (updated_nodes);
1747 1748 1749
	}
      else
	{
1750 1751 1752 1753 1754 1755 1756 1757 1758 1759
	  struct cgraph_node *outer_node = NULL;
	  int depth = 0;

	  /* Consider the case where self recursive function A is inlined into B.
	     This is desired optimization in some cases, since it leads to effect
	     similar of loop peeling and we might completely optimize out the
	     recursive call.  However we must be extra selective.  */

	  where = edge->caller;
	  while (where->global.inlined_to)
1760
	    {
1761
	      if (where->decl == callee->decl)
1762 1763 1764 1765 1766 1767 1768 1769
		outer_node = where, depth++;
	      where = where->callers->caller;
	    }
	  if (outer_node
	      && !want_inline_self_recursive_call_p (edge, outer_node,
						     true, depth))
	    {
	      edge->inline_failed
1770
		= (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
1771
		   ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
1772
	      resolve_noninline_speculation (edge_heap, edge);
1773 1774
	      continue;
	    }
1775 1776 1777
	  else if (depth && dump_file)
	    fprintf (dump_file, " Peeling recursion with depth %i\n", depth);

1778
	  gcc_checking_assert (!callee->global.inlined_to);
1779
	  inline_call (edge, true, &new_indirect_edges, &overall_size, true);
1780
	  if (flag_indirect_inlining)
1781
	    add_new_edges_to_heap (edge_heap, new_indirect_edges);
1782

1783 1784 1785
	  reset_edge_caches (edge->callee);
          reset_node_growth_cache (callee);

1786
	  update_callee_keys (edge_heap, where, updated_nodes);
1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797
	}
      where = edge->caller;
      if (where->global.inlined_to)
	where = where->global.inlined_to;

      /* Our profitability metric can depend on local properties
	 such as number of inlinable calls and size of the function body.
	 After inlining these properties might change for the function we
	 inlined into (since it's body size changed) and for the functions
	 called by function we inlined (since number of it inlinable callers
	 might change).  */
1798
      update_caller_keys (edge_heap, where, updated_nodes, NULL);
1799
      bitmap_clear (updated_nodes);
1800 1801

      if (dump_file)
1802
	{
H.J. Lu committed
1803
	  fprintf (dump_file,
1804
		   " Inlined into %s which now has time %i and size %i,"
1805
		   "net change of %+i.\n",
1806
		   edge->caller->name (),
1807 1808
		   inline_summary (edge->caller)->time,
		   inline_summary (edge->caller)->size,
1809
		   overall_size - old_size);
1810
	}
1811
      if (min_size > overall_size)
1812
	{
1813 1814
	  min_size = overall_size;
	  max_size = compute_max_insns (min_size);
1815 1816

	  if (dump_file)
1817
	    fprintf (dump_file, "New minimal size reached: %i\n", min_size);
1818
	}
1819
    }
1820

1821
  free_growth_caches ();
1822
  fibheap_delete (edge_heap);
1823 1824 1825
  if (dump_file)
    fprintf (dump_file,
	     "Unit growth for small function inlining: %i->%i (%i%%)\n",
1826 1827
	     initial_size, overall_size,
	     initial_size ? overall_size * 100 / (initial_size) - 100: 0);
1828
  BITMAP_FREE (updated_nodes);
1829
  cgraph_remove_edge_removal_hook (edge_removal_hook_holder);
1830 1831
}

1832 1833
/* Flatten NODE.  Performed both during early inlining and
   at IPA inlining time.  */
1834 1835

static void
1836
flatten_function (struct cgraph_node *node, bool early)
1837 1838 1839 1840
{
  struct cgraph_edge *e;

  /* We shouldn't be called recursively when we are being processed.  */
1841
  gcc_assert (node->aux == NULL);
1842

1843
  node->aux = (void *) node;
1844 1845 1846 1847

  for (e = node->callees; e; e = e->next_callee)
    {
      struct cgraph_node *orig_callee;
1848
      struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
1849 1850

      /* We've hit cycle?  It is time to give up.  */
1851
      if (callee->aux)
1852 1853 1854 1855
	{
	  if (dump_file)
	    fprintf (dump_file,
		     "Not inlining %s into %s to avoid cycle.\n",
1856 1857
		     xstrdup (callee->name ()),
		     xstrdup (e->caller->name ()));
1858 1859 1860 1861 1862 1863 1864 1865
	  e->inline_failed = CIF_RECURSIVE_INLINING;
	  continue;
	}

      /* When the edge is already inlined, we just need to recurse into
	 it in order to fully flatten the leaves.  */
      if (!e->inline_failed)
	{
1866
	  flatten_function (callee, early);
1867 1868 1869
	  continue;
	}

1870 1871 1872
      /* Flatten attribute needs to be processed during late inlining. For
	 extra code quality we however do flattening during early optimization,
	 too.  */
1873
      if (!early
1874 1875 1876 1877
	  ? !can_inline_edge_p (e, true)
	  : !can_early_inline_edge_p (e))
	continue;

1878
      if (cgraph_edge_recursive_p (e))
1879 1880 1881 1882 1883 1884
	{
	  if (dump_file)
	    fprintf (dump_file, "Not inlining: recursive call.\n");
	  continue;
	}

1885 1886
      if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
	  != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
1887 1888 1889 1890 1891 1892
	{
	  if (dump_file)
	    fprintf (dump_file, "Not inlining: SSA form does not match.\n");
	  continue;
	}

1893 1894 1895 1896
      /* Inline the edge and flatten the inline clone.  Avoid
         recursing through the original node if the node was cloned.  */
      if (dump_file)
	fprintf (dump_file, " Inlining %s into %s.\n",
1897 1898
		 xstrdup (callee->name ()),
		 xstrdup (e->caller->name ()));
1899
      orig_callee = callee;
1900
      inline_call (e, true, NULL, NULL, false);
1901
      if (e->callee != orig_callee)
1902
	orig_callee->aux = (void *) node;
1903
      flatten_function (e->callee, early);
1904
      if (e->callee != orig_callee)
1905
	orig_callee->aux = NULL;
1906 1907
    }

1908
  node->aux = NULL;
1909 1910
  if (!node->global.inlined_to)
    inline_update_overall_summary (node);
1911 1912
}

1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942
/* Count number of callers of NODE and store it into DATA (that
   points to int.  Worker for cgraph_for_node_and_aliases.  */

static bool
sum_callers (struct cgraph_node *node, void *data)
{
  struct cgraph_edge *e;
  int *num_calls = (int *)data;

  for (e = node->callers; e; e = e->next_caller)
    (*num_calls)++;
  return false;
}

/* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
   DATA points to number of calls originally found so we avoid infinite
   recursion.  */

static bool
inline_to_all_callers (struct cgraph_node *node, void *data)
{
  int *num_calls = (int *)data;
  while (node->callers && !node->global.inlined_to)
    {
      struct cgraph_node *caller = node->callers->caller;

      if (dump_file)
	{
	  fprintf (dump_file,
		   "\nInlining %s size %i.\n",
1943
		   node->name (),
1944 1945 1946
		   inline_summary (node)->size);
	  fprintf (dump_file,
		   " Called once from %s %i insns.\n",
1947
		   node->callers->caller->name (),
1948 1949 1950 1951 1952 1953 1954
		   inline_summary (node->callers->caller)->size);
	}

      inline_call (node->callers, true, NULL, NULL, true);
      if (dump_file)
	fprintf (dump_file,
		 " Inlined into %s which now has %i size\n",
1955
		 caller->name (),
1956 1957 1958 1959 1960
		 inline_summary (caller)->size);
      if (!(*num_calls)--)
	{
	  if (dump_file)
	    fprintf (dump_file, "New calls found; giving up.\n");
1961
	  return true;
1962 1963 1964 1965 1966
	}
    }
  return false;
}

1967 1968 1969
/* Decide on the inlining.  We do so in the topological order to avoid
   expenses on updating data structures.  */

1970
static unsigned int
1971
ipa_inline (void)
1972 1973 1974
{
  struct cgraph_node *node;
  int nnodes;
1975
  struct cgraph_node **order;
1976
  int i;
1977
  int cold;
1978 1979 1980 1981
  bool remove_functions = false;

  if (!optimize)
    return 0;
1982

1983 1984
  order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);

1985
  if (in_lto_p && optimize)
1986
    ipa_update_after_lto_read ();
1987

1988 1989
  if (dump_file)
    dump_inline_summaries (dump_file);
1990

1991
  nnodes = ipa_reverse_postorder (order);
1992

1993
  FOR_EACH_FUNCTION (node)
1994
    node->aux = 0;
1995 1996

  if (dump_file)
1997
    fprintf (dump_file, "\nFlattening functions:\n");
1998

1999 2000 2001
  /* In the first pass handle functions to be flattened.  Do this with
     a priority so none of our later choices will make this impossible.  */
  for (i = nnodes - 1; i >= 0; i--)
2002
    {
2003 2004
      node = order[i];

2005
      /* Handle nodes to be flattened.
2006 2007 2008 2009 2010
	 Ideally when processing callees we stop inlining at the
	 entry of cycles, possibly cloning that entry point and
	 try to flatten itself turning it into a self-recursive
	 function.  */
      if (lookup_attribute ("flatten",
2011
			    DECL_ATTRIBUTES (node->decl)) != NULL)
2012
	{
2013
	  if (dump_file)
H.J. Lu committed
2014
	    fprintf (dump_file,
2015
		     "Flattening %s\n", node->name ());
2016
	  flatten_function (node, false);
2017 2018 2019
	}
    }

2020
  inline_small_functions ();
2021 2022 2023

  /* Do first after-inlining removal.  We want to remove all "stale" extern inline
     functions and virtual functions so we really know what is called once.  */
2024
  symtab_remove_unreachable_nodes (false, dump_file);
2025
  free (order);
2026

2027 2028 2029 2030
  /* Inline functions with a property that after inlining into all callers the
     code size will shrink because the out-of-line copy is eliminated. 
     We do this regardless on the callee size as long as function growth limits
     are met.  */
2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050
  if (dump_file)
    fprintf (dump_file,
	     "\nDeciding on functions to be inlined into all callers and removing useless speculations:\n");

  /* Inlining one function called once has good chance of preventing
     inlining other function into the same callee.  Ideally we should
     work in priority order, but probably inlining hot functions first
     is good cut without the extra pain of maintaining the queue.

     ??? this is not really fitting the bill perfectly: inlining function
     into callee often leads to better optimization of callee due to
     increased context for optimization.
     For example if main() function calls a function that outputs help
     and then function that does the main optmization, we should inline
     the second with priority even if both calls are cold by themselves.

     We probably want to implement new predicate replacing our use of
     maybe_hot_edge interpreted as maybe_hot_edge || callee is known
     to be hot.  */
  for (cold = 0; cold <= 1; cold ++)
2051
    {
2052
      FOR_EACH_DEFINED_FUNCTION (node)
2053
	{
2054 2055 2056 2057
	  struct cgraph_edge *edge, *next;
	  bool update=false;

	  for (edge = node->callees; edge; edge = next)
2058
	    {
2059 2060
	      next = edge->next_callee;
	      if (edge->speculative && !speculation_useful_p (edge, false))
2061
		{
2062 2063
		  cgraph_resolve_speculation (edge, NULL);
		  update = true;
2064
		  remove_functions = true;
2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078
		}
	    }
	  if (update)
	    {
	      struct cgraph_node *where = node->global.inlined_to
					  ? node->global.inlined_to : node;
              reset_node_growth_cache (where);
	      reset_edge_caches (where);
	      inline_update_overall_summary (where);
	    }
	  if (flag_inline_functions_called_once
	      && want_inline_function_to_all_callers_p (node, cold))
	    {
	      int num_calls = 0;
2079 2080 2081 2082 2083
	      cgraph_for_node_and_aliases (node, sum_callers,
					   &num_calls, true);
	      cgraph_for_node_and_aliases (node, inline_to_all_callers,
					   &num_calls, true);
	      remove_functions = true;
2084 2085 2086 2087
	    }
	}
    }

2088
  /* Free ipa-prop structures if they are no longer needed.  */
2089
  if (optimize)
2090
    ipa_free_all_structures_after_iinln ();
2091

2092 2093
  if (dump_file)
    fprintf (dump_file,
2094 2095 2096
	     "\nInlined %i calls, eliminated %i functions\n\n",
	     ncalls_inlined, nfunctions_inlined);

2097 2098
  if (dump_file)
    dump_inline_summaries (dump_file);
2099 2100 2101
  /* In WPA we use inline summaries for partitioning process.  */
  if (!flag_wpa)
    inline_free_summary ();
2102
  return remove_functions ? TODO_remove_functions : 0;
2103 2104
}

2105 2106 2107
/* Inline always-inline function calls in NODE.  */

static bool
2108
inline_always_inline_functions (struct cgraph_node *node)
2109 2110 2111 2112 2113 2114
{
  struct cgraph_edge *e;
  bool inlined = false;

  for (e = node->callees; e; e = e->next_callee)
    {
2115
      struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
2116
      if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2117 2118 2119 2120 2121
	continue;

      if (cgraph_edge_recursive_p (e))
	{
	  if (dump_file)
2122
	    fprintf (dump_file, "  Not inlining recursive call to %s.\n",
2123
		     e->callee->name ());
2124 2125 2126 2127
	  e->inline_failed = CIF_RECURSIVE_INLINING;
	  continue;
	}

2128
      if (!can_early_inline_edge_p (e))
2129 2130 2131 2132 2133
	{
	  /* Set inlined to true if the callee is marked "always_inline" but
	     is not inlinable.  This will allow flagging an error later in
	     expand_call_inline in tree-inline.c.  */
	  if (lookup_attribute ("always_inline",
2134
				 DECL_ATTRIBUTES (callee->decl)) != NULL)
2135 2136 2137
	    inlined = true;
	  continue;
	}
2138 2139

      if (dump_file)
2140
	fprintf (dump_file, "  Inlining %s into %s (always_inline).\n",
2141 2142
		 xstrdup (e->callee->name ()),
		 xstrdup (e->caller->name ()));
2143
      inline_call (e, true, NULL, NULL, false);
2144 2145
      inlined = true;
    }
2146 2147
  if (inlined)
    inline_update_overall_summary (node);
2148 2149 2150 2151

  return inlined;
}

2152
/* Decide on the inlining.  We do so in the topological order to avoid
2153
   expenses on updating data structures.  */
2154

2155
static bool
2156
early_inline_small_functions (struct cgraph_node *node)
2157 2158
{
  struct cgraph_edge *e;
2159
  bool inlined = false;
2160

2161
  for (e = node->callees; e; e = e->next_callee)
2162
    {
2163 2164
      struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
      if (!inline_summary (callee)->inlinable
2165
	  || !e->inline_failed)
2166 2167 2168
	continue;

      /* Do not consider functions not declared inline.  */
2169
      if (!DECL_DECLARED_INLINE_P (callee->decl)
2170 2171 2172 2173
	  && !flag_inline_small_functions
	  && !flag_inline_functions)
	continue;

2174
      if (dump_file)
2175
	fprintf (dump_file, "Considering inline candidate %s.\n",
2176
		 callee->name ());
2177

2178 2179 2180
      if (!can_early_inline_edge_p (e))
	continue;

2181 2182 2183
      if (cgraph_edge_recursive_p (e))
	{
	  if (dump_file)
2184
	    fprintf (dump_file, "  Not inlining: recursive call.\n");
2185
	  continue;
2186
	}
2187

2188
      if (!want_early_inline_function_p (e))
2189
	continue;
2190

2191 2192
      if (dump_file)
	fprintf (dump_file, " Inlining %s into %s.\n",
2193 2194
		 xstrdup (callee->name ()),
		 xstrdup (e->caller->name ()));
2195
      inline_call (e, true, NULL, NULL, true);
2196
      inlined = true;
2197
    }
2198

2199
  return inlined;
2200 2201
}

2202 2203 2204
/* Do inlining of small functions.  Doing so early helps profiling and other
   passes to be somewhat more effective and avoids some code duplication in
   later real inlining pass for testcases with very many function calls.  */
2205
static unsigned int
2206
early_inliner (void)
2207
{
2208
  struct cgraph_node *node = cgraph_get_node (current_function_decl);
2209
  struct cgraph_edge *edge;
2210
  unsigned int todo = 0;
2211
  int iterations = 0;
2212
  bool inlined = false;
2213

Joseph Myers committed
2214
  if (seen_error ())
2215
    return 0;
2216

2217 2218 2219 2220 2221 2222
  /* Do nothing if datastructures for ipa-inliner are already computed.  This
     happens when some pass decides to construct new function and
     cgraph_add_new_function calls lowering passes and early optimization on
     it.  This may confuse ourself when early inliner decide to inline call to
     function clone, because function clones don't have parameter list in
     ipa-prop matching their signature.  */
2223
  if (ipa_node_params_vector.exists ())
2224 2225
    return 0;

2226 2227 2228
#ifdef ENABLE_CHECKING
  verify_cgraph_node (node);
#endif
2229
  ipa_remove_all_references (&node->ref_list);
2230 2231 2232

  /* Even when not optimizing or not inlining inline always-inline
     functions.  */
2233
  inlined = inline_always_inline_functions (node);
2234

2235 2236
  if (!optimize
      || flag_no_inline
2237 2238 2239 2240 2241
      || !flag_early_inlining
      /* Never inline regular functions into always-inline functions
	 during incremental inlining.  This sucks as functions calling
	 always inline functions will get less optimized, but at the
	 same time inlining of functions calling always inline
2242
	 function into an always inline function might introduce
2243 2244 2245
	 cycles of edges to be always inlined in the callgraph.

	 We might want to be smarter and just avoid this type of inlining.  */
2246
      || DECL_DISREGARD_INLINE_LIMITS (node->decl))
2247 2248
    ;
  else if (lookup_attribute ("flatten",
2249
			     DECL_ATTRIBUTES (node->decl)) != NULL)
2250
    {
2251 2252 2253 2254
      /* When the function is marked to be flattened, recursively inline
	 all calls in it.  */
      if (dump_file)
	fprintf (dump_file,
2255
		 "Flattening %s\n", node->name ());
2256
      flatten_function (node, true);
2257
      inlined = true;
2258
    }
2259 2260 2261 2262 2263
  else
    {
      /* We iterate incremental inlining to get trivial cases of indirect
	 inlining.  */
      while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2264
	     && early_inline_small_functions (node))
2265 2266 2267
	{
	  timevar_push (TV_INTEGRATION);
	  todo |= optimize_inline_calls (current_function_decl);
2268 2269 2270 2271 2272 2273 2274

	  /* Technically we ought to recompute inline parameters so the new
 	     iteration of early inliner works as expected.  We however have
	     values approximately right and thus we only need to update edge
	     info that might be cleared out for newly discovered edges.  */
	  for (edge = node->callees; edge; edge = edge->next_callee)
	    {
2275 2276
	      struct inline_edge_summary *es = inline_edge_summary (edge);
	      es->call_stmt_size
2277
		= estimate_num_insns (edge->call_stmt, &eni_size_weights);
2278
	      es->call_stmt_time
2279
		= estimate_num_insns (edge->call_stmt, &eni_time_weights);
2280
	      if (edge->callee->decl
2281
		  && !gimple_check_call_matching_types (
2282
		      edge->call_stmt, edge->callee->decl, false))
2283
		edge->call_stmt_cannot_inline_p = true;
2284
	    }
2285
	  timevar_pop (TV_INTEGRATION);
2286 2287
	  iterations++;
	  inlined = false;
2288 2289 2290 2291 2292
	}
      if (dump_file)
	fprintf (dump_file, "Iterations: %i\n", iterations);
    }

2293 2294 2295 2296 2297 2298 2299
  if (inlined)
    {
      timevar_push (TV_INTEGRATION);
      todo |= optimize_inline_calls (current_function_decl);
      timevar_pop (TV_INTEGRATION);
    }

2300
  cfun->always_inline_functions_inlined = true;
2301

2302
  return todo;
2303 2304
}

2305 2306 2307
namespace {

const pass_data pass_data_early_inline =
2308
{
2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319
  GIMPLE_PASS, /* type */
  "einline", /* name */
  OPTGROUP_INLINE, /* optinfo_flags */
  false, /* has_gate */
  true, /* has_execute */
  TV_EARLY_INLINING, /* tv_id */
  PROP_ssa, /* properties_required */
  0, /* properties_provided */
  0, /* properties_destroyed */
  0, /* todo_flags_start */
  0, /* todo_flags_finish */
2320 2321
};

2322 2323 2324
class pass_early_inline : public gimple_opt_pass
{
public:
2325 2326
  pass_early_inline (gcc::context *ctxt)
    : gimple_opt_pass (pass_data_early_inline, ctxt)
2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341
  {}

  /* opt_pass methods: */
  unsigned int execute () { return early_inliner (); }

}; // class pass_early_inline

} // anon namespace

gimple_opt_pass *
make_pass_early_inline (gcc::context *ctxt)
{
  return new pass_early_inline (ctxt);
}

2342

2343
/* When to run IPA inlining.  Inlining of always-inline functions
2344 2345
   happens during early inlining.

2346 2347
   Enable inlining unconditoinally, because callgraph redirection
   happens here.   */
2348 2349

static bool
2350
gate_ipa_inline (void)
2351
{
2352
  return true;
2353 2354
}

2355 2356 2357
namespace {

const pass_data pass_data_ipa_inline =
2358
{
2359 2360 2361 2362 2363 2364 2365 2366 2367 2368
  IPA_PASS, /* type */
  "inline", /* name */
  OPTGROUP_INLINE, /* optinfo_flags */
  true, /* has_gate */
  true, /* has_execute */
  TV_IPA_INLINING, /* tv_id */
  0, /* properties_required */
  0, /* properties_provided */
  0, /* properties_destroyed */
  TODO_remove_functions, /* todo_flags_start */
2369
  ( TODO_dump_symtab ), /* todo_flags_finish */
2370
};
2371 2372 2373 2374

class pass_ipa_inline : public ipa_opt_pass_d
{
public:
2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385
  pass_ipa_inline (gcc::context *ctxt)
    : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
		      inline_generate_summary, /* generate_summary */
		      inline_write_summary, /* write_summary */
		      inline_read_summary, /* read_summary */
		      NULL, /* write_optimization_summary */
		      NULL, /* read_optimization_summary */
		      NULL, /* stmt_fixup */
		      0, /* function_transform_todo_flags_start */
		      inline_transform, /* function_transform */
		      NULL) /* variable_transform */
2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400
  {}

  /* opt_pass methods: */
  bool gate () { return gate_ipa_inline (); }
  unsigned int execute () { return ipa_inline (); }

}; // class pass_ipa_inline

} // anon namespace

ipa_opt_pass_d *
make_pass_ipa_inline (gcc::context *ctxt)
{
  return new pass_ipa_inline (ctxt);
}