tree-loop-linear.c 11.2 KB
Newer Older
1
/* Linear Loop transforms
2
   Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
   Contributed by Daniel Berlin <dberlin@dberlin.org>.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2, or (at your option) any later
version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING.  If not, write to the Free
Kelley Cook committed
19 20
Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.  */
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56


#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "ggc.h"
#include "tree.h"
#include "target.h"

#include "rtl.h"
#include "basic-block.h"
#include "diagnostic.h"
#include "tree-flow.h"
#include "tree-dump.h"
#include "timevar.h"
#include "cfgloop.h"
#include "expr.h"
#include "optabs.h"
#include "tree-chrec.h"
#include "tree-data-ref.h"
#include "tree-scalar-evolution.h"
#include "tree-pass.h"
#include "varray.h"
#include "lambda.h"

/* Linear loop transforms include any composition of interchange,
   scaling, skewing, and reversal.  They are used to change the
   iteration order of loop nests in order to optimize data locality of
   traversals, or remove dependences that prevent
   parallelization/vectorization/etc.  

   TODO: Determine reuse vectors/matrix and use it to determine optimal
   transform matrix for locality purposes.
   TODO: Completion of partial transforms.  */

57 58 59 60
/* Gather statistics for loop interchange.  LOOP is the loop being
   considered. The first loop in the considered loop nest is
   FIRST_LOOP, and consequently, the index of the considered loop is
   obtained by LOOP->DEPTH - FIRST_LOOP->DEPTH
Daniel Berlin committed
61 62 63
   
   Initializes:
   - DEPENDENCE_STEPS the sum of all the data dependence distances
64
   carried by loop LOOP,
Daniel Berlin committed
65 66

   - NB_DEPS_NOT_CARRIED_BY_LOOP the number of dependence relations
67
   for which the loop LOOP is not carrying any dependence,
Daniel Berlin committed
68

69
   - ACCESS_STRIDES the sum of all the strides in LOOP.
Daniel Berlin committed
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89

   Example: for the following loop,

   | loop_1 runs 1335 times
   |   loop_2 runs 1335 times
   |     A[{{0, +, 1}_1, +, 1335}_2]
   |     B[{{0, +, 1}_1, +, 1335}_2]
   |   endloop_2
   |   A[{0, +, 1336}_1]
   | endloop_1

   gather_interchange_stats (in loop_1) will return 
   DEPENDENCE_STEPS = 3002
   NB_DEPS_NOT_CARRIED_BY_LOOP = 5
   ACCESS_STRIDES = 10694

   gather_interchange_stats (in loop_2) will return 
   DEPENDENCE_STEPS = 3000
   NB_DEPS_NOT_CARRIED_BY_LOOP = 7
   ACCESS_STRIDES = 8010
90
*/
91 92 93

static void
gather_interchange_stats (varray_type dependence_relations, 
Daniel Berlin committed
94
			  varray_type datarefs,
95 96
			  struct loop *loop,
			  struct loop *first_loop,
Daniel Berlin committed
97 98 99
			  unsigned int *dependence_steps, 
			  unsigned int *nb_deps_not_carried_by_loop, 
			  unsigned int *access_strides)
100 101 102
{
  unsigned int i;

Daniel Berlin committed
103
  *dependence_steps = 0;
104
  *nb_deps_not_carried_by_loop = 0;
Daniel Berlin committed
105 106
  *access_strides = 0;

107 108 109 110 111 112 113
  for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
    {
      int dist;
      struct data_dependence_relation *ddr = 
	(struct data_dependence_relation *) 
	VARRAY_GENERIC_PTR (dependence_relations, i);

114 115 116
      /* If we don't know anything about this dependence, or the distance
	 vector is NULL, or there is no dependence, then there is no reuse of
	 data.  */
Daniel Berlin committed
117

118 119 120 121 122
      if (DDR_DIST_VECT (ddr) == NULL
	  || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
	  || DDR_ARE_DEPENDENT (ddr) == chrec_known)
	continue;
      
123

124
      
125
      dist = DDR_DIST_VECT (ddr)[loop->depth - first_loop->depth];
126
      if (dist == 0)
Daniel Berlin committed
127
	(*nb_deps_not_carried_by_loop) += 1;
128
      else if (dist < 0)
Daniel Berlin committed
129
	(*dependence_steps) += -dist;
130
      else
Daniel Berlin committed
131 132 133 134 135 136 137 138 139 140
	(*dependence_steps) += dist;
    }

  /* Compute the access strides.  */
  for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
    {
      unsigned int it;
      struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
      tree stmt = DR_STMT (dr);
      struct loop *stmt_loop = loop_containing_stmt (stmt);
141 142 143 144
      struct loop *inner_loop = first_loop->inner;
      
      if (inner_loop != stmt_loop 
	  && !flow_loop_nested_p (inner_loop, stmt_loop))
Daniel Berlin committed
145 146 147 148 149
	continue;
      for (it = 0; it < DR_NUM_DIMENSIONS (dr); it++)
	{
	  tree chrec = DR_ACCESS_FN (dr, it);
	  tree tstride = evolution_part_in_loop_num 
150
	    (chrec, loop->num);
Daniel Berlin committed
151 152 153 154 155 156 157
	  
	  if (tstride == NULL_TREE
	      || TREE_CODE (tstride) != INTEGER_CST)
	    continue;
	  
	  (*access_strides) += int_cst_value (tstride);
	}
158 159 160
    }
}

161 162
/* Attempt to apply interchange transformations to TRANS to maximize the
   spatial and temporal locality of the loop.  
163
   Returns the new transform matrix.  The smaller the reuse vector
Daniel Berlin committed
164 165 166 167
   distances in the inner loops, the fewer the cache misses.
   FIRST_LOOP is the loop->num of the first loop in the analyzed loop
   nest.  */

168 169 170 171

static lambda_trans_matrix
try_interchange_loops (lambda_trans_matrix trans, 
		       unsigned int depth,		       
Daniel Berlin committed
172 173
		       varray_type dependence_relations,
		       varray_type datarefs, 
174
		       struct loop *first_loop)
175
{
176 177
  struct loop *loop_i;
  struct loop *loop_j;
Daniel Berlin committed
178 179
  unsigned int dependence_steps_i, dependence_steps_j;
  unsigned int access_strides_i, access_strides_j;
180 181 182 183 184 185 186 187 188 189 190
  unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j;
  struct data_dependence_relation *ddr;

  /* When there is an unknown relation in the dependence_relations, we
     know that it is no worth looking at this loop nest: give up.  */
  ddr = (struct data_dependence_relation *) 
    VARRAY_GENERIC_PTR (dependence_relations, 0);
  if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
    return trans;
  
  /* LOOP_I is always the outer loop.  */
191 192 193 194 195 196
  for (loop_j = first_loop->inner; 
       loop_j; 
       loop_j = loop_j->inner)
    for (loop_i = first_loop; 
	 loop_i->depth < loop_j->depth; 
	 loop_i = loop_i->inner)
197
      {
Daniel Berlin committed
198 199 200 201 202 203 204 205 206 207
	gather_interchange_stats (dependence_relations, datarefs,
				  loop_i, first_loop,
				  &dependence_steps_i, 
				  &nb_deps_not_carried_by_i,
				  &access_strides_i);
	gather_interchange_stats (dependence_relations, datarefs,
				  loop_j, first_loop,
				  &dependence_steps_j, 
				  &nb_deps_not_carried_by_j, 
				  &access_strides_j);
208 209
	
	/* Heuristics for loop interchange profitability:
Daniel Berlin committed
210 211 212 213 214 215 216 217 218

	   1. (spatial locality) Inner loops should have smallest
              dependence steps.

	   2. (spatial locality) Inner loops should contain more
	   dependence relations not carried by the loop.

	   3. (temporal locality) Inner loops should have smallest 
	      array access strides.
219
	*/
Daniel Berlin committed
220 221 222
	if (dependence_steps_i < dependence_steps_j 
	    || nb_deps_not_carried_by_i > nb_deps_not_carried_by_j
	    || access_strides_i < access_strides_j)
223
	  {
224 225 226
	    lambda_matrix_row_exchange (LTM_MATRIX (trans),
					loop_i->depth - first_loop->depth,
					loop_j->depth - first_loop->depth);
227
	    /* Validate the resulting matrix.  When the transformation
Daniel Berlin committed
228
	       is not valid, reverse to the previous transformation.  */
229
	    if (!lambda_transform_legal_p (trans, depth, dependence_relations))
230 231 232
	      lambda_matrix_row_exchange (LTM_MATRIX (trans), 
					  loop_i->depth - first_loop->depth, 
					  loop_j->depth - first_loop->depth);
233 234
	  }
      }
Daniel Berlin committed
235

236 237 238 239 240 241 242 243 244
  return trans;
}

/* Perform a set of linear transforms on LOOPS.  */

void
linear_transform_loops (struct loops *loops)
{
  unsigned int i;
245 246
  VEC(tree,heap) *oldivs = NULL;
  VEC(tree,heap) *invariants = NULL;
247
  
248 249 250 251 252 253 254 255 256 257
  for (i = 1; i < loops->num; i++)
    {
      unsigned int depth = 0;
      varray_type datarefs;
      varray_type dependence_relations;
      struct loop *loop_nest = loops->parray[i];
      struct loop *temp;
      lambda_loopnest before, after;
      lambda_trans_matrix trans;
      bool problem = false;
Daniel Berlin committed
258
      bool need_perfect_nest = false;
259 260 261 262 263 264 265 266 267 268 269 270 271 272
      /* If it's not a loop nest, we don't want it.
         We also don't handle sibling loops properly, 
         which are loops of the following form:
         for (i = 0; i < 50; i++)
           {
             for (j = 0; j < 50; j++)
               {
	        ...
               }
           for (j = 0; j < 50; j++)
               {
                ...
               }
           } */
273
      if (!loop_nest || !loop_nest->inner)
274
	continue;
275 276
      VEC_truncate (tree, oldivs, 0);
      VEC_truncate (tree, invariants, 0);
Daniel Berlin committed
277 278
      depth = 1;
      for (temp = loop_nest->inner; temp; temp = temp->inner)
279 280
	{
	  /* If we have a sibling loop or multiple exit edges, jump ship.  */
281
	  if (temp->next || !temp->single_exit)
282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297
	    {
	      problem = true;
	      break;
	    }
	  depth ++;
	}
      if (problem)
	continue;

      /* Analyze data references and dependence relations using scev.  */      
 
      VARRAY_GENERIC_PTR_INIT (datarefs, 10, "datarefs");
      VARRAY_GENERIC_PTR_INIT (dependence_relations, 10,
			       "dependence_relations");
      
  
298
      compute_data_dependences_for_loop (loop_nest, true,
299 300 301 302 303 304 305 306 307 308 309 310 311 312
					 &datarefs, &dependence_relations);
      if (dump_file && (dump_flags & TDF_DETAILS))
	{
	  unsigned int j;
	  for (j = 0; j < VARRAY_ACTIVE_SIZE (dependence_relations); j++)
	    {
	      struct data_dependence_relation *ddr = 
		(struct data_dependence_relation *) 
		VARRAY_GENERIC_PTR (dependence_relations, j);

	      if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
		{
		  fprintf (dump_file, "DISTANCE_V (");
		  print_lambda_vector (dump_file, DDR_DIST_VECT (ddr), 
313
				       DDR_SIZE_VECT (ddr));
314 315 316
		  fprintf (dump_file, ")\n");
		  fprintf (dump_file, "DIRECTION_V (");
		  print_lambda_vector (dump_file, DDR_DIR_VECT (ddr), 
317
				       DDR_SIZE_VECT (ddr));
318 319 320 321 322 323 324 325
		  fprintf (dump_file, ")\n");
		}
	    }
	  fprintf (dump_file, "\n\n");
	}
      /* Build the transformation matrix.  */
      trans = lambda_trans_matrix_new (depth, depth);
      lambda_matrix_id (LTM_MATRIX (trans), depth);
326

Daniel Berlin committed
327
      trans = try_interchange_loops (trans, depth, dependence_relations,
328
				     datarefs, loop_nest);
Daniel Berlin committed
329 330 331 332 333 334 335

      if (lambda_trans_matrix_id_p (trans))
	{
	  if (dump_file)
	   fprintf (dump_file, "Won't transform loop. Optimal transform is the identity transform\n");
	  continue;
	}
336 337 338 339 340 341 342 343

      /* Check whether the transformation is legal.  */
      if (!lambda_transform_legal_p (trans, depth, dependence_relations))
	{
	  if (dump_file)
	    fprintf (dump_file, "Can't transform loop, transform is illegal:\n");
	  continue;
	}
Daniel Berlin committed
344 345 346 347 348 349
      if (!perfect_nest_p (loop_nest))
	need_perfect_nest = true;
      before = gcc_loopnest_to_lambda_loopnest (loops,
						loop_nest, &oldivs, 
						&invariants,
						need_perfect_nest);
350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
      if (!before)
	continue;
            
      if (dump_file)
	{
	  fprintf (dump_file, "Before:\n");
	  print_lambda_loopnest (dump_file, before, 'i');
	}
  
      after = lambda_loopnest_transform (before, trans);
      if (dump_file)
	{
	  fprintf (dump_file, "After:\n");
	  print_lambda_loopnest (dump_file, after, 'u');
	}
      lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants,
				       after, trans);
367 368
      if (dump_file)
	fprintf (dump_file, "Successfully transformed loop.\n");
369 370 371
      free_dependence_relations (dependence_relations);
      free_data_refs (datarefs);
    }
372 373
  VEC_free (tree, heap, oldivs);
  VEC_free (tree, heap, invariants);
374
  scev_reset ();
Diego Novillo committed
375
  rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_full_phi);
376
}