tree-vectorizer.c 10.7 KB
Newer Older
1
/* Vectorizer
2 3
   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   Free Software Foundation, Inc.
H.J. Lu committed
4
   Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 6 7 8 9

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
10
Software Foundation; either version 3, or (at your option) any later
11 12 13 14 15 16 17 18
version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
19 20
along with GCC; see the file COPYING3.  If not see
<http://www.gnu.org/licenses/>.  */
21

22
/* Loop and basic block vectorizer.
23

H.J. Lu committed
24 25
  This file contains drivers for the three vectorizers:
  (1) loop vectorizer (inter-iteration parallelism),
26 27 28
  (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop
      vectorizer)
  (3) BB vectorizer (out-of-loops), aka SLP
H.J. Lu committed
29

30
  The rest of the vectorizer's code is organized as follows:
H.J. Lu committed
31 32 33 34 35
  - tree-vect-loop.c - loop specific parts such as reductions, etc. These are
    used by drivers (1) and (2).
  - tree-vect-loop-manip.c - vectorizer's loop control-flow utilities, used by
    drivers (1) and (2).
  - tree-vect-slp.c - BB vectorization specific analysis and transformation,
36 37
    used by drivers (2) and (3).
  - tree-vect-stmts.c - statements analysis and transformation (used by all).
H.J. Lu committed
38
  - tree-vect-data-refs.c - vectorizer specific data-refs analysis and
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
    manipulations (used by all).
  - tree-vect-patterns.c - vectorizable code patterns detector (used by all)

  Here's a poor attempt at illustrating that:

     tree-vectorizer.c:
     loop_vect()  loop_aware_slp()  slp_vect()
          |        /           \          /
          |       /             \        /
          tree-vect-loop.c  tree-vect-slp.c
                | \      \  /      /   |
                |  \      \/      /    |
                |   \     /\     /     |
                |    \   /  \   /      |
         tree-vect-stmts.c  tree-vect-data-refs.c
                       \      /
                    tree-vect-patterns.c
*/
Dorit Nuzman committed
57

58 59 60 61 62 63
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "ggc.h"
#include "tree.h"
64
#include "tree-pretty-print.h"
65 66 67 68 69 70
#include "tree-flow.h"
#include "tree-dump.h"
#include "cfgloop.h"
#include "cfglayout.h"
#include "tree-vectorizer.h"
#include "tree-pass.h"
71
#include "timevar.h"
Dorit Nuzman committed
72

73 74
/* vect_dump will be set to stderr or dump_file if exist.  */
FILE *vect_dump;
Dorit Nuzman committed
75

H.J. Lu committed
76
/* vect_verbosity_level set to an invalid value
77
   to mark that it's uninitialized.  */
78
static enum vect_verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
79

80
/* Loop or bb location.  */
81
LOC vect_location;
82

83 84
/* Vector mapping GIMPLE stmt to stmt_vec_info. */
VEC(vec_void_p,heap) *stmt_vec_info_vec;
Dorit Nuzman committed
85

86

87

88
/* Function vect_set_dump_settings.
89

90 91 92 93 94 95
   Fix the verbosity level of the vectorizer if the
   requested level was not set explicitly using the flag
   -ftree-vectorizer-verbose=N.
   Decide where to print the debugging information (dump_file/stderr).
   If the user defined the verbosity level, but there is no dump file,
   print to stderr, otherwise print to the dump file.  */
96 97

static void
98
vect_set_dump_settings (bool slp)
99
{
100
  vect_dump = dump_file;
101

102
  /* Check if the verbosity level was defined by the user:  */
103
  if (user_vect_verbosity_level != MAX_VERBOSITY_LEVEL)
104
    {
105 106 107
      vect_verbosity_level = user_vect_verbosity_level;
      /* Ignore user defined verbosity if dump flags require higher level of
         verbosity.  */
H.J. Lu committed
108
      if (dump_file)
109
        {
H.J. Lu committed
110
          if (((dump_flags & TDF_DETAILS)
111 112 113 114 115 116 117
                && vect_verbosity_level >= REPORT_DETAILS)
  	       || ((dump_flags & TDF_STATS)
	            && vect_verbosity_level >= REPORT_UNVECTORIZED_LOCATIONS))
            return;
        }
      else
        {
H.J. Lu committed
118 119
          /* If there is no dump file, print to stderr in case of loop
             vectorization.  */
120 121 122 123 124
          if (!slp)
            vect_dump = stderr;

          return;
        }
125 126
    }

127 128 129 130
  /* User didn't specify verbosity level:  */
  if (dump_file && (dump_flags & TDF_DETAILS))
    vect_verbosity_level = REPORT_DETAILS;
  else if (dump_file && (dump_flags & TDF_STATS))
131
    vect_verbosity_level = REPORT_UNVECTORIZED_LOCATIONS;
132
  else
133 134 135
    vect_verbosity_level = REPORT_NONE;

  gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
136 137 138
}


139
/* Function debug_loop_details.
140

141
   For vectorization debug dumps.  */
142

143
bool
144
vect_print_dump_info (enum vect_verbosity_levels vl)
145
{
146
  if (vl > vect_verbosity_level)
147
    return false;
148

149 150
  if (!current_function_decl || !vect_dump)
    return false;
151

152
  if (vect_location == UNKNOWN_LOC)
153 154 155 156
    fprintf (vect_dump, "\n%s:%d: note: ",
	     DECL_SOURCE_FILE (current_function_decl),
	     DECL_SOURCE_LINE (current_function_decl));
  else
H.J. Lu committed
157
    fprintf (vect_dump, "\n%s:%d: note: ",
158
	     LOC_FILE (vect_location), LOC_LINE (vect_location));
159

160 161 162 163
  return true;
}


164
/* Function vectorize_loops.
H.J. Lu committed
165

166
   Entry point to loop vectorization phase.  */
167

168
unsigned
169
vectorize_loops (void)
170
{
171
  unsigned int i;
172
  unsigned int num_vectorized_loops = 0;
173 174 175
  unsigned int vect_loops_num;
  loop_iterator li;
  struct loop *loop;
176

177 178 179 180 181 182
  vect_loops_num = number_of_loops ();

  /* Bail out if there are no loops.  */
  if (vect_loops_num <= 1)
    return 0;

183
  /* Fix the verbosity level if not defined explicitly by the user.  */
184
  vect_set_dump_settings (false);
185

186 187
  init_stmt_vec_info_vec ();

188 189
  /*  ----------- Analyze loops. -----------  */

H.J. Lu committed
190
  /* If some loop was duplicated, it gets bigger number
191
     than all previously defined loops.  This fact allows us to run
192
     only over initial loops skipping newly generated ones.  */
193
  FOR_EACH_LOOP (li, loop, 0)
194 195 196
    if (optimize_loop_nest_for_speed_p (loop))
      {
	loop_vec_info loop_vinfo;
197

198
	vect_location = find_loop_location (loop);
199 200
	loop_vinfo = vect_analyze_loop (loop);
	loop->aux = loop_vinfo;
201

202 203
	if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
	  continue;
204

205 206 207
	vect_transform_loop (loop_vinfo);
	num_vectorized_loops++;
      }
208 209

  vect_location = UNKNOWN_LOC;
210

211
  statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops);
212
  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)
213 214
      || (num_vectorized_loops > 0
	  && vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)))
215
    fprintf (vect_dump, "vectorized %u loops in function.\n",
216 217 218 219
	     num_vectorized_loops);

  /*  ----------- Finalize. -----------  */

220
  mark_sym_for_renaming (gimple_vop (cfun));
221

222
  for (i = 1; i < vect_loops_num; i++)
223
    {
224 225
      loop_vec_info loop_vinfo;

226
      loop = get_loop (i);
227
      if (!loop)
228
	continue;
229
      loop_vinfo = (loop_vec_info) loop->aux;
230
      destroy_loop_vec_info (loop_vinfo, true);
231 232
      loop->aux = NULL;
    }
233

234 235
  free_stmt_vec_info_vec ();

236
  return num_vectorized_loops > 0 ? TODO_cleanup_cfg : 0;
237
}
H.J. Lu committed
238

239

240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271
/*  Entry point to basic block SLP phase.  */

static unsigned int
execute_vect_slp (void)
{
  basic_block bb;

  /* Fix the verbosity level if not defined explicitly by the user.  */
  vect_set_dump_settings (true);

  init_stmt_vec_info_vec ();

  FOR_EACH_BB (bb)
    {
      vect_location = find_bb_location (bb);

      if (vect_slp_analyze_bb (bb))
        {
          vect_slp_transform_bb (bb);

          if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
            fprintf (vect_dump, "basic block vectorized using SLP\n");
        }
    }

  free_stmt_vec_info_vec ();
  return 0;
}

static bool
gate_vect_slp (void)
{
H.J. Lu committed
272
  /* Apply SLP either if the vectorizer is on and the user didn't specify
273
     whether to run SLP or not, or if the SLP flag was set by the user.  */
H.J. Lu committed
274
  return ((flag_tree_vectorize != 0 && flag_tree_slp_vectorize != 0)
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
          || flag_tree_slp_vectorize == 1);
}

struct gimple_opt_pass pass_slp_vectorize =
{
 {
  GIMPLE_PASS,
  "slp",                                /* name */
  gate_vect_slp,                        /* gate */
  execute_vect_slp,                     /* execute */
  NULL,                                 /* sub */
  NULL,                                 /* next */
  0,                                    /* static_pass_number */
  TV_TREE_SLP_VECTORIZATION,            /* tv_id */
  PROP_ssa | PROP_cfg,                  /* properties_required */
  0,                                    /* properties_provided */
  0,                                    /* properties_destroyed */
  0,                                    /* todo_flags_start */
  TODO_ggc_collect
    | TODO_verify_ssa
    | TODO_dump_func
H.J. Lu committed
296
    | TODO_update_ssa
297 298 299 300 301
    | TODO_verify_stmts                 /* todo_flags_finish */
 }
};


302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
/* Increase alignment of global arrays to improve vectorization potential.
   TODO:
   - Consider also structs that have an array field.
   - Use ipa analysis to prune arrays that can't be vectorized?
     This should involve global alignment analysis and in the future also
     array padding.  */

static unsigned int
increase_alignment (void)
{
  struct varpool_node *vnode;

  /* Increase the alignment of all global arrays for vectorization.  */
  for (vnode = varpool_nodes_queue;
       vnode;
       vnode = vnode->next_needed)
    {
      tree vectype, decl = vnode->decl;
320
      tree t;
321 322
      unsigned int alignment;

323 324
      t = TREE_TYPE(decl);
      if (TREE_CODE (t) != ARRAY_TYPE)
325
        continue;
326
      vectype = get_vectype_for_scalar_type (strip_array_types (t));
327
      if (!vectype)
328
        continue;
329 330
      alignment = TYPE_ALIGN (vectype);
      if (DECL_ALIGN (decl) >= alignment)
331
        continue;
332 333

      if (vect_can_force_dr_alignment_p (decl, alignment))
334 335 336 337 338 339 340
        {
          DECL_ALIGN (decl) = TYPE_ALIGN (vectype);
          DECL_USER_ALIGN (decl) = 1;
          if (dump_file)
            {
              fprintf (dump_file, "Increasing alignment of decl: ");
              print_generic_expr (dump_file, decl, TDF_SLIM);
341
	      fprintf (dump_file, "\n");
342 343
            }
        }
344 345 346 347
    }
  return 0;
}

348

349
static bool
350 351 352 353 354
gate_increase_alignment (void)
{
  return flag_section_anchors && flag_tree_vectorize;
}

355 356

struct simple_ipa_opt_pass pass_ipa_increase_alignment =
357
{
358 359
 {
  SIMPLE_IPA_PASS,
360 361 362 363 364 365
  "increase_alignment",                 /* name */
  gate_increase_alignment,              /* gate */
  increase_alignment,                   /* execute */
  NULL,                                 /* sub */
  NULL,                                 /* next */
  0,                                    /* static_pass_number */
366
  TV_IPA_OPT,                           /* tv_id */
367 368 369 370 371
  0,                                    /* properties_required */
  0,                                    /* properties_provided */
  0,                                    /* properties_destroyed */
  0,                                    /* todo_flags_start */
  0                                     /* todo_flags_finish */
372
 }
373
};