2004-10-14 Olga Golovanevsky <olga@il.ibm.com>

* tree-vectorizer.c (vect_generate_tmps_on_preheader): (vect_update_ivs_after_vectorizer): (vect_transform_for_unknown_loop_bound): (tree_duplicate_loop_to_edge): (allocate_new_names): (rename_use_op): (rename_def_op): (rename_variables_in_bb): (free_new_names): (rename_variables_in_loop): (copy_phi_nodes): (update_phis_for_duplicate_loop): (update_phi_nodes_for_guard): (make_loop_iterate_ntimes): (tree_duplicate_loop_to_edge_cfg): (add_loop_guard): (vect_analyze_loop_with_symbolic_num_of_iters): (verify_loop_for_duplication): (vect_gen_niters_for_prolog_loop): (vect_update_niters_after_peeling): (vect_update_inits_of_dr): (vect_update_inits_of_drs): (vect_build_loop_niters): (vect_do_peeling_for_alignment): New functions. (vect_transform_loop): Add unknown and known but indivisible loop bound support; add peeling for unalignment support. (vect_analyze_loop_form): Support symbolic number of iterations. (vect_transform_loop_bound): New input parameter. (vect_get_loop_niters): Change input parameter type. (new_loop_vec_info): LOOP_VINFO_NITERS is tree now. (vectorizable_store): Allow unaligned access. (vectorize_loops): Add rewrite_into_loop_closed_ssa. (vect_analyze_data_refs_alignment): Allowed one unaligned store. * tree-vectorizer.h (LOOP_VINFO_NITERS_KNOWN_P): Redefined to use tree. (LOOP_VINFO_INT_NITERS): New macro. (MAX_NUMBER_OF_UNALIGNED_DATA_REFS): New define. (do_peeling_for_alignment): (unaligned_drs): New members of _loop_vec_info. (LOOP_DO_PEELING_FOR_ALIGNMENT): New macro. From-SVN: r89040

2004-10-14 Olga Golovanevsky <olga@il.ibm.com>
* tree-vectorizer.c (vect_generate_tmps_on_preheader): (vect_update_ivs_after_vectorizer): (vect_transform_for_unknown_loop_bound): (tree_duplicate_loop_to_edge): (allocate_new_names): (rename_use_op): (rename_def_op): (rename_variables_in_bb): (free_new_names): (rename_variables_in_loop): (copy_phi_nodes): (update_phis_for_duplicate_loop): (update_phi_nodes_for_guard): (make_loop_iterate_ntimes): (tree_duplicate_loop_to_edge_cfg): (add_loop_guard): (vect_analyze_loop_with_symbolic_num_of_iters): (verify_loop_for_duplication): (vect_gen_niters_for_prolog_loop): (vect_update_niters_after_peeling): (vect_update_inits_of_dr): (vect_update_inits_of_drs): (vect_build_loop_niters): (vect_do_peeling_for_alignment): New functions. (vect_transform_loop): Add unknown and known but indivisible loop bound support; add peeling for unalignment support. (vect_analyze_loop_form): Support symbolic number of iterations. (vect_transform_loop_bound): New input parameter. (vect_get_loop_niters): Change input parameter type. (new_loop_vec_info): LOOP_VINFO_NITERS is tree now. (vectorizable_store): Allow unaligned access. (vectorize_loops): Add rewrite_into_loop_closed_ssa. (vect_analyze_data_refs_alignment): Allowed one unaligned store. * tree-vectorizer.h (LOOP_VINFO_NITERS_KNOWN_P): Redefined to use tree. (LOOP_VINFO_INT_NITERS): New macro. (MAX_NUMBER_OF_UNALIGNED_DATA_REFS): New define. (do_peeling_for_alignment): (unaligned_drs): New members of _loop_vec_info. (LOOP_DO_PEELING_FOR_ALIGNMENT): New macro. From-SVN: r89040
a023975e · Olga Golovanevsky · Dorit Nuzman · 7238c5ec · a023975e · a023975e
Commit a023975e authored Oct 14, 2004 by Olga Golovanevsky Committed by Dorit Nuzman Oct 14, 2004
23 changed files
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
+2004-10-14  Olga Golovanevsky  <olga@il.ibm.com>
+
+	* tree-vectorizer.c (vect_generate_tmps_on_preheader):
+	(vect_update_ivs_after_vectorizer):
+	(vect_transform_for_unknown_loop_bound):
+	(tree_duplicate_loop_to_edge):
+	(allocate_new_names):
+	(rename_use_op):
+	(rename_def_op):
+	(rename_variables_in_bb):
+	(free_new_names):
+	(rename_variables_in_loop):
+	(copy_phi_nodes):
+	(update_phis_for_duplicate_loop):
+	(update_phi_nodes_for_guard):
+	(make_loop_iterate_ntimes):
+	(tree_duplicate_loop_to_edge_cfg):
+	(add_loop_guard):
+	(vect_analyze_loop_with_symbolic_num_of_iters):
+	(verify_loop_for_duplication): 
+	(vect_gen_niters_for_prolog_loop):
+	(vect_update_niters_after_peeling):
+	(vect_update_inits_of_dr):
+	(vect_update_inits_of_drs):
+	(vect_build_loop_niters):
+	(vect_do_peeling_for_alignment): New functions.
+	(vect_transform_loop): Add unknown and known but indivisible loop
+	bound support; add peeling for unalignment support. 
+	(vect_analyze_loop_form): Support symbolic number of iterations.
+	(vect_transform_loop_bound): New input parameter.
+	(vect_get_loop_niters): Change input parameter type.
+	(new_loop_vec_info): LOOP_VINFO_NITERS is tree now.
+	(vectorizable_store): Allow unaligned access.
+	(vectorize_loops): Add rewrite_into_loop_closed_ssa.
+	(vect_analyze_data_refs_alignment): Allowed one unaligned
+	store.
+	* tree-vectorizer.h (LOOP_VINFO_NITERS_KNOWN_P): Redefined 
+	to use tree.
+	(LOOP_VINFO_INT_NITERS): New macro.
+	(MAX_NUMBER_OF_UNALIGNED_DATA_REFS): New define.
+	(do_peeling_for_alignment):
+	(unaligned_drs): New members of _loop_vec_info.
+	(LOOP_DO_PEELING_FOR_ALIGNMENT): New macro.
+
 2004-10-14  Ranjit Mathew  <rmathew@hotmail.com>

 	* tree.h (TREE_STRING_POINTER): Wrap in "const char *".

--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
+2004-10-14  Olga GOlovanevsky  <olga@il.ibm.com>
+
+	* testsuite/gcc.dg/vect/vect-28.c : Remove xfail.
+	* testsuite/gcc.dg/vect/vect-30.c : Remove xfail.
+	* testsuite/gcc.dg/vect/vect-31.c : Vectorize 4 loops instead of 2.
+	* testsuite/gcc.dg/vect/vect-33.c : Remove xfail.
+	* testsuite/gcc.dg/vect/vect-44.c : Remove xfail.
+	* testsuite/gcc.dg/vect/vect-46.c : Remove xfail.
+	* testsuite/gcc.dg/vect/vect-50.c : Remove xfail.
+	* testsuite/gcc.dg/vect/vect-52.c : Remove xfail.
+	* testsuite/gcc.dg/vect/vect-54.c : Remove xfail.
+	* testsuite/gcc.dg/vect/vect-58.c : Remove xfail.
+	* testsuite/gcc.dg/vect/vect-60.c : Remove xfail.
+	* testsuite/gcc.dg/vect/vect-64.c : Vectorize 3 loops instead of 1.
+	* testsuite/gcc.dg/vect/vect-66.c : Vectorize 3 loops instead of 2.
+	* testsuite/gcc.dg/vect/vect-68.c : Vectorize 4 loops instead of 2.
+	* testsuite/gcc.dg/vect/vect-69.c : Vectorize 4 loops instead of 2.
+	* testsuite/gcc.dg/vect/vect-8.c : Remove xfail.
+	* testsuite/gcc.dg/vect/vect-80.c : Remove xfail.
+	* testsuite/gcc.dg/vect/vect-none.c : Vectorize 1 loops instead of 0.
+
 2004-10-14  Dorit Naishlos  <dorit@il.ibm.com>

 	* gcc.dg/vect/vect-82.c: New testcase.	

--- a/gcc/testsuite/gcc.dg/vect/vect-26.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-26.c
@@ -37,5 +37,5 @@ int main (void)
  return main1 ();
 }

-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */

--- a/gcc/testsuite/gcc.dg/vect/vect-28.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-28.c
@@ -40,5 +40,5 @@ int main (void)
  return 0;
 }

-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */

--- a/gcc/testsuite/gcc.dg/vect/vect-30.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-30.c
@@ -64,4 +64,4 @@ int main (void)
  return 0;
 }

-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect"  } } */
--- a/gcc/testsuite/gcc.dg/vect/vect-31.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-31.c
@@ -88,4 +88,4 @@ int main (void)
  return main1 ();
 } 

-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */
--- a/gcc/testsuite/gcc.dg/vect/vect-33.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-33.c
@@ -38,4 +38,4 @@ int main (void)
 } 


-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */
--- a/gcc/testsuite/gcc.dg/vect/vect-44.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-44.c
@@ -59,4 +59,4 @@ int main (void)
  return 0;
 }

-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */
--- a/gcc/testsuite/gcc.dg/vect/vect-46.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-46.c
@@ -55,4 +55,4 @@ int main (void)
  return 0;
 }

-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */
--- a/gcc/testsuite/gcc.dg/vect/vect-50.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-50.c
@@ -54,4 +54,4 @@ int main (void)
  return 0;
 }

-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */
--- a/gcc/testsuite/gcc.dg/vect/vect-52.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-52.c
@@ -56,4 +56,4 @@ int main (void)
  return 0;
 }

-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */
--- a/gcc/testsuite/gcc.dg/vect/vect-54.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-54.c
@@ -55,4 +55,4 @@ int main (void)
  return 0;
 }

-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */
--- a/gcc/testsuite/gcc.dg/vect/vect-58.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-58.c
@@ -56,4 +56,4 @@ int main (void)
  return 0;
 }

-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */
--- a/gcc/testsuite/gcc.dg/vect/vect-60.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-60.c
@@ -57,4 +57,4 @@ int main (void)
  return 0;
 }

-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */
--- a/gcc/testsuite/gcc.dg/vect/vect-64.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-64.c
@@ -83,4 +83,4 @@ int main (void)
  return main1 ();
 }

-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
--- a/gcc/testsuite/gcc.dg/vect/vect-66.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-66.c
@@ -79,4 +79,4 @@ int main (void)
  return main1 ();
 }

-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
--- a/gcc/testsuite/gcc.dg/vect/vect-68.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-68.c
@@ -87,4 +87,4 @@ int main (void)
  return main1 ();
 } 

-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */
--- a/gcc/testsuite/gcc.dg/vect/vect-69.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-69.c
@@ -114,4 +114,4 @@ int main (void)
  return main1 ();
 } 

-/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */
--- a/gcc/testsuite/gcc.dg/vect/vect-8.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-8.c
@@ -39,4 +39,4 @@ int main (void)
  return main1 (N);
 }

-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */
--- a/gcc/testsuite/gcc.dg/vect/vect-80.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-80.c
@@ -47,4 +47,4 @@ int main (void)
  return 0;	
 }

-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */
--- a/gcc/testsuite/gcc.dg/vect/vect-none.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-none.c
@@ -189,5 +189,5 @@ foo (int n)
 }

 /* { dg-final { scan-tree-dump-times "vectorized " 3 "vect"} } */
-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 3 "vect"} } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 2 "vect"} } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -139,6 +139,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 #include "cfglayout.h"
 #include "expr.h"
 #include "optabs.h"
+#include "toplev.h"
 #include "tree-chrec.h"
 #include "tree-data-ref.h"
 #include "tree-scalar-evolution.h"
@@ -158,7 +159,7 @@ static bool vect_analyze_operations (loop_vec_info);

 /* Main code transformation functions.  */
 static void vect_transform_loop (loop_vec_info, struct loops *);
-static void vect_transform_loop_bound (loop_vec_info);
+static void vect_transform_loop_bound (loop_vec_info, tree niters);
 static bool vect_transform_stmt (tree, block_stmt_iterator *);
 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
@@ -173,7 +174,7 @@ static bool exist_non_indexing_operands_for_use_p (tree, tree);
 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
 static void vect_mark_relevant (varray_type, tree);
 static bool vect_stmt_relevant_p (tree, loop_vec_info);
-static tree vect_get_loop_niters (struct loop *, HOST_WIDE_INT *);
+static tree vect_get_loop_niters (struct loop *, tree *);
 static bool vect_compute_data_ref_alignment 
  (struct data_reference *, loop_vec_info);
 static bool vect_analyze_data_ref_access (struct data_reference *);
@@ -181,6 +182,8 @@ static bool vect_get_first_index (tree, tree *);
 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
 static struct data_reference * vect_analyze_pointer_ref_access 
  (tree, tree, bool);
+static bool vect_analyze_loop_with_symbolic_num_of_iters (tree niters, 
+ 							  struct loop *loop);
 static tree vect_get_base_and_bit_offset
  (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
 static struct data_reference * vect_analyze_pointer_ref_access
@@ -203,17 +206,847 @@ static tree get_vectype_for_scalar_type (tree);
 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
 static tree vect_get_vec_def_for_operand (tree, tree);
 static tree vect_init_vector (tree, tree);
+static tree vect_build_symbol_bound (tree, int, struct loop *);
 static void vect_finish_stmt_generation 
  (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);

+static void vect_generate_tmps_on_preheader (loop_vec_info, 
+					     tree *, tree *, 
+					     tree *);
+static tree vect_build_loop_niters (loop_vec_info);
+static void vect_update_ivs_after_vectorizer (struct loop *, tree); 
+
+/* Loop transformations prior to vectorizeration.  */
+
+/* Loop transformations entry point function. 
+   It can be used outside of the vectorizer 
+   in case the loop to be manipulated answers conditions specified
+   in function documentation.  */
+struct loop *tree_duplicate_loop_to_edge (struct loop *, 
+					  struct loops *, edge, 
+					  tree, tree, bool);
+
+static void allocate_new_names (bitmap);
+static void rename_use_op (use_operand_p);
+static void rename_def_op (def_operand_p, tree);
+static void rename_variables_in_bb (basic_block);
+static void free_new_names (bitmap);
+static void rename_variables_in_loop (struct loop *);
+static void copy_phi_nodes (struct loop *, struct loop *, bool);
+static void update_phis_for_duplicate_loop (struct loop *,
+					    struct loop *, 
+					    bool after);
+static void update_phi_nodes_for_guard (edge, struct loop *);  
+static void make_loop_iterate_ntimes (struct loop *, tree, tree, tree);
+static struct loop *tree_duplicate_loop_to_edge_cfg (struct loop *, 
+						     struct loops *, 
+						     edge);
+static edge add_loop_guard (basic_block, tree, basic_block);
+static bool verify_loop_for_duplication (struct loop *, bool, edge);
+
+/* Utilities dealing with loop peeling (not peeling itself).  */
+static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
+static void vect_update_niters_after_peeling (loop_vec_info, tree);
+static void vect_update_inits_of_dr (struct data_reference *, struct loop *, 
+				     tree niters);
+static void vect_update_inits_of_drs (loop_vec_info, tree);
+static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
+
 /* Utilities for creation and deletion of vec_info structs.  */
 loop_vec_info new_loop_vec_info (struct loop *loop);
 void destroy_loop_vec_info (loop_vec_info);
 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);

-static bool vect_debug_stats (struct loop *loop);
-static bool vect_debug_details (struct loop *loop);
+static bool vect_debug_stats (struct loop *loop);
+static bool vect_debug_details (struct loop *loop);
+
+
+/* Utilities to support loop peeling for vectorization purposes.  */
+
+
+/* For each definition in DEFINITIONS this function allocates 
+   new ssa name.  */
+
+static void
+allocate_new_names (bitmap definitions)
+{
+  unsigned ver;
+  bitmap_iterator bi;
+
+  EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
+    {
+      tree def = ssa_name (ver);
+      tree *new_name_ptr = xmalloc (sizeof (tree));
+
+      bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
+
+      *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
+      SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
+
+      SSA_NAME_AUX (def) = new_name_ptr;
+    }
+}
+
+
+/* Renames the use *OP_P.  */
+
+static void
+rename_use_op (use_operand_p op_p)
+{
+  tree *new_name_ptr;
+
+  if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
+    return;
+
+  new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
+
+  /* Something defined outside of the loop.  */
+  if (!new_name_ptr)
+    return;
+
+  /* An ordinary ssa name defined in the loop.  */
+
+  SET_USE (op_p, *new_name_ptr);
+}
+
+
+/* Renames the def *OP_P in statement STMT.  */
+
+static void
+rename_def_op (def_operand_p op_p, tree stmt)
+{
+  tree *new_name_ptr;
+
+  if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
+    return;
+
+  new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
+
+  /* Something defined outside of the loop.  */
+  if (!new_name_ptr)
+    return;
+
+  /* An ordinary ssa name defined in the loop.  */
+
+  SET_DEF (op_p, *new_name_ptr);
+  SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
+}
+
+
+/* Renames the variables in basic block BB.  */
+
+static void
+rename_variables_in_bb (basic_block bb)
+{
+  tree phi;
+  block_stmt_iterator bsi;
+  tree stmt;
+  stmt_ann_t ann;
+  use_optype uses;
+  vuse_optype vuses;
+  def_optype defs;
+  v_may_def_optype v_may_defs;
+  v_must_def_optype v_must_defs;
+  unsigned i;
+  edge e;
+  edge_iterator ei;
+
+  for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
+    rename_def_op (PHI_RESULT_PTR (phi), phi);
+
+  for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
+    {
+      stmt = bsi_stmt (bsi);
+      get_stmt_operands (stmt);
+      ann = stmt_ann (stmt);
+
+      uses = USE_OPS (ann);
+      for (i = 0; i < NUM_USES (uses); i++)
+	rename_use_op (USE_OP_PTR (uses, i));
+
+      defs = DEF_OPS (ann);
+      for (i = 0; i < NUM_DEFS (defs); i++)
+	rename_def_op (DEF_OP_PTR (defs, i), stmt);
+
+      vuses = VUSE_OPS (ann);
+      for (i = 0; i < NUM_VUSES (vuses); i++)
+	rename_use_op (VUSE_OP_PTR (vuses, i));
+
+      v_may_defs = V_MAY_DEF_OPS (ann);
+      for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
+	{
+	  rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
+	  rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
+	}
+
+      v_must_defs = V_MUST_DEF_OPS (ann);
+      for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
+	rename_def_op (V_MUST_DEF_OP_PTR (v_must_defs, i), stmt);
+    }
+
+  FOR_EACH_EDGE (e, ei, bb->succs)
+    for (phi = phi_nodes (e->dest); phi; phi = TREE_CHAIN (phi))
+      rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
+}
+
+
+/* Releases the structures holding the new ssa names.  */
+
+static void
+free_new_names (bitmap definitions)
+{
+  unsigned ver;
+  bitmap_iterator bi;
+
+  EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
+    {
+      tree def = ssa_name (ver);
+
+      if (SSA_NAME_AUX (def))
+	{
+	  free (SSA_NAME_AUX (def));
+	  SSA_NAME_AUX (def) = NULL;
+	}
+    }
+}
+
+
+/* Renames variables in new generated LOOP.  */
+
+static void
+rename_variables_in_loop (struct loop *loop)
+{
+  unsigned i;
+  basic_block *bbs;
+
+  bbs = get_loop_body (loop);
+
+  for (i = 0; i < loop->num_nodes; i++)
+    rename_variables_in_bb (bbs[i]);
+
+  free (bbs);
+}
+
+
+/* This function copies phis from LOOP header to
+   NEW_LOOP header. AFTER is as
+   in update_phis_for_duplicate_loop function.  */
+
+static void
+copy_phi_nodes (struct loop *loop, struct loop *new_loop,
+		      bool after)
+{
+  tree phi, new_phi, def;
+  edge new_e;
+  edge e = (after ? loop_latch_edge (loop) : loop_preheader_edge (loop));
+
+  /* Second add arguments to newly created phi nodes.  */
+  for (phi = phi_nodes (loop->header),
+	 new_phi = phi_nodes (new_loop->header);
+       phi;
+       phi = TREE_CHAIN (phi),
+	 new_phi = TREE_CHAIN (new_phi))
+    {
+      new_e = loop_preheader_edge (new_loop);
+      def = PHI_ARG_DEF_FROM_EDGE (phi, e);
+      add_phi_arg (&new_phi, def, new_e);
+    }
+}
+
+
+/* Update the PHI nodes of the NEW_LOOP. AFTER is true if the NEW_LOOP
+   executes after LOOP, and false if it executes before it.  */
+
+static void
+update_phis_for_duplicate_loop (struct loop *loop,
+                                struct loop *new_loop, bool after)
+{
+  edge old_latch;
+  tree *new_name_ptr, new_ssa_name;
+  tree phi_new, phi_old, def;
+  edge orig_entry_e = loop_preheader_edge (loop);
+
+  /* Copy phis from loop->header to new_loop->header.  */
+  copy_phi_nodes (loop, new_loop, after);
+
+  old_latch = loop_latch_edge (loop);
+
+  /* Update PHI args for the new loop latch edge, and
+     the old loop preheader edge, we know that the PHI nodes
+     are ordered appropriately in copy_phi_nodes.  */
+  for (phi_new = phi_nodes (new_loop->header),
+       phi_old = phi_nodes (loop->header);
+       phi_new && phi_old;
+       phi_new = TREE_CHAIN (phi_new), phi_old = TREE_CHAIN (phi_old))
+    {
+      def = PHI_ARG_DEF_FROM_EDGE (phi_old, old_latch);
+
+      if (TREE_CODE (def) != SSA_NAME)
+	continue;
+
+      new_name_ptr = SSA_NAME_AUX (def);
+
+      /* Something defined outside of the loop.  */
+      if (!new_name_ptr)
+	continue;
+
+      /* An ordinary ssa name defined in the loop.  */
+      new_ssa_name = *new_name_ptr;
+
+      add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge(new_loop));
+
+      /* Update PHI args for the original loop pre-header edge.  */
+      if (! after)
+        SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi_old, orig_entry_e),
+	         new_ssa_name);
+    }
+}
+
+
+/* Update PHI nodes for a guard of the LOOP.
+
+   LOOP is supposed to have a preheader bb at which a guard condition is 
+   located.  The true edge of this condition skips the LOOP and ends
+   at the destination of the (unique) LOOP exit.  The loop exit bb is supposed 
+   to be an empty bb (created by this transformation) with one successor.
+
+   This function creates phi nodes at the LOOP exit bb.  These phis need to be
+   created as a result of adding true edge coming from guard.
+
+   FORNOW: Only phis which have corresponding phi nodes at the header of the
+   LOOP are created.  Here we use the assumption that after the LOOP there
+   are no uses of defs generated in LOOP.
+
+   After the phis creation, the function updates the values of phi nodes at
+   the LOOP exit successor bb:
+
+   Original loop:
+
+   bb0: loop preheader
+        goto bb1
+   bb1: loop header
+        if (exit_cond) goto bb3 else goto bb2
+   bb2: loop latch
+        goto bb1
+   bb3:
+
+
+   After guard creation (the loop before this function):
+
+   bb0: loop preheader
+        if (guard_condition) goto bb4 else goto bb1
+   bb1: loop header
+        if (exit_cond) goto bb4 else goto bb2
+   bb2: loop latch
+        goto bb1
+   bb4: loop exit       
+        (new empty bb)
+        goto bb3
+   bb3:
+
+   This function updates the phi nodes in bb4 and in bb3, to account for the 
+   new edge from bb0 to bb4.  */
+
+static void
+update_phi_nodes_for_guard (edge guard_true_edge, struct loop * loop)
+{
+  tree phi, phi1;
+
+  for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
+      {
+	tree new_phi;
+	tree phi_arg;
+
+	/* Generate new phi node.  */
+	new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
+			           loop->exit_edges[0]->dest);
+
+	/* Add argument coming from guard true edge.  */
+	phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->entry_edges[0]);
+	add_phi_arg (&new_phi, phi_arg, guard_true_edge);
+
+	/* Add argument coming from loop exit edge.  */
+	phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));			           
+	add_phi_arg (&new_phi, phi_arg, loop->exit_edges[0]);
+      
+	/* Update all phi nodes at the loop exit successor.  */
+	for (phi1 = phi_nodes (EDGE_SUCC (loop->exit_edges[0]->dest, 0)->dest); 
+	     phi1; 
+	     phi1 = TREE_CHAIN (phi1))
+	  {
+	    tree old_arg = PHI_ARG_DEF_FROM_EDGE (phi1, 
+				  EDGE_SUCC (loop->exit_edges[0]->dest, 0));
+	    if (old_arg == phi_arg)
+	      {	
+		edge e = EDGE_SUCC (loop->exit_edges[0]->dest, 0);
+
+		SET_PHI_ARG_DEF (phi1, 
+				 phi_arg_from_edge (phi1, e),
+				 PHI_RESULT (new_phi)); 
+	      }
+	  }
+      }       
+}
+
+
+/* Make the LOOP iterate NITERS times. This is done by adding a new IV
+   that starts at zero, increases by one and its limit is NITERS.  */
+
+static void
+make_loop_iterate_ntimes (struct loop *loop, tree niters,
+			  tree begin_label, tree exit_label)
+{
+  tree indx_before_incr, indx_after_incr, cond_stmt, cond;
+  tree orig_cond;
+  edge exit_edge = loop->exit_edges[0];
+  block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
+
+  /* Flow loop scan does not update loop->single_exit field.  */
+  loop->single_exit = loop->exit_edges[0];
+  orig_cond = get_loop_exit_condition (loop);
+  gcc_assert (orig_cond);
+  create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
+             &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
+  
+  /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
+     back to the exit condition statement.  */
+  bsi_next (&loop_exit_bsi);
+  gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
+
+
+  if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
+    cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
+  else /* 'then' edge loops back.   */
+    cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
+
+  begin_label = build1 (GOTO_EXPR, void_type_node, begin_label);
+  exit_label = build1 (GOTO_EXPR, void_type_node, exit_label);
+  cond_stmt = build (COND_EXPR, TREE_TYPE (orig_cond), cond,
+		     begin_label, exit_label);
+  bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
+
+  /* Remove old loop exit test:  */
+  bsi_remove (&loop_exit_bsi);
+
+  if (vect_debug_stats (loop) || vect_debug_details (loop))
+    print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
+}
+
+
+/* Given LOOP this function generates a new copy of it and puts it 
+   on E which is either the entry or exit of LOOP.  */
+
+static struct loop *
+tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops, 
+				 edge e)
+{
+  struct loop *new_loop;
+  basic_block *new_bbs, *bbs;
+  bool at_exit;
+  bool was_imm_dom;
+  basic_block exit_dest; 
+  tree phi, phi_arg;
+
+  at_exit = (e == loop->exit_edges[0]); 
+  if (!at_exit && e != loop_preheader_edge (loop))
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	  fprintf (dump_file,
+		   "Edge is not an entry nor an exit edge.\n");
+      return NULL;
+    }
+
+  bbs = get_loop_body (loop);
+
+  /* Check whether duplication is possible.  */
+  if (!can_copy_bbs_p (bbs, loop->num_nodes))
+    {
+      if (vect_debug_stats (loop) || vect_debug_details (loop))	
+	  fprintf (dump_file,
+		   "Cannot copy basic blocks.\n");
+      free (bbs);
+      return NULL;
+    }
+
+  /* Generate new loop structure.  */
+  new_loop = duplicate_loop (loops, loop, loop->outer);
+  if (!new_loop)
+    {
+      if (vect_debug_stats (loop) || vect_debug_details (loop))	
+	  fprintf (dump_file,
+		   "The duplicate_loop returns NULL.\n");
+      free (bbs);
+      return NULL;
+    }
+
+  exit_dest = loop->exit_edges[0]->dest;
+  was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS, 
+					  exit_dest) == loop->header ? 
+		 true : false);
+
+  new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
+
+  copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
+
+  /* Duplicating phi args at exit bbs as coming 
+     also from exit of duplicated loop.  */
+  for (phi = phi_nodes (exit_dest); phi; phi = TREE_CHAIN (phi))
+    {
+      phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
+      if (phi_arg)
+	{
+	  edge new_loop_exit_edge;
+
+	  if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
+	    new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
+	  else
+	    new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
+  
+	  add_phi_arg (&phi, phi_arg, new_loop_exit_edge);	
+	}
+    }    
+   
+  if (at_exit) /* Add the loop copy at exit.  */
+    {
+      redirect_edge_and_branch_force (e, new_loop->header);
+      set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
+      if (was_imm_dom)
+	set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
+    }
+  else /* Add the copy at entry.  */
+    {
+      edge new_exit_e;
+      edge entry_e = loop_preheader_edge (loop);
+      basic_block preheader = entry_e->src;
+           
+      if (!flow_bb_inside_loop_p (new_loop, 
+				  EDGE_SUCC (new_loop->header, 0)->dest))
+        new_exit_e = EDGE_SUCC (new_loop->header, 0);
+      else
+	new_exit_e = EDGE_SUCC (new_loop->header, 1); 
+
+      redirect_edge_and_branch_force (new_exit_e, loop->header);
+      set_immediate_dominator (CDI_DOMINATORS, loop->header,
+			       new_exit_e->src);
+
+      /* We have to add phi args to the loop->header here as coming 
+	 from new_exit_e edge.  */
+      for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
+	{
+	  phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
+	  if (phi_arg)
+	    add_phi_arg (&phi, phi_arg, new_exit_e);	
+	}    
+
+      redirect_edge_and_branch_force (entry_e, new_loop->header);
+      set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
+    }
+
+  flow_loop_scan (new_loop, LOOP_ALL);
+  flow_loop_scan (loop, LOOP_ALL);  
+  free (new_bbs);
+  free (bbs);
+
+  return new_loop;
+}
+
+
+/* Given the condition statement COND, put it as the last statement
+   of GUARD_BB; EXIT_BB is the basic block to skip the loop;
+   Assumes that this is the single exit of the guarded loop.  
+   Returns the skip edge.  */
+
+static edge
+add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb)
+{
+  block_stmt_iterator bsi;
+  edge new_e, enter_e;
+  tree cond_stmt, then_label, else_label;
+
+  enter_e = EDGE_SUCC (guard_bb, 0);
+  enter_e->flags &= ~EDGE_FALLTHRU;
+  enter_e->flags |= EDGE_FALSE_VALUE;
+  bsi = bsi_last (guard_bb);
+
+  then_label = build1 (GOTO_EXPR, void_type_node,
+                       tree_block_label (exit_bb));
+  else_label = build1 (GOTO_EXPR, void_type_node,
+                       tree_block_label (enter_e->dest));
+  cond_stmt = build (COND_EXPR, void_type_node, cond,
+   		     then_label, else_label);
+  bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
+  /* Add new edge to connect entry block to the second loop.  */
+  new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
+  set_immediate_dominator (CDI_DOMINATORS, exit_bb, guard_bb);
+  return new_e;
+}
+
+
+/* This function verifies that certain restrictions apply to LOOP.  */
+
+static bool
+verify_loop_for_duplication (struct loop *loop,
+			     bool update_first_loop_count, edge e)
+{
+  edge exit_e = loop->exit_edges [0];
+  edge entry_e = loop_preheader_edge (loop);
+
+  /* We duplicate only innermost loops.  */
+  if (loop->inner)
+    {
+      if (vect_debug_stats (loop) || vect_debug_details (loop))	
+	  fprintf (dump_file,
+		   "Loop duplication failed. Loop is not innermost.\n");
+      return false;
+    }
+
+  /* Only loops with 1 exit. */
+  if (loop->num_exits != 1)
+    {
+      if (vect_debug_stats (loop) || vect_debug_details (loop))	
+	  fprintf (dump_file,
+		   "More than one exit from loop.\n");
+      return false;
+    }
+
+  /* Only loops with 1 entry. */
+  if (loop->num_entries != 1)
+    {
+      if (vect_debug_stats (loop) || vect_debug_details (loop))	
+	  fprintf (dump_file,
+		   "More than one exit from loop.\n");
+      return false;
+    }
+
+  /* All loops has outers, the only case loop->outer is NULL is for
+     the function itself.  */
+  if (!loop->outer)
+    {
+      if (vect_debug_stats (loop) || vect_debug_details (loop))	
+	  fprintf (dump_file,
+		   "Loop is outer-most loop.\n");
+      return false;
+    }
+  
+  /* Verify that new IV can be created and loop condition 
+     can be changed to make first loop iterate first_niters times.  */
+  if (!update_first_loop_count)
+    {
+      tree orig_cond = get_loop_exit_condition (loop);
+      block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
+      
+      if (!orig_cond)
+	{
+	  if (vect_debug_stats (loop) || vect_debug_details (loop))	
+	    fprintf (dump_file,
+		     "Loop has no exit condition.\n");
+	  return false;
+	}
+      if (orig_cond != bsi_stmt (loop_exit_bsi))
+	{
+	  if (vect_debug_stats (loop) || vect_debug_details (loop))	
+	    fprintf (dump_file,
+		     "Loop exit condition is not loop header last stmt.\n");
+	  return false;
+	}
+    }
+
+  /* Make sure E is either an entry or an exit edge.  */
+  if (e != exit_e && e != entry_e)
+    {
+      if (vect_debug_stats (loop) || vect_debug_details (loop))	
+	fprintf (dump_file,
+		 "E is not loop entry or exit edge.\n");
+      return false;
+    }
+
+  return true;
+}
+
+
+/* Given LOOP this function duplicates it to the edge E. 
+
+   This transformation takes place before the loop is vectorized. 
+   For now, there are two main cases when it's used 
+   by the vectorizer: to support loops with unknown loop bounds 
+   (or loop bounds indivisible by vectorization factor) and to force the 
+   alignment of data references in the loop. In the first case, LOOP is 
+   duplicated to the exit edge, producing epilog loop. In the second case, LOOP 
+   is duplicated to the preheader edge thus generating prolog loop. In both 
+   cases, the original loop will be vectorized after the transformation. 
+
+   The edge E is supposed to be either preheader edge of the LOOP or  
+   its exit edge. If preheader edge is specified, the LOOP copy 
+   will precede the original one. Otherwise the copy will be located 
+   at the exit of the LOOP.
+   
+   FIRST_NITERS (SSA_NAME) parameter specifies how many times to iterate 
+   the first loop. If UPDATE_FIRST_LOOP_COUNT parameter is false, the first 
+   loop will be iterated FIRST_NITERS times by introducing additional 
+   induction variable and replacing loop exit condition. If 
+   UPDATE_FIRST_LOOP_COUNT is true no change to the first loop is made and 
+   the caller to tree_duplicate_loop_to_edge is responsible for updating 
+   the first loop count.
+   
+   NITERS (also SSA_NAME) parameter defines the number of iteration the
+   original loop iterated. The function generates two if-then guards: 
+   one prior to the first loop and the other prior to the second loop. 
+   The first guard will be:
+
+   if (FIRST_NITERS == 0) then skip the first loop
+   
+   The second guard will be:
+
+   if (FIRST_NITERS == NITERS) then skip the second loop
+
+   Thus the equivalence to the original code is guaranteed by correct values 
+   of NITERS and FIRST_NITERS and generation of if-then loop guards.
+
+   For now this function supports only loop forms that are candidate for 
+   vectorization. Such types are the following: 
+
+   (1) only innermost loops 
+   (2) loops built from 2 basic blocks 
+   (3) loops with one entry and one exit
+   (4) loops without function calls
+   (5) loops without defs that are used after the loop 
+
+   (1), (3) are checked in this function; (2) - in function 
+   vect_analyze_loop_form; (4) - in function vect_analyze_data_refs;
+   (5) is checked as part of the function vect_mark_stmts_to_be_vectorized, 
+   when excluding induction/reduction support.   
+
+   The function returns NULL in case one of these checks or 
+   transformations failed.  */
+   
+struct loop*
+tree_duplicate_loop_to_edge (struct loop *loop, struct loops *loops, 
+			     edge e, tree first_niters, 
+			     tree niters, bool update_first_loop_count)
+{
+  struct loop *new_loop = NULL, *first_loop, *second_loop;
+  edge skip_e;
+  tree pre_condition;
+  bitmap definitions;
+  basic_block first_exit_bb, second_exit_bb;
+  basic_block pre_header_bb;
+  edge exit_e = loop->exit_edges [0];
+
+  gcc_assert (!any_marked_for_rewrite_p ());
+
+  if (!verify_loop_for_duplication (loop, update_first_loop_count, e))
+      return NULL;
+
+  /* We have to initialize cfg_hooks. Then, when calling 
+   cfg_hooks->split_edge, the function tree_split_edge 
+   is actually called and, when calling cfg_hooks->duplicate_block, 
+   the function tree_duplicate_bb is called.  */
+  tree_register_cfg_hooks ();
+
+  /* 1. Generate a copy of LOOP and put it on E (entry or exit).  */
+  if (!(new_loop = tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
+    {
+      if (vect_debug_stats (loop) || vect_debug_details (loop))	
+	fprintf (dump_file,
+		 "The tree_duplicate_loop_to_edge_cfg failed.\n");
+      return NULL;
+    }
+
+  definitions = marked_ssa_names ();
+  allocate_new_names (definitions);
+  update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
+  /* Here, using assumption (5), we do not propagate new names futher 
+     than on phis of the exit from the second loop.  */
+  rename_variables_in_loop (new_loop);
+  free_new_names (definitions);
+
+  if (e == exit_e)
+    {
+      first_loop = loop;
+      second_loop = new_loop;
+    }
+  else 
+    {
+      first_loop = new_loop;
+      second_loop = loop;
+    }
+
+  /* 2. Generate bb between the loops.  */
+  first_exit_bb = split_edge (first_loop->exit_edges[0]);
+  add_bb_to_loop (first_exit_bb, first_loop->outer);
+
+  /* We need to update here first loop exit edge 
+     and second loop preheader edge.  */
+  flow_loop_scan (first_loop, LOOP_ALL);
+  flow_loop_scan (second_loop, LOOP_ALL);  
+
+  /* 3. Make first loop iterate FIRST_NITERS times, if needed.  */
+  if (!update_first_loop_count)
+    {
+      tree first_loop_latch_lbl = tree_block_label (first_loop->latch);
+      tree first_loop_exit_lbl = tree_block_label (first_exit_bb);
+
+      make_loop_iterate_ntimes (first_loop, first_niters,
+				first_loop_latch_lbl,
+				first_loop_exit_lbl);
+    }
+  
+  /* 4. Add the guard before first loop:
+
+     if FIRST_NITERS == 0 
+       skip first loop
+     else 
+       enter first loop  */
+
+  /* 4a. Generate bb before first loop.  */
+  pre_header_bb = split_edge (loop_preheader_edge (first_loop));
+  add_bb_to_loop (pre_header_bb, first_loop->outer);
+
+  /* First loop preheader edge is changed.  */
+  flow_loop_scan (first_loop, LOOP_ALL);

+  /* 4b. Generate guard condition.  */
+  pre_condition = build (LE_EXPR, boolean_type_node,
+			   first_niters, integer_zero_node);
+
+  /* 4c. Add condition at the end of preheader bb.  */
+  skip_e = add_loop_guard (pre_header_bb, pre_condition, first_exit_bb);
+
+  /* 4d. Updtae phis at first loop exit and propagate changes 
+     to the phis of second loop.  */
+  update_phi_nodes_for_guard (skip_e, first_loop);
+
+  /* 5. Add the guard before second loop:
+
+     if FIRST_NITERS == NITERS SKIP
+       skip second loop
+     else 
+       enter second loop  */
+
+  /* 5a. Generate empty bb at the exit from the second loop.  */
+  second_exit_bb = split_edge (second_loop->exit_edges[0]);
+  add_bb_to_loop (second_exit_bb, second_loop->outer);
+
+  /* Second loop preheader edge is changed.  */
+  flow_loop_scan (second_loop, LOOP_ALL);
+
+  /* 5b. Generate guard condition.  */
+  pre_condition = build (EQ_EXPR, boolean_type_node,
+			   first_niters, niters);
+
+  /* 5c. Add condition at the end of preheader bb.  */
+  skip_e = add_loop_guard (first_exit_bb, pre_condition, second_exit_bb);
+  update_phi_nodes_for_guard (skip_e, second_loop);
+
+  BITMAP_XFREE (definitions);
+  unmark_all_for_rewrite ();
+  
+  return new_loop;
+}
+
+
+
+/* Here the proper Vectorizer starts.  */

 /* Function new_stmt_vec_info.

@@ -274,13 +1107,17 @@ new_loop_vec_info (struct loop *loop)
  LOOP_VINFO_LOOP (res) = loop;
  LOOP_VINFO_BBS (res) = bbs;
  LOOP_VINFO_EXIT_COND (res) = NULL;
-  LOOP_VINFO_NITERS (res) = -1;
+  LOOP_VINFO_NITERS (res) = NULL;
  LOOP_VINFO_VECTORIZABLE_P (res) = 0;
+  LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
  LOOP_VINFO_VECT_FACTOR (res) = 0;
  VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
 			   "loop_write_datarefs");
  VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
 			   "loop_read_datarefs");
+
+  for (i=0; i<MAX_NUMBER_OF_UNALIGNED_DATA_REFS; i++)
+    LOOP_UNALIGNED_DR (res, i) = NULL;
  return res;
 }

@@ -595,7 +1432,6 @@ vect_get_base_and_bit_offset (struct data_reference *dr,
 }


-
 /* Function vect_force_dr_alignment_p.

   Returns whether the alignment of a DECL can be forced to be aligned
@@ -709,8 +1545,8 @@ vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
   OFFSET: Optional. If supplied, it is be added to the initial address.

   Output:
-   1. Return an SSA_NAME whose value is the address of the memory location of the
-      first vector of the data reference.
+   1. Return an SSA_NAME whose value is the address of the memory location of 
+      the first vector of the data reference.
   2. If new_stmt_list is not NULL_TREE after return then the caller must insert
      these statement(s) which define the returned SSA_NAME.

@@ -744,7 +1580,8 @@ vect_create_addr_base_for_vector_ref (tree stmt,
  /* Only the access function of the last index is relevant (i_n in
     a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
  access_fn = DR_ACCESS_FN (dr, 0);
-  ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step, true);
+  ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step, 
+				    true);
  if (!ok)
    init_oval = integer_zero_node;

@@ -1519,8 +2356,6 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
  if (!STMT_VINFO_DATA_REF (stmt_info))
    return false;

-  if (!aligned_access_p (STMT_VINFO_DATA_REF (stmt_info)))
-    return false;

  if (!vec_stmt) /* transformation not required.  */
    {
@@ -1613,7 +2448,8 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
 	software_pipeline_loads_p = true;
      else if (!targetm.vectorize.misaligned_mem_ok (mode))
 	{
-	  /* Possibly unaligned access, and can't software pipeline the loads  */
+	  /* Possibly unaligned access, and can't software pipeline the loads.
+	   */
 	  if (vect_debug_details (loop))
 	    fprintf (dump_file, "Arbitrary load not supported.");
 	  return false;
@@ -1731,7 +2567,8 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
 	}
      else
 	{
-	  /* Use current address instead of init_addr for reduced reg pressure.  */
+	  /* Use current address instead of init_addr for reduced reg pressure.
+	   */
 	  magic = dataref_ptr;
 	}

@@ -1745,124 +2582,612 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
      add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop));


-      /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop  */
-      vec_dest = vect_create_destination_var (scalar_dest, vectype);
-      new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
-      new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
-      new_temp = make_ssa_name (vec_dest, new_stmt); 
-      TREE_OPERAND (new_stmt, 0) = new_temp;
-      vect_finish_stmt_generation (stmt, new_stmt, bsi);
-    }
+      /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop  */
+      vec_dest = vect_create_destination_var (scalar_dest, vectype);
+      new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
+      new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
+      new_temp = make_ssa_name (vec_dest, new_stmt); 
+      TREE_OPERAND (new_stmt, 0) = new_temp;
+      vect_finish_stmt_generation (stmt, new_stmt, bsi);
+    }
+
+  *vec_stmt = new_stmt;
+  return true;
+}
+
+
+/* Function vect_transform_stmt.
+
+   Create a vectorized stmt to replace STMT, and insert it at BSI.  */
+
+static bool
+vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
+{
+  bool is_store = false;
+  tree vec_stmt = NULL_TREE;
+  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+  bool done;
+
+  switch (STMT_VINFO_TYPE (stmt_info))
+    {
+    case op_vec_info_type:
+      done = vectorizable_operation (stmt, bsi, &vec_stmt);
+      gcc_assert (done);
+      break;
+
+    case assignment_vec_info_type:
+      done = vectorizable_assignment (stmt, bsi, &vec_stmt);
+      gcc_assert (done);
+      break;
+
+    case load_vec_info_type:
+      done = vectorizable_load (stmt, bsi, &vec_stmt);
+      gcc_assert (done);
+      break;
+
+    case store_vec_info_type:
+      done = vectorizable_store (stmt, bsi, &vec_stmt);
+      gcc_assert (done);
+      is_store = true;
+      break;
+    default:
+      if (vect_debug_details (NULL))
+        fprintf (dump_file, "stmt not supported.");
+      gcc_unreachable ();
+    }
+
+  STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
+
+  return is_store;
+}
+
+
+/* This function builds ni_name = number of iterations loop executes
+   on the loop preheader.  */
+
+static tree
+vect_build_loop_niters (loop_vec_info loop_vinfo)
+{
+  tree ni_name, stmt, var;
+  edge pe;
+  basic_block new_bb;
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  tree ni = unshare_expr (LOOP_VINFO_NITERS(loop_vinfo));
+
+  var = create_tmp_var (TREE_TYPE (ni), "niters");
+  add_referenced_tmp_var (var);
+  if (TREE_CODE (ni) == INTEGER_CST)
+    {
+      /* This case is generated when treating a known loop bound 
+	 indivisible by VF. Here we cannot use force_gimple_operand.  */
+      stmt = build (MODIFY_EXPR, void_type_node, var, ni);
+      ni_name = make_ssa_name (var, stmt);
+      TREE_OPERAND (stmt, 0) = ni_name;
+    }
+  else
+    ni_name = force_gimple_operand (ni, &stmt, false, var);
+
+  pe = loop_preheader_edge (loop);
+  new_bb = bsi_insert_on_edge_immediate (pe, stmt);
+  if (new_bb)
+    add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
+      
+  return ni_name;
+}
+
+
+/* This function generates the following statements:
+
+ ni_name = number of iterations loop executes
+ ratio = ni_name / vf
+ ratio_mult_vf_name = ratio * vf
+
+ and places them at the loop preheader edge.  */
+
+static void 
+vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, tree *ni_name_p,
+				 tree *ratio_mult_vf_name_p, tree *ratio_p)
+{
+
+  edge pe;
+  basic_block new_bb;
+  tree stmt, ni_name;
+  tree ratio;
+  tree ratio_mult_vf_name, ratio_mult_vf;
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  tree ni = LOOP_VINFO_NITERS(loop_vinfo);
+  
+  int vf, i;
+
+  /* Generate temporary variable that contains 
+     number of iterations loop executes.  */
+
+  ni_name = vect_build_loop_niters (loop_vinfo);
+
+  /* ratio = ni / vf.
+     vf is power of 2; then if ratio =  = n >> log2 (vf).   */
+  vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  ratio = vect_build_symbol_bound (ni_name, vf, loop);
+       
+  /* Update initial conditions of loop copy.  */
+       
+  /* ratio_mult_vf = ratio * vf;  
+     then if ratio_mult_vf = ratio << log2 (vf).  */
+
+  i = exact_log2 (vf);
+  ratio_mult_vf = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
+  add_referenced_tmp_var (ratio_mult_vf);
+
+  ratio_mult_vf_name = make_ssa_name (ratio_mult_vf, NULL_TREE);
+
+  stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
+		build2 (LSHIFT_EXPR, TREE_TYPE (ratio),
+		       ratio, build_int_cst (unsigned_type_node,
+					     i)));
+
+  SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
+
+  pe = loop_preheader_edge (loop);
+  new_bb = bsi_insert_on_edge_immediate (pe, stmt);
+  if (new_bb)
+    add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
+
+  *ni_name_p = ni_name;
+  *ratio_mult_vf_name_p = ratio_mult_vf_name;
+  *ratio_p = ratio;
+    
+  return;  
+}
+
+
+/* This function generates stmt 
+   
+   tmp = n / vf;
+
+   and attaches it to preheader of LOOP.  */
+
+static tree 
+vect_build_symbol_bound (tree n, int vf, struct loop * loop)
+{
+  tree var, stmt, var_name;
+  edge pe;
+  basic_block new_bb;
+  int i;
+
+  /* create temporary variable */
+  var = create_tmp_var (TREE_TYPE (n), "bnd");
+  add_referenced_tmp_var (var);
+
+  var_name = make_ssa_name (var, NULL_TREE);
+
+  /* vf is power of 2; then n/vf = n >> log2 (vf).   */
+
+  i = exact_log2 (vf);
+  stmt = build2 (MODIFY_EXPR, void_type_node, var_name,
+		build2 (RSHIFT_EXPR, TREE_TYPE (n),
+		       n, build_int_cst (unsigned_type_node,i)));
+
+  SSA_NAME_DEF_STMT (var_name) = stmt;
+
+  pe = loop_preheader_edge (loop);
+  new_bb = bsi_insert_on_edge_immediate (pe, stmt);
+  if (new_bb)
+    add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
+  else	
+    if (vect_debug_details (NULL))
+      fprintf (dump_file, "New bb on preheader edge was not generated.");
+
+  return var_name;
+}
+
+
+/* Function vect_transform_loop_bound.
+
+   Create a new exit condition for the loop.  */
+
+static void
+vect_transform_loop_bound (loop_vec_info loop_vinfo, tree niters)
+{
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  edge exit_edge = loop->single_exit;
+  block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
+  tree indx_before_incr, indx_after_incr;
+  tree orig_cond_expr;
+  HOST_WIDE_INT old_N = 0;
+  int vf;
+  tree cond_stmt;
+  tree new_loop_bound;
+  bool symbol_niters;
+  tree cond;
+  tree lb_type;
+
+  symbol_niters = !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo);
+
+  if (!symbol_niters)
+    old_N = LOOP_VINFO_INT_NITERS (loop_vinfo);
+
+  vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+
+  orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
+#ifdef ENABLE_CHECKING
+  gcc_assert (orig_cond_expr);
+#endif
+  gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi));
+
+  create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop, 
+	     &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
+
+  /* bsi_insert is using BSI_NEW_STMT. We need to bump it back 
+     to point to the exit condition.  */
+  bsi_next (&loop_exit_bsi);
+  gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond_expr);
+
+  /* new loop exit test:  */
+  lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1));
+  if (!symbol_niters)
+    new_loop_bound = fold_convert (lb_type, 
+				   build_int_cst (unsigned_type_node, 
+						  old_N/vf));
+  else
+    new_loop_bound = niters;
+
+  if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
+    cond = build2 (GE_EXPR, boolean_type_node, 
+		   indx_after_incr, new_loop_bound);
+  else /* 'then' edge loops back.   */
+    cond = build2 (LT_EXPR, boolean_type_node, 
+		   indx_after_incr, new_loop_bound);
+
+  cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond,
+	TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2));
+
+  bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);   
+
+  /* remove old loop exit test:  */
+  bsi_remove (&loop_exit_bsi);
+
+  if (vect_debug_details (NULL))
+    print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
+}
+
+
+/*   Advance IVs of the loop (to be vectorized later) to correct position.
+
+     When loop is vectorized, its IVs are not always advanced
+     correctly since vectorization changes the loop count. It's ok
+     in case epilog loop was not produced after original one before 
+     vectorization process (the vectorizer checks that there is no uses 
+     of IVs after the loop). However, in case the epilog loop was peeled, 
+     IVs from original loop are used in epilog loop and should be 
+     advanced correctly.
+
+     Here we use access functions of IVs and number of
+     iteration loop executes in order to bring IVs to correct position.
+
+     Function also update phis of basic block at the exit
+     from the loop.  */
+
+static void
+vect_update_ivs_after_vectorizer (struct loop *loop, tree niters)
+{
+  edge exit = loop->exit_edges[0];
+  tree phi;
+  edge latch = loop_latch_edge (loop);
+
+  /* Generate basic block at the exit from the loop.  */
+  basic_block new_bb = split_edge (exit);
+  add_bb_to_loop (new_bb, EDGE_SUCC (new_bb, 0)->dest->loop_father);
+  
+  loop->exit_edges[0] = EDGE_PRED (new_bb, 0);
+  
+  for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
+    {
+      tree access_fn = NULL;
+      tree evolution_part;
+      tree init_expr;
+      tree step_expr;
+      tree var, stmt, ni, ni_name;
+      int i, j, num_elem1, num_elem2;
+      tree phi1;
+      block_stmt_iterator last_bsi;
+
+      /* Skip virtual phi's. The data dependences that are associated with
+         virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
+
+      if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
+	{
+	  if (vect_debug_details (NULL))
+	    fprintf (dump_file, "virtual phi. skip.");
+	  continue;
+	}
+
+      access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi)); 
+
+      evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
+      
+      /* FORNOW: We do not transform initial conditions of IVs 
+	 which evolution functions are a polynomial of degree >= 2 or
+	 exponential.  */
+
+      step_expr = evolution_part;
+      init_expr = initial_condition (access_fn);
+
+      ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
+		  build2 (MULT_EXPR, TREE_TYPE (niters),
+		       niters, step_expr), init_expr);
+
+      var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
+      add_referenced_tmp_var (var);
+
+      ni_name = force_gimple_operand (ni, &stmt, false, var);
+      
+      /* Insert stmt into new_bb.  */
+      last_bsi = bsi_last (new_bb);
+      bsi_insert_after (&last_bsi, stmt, BSI_NEW_STMT);   
+
+      /* Fix phi expressions in duplicated loop.  */
+      num_elem1 = PHI_NUM_ARGS (phi);
+      for (i = 0; i < num_elem1; i++)
+	if (PHI_ARG_EDGE (phi, i) == latch)
+	  {
+	    tree def = PHI_ARG_DEF (phi, i);
+
+	    for (phi1 = phi_nodes (EDGE_SUCC (new_bb, 0)->dest); phi1; 
+		 phi1 = TREE_CHAIN (phi1))
+	      {
+		num_elem2 = PHI_NUM_ARGS (phi1);
+		for (j = 0; j < num_elem2; j++)
+		  if (PHI_ARG_DEF (phi1, j) == def)
+		    {
+		      SET_PHI_ARG_DEF (phi1, j, ni_name);
+		      PHI_ARG_EDGE (phi1, j) = EDGE_SUCC (new_bb, 0);		      
+		      break;
+ 		    }		    
+	      }
+	    break;
+	  }
+    }
+        
+}
+
+
+/* This function is the main driver of tranformation 
+   to be done for loop before vectorizing it in case of 
+   unknown loop bound.  */
+
+static void 
+vect_transform_for_unknown_loop_bound (loop_vec_info loop_vinfo, tree * ratio,
+				       struct loops *loops)
+{
+
+  tree ni_name, ratio_mult_vf_name;
+#ifdef ENABLE_CHECKING
+  int loop_num;
+#endif
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  struct loop *new_loop;
+
+  if (vect_debug_details (NULL))
+    fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
+
+  /* Generate the following variables on the preheader of original loop:
+	 
+     ni_name = number of iteration the original loop executes
+     ratio = ni_name / vf
+     ratio_mult_vf_name = ratio * vf  */
+  vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
+				   &ratio_mult_vf_name, ratio);
+
+  /* Update loop info.  */
+  loop->pre_header = loop_preheader_edge (loop)->src;
+  loop->pre_header_edges[0] = loop_preheader_edge (loop);
+
+#ifdef ENABLE_CHECKING
+  loop_num  = loop->num; 
+#endif
+  new_loop = tree_duplicate_loop_to_edge (loop, loops, loop->exit_edges[0],
+					  ratio_mult_vf_name, ni_name, true); 
+#ifdef ENABLE_CHECKING
+  gcc_assert (new_loop);
+  gcc_assert (loop_num == loop->num);
+#endif
+
+  /* Update IVs of original loop as if they were advanced 
+     by ratio_mult_vf_name steps.  */
+
+#ifdef ENABLE_CHECKING
+  /* Check existence of intermediate bb.  */
+  gcc_assert (loop->exit_edges[0]->dest == new_loop->pre_header);
+#endif
+  vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name); 
+
+  return;

-  *vec_stmt = new_stmt;
-  return true;
 }


-/* Function vect_transform_stmt.
+/* Function vect_gen_niters_for_prolog_loop

-   Create a vectorized stmt to replace STMT, and insert it at BSI.  */
+   Set the number of iterations for the loop represented by LOOP_VINFO
+   to the minimum between NITERS (the original iteration count of the loop)
+   and the misalignment DR  - the first data reference in the list
+   LOOP_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of this
+   loop, the data reference DR will refer to an aligned location.  */

-static bool
-vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
+static tree 
+vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree niters)
 {
-  bool is_store = false;
-  tree vec_stmt = NULL_TREE;
-  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-  bool done;
+  struct data_reference *dr = LOOP_UNALIGNED_DR (loop_vinfo, 0);
+  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  tree var, stmt;
+  tree iters, iters_name;
+  edge pe;
+  basic_block new_bb;
+  tree dr_stmt = DR_STMT (dr);
+  stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
+  tree start_addr, byte_miss_align, elem_miss_align;
+  int vec_type_align = 
+    GET_MODE_ALIGNMENT (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info))) 
+							/ BITS_PER_UNIT;
+  tree tmp1, tmp2;
+  tree new_stmt_list = NULL_TREE;

-  switch (STMT_VINFO_TYPE (stmt_info))
-    {
-    case op_vec_info_type:
-      done = vectorizable_operation (stmt, bsi, &vec_stmt);
-      gcc_assert (done);
-      break;
+  start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
+						     &new_stmt_list, NULL_TREE);
+
+  pe = loop_preheader_edge (loop); 
+  new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list); 
+  if (new_bb)
+    add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
+
+  byte_miss_align = 
+	build (BIT_AND_EXPR, integer_type_node, start_addr, 
+		  build (MINUS_EXPR, integer_type_node, 
+			 build_int_cst (unsigned_type_node,
+					vec_type_align), integer_one_node));
+  tmp1 = build_int_cst (unsigned_type_node, vec_type_align/vf);
+  elem_miss_align = build (FLOOR_DIV_EXPR, integer_type_node, 
+			   byte_miss_align, tmp1); 
+  
+  tmp2 = 
+	build (BIT_AND_EXPR, integer_type_node,
+	  build (MINUS_EXPR, integer_type_node, 
+		build_int_cst (unsigned_type_node, vf), elem_miss_align),
+	  build (MINUS_EXPR, integer_type_node, 
+		build_int_cst (unsigned_type_node, vf), integer_one_node)); 
+
+  iters = build2 (MIN_EXPR, TREE_TYPE (tmp2), tmp2, niters);
+  var = create_tmp_var (TREE_TYPE (iters), "iters");
+  add_referenced_tmp_var (var);
+  iters_name = force_gimple_operand (iters, &stmt, false, var);
+
+  /* Insert stmt on loop preheader edge.  */
+  pe = loop_preheader_edge (loop);
+  new_bb = bsi_insert_on_edge_immediate (pe, stmt);
+  if (new_bb)
+    add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);

-    case assignment_vec_info_type:
-      done = vectorizable_assignment (stmt, bsi, &vec_stmt);
-      gcc_assert (done);
-      break;
+  return iters_name; 
+}

-    case load_vec_info_type:
-      done = vectorizable_load (stmt, bsi, &vec_stmt);
-      gcc_assert (done);
-      break;

-    case store_vec_info_type:
-      done = vectorizable_store (stmt, bsi, &vec_stmt);
-      gcc_assert (done);
-      is_store = true;
-      break;
-    default:
-      if (vect_debug_details (NULL))
-        fprintf (dump_file, "stmt not supported.");
-      gcc_unreachable ();
-    }
+/* Function vect_update_niters_after_peeling

-  STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
+   NITERS iterations were peeled from the loop represented by LOOP_VINFO. 
+   The new number of iterations is therefore original_niters - NITERS.
+   Record the new number of iterations in LOOP_VINFO.  */

-  return is_store;
+static void
+vect_update_niters_after_peeling (loop_vec_info loop_vinfo, tree niters)
+{
+  tree n_iters = LOOP_VINFO_NITERS (loop_vinfo);
+  LOOP_VINFO_NITERS (loop_vinfo) = 
+    build (MINUS_EXPR, integer_type_node, n_iters, niters);      
 }


-/* Function vect_transform_loop_bound.
+/* Function vect_update_inits_of_dr

-   Create a new exit condition for the loop.  */
+   NITERS iterations were peeled from LOOP.  DR represents a data reference
+   in LOOP.  This function updates the information recorded in DR to
+   account for the fact that the first NITERS iterations had already been 
+   executed.  Specifically, it updates the initial_condition of the 
+   access_function of DR.  */

 static void
-vect_transform_loop_bound (loop_vec_info loop_vinfo)
+vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop, 
+			 tree niters)
 {
-  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-  edge exit_edge = loop->single_exit;
-  block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
-  tree indx_before_incr, indx_after_incr;
-  tree orig_cond_expr;
-  HOST_WIDE_INT old_N = 0;
-  int vf;
-  tree cond_stmt;
-  tree new_loop_bound;
-  tree cond;
-  tree lb_type;
+  tree access_fn = DR_ACCESS_FN (dr, 0);
+  tree init, init_new, step;
+      
+  step = evolution_part_in_loop_num (access_fn, loop->num);
+  init = initial_condition (access_fn);
+      
+  init_new = build (PLUS_EXPR, TREE_TYPE (init),
+		  build (MULT_EXPR, TREE_TYPE (niters),
+			 niters, step), init);
+  DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new);
+  
+  return;
+}

-  gcc_assert (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
-  old_N = LOOP_VINFO_NITERS (loop_vinfo);
-  vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);

-  /* FORNOW: 
-     assuming number-of-iterations divides by the vectorization factor.  */
-  gcc_assert (!(old_N % vf));
+/* Function vect_update_inits_of_drs

-  orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
-  gcc_assert (orig_cond_expr);
-  gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi));
+   NITERS iterations were peeled from the loop represented by LOOP_VINFO.  
+   This function updates the information recorded for the data references in 
+   the loop to account for the fact that the first NITERS iterations had 
+   already been executed.  Specifically, it updates the initial_condition of the
+   access_function of all the data_references in the loop.  */

-  create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop, 
-	     &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
+static void
+vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
+{
+  unsigned int i;
+  varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
+  varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);

-  /* bsi_insert is using BSI_NEW_STMT. We need to bump it back 
-     to point to the exit condition.  */
-  bsi_next (&loop_exit_bsi);
-  gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond_expr);
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");

-  /* new loop exit test:  */
-  lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1));
-  new_loop_bound = build_int_cst (lb_type, old_N/vf);
+  for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
+    {
+      struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
+      vect_update_inits_of_dr (dr, loop, niters);
+    }

-  if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
-    cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, new_loop_bound);
-  else /* 'then' edge loops back.   */
-    cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, new_loop_bound);
+  for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
+    {
+      struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
+      vect_update_inits_of_dr (dr, loop, niters);
+      DR_MISALIGNMENT (dr) = -1; 
+    }
+}

-  cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond,
-	TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2));

-  bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);   
+/* Function vect_do_peeling_for_alignment

-  /* remove old loop exit test:  */
-  bsi_remove (&loop_exit_bsi);
+   Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
+   'niters' is set to the misalignment of one of the data references in the
+   loop, thereby forcing it to refer to an aligned location at the beginning
+   of the execution of this loop.  The data reference for which we are
+   peeling is chosen from LOOP_UNALIGNED_DR.  */
+
+static void
+vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
+{
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  tree niters_of_prolog_loop, ni_name;
+  struct data_reference *dr = LOOP_UNALIGNED_DR (loop_vinfo, 0);

  if (vect_debug_details (NULL))
-    print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
+    fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
+
+  ni_name = vect_build_loop_niters (loop_vinfo);
+  niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
+  
+
+  /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
+  tree_duplicate_loop_to_edge (loop, loops, loop_preheader_edge(loop), 
+				  niters_of_prolog_loop, ni_name, false); 
+
+
+  /* Update stmt info of dr according to which we peeled.  */
+  DR_MISALIGNMENT (dr) = 0; 
+  
+  /* Update number of times loop executes.  */
+  vect_update_niters_after_peeling (loop_vinfo, niters_of_prolog_loop);
+
+  /* Update all inits of access functions of all data refs.  */
+  vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
+
+  /* After peeling we have to reset scalar evolution analyzer.  */
+  scev_reset ();
+
+  return;
 }


@@ -1881,6 +3206,7 @@ vect_transform_loop (loop_vec_info loop_vinfo,
  int nbbs = loop->num_nodes;
  block_stmt_iterator si;
  int i;
+  tree ratio = NULL;
 #ifdef ENABLE_CHECKING
  int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
 #endif
@@ -1888,6 +3214,37 @@ vect_transform_loop (loop_vec_info loop_vinfo,
  if (vect_debug_details (NULL))
    fprintf (dump_file, "\n<<vec_transform_loop>>\n");

+  
+  /* Peel the loop if there are data refs with unknown alignment.
+     Only one data ref with unknown store is allowed.  */
+
+  
+  if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
+    vect_do_peeling_for_alignment (loop_vinfo, loops);
+  
+  /* If the loop has a symbolic number of iterations 'n' 
+     (i.e. it's not a compile time constant), 
+     then an epilog loop needs to be created. We therefore duplicate 
+     the initial loop. The original loop will be vectorized, and will compute
+     the first (n/VF) iterations. The second copy of the loop will remain 
+     serial and will compute the remaining (n%VF) iterations.
+     (VF is the vectorization factor).  */
+
+  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+    vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops);
+
+  /* FORNOW: we'll treat the case where niters is constant and 
+     
+                        niters % vf != 0
+
+     in the way similar to one with symbolic niters. 
+     For this we'll generate variable which value is equal to niters.  */
+
+  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) 
+      && (LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
+    vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops);
+
+
  /* 1) Make sure the loop header has exactly two entries
     2) Make sure we have a preheader basic block.  */

@@ -1948,7 +3305,7 @@ vect_transform_loop (loop_vec_info loop_vinfo,
 	}		        /* stmts in BB */
    }				/* BBs in loop */

-  vect_transform_loop_bound (loop_vinfo);
+  vect_transform_loop_bound (loop_vinfo, ratio);

  if (vect_debug_details (loop))
    fprintf (dump_file,"Success! loop vectorized.");
@@ -2174,30 +3531,27 @@ vect_analyze_operations (loop_vec_info loop_vinfo)
    }
  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;

-  /* FORNOW: handle only cases where the loop bound divides by the
-     vectorization factor.  */
-
-  if (vect_debug_details (NULL))
+  
+  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) 
+      && vect_debug_details (NULL))
    fprintf (dump_file, 
 	"vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
-	vectorization_factor, LOOP_VINFO_NITERS (loop_vinfo));
-
-  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) 
-    {
-      if (vect_debug_stats (loop) || vect_debug_details (loop))
-	fprintf (dump_file, "not vectorized: Unknown loop bound.");
-      return false;
-    }
+	vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));

-  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) 
-      && LOOP_VINFO_NITERS (loop_vinfo) % vectorization_factor != 0)
+  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+      && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
    {
-      if (vect_debug_stats (loop) || vect_debug_details (loop))
-        fprintf (dump_file, "not vectorized: loop bound doesn't divided by %d.",
-		 vectorization_factor);
-      return false;
+      /* In this case we have to generate epilog loop, that 
+	 can be done only for loops with one entry edge.  */
+      if (LOOP_VINFO_LOOP (loop_vinfo)->num_entries != 1
+	  || !(LOOP_VINFO_LOOP (loop_vinfo)->pre_header))
+	{
+	  if (vect_debug_stats (loop) || vect_debug_details (loop))
+	    fprintf (dump_file, "not vectorized: more than one entry.");
+	  return false;
+	}
    }
-
+  
  return true;
 }

@@ -2272,7 +3626,7 @@ vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
    return false;
  
  step_expr = evolution_part;
-  init_expr = initial_condition (access_fn);
+  init_expr = unshare_expr (initial_condition (access_fn));

  if (vect_debug_details (NULL))
    {
@@ -2832,11 +4186,12 @@ vect_compute_array_ref_alignment (struct data_reference *dr,
  tree nbits;

  if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
-      /* The reference is an array without its last index. */
-      next_ref = vect_compute_array_base_alignment (ref, vectype, &dims, &misalign);
+    /* The reference is an array without its last index. */
+    next_ref = vect_compute_array_base_alignment (ref, vectype, &dims, 
+						  &misalign);
  else
-      next_ref = 
-	vect_compute_array_base_alignment (oprnd0, vectype, &dims, &misalign);
+    next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims, 
+						  &misalign);
  if (!vectype)
    /* Alignment is not requested. Just return the base.  */
    return next_ref;
@@ -3037,9 +4392,11 @@ static bool
 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
 {
  varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
  /*varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);*/

  unsigned int i;
+  unsigned int decide_peeling_count = 0;

  if (vect_debug_details (NULL))
    fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
@@ -3062,19 +4419,33 @@ vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)


  /* Finally, check that loop can be vectorized. 
-     FOR NOW: Until support for misaligned accesses is in place, only if all
-     accesses are aligned can the loop be vectorized. This restriction will be 
-     relaxed.  */
+     FOR NOW: Until support for misaligned stores is in place, only if all
+     stores are aligned can the loop be vectorized.  This restriction will be 
+     relaxed.  In the meantime, we can force the alignment of on of the
+     data-references in the loop using peeling.  We currently use a heuristic 
+     that peels the first misaligned store, but we plan to develop a 
+     better cost model to guide the decision on which data-access to peel for.
+   */

  for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
    {
      struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
      if (!aligned_access_p (dr))
 	{
-	  if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
-	      || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
-	    fprintf (dump_file, "not vectorized: unaligned store.");
-	  return false;
+	  /* Decide here whether we need peeling for alignment.  */
+	  decide_peeling_count++;
+	  if (decide_peeling_count > MAX_NUMBER_OF_UNALIGNED_DATA_REFS)
+	    {
+	      if (vect_debug_stats (loop) || vect_debug_details (loop))
+		fprintf (dump_file, 
+			 "not vectorized: multiple misaligned stores.");
+	      return false;
+	    }
+	  else
+	    {
+	      LOOP_UNALIGNED_DR (loop_vinfo, decide_peeling_count - 1) = dr;
+	      LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
+	    }
 	}
    }

@@ -3125,12 +4496,12 @@ vect_analyze_data_ref_access (struct data_reference *dr)
      if (evolution_part_in_loop_num (access_fn, 
 				      loop_containing_stmt (DR_STMT (dr))->num))
 	{
-	  /* Evolution part is not NULL in this loop (it is neither constant nor 
-	     invariant). */
+	  /* Evolution part is not NULL in this loop (it is neither constant 
+	     nor invariant). */
 	  if (vect_debug_details (NULL))
 	    {
 	      fprintf (dump_file, 
-		       "not vectorized: complicated multidimensional array access.");
+		       "not vectorized: complicated multidim. array access.");
 	      print_generic_expr (dump_file, access_fn, TDF_SLIM);
 	    }
 	  return false;
@@ -3144,7 +4515,7 @@ vect_analyze_data_ref_access (struct data_reference *dr)
    {
      if (vect_debug_details (NULL))
 	{
-	  fprintf (dump_file, "not vectorized: too complicated access function.");
+	  fprintf (dump_file, "not vectorized: complicated access function.");
 	  print_generic_expr (dump_file, access_fn, TDF_SLIM);
 	}
      return false;
@@ -3521,7 +4892,8 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo)
 	  /* Analyze MEMREF. If it is of a supported form, build data_reference
 	     struct for it (DR) and find the relevant symbol for aliasing 
 	     purposes.  */
-	  symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo, &dr);
+	  symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo, 
+					 &dr);
 	  if (!symbl)
 	    {
 	      if (vect_debug_stats (loop) || vect_debug_details (loop))
@@ -3563,7 +4935,8 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo)
 	      switch (TREE_CODE (address_base))
 		{
 		case ARRAY_REF:
-		  dr = analyze_array (stmt, TREE_OPERAND (symbl, 0), DR_IS_READ(dr));
+		  dr = analyze_array (stmt, TREE_OPERAND (symbl, 0), 
+				      DR_IS_READ(dr));
 		  STMT_VINFO_MEMTAG (stmt_info) = 
 		     vect_get_base_and_bit_offset (dr, DR_BASE_NAME (dr), NULL_TREE,
 						   loop_vinfo, &offset, 
@@ -3577,7 +4950,8 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo)
 		default:
 		  if (vect_debug_stats (loop) || vect_debug_details (loop))
 		    {
-		      fprintf (dump_file, "not vectorized: unhandled address expression: ");
+		      fprintf (dump_file, 
+			       "not vectorized: unhandled address expr: ");
 		      print_generic_expr (dump_file, stmt, TDF_SLIM);
 		    }
 		  return false;
@@ -3851,12 +5225,109 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
 }


+/* Function vect_analyze_loop_with_symbolic_num_of_iters.
+
+   In case the number of iterations that LOOP iterates in unknown at compile 
+   time, an epilog loop will be generated, and the loop induction variables 
+   (IVs) will be "advanced" to the value they are supposed to take just before 
+   the epilog loop. Here we check that the access function of the loop IVs
+   and the expression that represents the loop bound are simple enough.
+   These restrictions will be relaxed in the future.  */
+
+static bool 
+vect_analyze_loop_with_symbolic_num_of_iters (tree niters, 
+					      struct loop *loop)
+{
+  basic_block bb = loop->header;
+  tree phi;
+
+  if (vect_debug_details (NULL))
+    fprintf (dump_file, 
+	     "\n<<vect_analyze_loop_with_symbolic_num_of_iters>>\n");
+  
+  if (chrec_contains_undetermined (niters))
+    {
+      if (vect_debug_details (NULL))
+        fprintf (dump_file, "Infinite number of iterations.");
+      return false;
+    }
+
+  if (!niters)
+    {
+      if (vect_debug_details (NULL))
+        fprintf (dump_file, "niters is NULL pointer.");
+      return false;
+    }
+
+  if (vect_debug_details (NULL))
+    {
+      fprintf (dump_file, "Symbolic number of iterations is ");
+      print_generic_expr (dump_file, niters, TDF_DETAILS);
+    }
+   
+  /* Analyze phi functions of the loop header.  */
+
+  for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
+    {
+      tree access_fn = NULL;
+      tree evolution_part;
+
+      if (vect_debug_details (NULL))
+	{
+          fprintf (dump_file, "Analyze phi: ");
+          print_generic_expr (dump_file, phi, TDF_SLIM);
+	}
+
+      /* Skip virtual phi's. The data dependences that are associated with
+         virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
+
+      if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
+	{
+	  if (vect_debug_details (NULL))
+	    fprintf (dump_file, "virtual phi. skip.");
+	  continue;
+	}
+
+      /* Analyze the evolution function.  */
+
+      access_fn = instantiate_parameters
+	(loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
+
+      if (!access_fn)
+	{
+	  if (vect_debug_details (NULL))
+	    fprintf (dump_file, "No Access function.");
+	  return false;
+	}
+
+      if (vect_debug_details (NULL))
+        {
+	  fprintf (dump_file, "Access function of PHI: ");
+	  print_generic_expr (dump_file, access_fn, TDF_SLIM);
+        }
+
+      evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
+      
+      if (evolution_part == NULL_TREE)
+	return false;
+  
+      /* FORNOW: We do not transform initial conditions of IVs 
+	 which evolution functions are a polynomial of degree >= 2.  */
+
+      if (tree_is_chrec (evolution_part))
+	return false;  
+    }
+
+  return  true;
+}
+
+
 /* Function vect_get_loop_niters.

   Determine how many iterations the loop is executed.  */

 static tree
-vect_get_loop_niters (struct loop *loop, HOST_WIDE_INT *number_of_iterations)
+vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
 {
  tree niters;

@@ -3866,14 +5337,15 @@ vect_get_loop_niters (struct loop *loop, HOST_WIDE_INT *number_of_iterations)
  niters = number_of_iterations_in_loop (loop);

  if (niters != NULL_TREE
-      && niters != chrec_dont_know
-      && host_integerp (niters,0))
+      && niters != chrec_dont_know)
    {
-      *number_of_iterations = TREE_INT_CST_LOW (niters);
+      *number_of_iterations = niters;

      if (vect_debug_details (NULL))
-        fprintf (dump_file, "==> get_loop_niters:" HOST_WIDE_INT_PRINT_DEC,
-				 *number_of_iterations);
+	{
+	  fprintf (dump_file, "==> get_loop_niters:" );
+	  print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
+	}
    }

  return get_loop_exit_condition (loop);
@@ -3895,7 +5367,7 @@ vect_analyze_loop_form (struct loop *loop)
 {
  loop_vec_info loop_vinfo;
  tree loop_cond;
-  HOST_WIDE_INT number_of_iterations = -1;
+  tree number_of_iterations = NULL;

  if (vect_debug_details (loop))
    fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
@@ -3943,24 +5415,52 @@ vect_analyze_loop_form (struct loop *loop)
 	fprintf (dump_file, "not vectorized: complicated exit condition.");
      return NULL;
    }
-
-  if (number_of_iterations < 0)
+  
+  if (!number_of_iterations) 
    {
      if (vect_debug_stats (loop) || vect_debug_details (loop))
-        fprintf (dump_file, "not vectorized: unknown loop bound.");
+	fprintf (dump_file, 
+		 "not vectorized: number of iterations cannot be computed.");
      return NULL;
    }

-  if (number_of_iterations == 0) /* CHECKME: can this happen? */
+  loop_vinfo = new_loop_vec_info (loop);
+  LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
+  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+    {
+      if (vect_debug_stats (loop) || vect_debug_details (loop))
+	fprintf (dump_file, "loop bound unknown.");
+
+      /* Unknown loop bound.  */
+      if (!vect_analyze_loop_with_symbolic_num_of_iters 
+					(number_of_iterations, loop))
+	{
+          if (vect_debug_stats (loop) || vect_debug_details (loop))
+	    fprintf (dump_file, 
+		     "not vectorized: can't determine loop bound.");
+	  return NULL;
+	}
+      else
+	{
+	  /* We need only one loop entry for unknown loop bound support.  */
+	  if (loop->num_entries != 1 || !loop->pre_header)
+	    {	      
+	      if (vect_debug_stats (loop) || vect_debug_details (loop))
+		fprintf (dump_file, 
+			 "not vectorized: more than one loop entry.");
+	      return NULL;
+	    }
+	}
+    }
+  else
+  if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
    {
      if (vect_debug_stats (loop) || vect_debug_details (loop))
 	fprintf (dump_file, "not vectorized: number of iterations = 0.");
      return NULL;
    }

-  loop_vinfo = new_loop_vec_info (loop);
  LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
-  LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;

  return loop_vinfo;
 }
@@ -4170,5 +5670,6 @@ vectorize_loops (struct loops *loops)
         Information in virtual phi nodes is sufficient for it.  */
      rewrite_into_loop_closed_ssa (); 
    }
+  rewrite_into_loop_closed_ssa (); 
  bitmap_clear (vars_to_rename);
 }
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -121,6 +121,7 @@ vinfo_for_stmt (tree stmt)

 /* The misalignment of the memory access in bytes.  */
 #define DR_MISALIGNMENT(DR)   (DR)->aux
+#define MAX_NUMBER_OF_UNALIGNED_DATA_REFS 1

 static inline bool
 aligned_access_p (struct data_reference *data_ref_info)
@@ -152,8 +153,8 @@ typedef struct _loop_vec_info {
  /* The loop exit_condition.  */
  tree exit_cond;

-  /* Number of iterations. -1 if unknown.  */
-  HOST_WIDE_INT num_iters;
+  /* Number of iterations.  */
+  tree num_iters;

  /* Is the loop vectorizable? */
  bool vectorizable;
@@ -161,6 +162,13 @@ typedef struct _loop_vec_info {
  /* Unrolling factor  */
  int vectorization_factor;

+  /* Unknown DRs according to which loop was peeled.  */
+  struct data_reference *unaligned_drs [MAX_NUMBER_OF_UNALIGNED_DATA_REFS];
+
+  /* If true, loop is peeled.
+   unaligned_drs show in this case DRs used for peeling.  */
+  bool do_peeling_for_alignment;
+
  /* All data references in the loop that are being written to.  */
  varray_type data_ref_writes;

@@ -177,8 +185,14 @@ typedef struct _loop_vec_info {
 #define LOOP_VINFO_VECT_FACTOR(L)    (L)->vectorization_factor
 #define LOOP_VINFO_DATAREF_WRITES(L) (L)->data_ref_writes
 #define LOOP_VINFO_DATAREF_READS(L)  (L)->data_ref_reads
-
-#define LOOP_VINFO_NITERS_KNOWN_P(L) ((L)->num_iters > 0)
+#define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters))       
+#define LOOP_DO_PEELING_FOR_ALIGNMENT(L) (L)->do_peeling_for_alignment
+#define LOOP_UNALIGNED_DR(L, I)      (L)->unaligned_drs[(I)] 
+  
+
+#define LOOP_VINFO_NITERS_KNOWN_P(L)                     \
+(host_integerp ((L)->num_iters,0)                        \
+&& TREE_INT_CST_LOW ((L)->num_iters) > 0)      

 /*-----------------------------------------------------------------*/
 /* Function prototypes.                                            */