Commit a023975e by Olga Golovanevsky Committed by Dorit Nuzman

2004-10-14 Olga Golovanevsky <olga@il.ibm.com>

        * tree-vectorizer.c (vect_generate_tmps_on_preheader):
        (vect_update_ivs_after_vectorizer):
        (vect_transform_for_unknown_loop_bound):
        (tree_duplicate_loop_to_edge):
        (allocate_new_names):
        (rename_use_op):
        (rename_def_op):
        (rename_variables_in_bb):
        (free_new_names):
        (rename_variables_in_loop):
        (copy_phi_nodes):
        (update_phis_for_duplicate_loop):
        (update_phi_nodes_for_guard):
        (make_loop_iterate_ntimes):
        (tree_duplicate_loop_to_edge_cfg):
        (add_loop_guard):
        (vect_analyze_loop_with_symbolic_num_of_iters):
        (verify_loop_for_duplication):
        (vect_gen_niters_for_prolog_loop):
        (vect_update_niters_after_peeling):
        (vect_update_inits_of_dr):
        (vect_update_inits_of_drs):
        (vect_build_loop_niters):
        (vect_do_peeling_for_alignment): New functions.
        (vect_transform_loop): Add unknown and known but indivisible loop
        bound support; add peeling for unalignment support.
        (vect_analyze_loop_form): Support symbolic number of iterations.
        (vect_transform_loop_bound): New input parameter.
        (vect_get_loop_niters): Change input parameter type.
        (new_loop_vec_info): LOOP_VINFO_NITERS is tree now.
        (vectorizable_store): Allow unaligned access.
        (vectorize_loops): Add rewrite_into_loop_closed_ssa.
        (vect_analyze_data_refs_alignment): Allowed one unaligned
        store.
        * tree-vectorizer.h (LOOP_VINFO_NITERS_KNOWN_P): Redefined
        to use tree.
        (LOOP_VINFO_INT_NITERS): New macro.
        (MAX_NUMBER_OF_UNALIGNED_DATA_REFS): New define.
        (do_peeling_for_alignment):
        (unaligned_drs): New members of _loop_vec_info.
        (LOOP_DO_PEELING_FOR_ALIGNMENT): New macro.

From-SVN: r89040
parent 7238c5ec
2004-10-14 Olga Golovanevsky <olga@il.ibm.com>
* tree-vectorizer.c (vect_generate_tmps_on_preheader):
(vect_update_ivs_after_vectorizer):
(vect_transform_for_unknown_loop_bound):
(tree_duplicate_loop_to_edge):
(allocate_new_names):
(rename_use_op):
(rename_def_op):
(rename_variables_in_bb):
(free_new_names):
(rename_variables_in_loop):
(copy_phi_nodes):
(update_phis_for_duplicate_loop):
(update_phi_nodes_for_guard):
(make_loop_iterate_ntimes):
(tree_duplicate_loop_to_edge_cfg):
(add_loop_guard):
(vect_analyze_loop_with_symbolic_num_of_iters):
(verify_loop_for_duplication):
(vect_gen_niters_for_prolog_loop):
(vect_update_niters_after_peeling):
(vect_update_inits_of_dr):
(vect_update_inits_of_drs):
(vect_build_loop_niters):
(vect_do_peeling_for_alignment): New functions.
(vect_transform_loop): Add unknown and known but indivisible loop
bound support; add peeling for unalignment support.
(vect_analyze_loop_form): Support symbolic number of iterations.
(vect_transform_loop_bound): New input parameter.
(vect_get_loop_niters): Change input parameter type.
(new_loop_vec_info): LOOP_VINFO_NITERS is tree now.
(vectorizable_store): Allow unaligned access.
(vectorize_loops): Add rewrite_into_loop_closed_ssa.
(vect_analyze_data_refs_alignment): Allowed one unaligned
store.
* tree-vectorizer.h (LOOP_VINFO_NITERS_KNOWN_P): Redefined
to use tree.
(LOOP_VINFO_INT_NITERS): New macro.
(MAX_NUMBER_OF_UNALIGNED_DATA_REFS): New define.
(do_peeling_for_alignment):
(unaligned_drs): New members of _loop_vec_info.
(LOOP_DO_PEELING_FOR_ALIGNMENT): New macro.
2004-10-14 Ranjit Mathew <rmathew@hotmail.com>
* tree.h (TREE_STRING_POINTER): Wrap in "const char *".
......
2004-10-14 Olga GOlovanevsky <olga@il.ibm.com>
* testsuite/gcc.dg/vect/vect-28.c : Remove xfail.
* testsuite/gcc.dg/vect/vect-30.c : Remove xfail.
* testsuite/gcc.dg/vect/vect-31.c : Vectorize 4 loops instead of 2.
* testsuite/gcc.dg/vect/vect-33.c : Remove xfail.
* testsuite/gcc.dg/vect/vect-44.c : Remove xfail.
* testsuite/gcc.dg/vect/vect-46.c : Remove xfail.
* testsuite/gcc.dg/vect/vect-50.c : Remove xfail.
* testsuite/gcc.dg/vect/vect-52.c : Remove xfail.
* testsuite/gcc.dg/vect/vect-54.c : Remove xfail.
* testsuite/gcc.dg/vect/vect-58.c : Remove xfail.
* testsuite/gcc.dg/vect/vect-60.c : Remove xfail.
* testsuite/gcc.dg/vect/vect-64.c : Vectorize 3 loops instead of 1.
* testsuite/gcc.dg/vect/vect-66.c : Vectorize 3 loops instead of 2.
* testsuite/gcc.dg/vect/vect-68.c : Vectorize 4 loops instead of 2.
* testsuite/gcc.dg/vect/vect-69.c : Vectorize 4 loops instead of 2.
* testsuite/gcc.dg/vect/vect-8.c : Remove xfail.
* testsuite/gcc.dg/vect/vect-80.c : Remove xfail.
* testsuite/gcc.dg/vect/vect-none.c : Vectorize 1 loops instead of 0.
2004-10-14 Dorit Naishlos <dorit@il.ibm.com>
* gcc.dg/vect/vect-82.c: New testcase.
......
......@@ -37,5 +37,5 @@ int main (void)
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
......@@ -40,5 +40,5 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
......@@ -64,4 +64,4 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
......@@ -88,4 +88,4 @@ int main (void)
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */
......@@ -38,4 +38,4 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
......@@ -59,4 +59,4 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
......@@ -55,4 +55,4 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
......@@ -54,4 +54,4 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
......@@ -56,4 +56,4 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
......@@ -55,4 +55,4 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
......@@ -56,4 +56,4 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
......@@ -57,4 +57,4 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
......@@ -83,4 +83,4 @@ int main (void)
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
......@@ -79,4 +79,4 @@ int main (void)
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
......@@ -87,4 +87,4 @@ int main (void)
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */
......@@ -114,4 +114,4 @@ int main (void)
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */
......@@ -39,4 +39,4 @@ int main (void)
return main1 (N);
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
......@@ -47,4 +47,4 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
......@@ -189,5 +189,5 @@ foo (int n)
}
/* { dg-final { scan-tree-dump-times "vectorized " 3 "vect"} } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 3 "vect"} } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 2 "vect"} } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
......@@ -139,6 +139,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
#include "cfglayout.h"
#include "expr.h"
#include "optabs.h"
#include "toplev.h"
#include "tree-chrec.h"
#include "tree-data-ref.h"
#include "tree-scalar-evolution.h"
......@@ -158,7 +159,7 @@ static bool vect_analyze_operations (loop_vec_info);
/* Main code transformation functions. */
static void vect_transform_loop (loop_vec_info, struct loops *);
static void vect_transform_loop_bound (loop_vec_info);
static void vect_transform_loop_bound (loop_vec_info, tree niters);
static bool vect_transform_stmt (tree, block_stmt_iterator *);
static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
......@@ -173,7 +174,7 @@ static bool exist_non_indexing_operands_for_use_p (tree, tree);
static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
static void vect_mark_relevant (varray_type, tree);
static bool vect_stmt_relevant_p (tree, loop_vec_info);
static tree vect_get_loop_niters (struct loop *, HOST_WIDE_INT *);
static tree vect_get_loop_niters (struct loop *, tree *);
static bool vect_compute_data_ref_alignment
(struct data_reference *, loop_vec_info);
static bool vect_analyze_data_ref_access (struct data_reference *);
......@@ -181,6 +182,8 @@ static bool vect_get_first_index (tree, tree *);
static bool vect_can_force_dr_alignment_p (tree, unsigned int);
static struct data_reference * vect_analyze_pointer_ref_access
(tree, tree, bool);
static bool vect_analyze_loop_with_symbolic_num_of_iters (tree niters,
struct loop *loop);
static tree vect_get_base_and_bit_offset
(struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
static struct data_reference * vect_analyze_pointer_ref_access
......@@ -203,17 +206,847 @@ static tree get_vectype_for_scalar_type (tree);
static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
static tree vect_get_vec_def_for_operand (tree, tree);
static tree vect_init_vector (tree, tree);
static tree vect_build_symbol_bound (tree, int, struct loop *);
static void vect_finish_stmt_generation
(tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
static void vect_generate_tmps_on_preheader (loop_vec_info,
tree *, tree *,
tree *);
static tree vect_build_loop_niters (loop_vec_info);
static void vect_update_ivs_after_vectorizer (struct loop *, tree);
/* Loop transformations prior to vectorizeration. */
/* Loop transformations entry point function.
It can be used outside of the vectorizer
in case the loop to be manipulated answers conditions specified
in function documentation. */
struct loop *tree_duplicate_loop_to_edge (struct loop *,
struct loops *, edge,
tree, tree, bool);
static void allocate_new_names (bitmap);
static void rename_use_op (use_operand_p);
static void rename_def_op (def_operand_p, tree);
static void rename_variables_in_bb (basic_block);
static void free_new_names (bitmap);
static void rename_variables_in_loop (struct loop *);
static void copy_phi_nodes (struct loop *, struct loop *, bool);
static void update_phis_for_duplicate_loop (struct loop *,
struct loop *,
bool after);
static void update_phi_nodes_for_guard (edge, struct loop *);
static void make_loop_iterate_ntimes (struct loop *, tree, tree, tree);
static struct loop *tree_duplicate_loop_to_edge_cfg (struct loop *,
struct loops *,
edge);
static edge add_loop_guard (basic_block, tree, basic_block);
static bool verify_loop_for_duplication (struct loop *, bool, edge);
/* Utilities dealing with loop peeling (not peeling itself). */
static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
static void vect_update_niters_after_peeling (loop_vec_info, tree);
static void vect_update_inits_of_dr (struct data_reference *, struct loop *,
tree niters);
static void vect_update_inits_of_drs (loop_vec_info, tree);
static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
/* Utilities for creation and deletion of vec_info structs. */
loop_vec_info new_loop_vec_info (struct loop *loop);
void destroy_loop_vec_info (loop_vec_info);
stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
static bool vect_debug_stats (struct loop *loop);
static bool vect_debug_details (struct loop *loop);
static bool vect_debug_stats (struct loop *loop);
static bool vect_debug_details (struct loop *loop);
/* Utilities to support loop peeling for vectorization purposes. */
/* For each definition in DEFINITIONS this function allocates
new ssa name. */
static void
allocate_new_names (bitmap definitions)
{
unsigned ver;
bitmap_iterator bi;
EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
{
tree def = ssa_name (ver);
tree *new_name_ptr = xmalloc (sizeof (tree));
bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
*new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
SSA_NAME_AUX (def) = new_name_ptr;
}
}
/* Renames the use *OP_P. */
static void
rename_use_op (use_operand_p op_p)
{
tree *new_name_ptr;
if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
return;
new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
/* Something defined outside of the loop. */
if (!new_name_ptr)
return;
/* An ordinary ssa name defined in the loop. */
SET_USE (op_p, *new_name_ptr);
}
/* Renames the def *OP_P in statement STMT. */
static void
rename_def_op (def_operand_p op_p, tree stmt)
{
tree *new_name_ptr;
if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
return;
new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
/* Something defined outside of the loop. */
if (!new_name_ptr)
return;
/* An ordinary ssa name defined in the loop. */
SET_DEF (op_p, *new_name_ptr);
SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
}
/* Renames the variables in basic block BB. */
static void
rename_variables_in_bb (basic_block bb)
{
tree phi;
block_stmt_iterator bsi;
tree stmt;
stmt_ann_t ann;
use_optype uses;
vuse_optype vuses;
def_optype defs;
v_may_def_optype v_may_defs;
v_must_def_optype v_must_defs;
unsigned i;
edge e;
edge_iterator ei;
for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
rename_def_op (PHI_RESULT_PTR (phi), phi);
for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
{
stmt = bsi_stmt (bsi);
get_stmt_operands (stmt);
ann = stmt_ann (stmt);
uses = USE_OPS (ann);
for (i = 0; i < NUM_USES (uses); i++)
rename_use_op (USE_OP_PTR (uses, i));
defs = DEF_OPS (ann);
for (i = 0; i < NUM_DEFS (defs); i++)
rename_def_op (DEF_OP_PTR (defs, i), stmt);
vuses = VUSE_OPS (ann);
for (i = 0; i < NUM_VUSES (vuses); i++)
rename_use_op (VUSE_OP_PTR (vuses, i));
v_may_defs = V_MAY_DEF_OPS (ann);
for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
{
rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
}
v_must_defs = V_MUST_DEF_OPS (ann);
for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
rename_def_op (V_MUST_DEF_OP_PTR (v_must_defs, i), stmt);
}
FOR_EACH_EDGE (e, ei, bb->succs)
for (phi = phi_nodes (e->dest); phi; phi = TREE_CHAIN (phi))
rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
}
/* Releases the structures holding the new ssa names. */
static void
free_new_names (bitmap definitions)
{
unsigned ver;
bitmap_iterator bi;
EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
{
tree def = ssa_name (ver);
if (SSA_NAME_AUX (def))
{
free (SSA_NAME_AUX (def));
SSA_NAME_AUX (def) = NULL;
}
}
}
/* Renames variables in new generated LOOP. */
static void
rename_variables_in_loop (struct loop *loop)
{
unsigned i;
basic_block *bbs;
bbs = get_loop_body (loop);
for (i = 0; i < loop->num_nodes; i++)
rename_variables_in_bb (bbs[i]);
free (bbs);
}
/* This function copies phis from LOOP header to
NEW_LOOP header. AFTER is as
in update_phis_for_duplicate_loop function. */
static void
copy_phi_nodes (struct loop *loop, struct loop *new_loop,
bool after)
{
tree phi, new_phi, def;
edge new_e;
edge e = (after ? loop_latch_edge (loop) : loop_preheader_edge (loop));
/* Second add arguments to newly created phi nodes. */
for (phi = phi_nodes (loop->header),
new_phi = phi_nodes (new_loop->header);
phi;
phi = TREE_CHAIN (phi),
new_phi = TREE_CHAIN (new_phi))
{
new_e = loop_preheader_edge (new_loop);
def = PHI_ARG_DEF_FROM_EDGE (phi, e);
add_phi_arg (&new_phi, def, new_e);
}
}
/* Update the PHI nodes of the NEW_LOOP. AFTER is true if the NEW_LOOP
executes after LOOP, and false if it executes before it. */
static void
update_phis_for_duplicate_loop (struct loop *loop,
struct loop *new_loop, bool after)
{
edge old_latch;
tree *new_name_ptr, new_ssa_name;
tree phi_new, phi_old, def;
edge orig_entry_e = loop_preheader_edge (loop);
/* Copy phis from loop->header to new_loop->header. */
copy_phi_nodes (loop, new_loop, after);
old_latch = loop_latch_edge (loop);
/* Update PHI args for the new loop latch edge, and
the old loop preheader edge, we know that the PHI nodes
are ordered appropriately in copy_phi_nodes. */
for (phi_new = phi_nodes (new_loop->header),
phi_old = phi_nodes (loop->header);
phi_new && phi_old;
phi_new = TREE_CHAIN (phi_new), phi_old = TREE_CHAIN (phi_old))
{
def = PHI_ARG_DEF_FROM_EDGE (phi_old, old_latch);
if (TREE_CODE (def) != SSA_NAME)
continue;
new_name_ptr = SSA_NAME_AUX (def);
/* Something defined outside of the loop. */
if (!new_name_ptr)
continue;
/* An ordinary ssa name defined in the loop. */
new_ssa_name = *new_name_ptr;
add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge(new_loop));
/* Update PHI args for the original loop pre-header edge. */
if (! after)
SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi_old, orig_entry_e),
new_ssa_name);
}
}
/* Update PHI nodes for a guard of the LOOP.
LOOP is supposed to have a preheader bb at which a guard condition is
located. The true edge of this condition skips the LOOP and ends
at the destination of the (unique) LOOP exit. The loop exit bb is supposed
to be an empty bb (created by this transformation) with one successor.
This function creates phi nodes at the LOOP exit bb. These phis need to be
created as a result of adding true edge coming from guard.
FORNOW: Only phis which have corresponding phi nodes at the header of the
LOOP are created. Here we use the assumption that after the LOOP there
are no uses of defs generated in LOOP.
After the phis creation, the function updates the values of phi nodes at
the LOOP exit successor bb:
Original loop:
bb0: loop preheader
goto bb1
bb1: loop header
if (exit_cond) goto bb3 else goto bb2
bb2: loop latch
goto bb1
bb3:
After guard creation (the loop before this function):
bb0: loop preheader
if (guard_condition) goto bb4 else goto bb1
bb1: loop header
if (exit_cond) goto bb4 else goto bb2
bb2: loop latch
goto bb1
bb4: loop exit
(new empty bb)
goto bb3
bb3:
This function updates the phi nodes in bb4 and in bb3, to account for the
new edge from bb0 to bb4. */
static void
update_phi_nodes_for_guard (edge guard_true_edge, struct loop * loop)
{
tree phi, phi1;
for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
{
tree new_phi;
tree phi_arg;
/* Generate new phi node. */
new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
loop->exit_edges[0]->dest);
/* Add argument coming from guard true edge. */
phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->entry_edges[0]);
add_phi_arg (&new_phi, phi_arg, guard_true_edge);
/* Add argument coming from loop exit edge. */
phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
add_phi_arg (&new_phi, phi_arg, loop->exit_edges[0]);
/* Update all phi nodes at the loop exit successor. */
for (phi1 = phi_nodes (EDGE_SUCC (loop->exit_edges[0]->dest, 0)->dest);
phi1;
phi1 = TREE_CHAIN (phi1))
{
tree old_arg = PHI_ARG_DEF_FROM_EDGE (phi1,
EDGE_SUCC (loop->exit_edges[0]->dest, 0));
if (old_arg == phi_arg)
{
edge e = EDGE_SUCC (loop->exit_edges[0]->dest, 0);
SET_PHI_ARG_DEF (phi1,
phi_arg_from_edge (phi1, e),
PHI_RESULT (new_phi));
}
}
}
}
/* Make the LOOP iterate NITERS times. This is done by adding a new IV
that starts at zero, increases by one and its limit is NITERS. */
static void
make_loop_iterate_ntimes (struct loop *loop, tree niters,
tree begin_label, tree exit_label)
{
tree indx_before_incr, indx_after_incr, cond_stmt, cond;
tree orig_cond;
edge exit_edge = loop->exit_edges[0];
block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
/* Flow loop scan does not update loop->single_exit field. */
loop->single_exit = loop->exit_edges[0];
orig_cond = get_loop_exit_condition (loop);
gcc_assert (orig_cond);
create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
&loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
/* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
back to the exit condition statement. */
bsi_next (&loop_exit_bsi);
gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
else /* 'then' edge loops back. */
cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
begin_label = build1 (GOTO_EXPR, void_type_node, begin_label);
exit_label = build1 (GOTO_EXPR, void_type_node, exit_label);
cond_stmt = build (COND_EXPR, TREE_TYPE (orig_cond), cond,
begin_label, exit_label);
bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
/* Remove old loop exit test: */
bsi_remove (&loop_exit_bsi);
if (vect_debug_stats (loop) || vect_debug_details (loop))
print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
}
/* Given LOOP this function generates a new copy of it and puts it
on E which is either the entry or exit of LOOP. */
static struct loop *
tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
edge e)
{
struct loop *new_loop;
basic_block *new_bbs, *bbs;
bool at_exit;
bool was_imm_dom;
basic_block exit_dest;
tree phi, phi_arg;
at_exit = (e == loop->exit_edges[0]);
if (!at_exit && e != loop_preheader_edge (loop))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,
"Edge is not an entry nor an exit edge.\n");
return NULL;
}
bbs = get_loop_body (loop);
/* Check whether duplication is possible. */
if (!can_copy_bbs_p (bbs, loop->num_nodes))
{
if (vect_debug_stats (loop) || vect_debug_details (loop))
fprintf (dump_file,
"Cannot copy basic blocks.\n");
free (bbs);
return NULL;
}
/* Generate new loop structure. */
new_loop = duplicate_loop (loops, loop, loop->outer);
if (!new_loop)
{
if (vect_debug_stats (loop) || vect_debug_details (loop))
fprintf (dump_file,
"The duplicate_loop returns NULL.\n");
free (bbs);
return NULL;
}
exit_dest = loop->exit_edges[0]->dest;
was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
exit_dest) == loop->header ?
true : false);
new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
/* Duplicating phi args at exit bbs as coming
also from exit of duplicated loop. */
for (phi = phi_nodes (exit_dest); phi; phi = TREE_CHAIN (phi))
{
phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
if (phi_arg)
{
edge new_loop_exit_edge;
if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
else
new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
add_phi_arg (&phi, phi_arg, new_loop_exit_edge);
}
}
if (at_exit) /* Add the loop copy at exit. */
{
redirect_edge_and_branch_force (e, new_loop->header);
set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
if (was_imm_dom)
set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
}
else /* Add the copy at entry. */
{
edge new_exit_e;
edge entry_e = loop_preheader_edge (loop);
basic_block preheader = entry_e->src;
if (!flow_bb_inside_loop_p (new_loop,
EDGE_SUCC (new_loop->header, 0)->dest))
new_exit_e = EDGE_SUCC (new_loop->header, 0);
else
new_exit_e = EDGE_SUCC (new_loop->header, 1);
redirect_edge_and_branch_force (new_exit_e, loop->header);
set_immediate_dominator (CDI_DOMINATORS, loop->header,
new_exit_e->src);
/* We have to add phi args to the loop->header here as coming
from new_exit_e edge. */
for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
{
phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
if (phi_arg)
add_phi_arg (&phi, phi_arg, new_exit_e);
}
redirect_edge_and_branch_force (entry_e, new_loop->header);
set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
}
flow_loop_scan (new_loop, LOOP_ALL);
flow_loop_scan (loop, LOOP_ALL);
free (new_bbs);
free (bbs);
return new_loop;
}
/* Given the condition statement COND, put it as the last statement
of GUARD_BB; EXIT_BB is the basic block to skip the loop;
Assumes that this is the single exit of the guarded loop.
Returns the skip edge. */
static edge
add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb)
{
block_stmt_iterator bsi;
edge new_e, enter_e;
tree cond_stmt, then_label, else_label;
enter_e = EDGE_SUCC (guard_bb, 0);
enter_e->flags &= ~EDGE_FALLTHRU;
enter_e->flags |= EDGE_FALSE_VALUE;
bsi = bsi_last (guard_bb);
then_label = build1 (GOTO_EXPR, void_type_node,
tree_block_label (exit_bb));
else_label = build1 (GOTO_EXPR, void_type_node,
tree_block_label (enter_e->dest));
cond_stmt = build (COND_EXPR, void_type_node, cond,
then_label, else_label);
bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
/* Add new edge to connect entry block to the second loop. */
new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
set_immediate_dominator (CDI_DOMINATORS, exit_bb, guard_bb);
return new_e;
}
/* This function verifies that certain restrictions apply to LOOP. */
static bool
verify_loop_for_duplication (struct loop *loop,
bool update_first_loop_count, edge e)
{
edge exit_e = loop->exit_edges [0];
edge entry_e = loop_preheader_edge (loop);
/* We duplicate only innermost loops. */
if (loop->inner)
{
if (vect_debug_stats (loop) || vect_debug_details (loop))
fprintf (dump_file,
"Loop duplication failed. Loop is not innermost.\n");
return false;
}
/* Only loops with 1 exit. */
if (loop->num_exits != 1)
{
if (vect_debug_stats (loop) || vect_debug_details (loop))
fprintf (dump_file,
"More than one exit from loop.\n");
return false;
}
/* Only loops with 1 entry. */
if (loop->num_entries != 1)
{
if (vect_debug_stats (loop) || vect_debug_details (loop))
fprintf (dump_file,
"More than one exit from loop.\n");
return false;
}
/* All loops has outers, the only case loop->outer is NULL is for
the function itself. */
if (!loop->outer)
{
if (vect_debug_stats (loop) || vect_debug_details (loop))
fprintf (dump_file,
"Loop is outer-most loop.\n");
return false;
}
/* Verify that new IV can be created and loop condition
can be changed to make first loop iterate first_niters times. */
if (!update_first_loop_count)
{
tree orig_cond = get_loop_exit_condition (loop);
block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
if (!orig_cond)
{
if (vect_debug_stats (loop) || vect_debug_details (loop))
fprintf (dump_file,
"Loop has no exit condition.\n");
return false;
}
if (orig_cond != bsi_stmt (loop_exit_bsi))
{
if (vect_debug_stats (loop) || vect_debug_details (loop))
fprintf (dump_file,
"Loop exit condition is not loop header last stmt.\n");
return false;
}
}
/* Make sure E is either an entry or an exit edge. */
if (e != exit_e && e != entry_e)
{
if (vect_debug_stats (loop) || vect_debug_details (loop))
fprintf (dump_file,
"E is not loop entry or exit edge.\n");
return false;
}
return true;
}
/* Given LOOP this function duplicates it to the edge E.
This transformation takes place before the loop is vectorized.
For now, there are two main cases when it's used
by the vectorizer: to support loops with unknown loop bounds
(or loop bounds indivisible by vectorization factor) and to force the
alignment of data references in the loop. In the first case, LOOP is
duplicated to the exit edge, producing epilog loop. In the second case, LOOP
is duplicated to the preheader edge thus generating prolog loop. In both
cases, the original loop will be vectorized after the transformation.
The edge E is supposed to be either preheader edge of the LOOP or
its exit edge. If preheader edge is specified, the LOOP copy
will precede the original one. Otherwise the copy will be located
at the exit of the LOOP.
FIRST_NITERS (SSA_NAME) parameter specifies how many times to iterate
the first loop. If UPDATE_FIRST_LOOP_COUNT parameter is false, the first
loop will be iterated FIRST_NITERS times by introducing additional
induction variable and replacing loop exit condition. If
UPDATE_FIRST_LOOP_COUNT is true no change to the first loop is made and
the caller to tree_duplicate_loop_to_edge is responsible for updating
the first loop count.
NITERS (also SSA_NAME) parameter defines the number of iteration the
original loop iterated. The function generates two if-then guards:
one prior to the first loop and the other prior to the second loop.
The first guard will be:
if (FIRST_NITERS == 0) then skip the first loop
The second guard will be:
if (FIRST_NITERS == NITERS) then skip the second loop
Thus the equivalence to the original code is guaranteed by correct values
of NITERS and FIRST_NITERS and generation of if-then loop guards.
For now this function supports only loop forms that are candidate for
vectorization. Such types are the following:
(1) only innermost loops
(2) loops built from 2 basic blocks
(3) loops with one entry and one exit
(4) loops without function calls
(5) loops without defs that are used after the loop
(1), (3) are checked in this function; (2) - in function
vect_analyze_loop_form; (4) - in function vect_analyze_data_refs;
(5) is checked as part of the function vect_mark_stmts_to_be_vectorized,
when excluding induction/reduction support.
The function returns NULL in case one of these checks or
transformations failed. */
struct loop*
tree_duplicate_loop_to_edge (struct loop *loop, struct loops *loops,
edge e, tree first_niters,
tree niters, bool update_first_loop_count)
{
struct loop *new_loop = NULL, *first_loop, *second_loop;
edge skip_e;
tree pre_condition;
bitmap definitions;
basic_block first_exit_bb, second_exit_bb;
basic_block pre_header_bb;
edge exit_e = loop->exit_edges [0];
gcc_assert (!any_marked_for_rewrite_p ());
if (!verify_loop_for_duplication (loop, update_first_loop_count, e))
return NULL;
/* We have to initialize cfg_hooks. Then, when calling
cfg_hooks->split_edge, the function tree_split_edge
is actually called and, when calling cfg_hooks->duplicate_block,
the function tree_duplicate_bb is called. */
tree_register_cfg_hooks ();
/* 1. Generate a copy of LOOP and put it on E (entry or exit). */
if (!(new_loop = tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
{
if (vect_debug_stats (loop) || vect_debug_details (loop))
fprintf (dump_file,
"The tree_duplicate_loop_to_edge_cfg failed.\n");
return NULL;
}
definitions = marked_ssa_names ();
allocate_new_names (definitions);
update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
/* Here, using assumption (5), we do not propagate new names futher
than on phis of the exit from the second loop. */
rename_variables_in_loop (new_loop);
free_new_names (definitions);
if (e == exit_e)
{
first_loop = loop;
second_loop = new_loop;
}
else
{
first_loop = new_loop;
second_loop = loop;
}
/* 2. Generate bb between the loops. */
first_exit_bb = split_edge (first_loop->exit_edges[0]);
add_bb_to_loop (first_exit_bb, first_loop->outer);
/* We need to update here first loop exit edge
and second loop preheader edge. */
flow_loop_scan (first_loop, LOOP_ALL);
flow_loop_scan (second_loop, LOOP_ALL);
/* 3. Make first loop iterate FIRST_NITERS times, if needed. */
if (!update_first_loop_count)
{
tree first_loop_latch_lbl = tree_block_label (first_loop->latch);
tree first_loop_exit_lbl = tree_block_label (first_exit_bb);
make_loop_iterate_ntimes (first_loop, first_niters,
first_loop_latch_lbl,
first_loop_exit_lbl);
}
/* 4. Add the guard before first loop:
if FIRST_NITERS == 0
skip first loop
else
enter first loop */
/* 4a. Generate bb before first loop. */
pre_header_bb = split_edge (loop_preheader_edge (first_loop));
add_bb_to_loop (pre_header_bb, first_loop->outer);
/* First loop preheader edge is changed. */
flow_loop_scan (first_loop, LOOP_ALL);
/* 4b. Generate guard condition. */
pre_condition = build (LE_EXPR, boolean_type_node,
first_niters, integer_zero_node);
/* 4c. Add condition at the end of preheader bb. */
skip_e = add_loop_guard (pre_header_bb, pre_condition, first_exit_bb);
/* 4d. Updtae phis at first loop exit and propagate changes
to the phis of second loop. */
update_phi_nodes_for_guard (skip_e, first_loop);
/* 5. Add the guard before second loop:
if FIRST_NITERS == NITERS SKIP
skip second loop
else
enter second loop */
/* 5a. Generate empty bb at the exit from the second loop. */
second_exit_bb = split_edge (second_loop->exit_edges[0]);
add_bb_to_loop (second_exit_bb, second_loop->outer);
/* Second loop preheader edge is changed. */
flow_loop_scan (second_loop, LOOP_ALL);
/* 5b. Generate guard condition. */
pre_condition = build (EQ_EXPR, boolean_type_node,
first_niters, niters);
/* 5c. Add condition at the end of preheader bb. */
skip_e = add_loop_guard (first_exit_bb, pre_condition, second_exit_bb);
update_phi_nodes_for_guard (skip_e, second_loop);
BITMAP_XFREE (definitions);
unmark_all_for_rewrite ();
return new_loop;
}
/* Here the proper Vectorizer starts. */
/* Function new_stmt_vec_info.
......@@ -274,13 +1107,17 @@ new_loop_vec_info (struct loop *loop)
LOOP_VINFO_LOOP (res) = loop;
LOOP_VINFO_BBS (res) = bbs;
LOOP_VINFO_EXIT_COND (res) = NULL;
LOOP_VINFO_NITERS (res) = -1;
LOOP_VINFO_NITERS (res) = NULL;
LOOP_VINFO_VECTORIZABLE_P (res) = 0;
LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
LOOP_VINFO_VECT_FACTOR (res) = 0;
VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
"loop_write_datarefs");
VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
"loop_read_datarefs");
for (i=0; i<MAX_NUMBER_OF_UNALIGNED_DATA_REFS; i++)
LOOP_UNALIGNED_DR (res, i) = NULL;
return res;
}
......@@ -595,7 +1432,6 @@ vect_get_base_and_bit_offset (struct data_reference *dr,
}
/* Function vect_force_dr_alignment_p.
Returns whether the alignment of a DECL can be forced to be aligned
......@@ -709,8 +1545,8 @@ vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
OFFSET: Optional. If supplied, it is be added to the initial address.
Output:
1. Return an SSA_NAME whose value is the address of the memory location of the
first vector of the data reference.
1. Return an SSA_NAME whose value is the address of the memory location of
the first vector of the data reference.
2. If new_stmt_list is not NULL_TREE after return then the caller must insert
these statement(s) which define the returned SSA_NAME.
......@@ -744,7 +1580,8 @@ vect_create_addr_base_for_vector_ref (tree stmt,
/* Only the access function of the last index is relevant (i_n in
a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
access_fn = DR_ACCESS_FN (dr, 0);
ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step, true);
ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step,
true);
if (!ok)
init_oval = integer_zero_node;
......@@ -1519,8 +2356,6 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
if (!STMT_VINFO_DATA_REF (stmt_info))
return false;
if (!aligned_access_p (STMT_VINFO_DATA_REF (stmt_info)))
return false;
if (!vec_stmt) /* transformation not required. */
{
......@@ -1613,7 +2448,8 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
software_pipeline_loads_p = true;
else if (!targetm.vectorize.misaligned_mem_ok (mode))
{
/* Possibly unaligned access, and can't software pipeline the loads */
/* Possibly unaligned access, and can't software pipeline the loads.
*/
if (vect_debug_details (loop))
fprintf (dump_file, "Arbitrary load not supported.");
return false;
......@@ -1731,7 +2567,8 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
}
else
{
/* Use current address instead of init_addr for reduced reg pressure. */
/* Use current address instead of init_addr for reduced reg pressure.
*/
magic = dataref_ptr;
}
......@@ -1745,124 +2582,612 @@ vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop));
/* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
vec_dest = vect_create_destination_var (scalar_dest, vectype);
new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
new_temp = make_ssa_name (vec_dest, new_stmt);
TREE_OPERAND (new_stmt, 0) = new_temp;
vect_finish_stmt_generation (stmt, new_stmt, bsi);
}
/* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
vec_dest = vect_create_destination_var (scalar_dest, vectype);
new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
new_temp = make_ssa_name (vec_dest, new_stmt);
TREE_OPERAND (new_stmt, 0) = new_temp;
vect_finish_stmt_generation (stmt, new_stmt, bsi);
}
*vec_stmt = new_stmt;
return true;
}
/* Function vect_transform_stmt.
Create a vectorized stmt to replace STMT, and insert it at BSI. */
static bool
vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
{
bool is_store = false;
tree vec_stmt = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
bool done;
switch (STMT_VINFO_TYPE (stmt_info))
{
case op_vec_info_type:
done = vectorizable_operation (stmt, bsi, &vec_stmt);
gcc_assert (done);
break;
case assignment_vec_info_type:
done = vectorizable_assignment (stmt, bsi, &vec_stmt);
gcc_assert (done);
break;
case load_vec_info_type:
done = vectorizable_load (stmt, bsi, &vec_stmt);
gcc_assert (done);
break;
case store_vec_info_type:
done = vectorizable_store (stmt, bsi, &vec_stmt);
gcc_assert (done);
is_store = true;
break;
default:
if (vect_debug_details (NULL))
fprintf (dump_file, "stmt not supported.");
gcc_unreachable ();
}
STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
return is_store;
}
/* This function builds ni_name = number of iterations loop executes
on the loop preheader. */
static tree
vect_build_loop_niters (loop_vec_info loop_vinfo)
{
tree ni_name, stmt, var;
edge pe;
basic_block new_bb;
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree ni = unshare_expr (LOOP_VINFO_NITERS(loop_vinfo));
var = create_tmp_var (TREE_TYPE (ni), "niters");
add_referenced_tmp_var (var);
if (TREE_CODE (ni) == INTEGER_CST)
{
/* This case is generated when treating a known loop bound
indivisible by VF. Here we cannot use force_gimple_operand. */
stmt = build (MODIFY_EXPR, void_type_node, var, ni);
ni_name = make_ssa_name (var, stmt);
TREE_OPERAND (stmt, 0) = ni_name;
}
else
ni_name = force_gimple_operand (ni, &stmt, false, var);
pe = loop_preheader_edge (loop);
new_bb = bsi_insert_on_edge_immediate (pe, stmt);
if (new_bb)
add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
return ni_name;
}
/* This function generates the following statements:
ni_name = number of iterations loop executes
ratio = ni_name / vf
ratio_mult_vf_name = ratio * vf
and places them at the loop preheader edge. */
static void
vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, tree *ni_name_p,
tree *ratio_mult_vf_name_p, tree *ratio_p)
{
edge pe;
basic_block new_bb;
tree stmt, ni_name;
tree ratio;
tree ratio_mult_vf_name, ratio_mult_vf;
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree ni = LOOP_VINFO_NITERS(loop_vinfo);
int vf, i;
/* Generate temporary variable that contains
number of iterations loop executes. */
ni_name = vect_build_loop_niters (loop_vinfo);
/* ratio = ni / vf.
vf is power of 2; then if ratio = = n >> log2 (vf). */
vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
ratio = vect_build_symbol_bound (ni_name, vf, loop);
/* Update initial conditions of loop copy. */
/* ratio_mult_vf = ratio * vf;
then if ratio_mult_vf = ratio << log2 (vf). */
i = exact_log2 (vf);
ratio_mult_vf = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
add_referenced_tmp_var (ratio_mult_vf);
ratio_mult_vf_name = make_ssa_name (ratio_mult_vf, NULL_TREE);
stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
build2 (LSHIFT_EXPR, TREE_TYPE (ratio),
ratio, build_int_cst (unsigned_type_node,
i)));
SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
pe = loop_preheader_edge (loop);
new_bb = bsi_insert_on_edge_immediate (pe, stmt);
if (new_bb)
add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
*ni_name_p = ni_name;
*ratio_mult_vf_name_p = ratio_mult_vf_name;
*ratio_p = ratio;
return;
}
/* This function generates stmt
tmp = n / vf;
and attaches it to preheader of LOOP. */
static tree
vect_build_symbol_bound (tree n, int vf, struct loop * loop)
{
tree var, stmt, var_name;
edge pe;
basic_block new_bb;
int i;
/* create temporary variable */
var = create_tmp_var (TREE_TYPE (n), "bnd");
add_referenced_tmp_var (var);
var_name = make_ssa_name (var, NULL_TREE);
/* vf is power of 2; then n/vf = n >> log2 (vf). */
i = exact_log2 (vf);
stmt = build2 (MODIFY_EXPR, void_type_node, var_name,
build2 (RSHIFT_EXPR, TREE_TYPE (n),
n, build_int_cst (unsigned_type_node,i)));
SSA_NAME_DEF_STMT (var_name) = stmt;
pe = loop_preheader_edge (loop);
new_bb = bsi_insert_on_edge_immediate (pe, stmt);
if (new_bb)
add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
else
if (vect_debug_details (NULL))
fprintf (dump_file, "New bb on preheader edge was not generated.");
return var_name;
}
/* Function vect_transform_loop_bound.
Create a new exit condition for the loop. */
static void
vect_transform_loop_bound (loop_vec_info loop_vinfo, tree niters)
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
edge exit_edge = loop->single_exit;
block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
tree indx_before_incr, indx_after_incr;
tree orig_cond_expr;
HOST_WIDE_INT old_N = 0;
int vf;
tree cond_stmt;
tree new_loop_bound;
bool symbol_niters;
tree cond;
tree lb_type;
symbol_niters = !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo);
if (!symbol_niters)
old_N = LOOP_VINFO_INT_NITERS (loop_vinfo);
vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
#ifdef ENABLE_CHECKING
gcc_assert (orig_cond_expr);
#endif
gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi));
create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
&loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
/* bsi_insert is using BSI_NEW_STMT. We need to bump it back
to point to the exit condition. */
bsi_next (&loop_exit_bsi);
gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond_expr);
/* new loop exit test: */
lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1));
if (!symbol_niters)
new_loop_bound = fold_convert (lb_type,
build_int_cst (unsigned_type_node,
old_N/vf));
else
new_loop_bound = niters;
if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
cond = build2 (GE_EXPR, boolean_type_node,
indx_after_incr, new_loop_bound);
else /* 'then' edge loops back. */
cond = build2 (LT_EXPR, boolean_type_node,
indx_after_incr, new_loop_bound);
cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond,
TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2));
bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
/* remove old loop exit test: */
bsi_remove (&loop_exit_bsi);
if (vect_debug_details (NULL))
print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
}
/* Advance IVs of the loop (to be vectorized later) to correct position.
When loop is vectorized, its IVs are not always advanced
correctly since vectorization changes the loop count. It's ok
in case epilog loop was not produced after original one before
vectorization process (the vectorizer checks that there is no uses
of IVs after the loop). However, in case the epilog loop was peeled,
IVs from original loop are used in epilog loop and should be
advanced correctly.
Here we use access functions of IVs and number of
iteration loop executes in order to bring IVs to correct position.
Function also update phis of basic block at the exit
from the loop. */
static void
vect_update_ivs_after_vectorizer (struct loop *loop, tree niters)
{
edge exit = loop->exit_edges[0];
tree phi;
edge latch = loop_latch_edge (loop);
/* Generate basic block at the exit from the loop. */
basic_block new_bb = split_edge (exit);
add_bb_to_loop (new_bb, EDGE_SUCC (new_bb, 0)->dest->loop_father);
loop->exit_edges[0] = EDGE_PRED (new_bb, 0);
for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
{
tree access_fn = NULL;
tree evolution_part;
tree init_expr;
tree step_expr;
tree var, stmt, ni, ni_name;
int i, j, num_elem1, num_elem2;
tree phi1;
block_stmt_iterator last_bsi;
/* Skip virtual phi's. The data dependences that are associated with
virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
{
if (vect_debug_details (NULL))
fprintf (dump_file, "virtual phi. skip.");
continue;
}
access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
/* FORNOW: We do not transform initial conditions of IVs
which evolution functions are a polynomial of degree >= 2 or
exponential. */
step_expr = evolution_part;
init_expr = initial_condition (access_fn);
ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
build2 (MULT_EXPR, TREE_TYPE (niters),
niters, step_expr), init_expr);
var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
add_referenced_tmp_var (var);
ni_name = force_gimple_operand (ni, &stmt, false, var);
/* Insert stmt into new_bb. */
last_bsi = bsi_last (new_bb);
bsi_insert_after (&last_bsi, stmt, BSI_NEW_STMT);
/* Fix phi expressions in duplicated loop. */
num_elem1 = PHI_NUM_ARGS (phi);
for (i = 0; i < num_elem1; i++)
if (PHI_ARG_EDGE (phi, i) == latch)
{
tree def = PHI_ARG_DEF (phi, i);
for (phi1 = phi_nodes (EDGE_SUCC (new_bb, 0)->dest); phi1;
phi1 = TREE_CHAIN (phi1))
{
num_elem2 = PHI_NUM_ARGS (phi1);
for (j = 0; j < num_elem2; j++)
if (PHI_ARG_DEF (phi1, j) == def)
{
SET_PHI_ARG_DEF (phi1, j, ni_name);
PHI_ARG_EDGE (phi1, j) = EDGE_SUCC (new_bb, 0);
break;
}
}
break;
}
}
}
/* This function is the main driver of tranformation
to be done for loop before vectorizing it in case of
unknown loop bound. */
static void
vect_transform_for_unknown_loop_bound (loop_vec_info loop_vinfo, tree * ratio,
struct loops *loops)
{
tree ni_name, ratio_mult_vf_name;
#ifdef ENABLE_CHECKING
int loop_num;
#endif
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
struct loop *new_loop;
if (vect_debug_details (NULL))
fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
/* Generate the following variables on the preheader of original loop:
ni_name = number of iteration the original loop executes
ratio = ni_name / vf
ratio_mult_vf_name = ratio * vf */
vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
&ratio_mult_vf_name, ratio);
/* Update loop info. */
loop->pre_header = loop_preheader_edge (loop)->src;
loop->pre_header_edges[0] = loop_preheader_edge (loop);
#ifdef ENABLE_CHECKING
loop_num = loop->num;
#endif
new_loop = tree_duplicate_loop_to_edge (loop, loops, loop->exit_edges[0],
ratio_mult_vf_name, ni_name, true);
#ifdef ENABLE_CHECKING
gcc_assert (new_loop);
gcc_assert (loop_num == loop->num);
#endif
/* Update IVs of original loop as if they were advanced
by ratio_mult_vf_name steps. */
#ifdef ENABLE_CHECKING
/* Check existence of intermediate bb. */
gcc_assert (loop->exit_edges[0]->dest == new_loop->pre_header);
#endif
vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name);
return;
*vec_stmt = new_stmt;
return true;
}
/* Function vect_transform_stmt.
/* Function vect_gen_niters_for_prolog_loop
Create a vectorized stmt to replace STMT, and insert it at BSI. */
Set the number of iterations for the loop represented by LOOP_VINFO
to the minimum between NITERS (the original iteration count of the loop)
and the misalignment DR - the first data reference in the list
LOOP_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of this
loop, the data reference DR will refer to an aligned location. */
static bool
vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
static tree
vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree niters)
{
bool is_store = false;
tree vec_stmt = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
bool done;
struct data_reference *dr = LOOP_UNALIGNED_DR (loop_vinfo, 0);
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree var, stmt;
tree iters, iters_name;
edge pe;
basic_block new_bb;
tree dr_stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
tree start_addr, byte_miss_align, elem_miss_align;
int vec_type_align =
GET_MODE_ALIGNMENT (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
/ BITS_PER_UNIT;
tree tmp1, tmp2;
tree new_stmt_list = NULL_TREE;
switch (STMT_VINFO_TYPE (stmt_info))
{
case op_vec_info_type:
done = vectorizable_operation (stmt, bsi, &vec_stmt);
gcc_assert (done);
break;
start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
&new_stmt_list, NULL_TREE);
pe = loop_preheader_edge (loop);
new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
if (new_bb)
add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
byte_miss_align =
build (BIT_AND_EXPR, integer_type_node, start_addr,
build (MINUS_EXPR, integer_type_node,
build_int_cst (unsigned_type_node,
vec_type_align), integer_one_node));
tmp1 = build_int_cst (unsigned_type_node, vec_type_align/vf);
elem_miss_align = build (FLOOR_DIV_EXPR, integer_type_node,
byte_miss_align, tmp1);
tmp2 =
build (BIT_AND_EXPR, integer_type_node,
build (MINUS_EXPR, integer_type_node,
build_int_cst (unsigned_type_node, vf), elem_miss_align),
build (MINUS_EXPR, integer_type_node,
build_int_cst (unsigned_type_node, vf), integer_one_node));
iters = build2 (MIN_EXPR, TREE_TYPE (tmp2), tmp2, niters);
var = create_tmp_var (TREE_TYPE (iters), "iters");
add_referenced_tmp_var (var);
iters_name = force_gimple_operand (iters, &stmt, false, var);
/* Insert stmt on loop preheader edge. */
pe = loop_preheader_edge (loop);
new_bb = bsi_insert_on_edge_immediate (pe, stmt);
if (new_bb)
add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
case assignment_vec_info_type:
done = vectorizable_assignment (stmt, bsi, &vec_stmt);
gcc_assert (done);
break;
return iters_name;
}
case load_vec_info_type:
done = vectorizable_load (stmt, bsi, &vec_stmt);
gcc_assert (done);
break;
case store_vec_info_type:
done = vectorizable_store (stmt, bsi, &vec_stmt);
gcc_assert (done);
is_store = true;
break;
default:
if (vect_debug_details (NULL))
fprintf (dump_file, "stmt not supported.");
gcc_unreachable ();
}
/* Function vect_update_niters_after_peeling
STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
NITERS iterations were peeled from the loop represented by LOOP_VINFO.
The new number of iterations is therefore original_niters - NITERS.
Record the new number of iterations in LOOP_VINFO. */
return is_store;
static void
vect_update_niters_after_peeling (loop_vec_info loop_vinfo, tree niters)
{
tree n_iters = LOOP_VINFO_NITERS (loop_vinfo);
LOOP_VINFO_NITERS (loop_vinfo) =
build (MINUS_EXPR, integer_type_node, n_iters, niters);
}
/* Function vect_transform_loop_bound.
/* Function vect_update_inits_of_dr
Create a new exit condition for the loop. */
NITERS iterations were peeled from LOOP. DR represents a data reference
in LOOP. This function updates the information recorded in DR to
account for the fact that the first NITERS iterations had already been
executed. Specifically, it updates the initial_condition of the
access_function of DR. */
static void
vect_transform_loop_bound (loop_vec_info loop_vinfo)
vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop,
tree niters)
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
edge exit_edge = loop->single_exit;
block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
tree indx_before_incr, indx_after_incr;
tree orig_cond_expr;
HOST_WIDE_INT old_N = 0;
int vf;
tree cond_stmt;
tree new_loop_bound;
tree cond;
tree lb_type;
tree access_fn = DR_ACCESS_FN (dr, 0);
tree init, init_new, step;
step = evolution_part_in_loop_num (access_fn, loop->num);
init = initial_condition (access_fn);
init_new = build (PLUS_EXPR, TREE_TYPE (init),
build (MULT_EXPR, TREE_TYPE (niters),
niters, step), init);
DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new);
return;
}
gcc_assert (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
old_N = LOOP_VINFO_NITERS (loop_vinfo);
vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
/* FORNOW:
assuming number-of-iterations divides by the vectorization factor. */
gcc_assert (!(old_N % vf));
/* Function vect_update_inits_of_drs
orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
gcc_assert (orig_cond_expr);
gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi));
NITERS iterations were peeled from the loop represented by LOOP_VINFO.
This function updates the information recorded for the data references in
the loop to account for the fact that the first NITERS iterations had
already been executed. Specifically, it updates the initial_condition of the
access_function of all the data_references in the loop. */
create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
&loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
static void
vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
{
unsigned int i;
varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
/* bsi_insert is using BSI_NEW_STMT. We need to bump it back
to point to the exit condition. */
bsi_next (&loop_exit_bsi);
gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond_expr);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
/* new loop exit test: */
lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1));
new_loop_bound = build_int_cst (lb_type, old_N/vf);
for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
{
struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
vect_update_inits_of_dr (dr, loop, niters);
}
if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, new_loop_bound);
else /* 'then' edge loops back. */
cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, new_loop_bound);
for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
{
struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
vect_update_inits_of_dr (dr, loop, niters);
DR_MISALIGNMENT (dr) = -1;
}
}
cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond,
TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2));
bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
/* Function vect_do_peeling_for_alignment
/* remove old loop exit test: */
bsi_remove (&loop_exit_bsi);
Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
'niters' is set to the misalignment of one of the data references in the
loop, thereby forcing it to refer to an aligned location at the beginning
of the execution of this loop. The data reference for which we are
peeling is chosen from LOOP_UNALIGNED_DR. */
static void
vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree niters_of_prolog_loop, ni_name;
struct data_reference *dr = LOOP_UNALIGNED_DR (loop_vinfo, 0);
if (vect_debug_details (NULL))
print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
ni_name = vect_build_loop_niters (loop_vinfo);
niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
/* Peel the prolog loop and iterate it niters_of_prolog_loop. */
tree_duplicate_loop_to_edge (loop, loops, loop_preheader_edge(loop),
niters_of_prolog_loop, ni_name, false);
/* Update stmt info of dr according to which we peeled. */
DR_MISALIGNMENT (dr) = 0;
/* Update number of times loop executes. */
vect_update_niters_after_peeling (loop_vinfo, niters_of_prolog_loop);
/* Update all inits of access functions of all data refs. */
vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
/* After peeling we have to reset scalar evolution analyzer. */
scev_reset ();
return;
}
......@@ -1881,6 +3206,7 @@ vect_transform_loop (loop_vec_info loop_vinfo,
int nbbs = loop->num_nodes;
block_stmt_iterator si;
int i;
tree ratio = NULL;
#ifdef ENABLE_CHECKING
int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
#endif
......@@ -1888,6 +3214,37 @@ vect_transform_loop (loop_vec_info loop_vinfo,
if (vect_debug_details (NULL))
fprintf (dump_file, "\n<<vec_transform_loop>>\n");
/* Peel the loop if there are data refs with unknown alignment.
Only one data ref with unknown store is allowed. */
if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
vect_do_peeling_for_alignment (loop_vinfo, loops);
/* If the loop has a symbolic number of iterations 'n'
(i.e. it's not a compile time constant),
then an epilog loop needs to be created. We therefore duplicate
the initial loop. The original loop will be vectorized, and will compute
the first (n/VF) iterations. The second copy of the loop will remain
serial and will compute the remaining (n%VF) iterations.
(VF is the vectorization factor). */
if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops);
/* FORNOW: we'll treat the case where niters is constant and
niters % vf != 0
in the way similar to one with symbolic niters.
For this we'll generate variable which value is equal to niters. */
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
&& (LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops);
/* 1) Make sure the loop header has exactly two entries
2) Make sure we have a preheader basic block. */
......@@ -1948,7 +3305,7 @@ vect_transform_loop (loop_vec_info loop_vinfo,
} /* stmts in BB */
} /* BBs in loop */
vect_transform_loop_bound (loop_vinfo);
vect_transform_loop_bound (loop_vinfo, ratio);
if (vect_debug_details (loop))
fprintf (dump_file,"Success! loop vectorized.");
......@@ -2174,30 +3531,27 @@ vect_analyze_operations (loop_vec_info loop_vinfo)
}
LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
/* FORNOW: handle only cases where the loop bound divides by the
vectorization factor. */
if (vect_debug_details (NULL))
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
&& vect_debug_details (NULL))
fprintf (dump_file,
"vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
vectorization_factor, LOOP_VINFO_NITERS (loop_vinfo));
if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
{
if (vect_debug_stats (loop) || vect_debug_details (loop))
fprintf (dump_file, "not vectorized: Unknown loop bound.");
return false;
}
vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
&& LOOP_VINFO_NITERS (loop_vinfo) % vectorization_factor != 0)
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
&& LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
{
if (vect_debug_stats (loop) || vect_debug_details (loop))
fprintf (dump_file, "not vectorized: loop bound doesn't divided by %d.",
vectorization_factor);
return false;
/* In this case we have to generate epilog loop, that
can be done only for loops with one entry edge. */
if (LOOP_VINFO_LOOP (loop_vinfo)->num_entries != 1
|| !(LOOP_VINFO_LOOP (loop_vinfo)->pre_header))
{
if (vect_debug_stats (loop) || vect_debug_details (loop))
fprintf (dump_file, "not vectorized: more than one entry.");
return false;
}
}
return true;
}
......@@ -2272,7 +3626,7 @@ vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
return false;
step_expr = evolution_part;
init_expr = initial_condition (access_fn);
init_expr = unshare_expr (initial_condition (access_fn));
if (vect_debug_details (NULL))
{
......@@ -2832,11 +4186,12 @@ vect_compute_array_ref_alignment (struct data_reference *dr,
tree nbits;
if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
/* The reference is an array without its last index. */
next_ref = vect_compute_array_base_alignment (ref, vectype, &dims, &misalign);
/* The reference is an array without its last index. */
next_ref = vect_compute_array_base_alignment (ref, vectype, &dims,
&misalign);
else
next_ref =
vect_compute_array_base_alignment (oprnd0, vectype, &dims, &misalign);
next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims,
&misalign);
if (!vectype)
/* Alignment is not requested. Just return the base. */
return next_ref;
......@@ -3037,9 +4392,11 @@ static bool
vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
{
varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
/*varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);*/
unsigned int i;
unsigned int decide_peeling_count = 0;
if (vect_debug_details (NULL))
fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
......@@ -3062,19 +4419,33 @@ vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
/* Finally, check that loop can be vectorized.
FOR NOW: Until support for misaligned accesses is in place, only if all
accesses are aligned can the loop be vectorized. This restriction will be
relaxed. */
FOR NOW: Until support for misaligned stores is in place, only if all
stores are aligned can the loop be vectorized. This restriction will be
relaxed. In the meantime, we can force the alignment of on of the
data-references in the loop using peeling. We currently use a heuristic
that peels the first misaligned store, but we plan to develop a
better cost model to guide the decision on which data-access to peel for.
*/
for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
{
struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
if (!aligned_access_p (dr))
{
if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
|| vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
fprintf (dump_file, "not vectorized: unaligned store.");
return false;
/* Decide here whether we need peeling for alignment. */
decide_peeling_count++;
if (decide_peeling_count > MAX_NUMBER_OF_UNALIGNED_DATA_REFS)
{
if (vect_debug_stats (loop) || vect_debug_details (loop))
fprintf (dump_file,
"not vectorized: multiple misaligned stores.");
return false;
}
else
{
LOOP_UNALIGNED_DR (loop_vinfo, decide_peeling_count - 1) = dr;
LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
}
}
}
......@@ -3125,12 +4496,12 @@ vect_analyze_data_ref_access (struct data_reference *dr)
if (evolution_part_in_loop_num (access_fn,
loop_containing_stmt (DR_STMT (dr))->num))
{
/* Evolution part is not NULL in this loop (it is neither constant nor
invariant). */
/* Evolution part is not NULL in this loop (it is neither constant
nor invariant). */
if (vect_debug_details (NULL))
{
fprintf (dump_file,
"not vectorized: complicated multidimensional array access.");
"not vectorized: complicated multidim. array access.");
print_generic_expr (dump_file, access_fn, TDF_SLIM);
}
return false;
......@@ -3144,7 +4515,7 @@ vect_analyze_data_ref_access (struct data_reference *dr)
{
if (vect_debug_details (NULL))
{
fprintf (dump_file, "not vectorized: too complicated access function.");
fprintf (dump_file, "not vectorized: complicated access function.");
print_generic_expr (dump_file, access_fn, TDF_SLIM);
}
return false;
......@@ -3521,7 +4892,8 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo)
/* Analyze MEMREF. If it is of a supported form, build data_reference
struct for it (DR) and find the relevant symbol for aliasing
purposes. */
symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo, &dr);
symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo,
&dr);
if (!symbl)
{
if (vect_debug_stats (loop) || vect_debug_details (loop))
......@@ -3563,7 +4935,8 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo)
switch (TREE_CODE (address_base))
{
case ARRAY_REF:
dr = analyze_array (stmt, TREE_OPERAND (symbl, 0), DR_IS_READ(dr));
dr = analyze_array (stmt, TREE_OPERAND (symbl, 0),
DR_IS_READ(dr));
STMT_VINFO_MEMTAG (stmt_info) =
vect_get_base_and_bit_offset (dr, DR_BASE_NAME (dr), NULL_TREE,
loop_vinfo, &offset,
......@@ -3577,7 +4950,8 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo)
default:
if (vect_debug_stats (loop) || vect_debug_details (loop))
{
fprintf (dump_file, "not vectorized: unhandled address expression: ");
fprintf (dump_file,
"not vectorized: unhandled address expr: ");
print_generic_expr (dump_file, stmt, TDF_SLIM);
}
return false;
......@@ -3851,12 +5225,109 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
}
/* Function vect_analyze_loop_with_symbolic_num_of_iters.
In case the number of iterations that LOOP iterates in unknown at compile
time, an epilog loop will be generated, and the loop induction variables
(IVs) will be "advanced" to the value they are supposed to take just before
the epilog loop. Here we check that the access function of the loop IVs
and the expression that represents the loop bound are simple enough.
These restrictions will be relaxed in the future. */
static bool
vect_analyze_loop_with_symbolic_num_of_iters (tree niters,
struct loop *loop)
{
basic_block bb = loop->header;
tree phi;
if (vect_debug_details (NULL))
fprintf (dump_file,
"\n<<vect_analyze_loop_with_symbolic_num_of_iters>>\n");
if (chrec_contains_undetermined (niters))
{
if (vect_debug_details (NULL))
fprintf (dump_file, "Infinite number of iterations.");
return false;
}
if (!niters)
{
if (vect_debug_details (NULL))
fprintf (dump_file, "niters is NULL pointer.");
return false;
}
if (vect_debug_details (NULL))
{
fprintf (dump_file, "Symbolic number of iterations is ");
print_generic_expr (dump_file, niters, TDF_DETAILS);
}
/* Analyze phi functions of the loop header. */
for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
{
tree access_fn = NULL;
tree evolution_part;
if (vect_debug_details (NULL))
{
fprintf (dump_file, "Analyze phi: ");
print_generic_expr (dump_file, phi, TDF_SLIM);
}
/* Skip virtual phi's. The data dependences that are associated with
virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
{
if (vect_debug_details (NULL))
fprintf (dump_file, "virtual phi. skip.");
continue;
}
/* Analyze the evolution function. */
access_fn = instantiate_parameters
(loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
if (!access_fn)
{
if (vect_debug_details (NULL))
fprintf (dump_file, "No Access function.");
return false;
}
if (vect_debug_details (NULL))
{
fprintf (dump_file, "Access function of PHI: ");
print_generic_expr (dump_file, access_fn, TDF_SLIM);
}
evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
if (evolution_part == NULL_TREE)
return false;
/* FORNOW: We do not transform initial conditions of IVs
which evolution functions are a polynomial of degree >= 2. */
if (tree_is_chrec (evolution_part))
return false;
}
return true;
}
/* Function vect_get_loop_niters.
Determine how many iterations the loop is executed. */
static tree
vect_get_loop_niters (struct loop *loop, HOST_WIDE_INT *number_of_iterations)
vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
{
tree niters;
......@@ -3866,14 +5337,15 @@ vect_get_loop_niters (struct loop *loop, HOST_WIDE_INT *number_of_iterations)
niters = number_of_iterations_in_loop (loop);
if (niters != NULL_TREE
&& niters != chrec_dont_know
&& host_integerp (niters,0))
&& niters != chrec_dont_know)
{
*number_of_iterations = TREE_INT_CST_LOW (niters);
*number_of_iterations = niters;
if (vect_debug_details (NULL))
fprintf (dump_file, "==> get_loop_niters:" HOST_WIDE_INT_PRINT_DEC,
*number_of_iterations);
{
fprintf (dump_file, "==> get_loop_niters:" );
print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
}
}
return get_loop_exit_condition (loop);
......@@ -3895,7 +5367,7 @@ vect_analyze_loop_form (struct loop *loop)
{
loop_vec_info loop_vinfo;
tree loop_cond;
HOST_WIDE_INT number_of_iterations = -1;
tree number_of_iterations = NULL;
if (vect_debug_details (loop))
fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
......@@ -3943,24 +5415,52 @@ vect_analyze_loop_form (struct loop *loop)
fprintf (dump_file, "not vectorized: complicated exit condition.");
return NULL;
}
if (number_of_iterations < 0)
if (!number_of_iterations)
{
if (vect_debug_stats (loop) || vect_debug_details (loop))
fprintf (dump_file, "not vectorized: unknown loop bound.");
fprintf (dump_file,
"not vectorized: number of iterations cannot be computed.");
return NULL;
}
if (number_of_iterations == 0) /* CHECKME: can this happen? */
loop_vinfo = new_loop_vec_info (loop);
LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
{
if (vect_debug_stats (loop) || vect_debug_details (loop))
fprintf (dump_file, "loop bound unknown.");
/* Unknown loop bound. */
if (!vect_analyze_loop_with_symbolic_num_of_iters
(number_of_iterations, loop))
{
if (vect_debug_stats (loop) || vect_debug_details (loop))
fprintf (dump_file,
"not vectorized: can't determine loop bound.");
return NULL;
}
else
{
/* We need only one loop entry for unknown loop bound support. */
if (loop->num_entries != 1 || !loop->pre_header)
{
if (vect_debug_stats (loop) || vect_debug_details (loop))
fprintf (dump_file,
"not vectorized: more than one loop entry.");
return NULL;
}
}
}
else
if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
{
if (vect_debug_stats (loop) || vect_debug_details (loop))
fprintf (dump_file, "not vectorized: number of iterations = 0.");
return NULL;
}
loop_vinfo = new_loop_vec_info (loop);
LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
return loop_vinfo;
}
......@@ -4170,5 +5670,6 @@ vectorize_loops (struct loops *loops)
Information in virtual phi nodes is sufficient for it. */
rewrite_into_loop_closed_ssa ();
}
rewrite_into_loop_closed_ssa ();
bitmap_clear (vars_to_rename);
}
......@@ -121,6 +121,7 @@ vinfo_for_stmt (tree stmt)
/* The misalignment of the memory access in bytes. */
#define DR_MISALIGNMENT(DR) (DR)->aux
#define MAX_NUMBER_OF_UNALIGNED_DATA_REFS 1
static inline bool
aligned_access_p (struct data_reference *data_ref_info)
......@@ -152,8 +153,8 @@ typedef struct _loop_vec_info {
/* The loop exit_condition. */
tree exit_cond;
/* Number of iterations. -1 if unknown. */
HOST_WIDE_INT num_iters;
/* Number of iterations. */
tree num_iters;
/* Is the loop vectorizable? */
bool vectorizable;
......@@ -161,6 +162,13 @@ typedef struct _loop_vec_info {
/* Unrolling factor */
int vectorization_factor;
/* Unknown DRs according to which loop was peeled. */
struct data_reference *unaligned_drs [MAX_NUMBER_OF_UNALIGNED_DATA_REFS];
/* If true, loop is peeled.
unaligned_drs show in this case DRs used for peeling. */
bool do_peeling_for_alignment;
/* All data references in the loop that are being written to. */
varray_type data_ref_writes;
......@@ -177,8 +185,14 @@ typedef struct _loop_vec_info {
#define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor
#define LOOP_VINFO_DATAREF_WRITES(L) (L)->data_ref_writes
#define LOOP_VINFO_DATAREF_READS(L) (L)->data_ref_reads
#define LOOP_VINFO_NITERS_KNOWN_P(L) ((L)->num_iters > 0)
#define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters))
#define LOOP_DO_PEELING_FOR_ALIGNMENT(L) (L)->do_peeling_for_alignment
#define LOOP_UNALIGNED_DR(L, I) (L)->unaligned_drs[(I)]
#define LOOP_VINFO_NITERS_KNOWN_P(L) \
(host_integerp ((L)->num_iters,0) \
&& TREE_INT_CST_LOW ((L)->num_iters) > 0)
/*-----------------------------------------------------------------*/
/* Function prototypes. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment