Commit 5f55a1ba by Dorit Naishlos Committed by Dorit Nuzman

tree-vectorizer.h (unknown_alignment_for_access_p): Replaced by known_alignment_for_access_p.

        * tree-vectorizer.h (unknown_alignment_for_access_p): Replaced by
        known_alignment_for_access_p.
        (known_alignment_for_access_p): New.
        (do_peeling_for_alignment): Field made int instead of bool and renamed
        to peeling_for_alignment.
        (LOOP_DO_PEELING_FOR_ALIGNMENT): Renamed to LOOP_PEELING_FOR_ALIGNMENT.
        * tree-vect-analyze.c (vect_determine_vectorization_factor): New. This
        functionality used to be in vect_analyze_operations.
        (vect_analyze_operations): Code to determine vectorization factor was
        moved to vect_determine_vectorization_factor.
        (vect_enhance_data_refs_alignment): Update to correct alignment when it
        is known instead of -1.  Set LOOP_PEELING_FOR_ALIGNMENT to peeling
        factor.
        (vect_analyze_loop): Call vect_determine_vectorization_factor (used to
        be part of vect_analyze_operations).
        * tree-vectorizer.c (slpeel_tree_peel_loop_to_edge): Use fold when
        creating the guard condition, as the number of iterations may be
        constant.
        (slpeel_tree_peel_loop_to_edge): Use new name of
        LOOP_DO_PEELING_FOR_ALIGNMENT. Set it to 0 instead of false.
        * tree-vect-transform.c (vect_gen_niters_for_prolog_loop): Handle known
        alignment case more efficiently. Use LOOP_PEELING_FOR_ALIGNMENT.
        (vect_do_peeling_for_alignment): Use fold.
        (vect_transform_loop): Use new name of LOOP_DO_PEELING_FOR_ALIGNMENT.

        (vect_update_inits_of_dr): Renamed to
        vect_update_init_of_dr.
        (vect_update_inits_of_drs): Use new name of vect_update_inits_of_dr.
        (vectorizable_store): Fix assertion to use == instead of =.

From-SVN: r96526
parent 119bb233
2005-03-15 Dorit Naishlos <dorit@il.ibm.com>
* tree-vectorizer.h (unknown_alignment_for_access_p): Replaced by
known_alignment_for_access_p.
(known_alignment_for_access_p): New.
(do_peeling_for_alignment): Field made int instead of bool and renamed
to peeling_for_alignment.
(LOOP_DO_PEELING_FOR_ALIGNMENT): Renamed to LOOP_PEELING_FOR_ALIGNMENT.
* tree-vect-analyze.c (vect_determine_vectorization_factor): New. This
functionality used to be in vect_analyze_operations.
(vect_analyze_operations): Code to determine vectorization factor was
moved to vect_determine_vectorization_factor.
(vect_enhance_data_refs_alignment): Update to correct alignment when it
is known instead of -1. Set LOOP_PEELING_FOR_ALIGNMENT to peeling
factor.
(vect_analyze_loop): Call vect_determine_vectorization_factor (used to
be part of vect_analyze_operations).
* tree-vectorizer.c (slpeel_tree_peel_loop_to_edge): Use fold when
creating the guard condition, as the number of iterations may be
constant.
(slpeel_tree_peel_loop_to_edge): Use new name of
LOOP_DO_PEELING_FOR_ALIGNMENT. Set it to 0 instead of false.
* tree-vect-transform.c (vect_gen_niters_for_prolog_loop): Handle known
alignment case more efficiently. Use LOOP_PEELING_FOR_ALIGNMENT.
(vect_do_peeling_for_alignment): Use fold.
(vect_transform_loop): Use new name of LOOP_DO_PEELING_FOR_ALIGNMENT.
(vect_update_inits_of_dr): Renamed to
vect_update_init_of_dr.
(vect_update_inits_of_drs): Use new name of vect_update_inits_of_dr.
(vectorizable_store): Fix assertion to use == instead of =.
2005-03-15 Daniel Jacobowitz <dan@codesourcery.com> 2005-03-15 Daniel Jacobowitz <dan@codesourcery.com>
* config/arm/arm.h (CONDITIONAL_REGISTER_USAGE): Don't clear * config/arm/arm.h (CONDITIONAL_REGISTER_USAGE): Don't clear
......
2005-03-15 Dorit Naishlos <dorit@il.ibm.com>
* gcc.dg/vect/vect-54.c: Now vectorizable on targets that don't support
misaligned accesses.
* gcc.dg/vect/vect-58.c: Likewise.
* gcc.dg/vect/vect-92.c: New.
* gcc.dg/vect/vect-93.c: New.
2005-03-15 Feng Wang <fengwang@nudt.edu.cn> 2005-03-15 Feng Wang <fengwang@nudt.edu.cn>
PR fortran/18827 PR fortran/18827
......
...@@ -50,6 +50,6 @@ int main (void) ...@@ -50,6 +50,6 @@ int main (void)
return 0; return 0;
} }
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } } } */ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */
...@@ -51,6 +51,6 @@ int main (void) ...@@ -51,6 +51,6 @@ int main (void)
return 0; return 0;
} }
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } } } */ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */
/* { dg-require-effective-target vect_float } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 256
typedef float afloat __attribute__ ((__aligned__(16)));
/* known misalignment: same alignment */
int
main1 (afloat * __restrict__ pa, afloat * __restrict__ pb, afloat * __restrict__ pc)
{
int i;
for (i = 0; i < 5; i++)
{
pa[i+1] = pb[i+1] * pc[i+1];
}
/* check results: */
for (i = 0; i < 5; i++)
{
if (pa[i+1] != (pb[i+1] * pc[i+1]))
abort ();
}
return 0;
}
int
main2 (afloat * __restrict__ pa, afloat * __restrict__ pb, afloat * __restrict__ pc)
{
int i;
for (i = 0; i < 6; i++)
{
pa[i+1] = pb[i+1] * pc[i+1];
}
/* check results: */
for (i = 0; i < 6; i++)
{
if (pa[i+1] != (pb[i+1] * pc[i+1]))
abort ();
}
return 0;
}
int
main3 (afloat * __restrict__ pa, afloat * __restrict__ pb, afloat * __restrict__ pc, int n)
{
int i;
for (i = 0; i < n; i++)
{
pa[i+1] = pb[i+1] * pc[i+1];
}
/* check results: */
for (i = 0; i < n; i++)
{
if (pa[i+1] != (pb[i+1] * pc[i+1]))
abort ();
}
return 0;
}
int main (void)
{
int i;
afloat a[N];
afloat b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57};
afloat c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19};
check_vect ();
main1 (a,b,c);
main2 (a,b,c);
main3 (a,b,c,N);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 3 "vect" } } */
/* { dg-require-effective-target vect_float } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 3001
typedef float afloat __attribute__ ((__aligned__(16)));
int
main1 (float *pa)
{
int i;
for (i = 0; i < 3001; i++)
{
pa[i] = 2.0;
}
/* check results: */
for (i = 0; i < 3001; i++)
{
if (pa[i] != 2.0)
abort ();
}
for (i = 1; i <= 10; i++)
{
pa[i] = 3.0;
}
/* check results: */
for (i = 1; i <= 10; i++)
{
if (pa[i] != 3.0)
abort ();
}
return 0;
}
int main (void)
{
int i;
afloat a[N];
afloat b[N];
check_vect ();
/* from bzip2: */
for (i=0; i<N; i++) b[i] = i;
a[0] = 0;
for (i = 1; i <= 256; i++) a[i] = b[i-1];
/* check results: */
for (i = 1; i <= 256; i++)
{
if (a[i] != i-1)
abort ();
}
if (a[0] != 0)
abort ();
main1 (a);
return 0;
}
/* in main1 */
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { target vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 3 "vect" { xfail vect_no_align } } } */
/* in main */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail vect_no_align } } } */
...@@ -64,7 +64,7 @@ static void vect_generate_tmps_on_preheader ...@@ -64,7 +64,7 @@ static void vect_generate_tmps_on_preheader
static tree vect_build_loop_niters (loop_vec_info); static tree vect_build_loop_niters (loop_vec_info);
static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge); static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree); static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
static void vect_update_inits_of_dr (struct data_reference *, tree niters); static void vect_update_init_of_dr (struct data_reference *, tree niters);
static void vect_update_inits_of_drs (loop_vec_info, tree); static void vect_update_inits_of_drs (loop_vec_info, tree);
static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *); static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
static void vect_do_peeling_for_loop_bound static void vect_do_peeling_for_loop_bound
...@@ -907,7 +907,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) ...@@ -907,7 +907,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
alignment_support_cheme = vect_supportable_dr_alignment (dr); alignment_support_cheme = vect_supportable_dr_alignment (dr);
gcc_assert (alignment_support_cheme); gcc_assert (alignment_support_cheme);
gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */ gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
/* Handle use - get the vectorized def from the defining stmt. */ /* Handle use - get the vectorized def from the defining stmt. */
vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt); vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
...@@ -1451,14 +1451,16 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, ...@@ -1451,14 +1451,16 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
Set the number of iterations for the loop represented by LOOP_VINFO Set the number of iterations for the loop represented by LOOP_VINFO
to the minimum between LOOP_NITERS (the original iteration count of the loop) to the minimum between LOOP_NITERS (the original iteration count of the loop)
and the misalignment of DR - the first data reference recorded in and the misalignment of DR - the data reference recorded in
LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
this loop, the data reference DR will refer to an aligned location. this loop, the data reference DR will refer to an aligned location.
The following computation is generated: The following computation is generated:
compute address misalignment in bytes: If the misalignment of DR is known at compile time:
addr_mis = addr & (vectype_size - 1) addr_mis = int mis = DR_MISALIGNMENT (dr);
Else, compute address misalignment in bytes:
addr_mis = addr & (vectype_size - 1)
prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) ) prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
...@@ -1479,37 +1481,53 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) ...@@ -1479,37 +1481,53 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt); stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info); tree vectype = STMT_VINFO_VECTYPE (stmt_info);
int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT; int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
tree elem_misalign;
tree byte_misalign;
tree new_stmts = NULL_TREE;
tree start_addr =
vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
tree ptr_type = TREE_TYPE (start_addr);
tree size = TYPE_SIZE (ptr_type);
tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1); tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
tree niters_type = TREE_TYPE (loop_niters); tree niters_type = TREE_TYPE (loop_niters);
tree elem_size_log =
build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
tree vf_tree = build_int_cst (unsigned_type_node, vf);
pe = loop_preheader_edge (loop); pe = loop_preheader_edge (loop);
new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
gcc_assert (!new_bb);
/* Create: byte_misalign = addr & (vectype_size - 1) */ if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1); {
int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
int element_size = vectype_align/vf;
int elem_misalign = byte_misalign / element_size;
/* Create: elem_misalign = byte_misalign / element_size */ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
elem_misalign = fprintf (vect_dump, "known alignment = %d.", byte_misalign);
build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log); iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
}
else
{
tree new_stmts = NULL_TREE;
tree start_addr =
vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
tree ptr_type = TREE_TYPE (start_addr);
tree size = TYPE_SIZE (ptr_type);
tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
tree elem_size_log =
build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
tree vf_tree = build_int_cst (unsigned_type_node, vf);
tree byte_misalign;
tree elem_misalign;
new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
gcc_assert (!new_bb);
/* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */ /* Create: byte_misalign = addr & (vectype_size - 1) */
iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign); byte_misalign =
iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1); build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
iters = fold_convert (niters_type, iters);
/* Create: elem_misalign = byte_misalign / element_size */
elem_misalign =
build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
/* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
iters = fold_convert (niters_type, iters);
}
/* Create: prolog_loop_niters = min (iters, loop_niters) */ /* Create: prolog_loop_niters = min (iters, loop_niters) */
/* If the loop bound is known at compile time we already verified that it is /* If the loop bound is known at compile time we already verified that it is
greater than vf; since the misalignment ('iters') is at most vf, there's greater than vf; since the misalignment ('iters') is at most vf, there's
...@@ -1517,12 +1535,17 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) ...@@ -1517,12 +1535,17 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
if (TREE_CODE (loop_niters) != INTEGER_CST) if (TREE_CODE (loop_niters) != INTEGER_CST)
iters = build2 (MIN_EXPR, niters_type, iters, loop_niters); iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
{
fprintf (vect_dump, "niters for prolog loop: ");
print_generic_expr (vect_dump, iters, TDF_SLIM);
}
var = create_tmp_var (niters_type, "prolog_loop_niters"); var = create_tmp_var (niters_type, "prolog_loop_niters");
add_referenced_tmp_var (var); add_referenced_tmp_var (var);
iters_name = force_gimple_operand (iters, &stmt, false, var); iters_name = force_gimple_operand (iters, &stmt, false, var);
/* Insert stmt on loop preheader edge. */ /* Insert stmt on loop preheader edge. */
pe = loop_preheader_edge (loop);
if (stmt) if (stmt)
{ {
basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt); basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
...@@ -1533,7 +1556,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) ...@@ -1533,7 +1556,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
} }
/* Function vect_update_inits_of_dr /* Function vect_update_init_of_dr
NITERS iterations were peeled from LOOP. DR represents a data reference NITERS iterations were peeled from LOOP. DR represents a data reference
in LOOP. This function updates the information recorded in DR to in LOOP. This function updates the information recorded in DR to
...@@ -1541,7 +1564,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) ...@@ -1541,7 +1564,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
executed. Specifically, it updates the OFFSET field of stmt_info. */ executed. Specifically, it updates the OFFSET field of stmt_info. */
static void static void
vect_update_inits_of_dr (struct data_reference *dr, tree niters) vect_update_init_of_dr (struct data_reference *dr, tree niters)
{ {
stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr)); stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info); tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
...@@ -1574,13 +1597,13 @@ vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters) ...@@ -1574,13 +1597,13 @@ vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++) for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
{ {
struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i); struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
vect_update_inits_of_dr (dr, niters); vect_update_init_of_dr (dr, niters);
} }
for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++) for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
{ {
struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i); struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
vect_update_inits_of_dr (dr, niters); vect_update_init_of_dr (dr, niters);
} }
} }
...@@ -1618,8 +1641,8 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops) ...@@ -1618,8 +1641,8 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
/* Update number of times loop executes. */ /* Update number of times loop executes. */
n_iters = LOOP_VINFO_NITERS (loop_vinfo); n_iters = LOOP_VINFO_NITERS (loop_vinfo);
LOOP_VINFO_NITERS (loop_vinfo) = LOOP_VINFO_NITERS (loop_vinfo) = fold (build2 (MINUS_EXPR,
build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop); TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop));
/* Update the init conditions of the access functions of all data refs. */ /* Update the init conditions of the access functions of all data refs. */
vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop); vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
...@@ -1656,7 +1679,7 @@ vect_transform_loop (loop_vec_info loop_vinfo, ...@@ -1656,7 +1679,7 @@ vect_transform_loop (loop_vec_info loop_vinfo,
/* Peel the loop if there are data refs with unknown alignment. /* Peel the loop if there are data refs with unknown alignment.
Only one data ref with unknown store is allowed. */ Only one data ref with unknown store is allowed. */
if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo)) if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
vect_do_peeling_for_alignment (loop_vinfo, loops); vect_do_peeling_for_alignment (loop_vinfo, loops);
/* If the loop has a symbolic number of iterations 'n' (i.e. it's not a /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
......
...@@ -963,7 +963,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops, ...@@ -963,7 +963,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
add_bb_to_loop (bb_before_second_loop, first_loop->outer); add_bb_to_loop (bb_before_second_loop, first_loop->outer);
pre_condition = pre_condition =
build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node); fold (build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node));
skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition, skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
bb_before_second_loop, bb_before_first_loop); bb_before_second_loop, bb_before_first_loop);
slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */, slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
...@@ -1001,7 +1001,8 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops, ...@@ -1001,7 +1001,8 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
bb_after_second_loop = split_edge (second_loop->single_exit); bb_after_second_loop = split_edge (second_loop->single_exit);
add_bb_to_loop (bb_after_second_loop, second_loop->outer); add_bb_to_loop (bb_after_second_loop, second_loop->outer);
pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters); pre_condition =
fold (build2 (EQ_EXPR, boolean_type_node, first_niters, niters));
skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
bb_after_second_loop, bb_before_first_loop); bb_after_second_loop, bb_before_first_loop);
slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */, slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
...@@ -1213,7 +1214,7 @@ new_loop_vec_info (struct loop *loop) ...@@ -1213,7 +1214,7 @@ new_loop_vec_info (struct loop *loop)
LOOP_VINFO_EXIT_COND (res) = NULL; LOOP_VINFO_EXIT_COND (res) = NULL;
LOOP_VINFO_NITERS (res) = NULL; LOOP_VINFO_NITERS (res) = NULL;
LOOP_VINFO_VECTORIZABLE_P (res) = 0; LOOP_VINFO_VECTORIZABLE_P (res) = 0;
LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false; LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
LOOP_VINFO_VECT_FACTOR (res) = 0; LOOP_VINFO_VECT_FACTOR (res) = 0;
VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20, VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
"loop_write_datarefs"); "loop_write_datarefs");
......
...@@ -95,9 +95,15 @@ typedef struct _loop_vec_info { ...@@ -95,9 +95,15 @@ typedef struct _loop_vec_info {
/* Unknown DRs according to which loop was peeled. */ /* Unknown DRs according to which loop was peeled. */
struct data_reference *unaligned_dr; struct data_reference *unaligned_dr;
/* If true, loop is peeled. /* peeling_for_alignment indicates whether peeling for alignment will take
unaligned_drs show in this case DRs used for peeling. */ place, and what the peeling factor should be:
bool do_peeling_for_alignment; peeling_for_alignment = X means:
If X=0: Peeling for alignment will not be applied.
If X>0: Peel first X iterations.
If X=-1: Generate a runtime test to calculate the number of iterations
to be peeled, using the dataref recorded in the field
unaligned_dr. */
int peeling_for_alignment;
/* All data references in the loop that are being written to. */ /* All data references in the loop that are being written to. */
varray_type data_ref_writes; varray_type data_ref_writes;
...@@ -119,7 +125,7 @@ typedef struct _loop_vec_info { ...@@ -119,7 +125,7 @@ typedef struct _loop_vec_info {
#define LOOP_VINFO_DATAREF_WRITES(L) (L)->data_ref_writes #define LOOP_VINFO_DATAREF_WRITES(L) (L)->data_ref_writes
#define LOOP_VINFO_DATAREF_READS(L) (L)->data_ref_reads #define LOOP_VINFO_DATAREF_READS(L) (L)->data_ref_reads
#define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters)) #define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters))
#define LOOP_DO_PEELING_FOR_ALIGNMENT(L) (L)->do_peeling_for_alignment #define LOOP_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment
#define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr #define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr
#define LOOP_VINFO_LOC(L) (L)->loop_line_number #define LOOP_VINFO_LOC(L) (L)->loop_line_number
...@@ -242,7 +248,8 @@ vinfo_for_stmt (tree stmt) ...@@ -242,7 +248,8 @@ vinfo_for_stmt (tree stmt)
/* Info on data references alignment. */ /* Info on data references alignment. */
/*-----------------------------------------------------------------*/ /*-----------------------------------------------------------------*/
/* The misalignment of the memory access in bytes. */ /* Reflects actual alignment of first access in the vectorized loop,
taking into account peeling/versioning if applied. */
#define DR_MISALIGNMENT(DR) (DR)->aux #define DR_MISALIGNMENT(DR) (DR)->aux
static inline bool static inline bool
...@@ -252,9 +259,9 @@ aligned_access_p (struct data_reference *data_ref_info) ...@@ -252,9 +259,9 @@ aligned_access_p (struct data_reference *data_ref_info)
} }
static inline bool static inline bool
unknown_alignment_for_access_p (struct data_reference *data_ref_info) known_alignment_for_access_p (struct data_reference *data_ref_info)
{ {
return (DR_MISALIGNMENT (data_ref_info) == -1); return (DR_MISALIGNMENT (data_ref_info) != -1);
} }
/* Perform signed modulo, always returning a non-negative value. */ /* Perform signed modulo, always returning a non-negative value. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment