Commit 5f55a1ba by Dorit Naishlos Committed by Dorit Nuzman

tree-vectorizer.h (unknown_alignment_for_access_p): Replaced by known_alignment_for_access_p.

        * tree-vectorizer.h (unknown_alignment_for_access_p): Replaced by
        known_alignment_for_access_p.
        (known_alignment_for_access_p): New.
        (do_peeling_for_alignment): Field made int instead of bool and renamed
        to peeling_for_alignment.
        (LOOP_DO_PEELING_FOR_ALIGNMENT): Renamed to LOOP_PEELING_FOR_ALIGNMENT.
        * tree-vect-analyze.c (vect_determine_vectorization_factor): New. This
        functionality used to be in vect_analyze_operations.
        (vect_analyze_operations): Code to determine vectorization factor was
        moved to vect_determine_vectorization_factor.
        (vect_enhance_data_refs_alignment): Update to correct alignment when it
        is known instead of -1.  Set LOOP_PEELING_FOR_ALIGNMENT to peeling
        factor.
        (vect_analyze_loop): Call vect_determine_vectorization_factor (used to
        be part of vect_analyze_operations).
        * tree-vectorizer.c (slpeel_tree_peel_loop_to_edge): Use fold when
        creating the guard condition, as the number of iterations may be
        constant.
        (slpeel_tree_peel_loop_to_edge): Use new name of
        LOOP_DO_PEELING_FOR_ALIGNMENT. Set it to 0 instead of false.
        * tree-vect-transform.c (vect_gen_niters_for_prolog_loop): Handle known
        alignment case more efficiently. Use LOOP_PEELING_FOR_ALIGNMENT.
        (vect_do_peeling_for_alignment): Use fold.
        (vect_transform_loop): Use new name of LOOP_DO_PEELING_FOR_ALIGNMENT.

        (vect_update_inits_of_dr): Renamed to
        vect_update_init_of_dr.
        (vect_update_inits_of_drs): Use new name of vect_update_inits_of_dr.
        (vectorizable_store): Fix assertion to use == instead of =.

From-SVN: r96526
parent 119bb233
2005-03-15 Dorit Naishlos <dorit@il.ibm.com>
* tree-vectorizer.h (unknown_alignment_for_access_p): Replaced by
known_alignment_for_access_p.
(known_alignment_for_access_p): New.
(do_peeling_for_alignment): Field made int instead of bool and renamed
to peeling_for_alignment.
(LOOP_DO_PEELING_FOR_ALIGNMENT): Renamed to LOOP_PEELING_FOR_ALIGNMENT.
* tree-vect-analyze.c (vect_determine_vectorization_factor): New. This
functionality used to be in vect_analyze_operations.
(vect_analyze_operations): Code to determine vectorization factor was
moved to vect_determine_vectorization_factor.
(vect_enhance_data_refs_alignment): Update to correct alignment when it
is known instead of -1. Set LOOP_PEELING_FOR_ALIGNMENT to peeling
factor.
(vect_analyze_loop): Call vect_determine_vectorization_factor (used to
be part of vect_analyze_operations).
* tree-vectorizer.c (slpeel_tree_peel_loop_to_edge): Use fold when
creating the guard condition, as the number of iterations may be
constant.
(slpeel_tree_peel_loop_to_edge): Use new name of
LOOP_DO_PEELING_FOR_ALIGNMENT. Set it to 0 instead of false.
* tree-vect-transform.c (vect_gen_niters_for_prolog_loop): Handle known
alignment case more efficiently. Use LOOP_PEELING_FOR_ALIGNMENT.
(vect_do_peeling_for_alignment): Use fold.
(vect_transform_loop): Use new name of LOOP_DO_PEELING_FOR_ALIGNMENT.
(vect_update_inits_of_dr): Renamed to
vect_update_init_of_dr.
(vect_update_inits_of_drs): Use new name of vect_update_inits_of_dr.
(vectorizable_store): Fix assertion to use == instead of =.
2005-03-15 Daniel Jacobowitz <dan@codesourcery.com>
* config/arm/arm.h (CONDITIONAL_REGISTER_USAGE): Don't clear
......
2005-03-15 Dorit Naishlos <dorit@il.ibm.com>
* gcc.dg/vect/vect-54.c: Now vectorizable on targets that don't support
misaligned accesses.
* gcc.dg/vect/vect-58.c: Likewise.
* gcc.dg/vect/vect-92.c: New.
* gcc.dg/vect/vect-93.c: New.
2005-03-15 Feng Wang <fengwang@nudt.edu.cn>
PR fortran/18827
......
......@@ -50,6 +50,6 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */
......@@ -51,6 +51,6 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */
/* { dg-require-effective-target vect_float } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 256
typedef float afloat __attribute__ ((__aligned__(16)));
/* known misalignment: same alignment */
int
main1 (afloat * __restrict__ pa, afloat * __restrict__ pb, afloat * __restrict__ pc)
{
int i;
for (i = 0; i < 5; i++)
{
pa[i+1] = pb[i+1] * pc[i+1];
}
/* check results: */
for (i = 0; i < 5; i++)
{
if (pa[i+1] != (pb[i+1] * pc[i+1]))
abort ();
}
return 0;
}
int
main2 (afloat * __restrict__ pa, afloat * __restrict__ pb, afloat * __restrict__ pc)
{
int i;
for (i = 0; i < 6; i++)
{
pa[i+1] = pb[i+1] * pc[i+1];
}
/* check results: */
for (i = 0; i < 6; i++)
{
if (pa[i+1] != (pb[i+1] * pc[i+1]))
abort ();
}
return 0;
}
int
main3 (afloat * __restrict__ pa, afloat * __restrict__ pb, afloat * __restrict__ pc, int n)
{
int i;
for (i = 0; i < n; i++)
{
pa[i+1] = pb[i+1] * pc[i+1];
}
/* check results: */
for (i = 0; i < n; i++)
{
if (pa[i+1] != (pb[i+1] * pc[i+1]))
abort ();
}
return 0;
}
int main (void)
{
int i;
afloat a[N];
afloat b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57};
afloat c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19};
check_vect ();
main1 (a,b,c);
main2 (a,b,c);
main3 (a,b,c,N);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 3 "vect" } } */
/* { dg-require-effective-target vect_float } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 3001
typedef float afloat __attribute__ ((__aligned__(16)));
int
main1 (float *pa)
{
int i;
for (i = 0; i < 3001; i++)
{
pa[i] = 2.0;
}
/* check results: */
for (i = 0; i < 3001; i++)
{
if (pa[i] != 2.0)
abort ();
}
for (i = 1; i <= 10; i++)
{
pa[i] = 3.0;
}
/* check results: */
for (i = 1; i <= 10; i++)
{
if (pa[i] != 3.0)
abort ();
}
return 0;
}
int main (void)
{
int i;
afloat a[N];
afloat b[N];
check_vect ();
/* from bzip2: */
for (i=0; i<N; i++) b[i] = i;
a[0] = 0;
for (i = 1; i <= 256; i++) a[i] = b[i-1];
/* check results: */
for (i = 1; i <= 256; i++)
{
if (a[i] != i-1)
abort ();
}
if (a[0] != 0)
abort ();
main1 (a);
return 0;
}
/* in main1 */
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { target vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 3 "vect" { xfail vect_no_align } } } */
/* in main */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail vect_no_align } } } */
......@@ -64,7 +64,7 @@ static void vect_generate_tmps_on_preheader
static tree vect_build_loop_niters (loop_vec_info);
static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
static void vect_update_inits_of_dr (struct data_reference *, tree niters);
static void vect_update_init_of_dr (struct data_reference *, tree niters);
static void vect_update_inits_of_drs (loop_vec_info, tree);
static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
static void vect_do_peeling_for_loop_bound
......@@ -907,7 +907,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
alignment_support_cheme = vect_supportable_dr_alignment (dr);
gcc_assert (alignment_support_cheme);
gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
/* Handle use - get the vectorized def from the defining stmt. */
vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
......@@ -1451,14 +1451,16 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
Set the number of iterations for the loop represented by LOOP_VINFO
to the minimum between LOOP_NITERS (the original iteration count of the loop)
and the misalignment of DR - the first data reference recorded in
and the misalignment of DR - the data reference recorded in
LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
this loop, the data reference DR will refer to an aligned location.
The following computation is generated:
compute address misalignment in bytes:
addr_mis = addr & (vectype_size - 1)
If the misalignment of DR is known at compile time:
addr_mis = int mis = DR_MISALIGNMENT (dr);
Else, compute address misalignment in bytes:
addr_mis = addr & (vectype_size - 1)
prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
......@@ -1479,37 +1481,53 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
tree elem_misalign;
tree byte_misalign;
tree new_stmts = NULL_TREE;
tree start_addr =
vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
tree ptr_type = TREE_TYPE (start_addr);
tree size = TYPE_SIZE (ptr_type);
tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
tree niters_type = TREE_TYPE (loop_niters);
tree elem_size_log =
build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
tree vf_tree = build_int_cst (unsigned_type_node, vf);
pe = loop_preheader_edge (loop);
new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
gcc_assert (!new_bb);
/* Create: byte_misalign = addr & (vectype_size - 1) */
byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
{
int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
int element_size = vectype_align/vf;
int elem_misalign = byte_misalign / element_size;
/* Create: elem_misalign = byte_misalign / element_size */
elem_misalign =
build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "known alignment = %d.", byte_misalign);
iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
}
else
{
tree new_stmts = NULL_TREE;
tree start_addr =
vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
tree ptr_type = TREE_TYPE (start_addr);
tree size = TYPE_SIZE (ptr_type);
tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
tree elem_size_log =
build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
tree vf_tree = build_int_cst (unsigned_type_node, vf);
tree byte_misalign;
tree elem_misalign;
new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
gcc_assert (!new_bb);
/* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
iters = fold_convert (niters_type, iters);
/* Create: byte_misalign = addr & (vectype_size - 1) */
byte_misalign =
build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
/* Create: elem_misalign = byte_misalign / element_size */
elem_misalign =
build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
/* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
iters = fold_convert (niters_type, iters);
}
/* Create: prolog_loop_niters = min (iters, loop_niters) */
/* If the loop bound is known at compile time we already verified that it is
greater than vf; since the misalignment ('iters') is at most vf, there's
......@@ -1517,12 +1535,17 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
if (TREE_CODE (loop_niters) != INTEGER_CST)
iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
{
fprintf (vect_dump, "niters for prolog loop: ");
print_generic_expr (vect_dump, iters, TDF_SLIM);
}
var = create_tmp_var (niters_type, "prolog_loop_niters");
add_referenced_tmp_var (var);
iters_name = force_gimple_operand (iters, &stmt, false, var);
/* Insert stmt on loop preheader edge. */
pe = loop_preheader_edge (loop);
if (stmt)
{
basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
......@@ -1533,7 +1556,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
}
/* Function vect_update_inits_of_dr
/* Function vect_update_init_of_dr
NITERS iterations were peeled from LOOP. DR represents a data reference
in LOOP. This function updates the information recorded in DR to
......@@ -1541,7 +1564,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
executed. Specifically, it updates the OFFSET field of stmt_info. */
static void
vect_update_inits_of_dr (struct data_reference *dr, tree niters)
vect_update_init_of_dr (struct data_reference *dr, tree niters)
{
stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
......@@ -1574,13 +1597,13 @@ vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
{
struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
vect_update_inits_of_dr (dr, niters);
vect_update_init_of_dr (dr, niters);
}
for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
{
struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
vect_update_inits_of_dr (dr, niters);
vect_update_init_of_dr (dr, niters);
}
}
......@@ -1618,8 +1641,8 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
/* Update number of times loop executes. */
n_iters = LOOP_VINFO_NITERS (loop_vinfo);
LOOP_VINFO_NITERS (loop_vinfo) =
build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
LOOP_VINFO_NITERS (loop_vinfo) = fold (build2 (MINUS_EXPR,
TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop));
/* Update the init conditions of the access functions of all data refs. */
vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
......@@ -1656,7 +1679,7 @@ vect_transform_loop (loop_vec_info loop_vinfo,
/* Peel the loop if there are data refs with unknown alignment.
Only one data ref with unknown store is allowed. */
if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
vect_do_peeling_for_alignment (loop_vinfo, loops);
/* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
......
......@@ -963,7 +963,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
add_bb_to_loop (bb_before_second_loop, first_loop->outer);
pre_condition =
build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
fold (build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node));
skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
bb_before_second_loop, bb_before_first_loop);
slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
......@@ -1001,7 +1001,8 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
bb_after_second_loop = split_edge (second_loop->single_exit);
add_bb_to_loop (bb_after_second_loop, second_loop->outer);
pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
pre_condition =
fold (build2 (EQ_EXPR, boolean_type_node, first_niters, niters));
skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
bb_after_second_loop, bb_before_first_loop);
slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
......@@ -1213,7 +1214,7 @@ new_loop_vec_info (struct loop *loop)
LOOP_VINFO_EXIT_COND (res) = NULL;
LOOP_VINFO_NITERS (res) = NULL;
LOOP_VINFO_VECTORIZABLE_P (res) = 0;
LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
LOOP_VINFO_VECT_FACTOR (res) = 0;
VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
"loop_write_datarefs");
......
......@@ -95,9 +95,15 @@ typedef struct _loop_vec_info {
/* Unknown DRs according to which loop was peeled. */
struct data_reference *unaligned_dr;
/* If true, loop is peeled.
unaligned_drs show in this case DRs used for peeling. */
bool do_peeling_for_alignment;
/* peeling_for_alignment indicates whether peeling for alignment will take
place, and what the peeling factor should be:
peeling_for_alignment = X means:
If X=0: Peeling for alignment will not be applied.
If X>0: Peel first X iterations.
If X=-1: Generate a runtime test to calculate the number of iterations
to be peeled, using the dataref recorded in the field
unaligned_dr. */
int peeling_for_alignment;
/* All data references in the loop that are being written to. */
varray_type data_ref_writes;
......@@ -119,7 +125,7 @@ typedef struct _loop_vec_info {
#define LOOP_VINFO_DATAREF_WRITES(L) (L)->data_ref_writes
#define LOOP_VINFO_DATAREF_READS(L) (L)->data_ref_reads
#define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters))
#define LOOP_DO_PEELING_FOR_ALIGNMENT(L) (L)->do_peeling_for_alignment
#define LOOP_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment
#define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr
#define LOOP_VINFO_LOC(L) (L)->loop_line_number
......@@ -242,7 +248,8 @@ vinfo_for_stmt (tree stmt)
/* Info on data references alignment. */
/*-----------------------------------------------------------------*/
/* The misalignment of the memory access in bytes. */
/* Reflects actual alignment of first access in the vectorized loop,
taking into account peeling/versioning if applied. */
#define DR_MISALIGNMENT(DR) (DR)->aux
static inline bool
......@@ -252,9 +259,9 @@ aligned_access_p (struct data_reference *data_ref_info)
}
static inline bool
unknown_alignment_for_access_p (struct data_reference *data_ref_info)
known_alignment_for_access_p (struct data_reference *data_ref_info)
{
return (DR_MISALIGNMENT (data_ref_info) == -1);
return (DR_MISALIGNMENT (data_ref_info) != -1);
}
/* Perform signed modulo, always returning a non-negative value. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment