Commit ff802fa1 by Ira Rosen Committed by Ira Rosen

tree-vectorizer.c: Fix documentation.


	* tree-vectorizer.c: Fix documentation.
	* tree-vectorizer.h (vinfo_for_stmt): Add documentation.
	(set_vinfo_for_stmt, get_earlier_stmt, get_later_stmt,
	is_pattern_stmt_p, is_loop_header_bb_p,
	stmt_vinfo_set_inside_of_loop_cost,
	stmt_vinfo_set_outside_of_loop_cost, vect_pow2, aligned_access_p,
	known_alignment_for_access_p): Likewise.
	* tree-vect-loop.c: Fix documentation.
	(vect_get_cost): Start function name from new line.
	* tree-vect-data-refs.c: Fix documentation.
	* tree-vect_stmts.c: Likewise.
	(vect_create_vectorized_promotion_stmts): Always free vec_tmp.
	(vectorizable_store): Free vec_oprnds if allocated.
	(vectorizable_condition): Initialize several variables to avoid
	warnings.
	* tree-vect-slp.c: Fix documentation.

From-SVN: r164332
parent 6be14c0e
2010-09-16 Ira Rosen <irar@il.ibm.com>
* tree-vectorizer.c: Fix documentation.
* tree-vectorizer.h (vinfo_for_stmt): Add documentation.
(set_vinfo_for_stmt, get_earlier_stmt, get_later_stmt,
is_pattern_stmt_p, is_loop_header_bb_p,
stmt_vinfo_set_inside_of_loop_cost,
stmt_vinfo_set_outside_of_loop_cost, vect_pow2, aligned_access_p,
known_alignment_for_access_p): Likewise.
* tree-vect-loop.c: Fix documentation.
(vect_get_cost): Start function name from new line.
* tree-vect-data-refs.c: Fix documentation.
* tree-vect_stmts.c: Likewise.
(vect_create_vectorized_promotion_stmts): Always free vec_tmp.
(vectorizable_store): Free vec_oprnds if allocated.
(vectorizable_condition): Initialize several variables to avoid
warnings.
* tree-vect-slp.c: Fix documentation.
2010-09-16 Richard Guenther <rguenther@suse.de> 2010-09-16 Richard Guenther <rguenther@suse.de>
* tree.c (tree_node_structure_for_code): TRANSLATION_UNIT_DECL * tree.c (tree_node_structure_for_code): TRANSLATION_UNIT_DECL
......
2010-09-16 Ira Rosen <irar@il.ibm.com>
* gcc.dg/vect/bb-slp-8.c: Fix documentation, add space between function
name and parentheses.
* gcc.dg/vect/bb-slp-8a.c, gcc.dg/vect/bb-slp-8b.c: Likewise.
2010-09-15 Jason Merrill <jason@redhat.com> 2010-09-15 Jason Merrill <jason@redhat.com>
* g++.dg/parse/parameter-declaration-2.C: New. * g++.dg/parse/parameter-declaration-2.C: New.
......
...@@ -15,8 +15,8 @@ main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout) ...@@ -15,8 +15,8 @@ main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout)
int i; int i;
unsigned int a0, a1, a2, a3; unsigned int a0, a1, a2, a3;
/* pin and pout may alias. But since all the loads are before the first store /* pin and pout may alias. But since all the loads are before the first
the basic block is vectorizable. */ store the basic block is vectorizable. */
a0 = *pin++ + 23; a0 = *pin++ + 23;
a1 = *pin++ + 142; a1 = *pin++ + 142;
a2 = *pin++ + 2; a2 = *pin++ + 2;
...@@ -35,7 +35,7 @@ main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout) ...@@ -35,7 +35,7 @@ main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout)
|| out[1] != (in[1] + 142) * y || out[1] != (in[1] + 142) * y
|| out[2] != (in[2] + 2) * x || out[2] != (in[2] + 2) * x
|| out[3] != (in[3] + 31) * y) || out[3] != (in[3] + 31) * y)
abort(); abort ();
return 0; return 0;
} }
......
...@@ -34,7 +34,7 @@ main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout) ...@@ -34,7 +34,7 @@ main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout)
|| out[1] != (in[1] + 142) * y || out[1] != (in[1] + 142) * y
|| out[2] != (in[2] + 2) * x || out[2] != (in[2] + 2) * x
|| out[3] != (in[3] + 31) * y) || out[3] != (in[3] + 31) * y)
abort(); abort ();
return 0; return 0;
} }
......
...@@ -36,7 +36,7 @@ main1 (unsigned int x, unsigned int y) ...@@ -36,7 +36,7 @@ main1 (unsigned int x, unsigned int y)
|| out[1] != (in[1] + 142) * y || out[1] != (in[1] + 142) * y
|| out[2] != (in[2] + 2) * x || out[2] != (in[2] + 2) * x
|| out[3] != (in[3] + 31) * y) || out[3] != (in[3] + 31) * y)
abort(); abort ();
return 0; return 0;
} }
......
...@@ -55,9 +55,9 @@ along with GCC; see the file COPYING3. If not see ...@@ -55,9 +55,9 @@ along with GCC; see the file COPYING3. If not see
Such a case, where a variable of this datatype does not appear in the lhs Such a case, where a variable of this datatype does not appear in the lhs
anywhere in the loop, can only occur if it's an invariant: e.g.: anywhere in the loop, can only occur if it's an invariant: e.g.:
'int_x = (int) short_inv', which we'd expect to have been optimized away by 'int_x = (int) short_inv', which we'd expect to have been optimized away by
invariant motion. However, we cannot rely on invariant motion to always take invariant motion. However, we cannot rely on invariant motion to always
invariants out of the loop, and so in the case of promotion we also have to take invariants out of the loop, and so in the case of promotion we also
check the rhs. have to check the rhs.
LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
types. */ types. */
...@@ -323,7 +323,11 @@ vect_equal_offsets (tree offset1, tree offset2) ...@@ -323,7 +323,11 @@ vect_equal_offsets (tree offset1, tree offset2)
} }
/* Check dependence between DRA and DRB for basic block vectorization. */ /* Check dependence between DRA and DRB for basic block vectorization.
If the accesses share same bases and offsets, we can compare their initial
constant offsets to decide whether they differ or not. In case of a read-
write dependence we check that the load is before the store to ensure that
vectorization will not change the order of the accesses. */
static bool static bool
vect_drs_dependent_in_basic_block (struct data_reference *dra, vect_drs_dependent_in_basic_block (struct data_reference *dra,
...@@ -1241,8 +1245,8 @@ vect_peeling_hash_get_most_frequent (void **slot, void *data) ...@@ -1241,8 +1245,8 @@ vect_peeling_hash_get_most_frequent (void **slot, void *data)
} }
/* Traverse peeling hash table and calculate cost for each peeling option. Find /* Traverse peeling hash table and calculate cost for each peeling option.
one with the lowest cost. */ Find the one with the lowest cost. */
static int static int
vect_peeling_hash_get_lowest_cost (void **slot, void *data) vect_peeling_hash_get_lowest_cost (void **slot, void *data)
...@@ -1326,7 +1330,7 @@ vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo, ...@@ -1326,7 +1330,7 @@ vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
the alignment of data references in the loop. the alignment of data references in the loop.
FOR NOW: we assume that whatever versioning/peeling takes place, only the FOR NOW: we assume that whatever versioning/peeling takes place, only the
original loop is to be vectorized; Any other loops that are created by original loop is to be vectorized. Any other loops that are created by
the transformations performed in this pass - are not supposed to be the transformations performed in this pass - are not supposed to be
vectorized. This restriction will be relaxed. vectorized. This restriction will be relaxed.
...@@ -2072,9 +2076,10 @@ vect_analyze_group_access (struct data_reference *dr) ...@@ -2072,9 +2076,10 @@ vect_analyze_group_access (struct data_reference *dr)
while (next) while (next)
{ {
/* Skip same data-refs. In case that two or more stmts share data-ref /* Skip same data-refs. In case that two or more stmts share
(supported only for loads), we vectorize only the first stmt, and data-ref (supported only for loads), we vectorize only the first
the rest get their vectorized loads from the first one. */ stmt, and the rest get their vectorized loads from the first
one. */
if (!tree_int_cst_compare (DR_INIT (data_ref), if (!tree_int_cst_compare (DR_INIT (data_ref),
DR_INIT (STMT_VINFO_DATA_REF ( DR_INIT (STMT_VINFO_DATA_REF (
vinfo_for_stmt (next))))) vinfo_for_stmt (next)))))
...@@ -2483,8 +2488,8 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo, ...@@ -2483,8 +2488,8 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
datarefs = BB_VINFO_DATAREFS (bb_vinfo); datarefs = BB_VINFO_DATAREFS (bb_vinfo);
} }
/* Go through the data-refs, check that the analysis succeeded. Update pointer /* Go through the data-refs, check that the analysis succeeded. Update
from stmt_vec_info struct to DR and vectype. */ pointer from stmt_vec_info struct to DR and vectype. */
FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr) FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
{ {
...@@ -3017,7 +3022,7 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop, ...@@ -3017,7 +3022,7 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
print_generic_expr (vect_dump, base_name, TDF_SLIM); print_generic_expr (vect_dump, base_name, TDF_SLIM);
} }
/** (1) Create the new vector-pointer variable: **/ /* (1) Create the new vector-pointer variable. */
vect_ptr_type = build_pointer_type (vectype); vect_ptr_type = build_pointer_type (vectype);
base = get_base_address (DR_REF (dr)); base = get_base_address (DR_REF (dr));
if (base if (base
...@@ -3067,9 +3072,9 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop, ...@@ -3067,9 +3072,9 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
add_referenced_var (vect_ptr); add_referenced_var (vect_ptr);
/** Note: If the dataref is in an inner-loop nested in LOOP, and we are /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
vectorizing LOOP (i.e. outer-loop vectorization), we need to create two vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
def-use update cycles for the pointer: One relative to the outer-loop def-use update cycles for the pointer: one relative to the outer-loop
(LOOP), which is what steps (3) and (4) below do. The other is relative (LOOP), which is what steps (3) and (4) below do. The other is relative
to the inner-loop (which is the inner-most loop containing the dataref), to the inner-loop (which is the inner-most loop containing the dataref),
and this is done be step (5) below. and this is done be step (5) below.
...@@ -3098,8 +3103,8 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop, ...@@ -3098,8 +3103,8 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
vp2 = vp1 + step vp2 = vp1 + step
if () goto LOOP */ if () goto LOOP */
/** (3) Calculate the initial address the vector-pointer, and set /* (2) Calculate the initial address the vector-pointer, and set
the vector-pointer to point to it before the loop: **/ the vector-pointer to point to it before the loop. */
/* Create: (&(base[init_val+offset]) in the loop preheader. */ /* Create: (&(base[init_val+offset]) in the loop preheader. */
...@@ -3140,10 +3145,9 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop, ...@@ -3140,10 +3145,9 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
else else
vect_ptr_init = new_temp; vect_ptr_init = new_temp;
/** (4) Handle the updating of the vector-pointer inside the loop. /* (3) Handle the updating of the vector-pointer inside the loop.
This is needed when ONLY_INIT is false, and also when AT_LOOP This is needed when ONLY_INIT is false, and also when AT_LOOP is the
is the inner-loop nested in LOOP (during outer-loop vectorization). inner-loop nested in LOOP (during outer-loop vectorization). */
**/
/* No update in loop is required. */ /* No update in loop is required. */
if (only_init && (!loop_vinfo || at_loop == loop)) if (only_init && (!loop_vinfo || at_loop == loop))
...@@ -3182,8 +3186,8 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop, ...@@ -3182,8 +3186,8 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
return vptr; return vptr;
/** (5) Handle the updating of the vector-pointer inside the inner-loop /* (4) Handle the updating of the vector-pointer inside the inner-loop
nested in LOOP, if exists: **/ nested in LOOP, if exists. */
gcc_assert (nested_in_vect_loop); gcc_assert (nested_in_vect_loop);
if (!only_init) if (!only_init)
...@@ -3362,8 +3366,8 @@ vect_strided_store_supported (tree vectype) ...@@ -3362,8 +3366,8 @@ vect_strided_store_supported (tree vectype)
RESULT_CHAIN. RESULT_CHAIN.
E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8. E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
The input is 4 vectors each containing 8 elements. We assign a number to each The input is 4 vectors each containing 8 elements. We assign a number to
element, the input sequence is: each element, the input sequence is:
1st vec: 0 1 2 3 4 5 6 7 1st vec: 0 1 2 3 4 5 6 7
2nd vec: 8 9 10 11 12 13 14 15 2nd vec: 8 9 10 11 12 13 14 15
...@@ -3582,8 +3586,7 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -3582,8 +3586,7 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
1. the misalignment computation 1. the misalignment computation
2. the extra vector load (for the optimized realignment scheme). 2. the extra vector load (for the optimized realignment scheme).
3. the phi node for the two vectors from which the realignment is 3. the phi node for the two vectors from which the realignment is
done (for the optimized realignment scheme). done (for the optimized realignment scheme). */
*/
/* 1. Determine where to generate the misalignment computation. /* 1. Determine where to generate the misalignment computation.
...@@ -3828,8 +3831,8 @@ vect_strided_load_supported (tree vectype) ...@@ -3828,8 +3831,8 @@ vect_strided_load_supported (tree vectype)
i.e., the first output vector should contain the first elements of each i.e., the first output vector should contain the first elements of each
interleaving group, etc. interleaving group, etc.
We use extract_even/odd instructions to create such output. The input of each We use extract_even/odd instructions to create such output. The input of
extract_even/odd operation is two vectors each extract_even/odd operation is two vectors
1st vec 2nd vec 1st vec 2nd vec
0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7
...@@ -3838,9 +3841,9 @@ vect_strided_load_supported (tree vectype) ...@@ -3838,9 +3841,9 @@ vect_strided_load_supported (tree vectype)
and of extract_odd: 1 3 5 7 and of extract_odd: 1 3 5 7
The permutation is done in log LENGTH stages. In each stage extract_even and The permutation is done in log LENGTH stages. In each stage extract_even
extract_odd stmts are created for each pair of vectors in DR_CHAIN in their and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
order. In our example, their order. In our example,
E1: extract_even (1st vec, 2nd vec) E1: extract_even (1st vec, 2nd vec)
E2: extract_odd (1st vec, 2nd vec) E2: extract_odd (1st vec, 2nd vec)
...@@ -3982,8 +3985,7 @@ vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size, ...@@ -3982,8 +3985,7 @@ vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
the group, since it always exists. the group, since it always exists.
DR_GROUP_GAP is the number of steps in elements from the previous DR_GROUP_GAP is the number of steps in elements from the previous
access (if there is no gap DR_GROUP_GAP is 1). We skip loads that access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
correspond to the gaps. correspond to the gaps. */
*/
if (next_stmt != first_stmt if (next_stmt != first_stmt
&& gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt))) && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
{ {
...@@ -4088,8 +4090,8 @@ vect_supportable_dr_alignment (struct data_reference *dr, ...@@ -4088,8 +4090,8 @@ vect_supportable_dr_alignment (struct data_reference *dr,
/* We can choose between using the implicit realignment scheme (generating /* We can choose between using the implicit realignment scheme (generating
a misaligned_move stmt) and the explicit realignment scheme (generating a misaligned_move stmt) and the explicit realignment scheme (generating
aligned loads with a REALIGN_LOAD). There are two variants to the explicit aligned loads with a REALIGN_LOAD). There are two variants to the
realignment scheme: optimized, and unoptimized. explicit realignment scheme: optimized, and unoptimized.
We can optimize the realignment only if the step between consecutive We can optimize the realignment only if the step between consecutive
vector loads is equal to the vector size. Since the vector memory vector loads is equal to the vector size. Since the vector memory
accesses advance in steps of VS (Vector Size) in the vectorized loop, it accesses advance in steps of VS (Vector Size) in the vectorized loop, it
......
...@@ -569,7 +569,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop) ...@@ -569,7 +569,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
/* Function vect_analyze_scalar_cycles. /* Function vect_analyze_scalar_cycles.
Examine the cross iteration def-use cycles of scalar variables, by Examine the cross iteration def-use cycles of scalar variables, by
analyzing the loop-header PHIs of scalar variables; Classify each analyzing the loop-header PHIs of scalar variables. Classify each
cycle as one of the following: invariant, induction, reduction, unknown. cycle as one of the following: invariant, induction, reduction, unknown.
We do that for the loop represented by LOOP_VINFO, and also to its We do that for the loop represented by LOOP_VINFO, and also to its
inner-loop, if exists. inner-loop, if exists.
...@@ -1125,8 +1125,8 @@ vect_analyze_loop_form (struct loop *loop) ...@@ -1125,8 +1125,8 @@ vect_analyze_loop_form (struct loop *loop)
/* Get cost by calling cost target builtin. */ /* Get cost by calling cost target builtin. */
static inline static inline int
int vect_get_cost (enum vect_cost_for_stmt type_of_cost) vect_get_cost (enum vect_cost_for_stmt type_of_cost)
{ {
tree dummy_type = NULL; tree dummy_type = NULL;
int dummy = 0; int dummy = 0;
...@@ -2638,8 +2638,8 @@ get_initial_def_for_induction (gimple iv_phi) ...@@ -2638,8 +2638,8 @@ get_initial_def_for_induction (gimple iv_phi)
if (nested_in_vect_loop) if (nested_in_vect_loop)
{ {
/* iv_loop is nested in the loop to be vectorized. init_expr had already /* iv_loop is nested in the loop to be vectorized. init_expr had already
been created during vectorization of previous stmts; We obtain it from been created during vectorization of previous stmts. We obtain it
the STMT_VINFO_VEC_STMT of the defining stmt. */ from the STMT_VINFO_VEC_STMT of the defining stmt. */
tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi, tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi,
loop_preheader_edge (iv_loop)); loop_preheader_edge (iv_loop));
vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL); vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
...@@ -3764,7 +3764,7 @@ vect_finalize_reduction: ...@@ -3764,7 +3764,7 @@ vect_finalize_reduction:
phis = VEC_alloc (gimple, heap, 3); phis = VEC_alloc (gimple, heap, 3);
/* Find the loop-closed-use at the loop exit of the original scalar /* Find the loop-closed-use at the loop exit of the original scalar
result. (The reduction result is expected to have two immediate uses - result. (The reduction result is expected to have two immediate uses,
one at the latch block, and one at the loop exit). For double one at the latch block, and one at the loop exit). For double
reductions we are looking for exit phis of the outer loop. */ reductions we are looking for exit phis of the outer loop. */
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest) FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
......
...@@ -1067,6 +1067,7 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size, ...@@ -1067,6 +1067,7 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
incorrect order of statements. Since we generate all the loads together, incorrect order of statements. Since we generate all the loads together,
they must be inserted before the first load of the SLP instance and not they must be inserted before the first load of the SLP instance and not
before the first load of the first node of the instance. */ before the first load of the first node of the instance. */
static gimple static gimple
vect_find_first_load_in_slp_instance (slp_instance instance) vect_find_first_load_in_slp_instance (slp_instance instance)
{ {
...@@ -1083,6 +1084,7 @@ vect_find_first_load_in_slp_instance (slp_instance instance) ...@@ -1083,6 +1084,7 @@ vect_find_first_load_in_slp_instance (slp_instance instance)
/* Find the last store in SLP INSTANCE. */ /* Find the last store in SLP INSTANCE. */
static gimple static gimple
vect_find_last_store_in_slp_instance (slp_instance instance) vect_find_last_store_in_slp_instance (slp_instance instance)
{ {
...@@ -1783,11 +1785,11 @@ vect_slp_analyze_bb (basic_block bb) ...@@ -1783,11 +1785,11 @@ vect_slp_analyze_bb (basic_block bb)
/* SLP costs are calculated according to SLP instance unrolling factor (i.e., /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
the number of created vector stmts depends on the unrolling factor). However, the number of created vector stmts depends on the unrolling factor).
the actual number of vector stmts for every SLP node depends on VF which is However, the actual number of vector stmts for every SLP node depends on
set later in vect_analyze_operations(). Hence, SLP costs should be updated. VF which is set later in vect_analyze_operations (). Hence, SLP costs
In this function we assume that the inside costs calculated in should be updated. In this function we assume that the inside costs
vect_model_xxx_cost are linear in ncopies. */ calculated in vect_model_xxx_cost are linear in ncopies. */
void void
vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo) vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
...@@ -2051,7 +2053,7 @@ vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0, ...@@ -2051,7 +2053,7 @@ vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
/* Number of vector stmts was calculated according to LHS in /* Number of vector stmts was calculated according to LHS in
vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
necessary. See vect_get_smallest_scalar_type() for details. */ necessary. See vect_get_smallest_scalar_type () for details. */
vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit, vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
&rhs_size_unit); &rhs_size_unit);
if (rhs_size_unit != lhs_size_unit) if (rhs_size_unit != lhs_size_unit)
...@@ -2321,8 +2323,8 @@ vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain, ...@@ -2321,8 +2323,8 @@ vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
scpecific type, e.g., in bytes for Altivec. scpecific type, e.g., in bytes for Altivec.
The last mask is illegal since we assume two operands for permute The last mask is illegal since we assume two operands for permute
operation, and the mask element values can't be outside that range. Hence, operation, and the mask element values can't be outside that range.
the last mask must be converted into {2,5,5,5}. Hence, the last mask must be converted into {2,5,5,5}.
For the first two permutations we need the first and the second input For the first two permutations we need the first and the second input
vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
we need the second and the third vectors: {b1,c1,a2,b2} and we need the second and the third vectors: {b1,c1,a2,b2} and
...@@ -2492,6 +2494,8 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance, ...@@ -2492,6 +2494,8 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
} }
/* Generate vector code for all SLP instances in the loop/basic block. */
bool bool
vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
{ {
......
...@@ -558,6 +558,9 @@ int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost) ...@@ -558,6 +558,9 @@ int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
dummy_type, dummy); dummy_type, dummy);
} }
/* Get cost for STMT. */
int int
cost_for_stmt (gimple stmt) cost_for_stmt (gimple stmt)
{ {
...@@ -1196,7 +1199,7 @@ vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd) ...@@ -1196,7 +1199,7 @@ vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
/* Get vectorized definitions for the operands to create a copy of an original /* Get vectorized definitions for the operands to create a copy of an original
stmt. See vect_get_vec_def_for_stmt_copy() for details. */ stmt. See vect_get_vec_def_for_stmt_copy () for details. */
static void static void
vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt, vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
...@@ -1217,7 +1220,8 @@ vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt, ...@@ -1217,7 +1220,8 @@ vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
} }
/* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */ /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
NULL. */
static void static void
vect_get_vec_defs (tree op0, tree op1, gimple stmt, vect_get_vec_defs (tree op0, tree op1, gimple stmt,
...@@ -1742,8 +1746,9 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -1742,8 +1746,9 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
else else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
/* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies /* Multiple types in SLP are handled by creating the appropriate number of
this, so we can safely override NCOPIES with 1 here. */ vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
if (slp_node) if (slp_node)
ncopies = 1; ncopies = 1;
...@@ -1900,6 +1905,8 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -1900,6 +1905,8 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
return true; return true;
} }
/* Function vectorizable_assignment. /* Function vectorizable_assignment.
Check if STMT performs an assignment (copy) that can be vectorized. Check if STMT performs an assignment (copy) that can be vectorized.
...@@ -2293,9 +2300,9 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -2293,9 +2300,9 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
from one copy of the vector stmt to the next, in the field from one copy of the vector stmt to the next, in the field
STMT_VINFO_RELATED_STMT. This is necessary in order to allow following STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
stages to find the correct vector defs to be used when vectorizing stages to find the correct vector defs to be used when vectorizing
stmts that use the defs of the current stmt. The example below illustrates stmts that use the defs of the current stmt. The example below
the vectorization process when VF=16 and nunits=4 (i.e - we need to create illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4 vectorized stmts): we need to create 4 vectorized stmts):
before vectorization: before vectorization:
RELATED_STMT VEC_STMT RELATED_STMT VEC_STMT
...@@ -2361,8 +2368,8 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -2361,8 +2368,8 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
if (slp_node) if (slp_node)
{ {
/* Store vec_oprnd1 for every vector stmt to be created /* Store vec_oprnd1 for every vector stmt to be created
for SLP_NODE. We check during the analysis that all the for SLP_NODE. We check during the analysis that all
shift arguments are the same. the shift arguments are the same.
TODO: Allow different constants for different vector TODO: Allow different constants for different vector
stmts generated for an SLP instance. */ stmts generated for an SLP instance. */
for (k = 0; k < slp_node->vec_stmts_size - 1; k++) for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
...@@ -2783,13 +2790,14 @@ vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0, ...@@ -2783,13 +2790,14 @@ vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
create promotion operations to the intermediate types, and then create promotion operations to the intermediate types, and then
create promotions to the output type. */ create promotions to the output type. */
*vec_oprnds0 = VEC_copy (tree, heap, vec_tmp); *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
VEC_free (tree, heap, vec_tmp);
vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1, vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
multi_step_cvt - 1, stmt, multi_step_cvt - 1, stmt,
vec_dsts, gsi, slp_node, code1, vec_dsts, gsi, slp_node, code1,
code2, decl2, decl2, op_type, code2, decl2, decl2, op_type,
prev_stmt_info); prev_stmt_info);
} }
VEC_free (tree, heap, vec_tmp);
} }
...@@ -3411,6 +3419,8 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ...@@ -3411,6 +3419,8 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
VEC_free (tree, heap, oprnds); VEC_free (tree, heap, oprnds);
if (result_chain) if (result_chain)
VEC_free (tree, heap, result_chain); VEC_free (tree, heap, result_chain);
if (vec_oprnds)
VEC_free (tree, heap, vec_oprnds);
return true; return true;
} }
...@@ -3607,9 +3617,9 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ...@@ -3607,9 +3617,9 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
from one copy of the vector stmt to the next, in the field from one copy of the vector stmt to the next, in the field
STMT_VINFO_RELATED_STMT. This is necessary in order to allow following STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
stages to find the correct vector defs to be used when vectorizing stages to find the correct vector defs to be used when vectorizing
stmts that use the defs of the current stmt. The example below illustrates stmts that use the defs of the current stmt. The example below
the vectorization process when VF=16 and nunits=4 (i.e - we need to create illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4 vectorized stmts): need to create 4 vectorized stmts):
before vectorization: before vectorization:
RELATED_STMT VEC_STMT RELATED_STMT VEC_STMT
...@@ -3664,9 +3674,9 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ...@@ -3664,9 +3674,9 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
STMT_VINFO_VEC_STMT is done in vect_transform_strided_load(). STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
In case of both multiple types and interleaving, the vector loads and In case of both multiple types and interleaving, the vector loads and
permutation stmts above are created for every copy. The result vector stmts permutation stmts above are created for every copy. The result vector
are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
STMT_VINFO_RELATED_STMT for the next copies. */ corresponding STMT_VINFO_RELATED_STMT for the next copies. */
/* If the data reference is aligned (dr_aligned) or potentially unaligned /* If the data reference is aligned (dr_aligned) or potentially unaligned
on a target that supports unaligned accesses (dr_unaligned_supported) on a target that supports unaligned accesses (dr_unaligned_supported)
...@@ -4035,7 +4045,8 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -4035,7 +4045,8 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
tree cond_expr, then_clause, else_clause; tree cond_expr, then_clause, else_clause;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info); tree vectype = STMT_VINFO_VECTYPE (stmt_info);
tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause; tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
tree vec_compare, vec_cond_expr; tree vec_compare, vec_cond_expr;
tree new_temp; tree new_temp;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
...@@ -5086,8 +5097,9 @@ supportable_widening_operation (enum tree_code code, gimple stmt, ...@@ -5086,8 +5097,9 @@ supportable_widening_operation (enum tree_code code, gimple stmt,
*code2 = c2; *code2 = c2;
/* We assume here that there will not be more than MAX_INTERM_CVT_STEPS /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS intermediate steps in promotion sequence. We try
to get to NARROW_VECTYPE, and fail if we do not. */ MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
not. */
*interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS); *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
for (i = 0; i < 3; i++) for (i = 0; i < 3; i++)
{ {
...@@ -5206,8 +5218,9 @@ supportable_narrowing_operation (enum tree_code code, ...@@ -5206,8 +5218,9 @@ supportable_narrowing_operation (enum tree_code code,
*code1 = c1; *code1 = c1;
prev_type = vectype; prev_type = vectype;
/* We assume here that there will not be more than MAX_INTERM_CVT_STEPS /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS intermediate steps in promotion sequence. We try
to get to NARROW_VECTYPE, and fail if we do not. */ MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
not. */
*interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS); *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
for (i = 0; i < 3; i++) for (i = 0; i < 3; i++)
{ {
......
...@@ -582,6 +582,8 @@ extern VEC(vec_void_p,heap) *stmt_vec_info_vec; ...@@ -582,6 +582,8 @@ extern VEC(vec_void_p,heap) *stmt_vec_info_vec;
void init_stmt_vec_info_vec (void); void init_stmt_vec_info_vec (void);
void free_stmt_vec_info_vec (void); void free_stmt_vec_info_vec (void);
/* Return a stmt_vec_info corresponding to STMT. */
static inline stmt_vec_info static inline stmt_vec_info
vinfo_for_stmt (gimple stmt) vinfo_for_stmt (gimple stmt)
{ {
...@@ -592,6 +594,8 @@ vinfo_for_stmt (gimple stmt) ...@@ -592,6 +594,8 @@ vinfo_for_stmt (gimple stmt)
return (stmt_vec_info) VEC_index (vec_void_p, stmt_vec_info_vec, uid - 1); return (stmt_vec_info) VEC_index (vec_void_p, stmt_vec_info_vec, uid - 1);
} }
/* Set vectorizer information INFO for STMT. */
static inline void static inline void
set_vinfo_for_stmt (gimple stmt, stmt_vec_info info) set_vinfo_for_stmt (gimple stmt, stmt_vec_info info)
{ {
...@@ -607,6 +611,8 @@ set_vinfo_for_stmt (gimple stmt, stmt_vec_info info) ...@@ -607,6 +611,8 @@ set_vinfo_for_stmt (gimple stmt, stmt_vec_info info)
VEC_replace (vec_void_p, stmt_vec_info_vec, uid - 1, (vec_void_p) info); VEC_replace (vec_void_p, stmt_vec_info_vec, uid - 1, (vec_void_p) info);
} }
/* Return the earlier statement between STMT1 and STMT2. */
static inline gimple static inline gimple
get_earlier_stmt (gimple stmt1, gimple stmt2) get_earlier_stmt (gimple stmt1, gimple stmt2)
{ {
...@@ -633,6 +639,8 @@ get_earlier_stmt (gimple stmt1, gimple stmt2) ...@@ -633,6 +639,8 @@ get_earlier_stmt (gimple stmt1, gimple stmt2)
return stmt2; return stmt2;
} }
/* Return the later statement between STMT1 and STMT2. */
static inline gimple static inline gimple
get_later_stmt (gimple stmt1, gimple stmt2) get_later_stmt (gimple stmt1, gimple stmt2)
{ {
...@@ -659,6 +667,9 @@ get_later_stmt (gimple stmt1, gimple stmt2) ...@@ -659,6 +667,9 @@ get_later_stmt (gimple stmt1, gimple stmt2)
return stmt2; return stmt2;
} }
/* Return TRUE if a statement represented by STMT_INFO is a part of a
pattern. */
static inline bool static inline bool
is_pattern_stmt_p (stmt_vec_info stmt_info) is_pattern_stmt_p (stmt_vec_info stmt_info)
{ {
...@@ -674,6 +685,8 @@ is_pattern_stmt_p (stmt_vec_info stmt_info) ...@@ -674,6 +685,8 @@ is_pattern_stmt_p (stmt_vec_info stmt_info)
return false; return false;
} }
/* Return true if BB is a loop header. */
static inline bool static inline bool
is_loop_header_bb_p (basic_block bb) is_loop_header_bb_p (basic_block bb)
{ {
...@@ -683,6 +696,8 @@ is_loop_header_bb_p (basic_block bb) ...@@ -683,6 +696,8 @@ is_loop_header_bb_p (basic_block bb)
return false; return false;
} }
/* Set inside loop vectorization cost. */
static inline void static inline void
stmt_vinfo_set_inside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node, stmt_vinfo_set_inside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
int cost) int cost)
...@@ -693,6 +708,8 @@ stmt_vinfo_set_inside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node, ...@@ -693,6 +708,8 @@ stmt_vinfo_set_inside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = cost; STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = cost;
} }
/* Set inside loop vectorization cost. */
static inline void static inline void
stmt_vinfo_set_outside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node, stmt_vinfo_set_outside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
int cost) int cost)
...@@ -703,6 +720,8 @@ stmt_vinfo_set_outside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node, ...@@ -703,6 +720,8 @@ stmt_vinfo_set_outside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = cost; STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = cost;
} }
/* Return pow2 (X). */
static inline int static inline int
vect_pow2 (int x) vect_pow2 (int x)
{ {
...@@ -723,12 +742,17 @@ vect_pow2 (int x) ...@@ -723,12 +742,17 @@ vect_pow2 (int x)
#define DR_MISALIGNMENT(DR) ((int) (size_t) (DR)->aux) #define DR_MISALIGNMENT(DR) ((int) (size_t) (DR)->aux)
#define SET_DR_MISALIGNMENT(DR, VAL) ((DR)->aux = (void *) (size_t) (VAL)) #define SET_DR_MISALIGNMENT(DR, VAL) ((DR)->aux = (void *) (size_t) (VAL))
/* Return TRUE if the data access is aligned, and FALSE otherwise. */
static inline bool static inline bool
aligned_access_p (struct data_reference *data_ref_info) aligned_access_p (struct data_reference *data_ref_info)
{ {
return (DR_MISALIGNMENT (data_ref_info) == 0); return (DR_MISALIGNMENT (data_ref_info) == 0);
} }
/* Return TRUE if the alignment of the data access is known, and FALSE
otherwise. */
static inline bool static inline bool
known_alignment_for_access_p (struct data_reference *data_ref_info) known_alignment_for_access_p (struct data_reference *data_ref_info)
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment