Commit d8ba5b19 by Richard Guenther Committed by Richard Biener

re PR middle-end/45720 (Revision 164367 miscompiled SPEC CPU 2K)

2010-10-22  Richard Guenther  <rguenther@suse.de>

	PR tree-optimization/45720
	* tree-vect-data-refs.c (vect_update_misalignment_for_peel):
	Handle negative step.
	(vect_enhance_data_refs_alignment): Likewise.
	* tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop): Likewise.
	(vect_create_cond_for_align_checks): Likewise.
	(vect_create_cond_for_alias_checks): Likewise.

	* gcc.dg/torture/pr45720.c: New testcase.

From-SVN: r165832
parent 9fc2854d
2010-10-22 Richard Guenther <rguenther@suse.de>
PR tree-optimization/45720
* tree-vect-data-refs.c (vect_update_misalignment_for_peel):
Handle negative step.
(vect_enhance_data_refs_alignment): Likewise.
* tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop): Likewise.
(vect_create_cond_for_align_checks): Likewise.
(vect_create_cond_for_alias_checks): Likewise.
2010-10-22 Ira Rosen <irar@il.ibm.com> 2010-10-22 Ira Rosen <irar@il.ibm.com>
PR tree-optimization/46126 PR tree-optimization/46126
2010-10-22 Richard Guenther <rguenther@suse.de>
PR tree-optimization/45720
* gcc.dg/torture/pr45720.c: New testcase.
2010-10-22 Ira Rosen <irar@il.ibm.com> 2010-10-22 Ira Rosen <irar@il.ibm.com>
PR tree-optimization/46126 PR tree-optimization/46126
......
/* { dg-do run } */
/* { dg-options "-ftree-vectorize" } */
float val[256];
float x;
void __attribute__((noinline,noclone))
foo(int len, int beg)
{
int i;
for (i = len - 1; i >= beg; --i)
x += val[i] * 2;
}
void __attribute__((noinline,noclone))
bar(void)
{
int i;
for (i = 255; i >= 0; --i)
x += val[i] * 2;
for (i = 254; i >= 0; --i)
x += val[i] * 2;
for (i = 253; i >= 0; --i)
x += val[i] * 2;
for (i = 252; i >= 0; --i)
x += val[i] * 2;
}
float y[256];
void __attribute__((noinline,noclone))
foobar(void)
{
int i;
for (i = 0; i < 252; ++i)
{
float l = 0;
l += val[255 - i] * 2;
l += val[254 - i] * 2;
l += val[253 - i] * 2;
l += val[252 - i] * 2;
y[i] = l;
}
}
int main()
{
foo(256-1, 0);
foo(256-2, 0);
foo(256-3, 0);
foo(256-4, 0);
bar();
foobar();
return 0;
}
...@@ -1016,10 +1016,11 @@ vect_update_misalignment_for_peel (struct data_reference *dr, ...@@ -1016,10 +1016,11 @@ vect_update_misalignment_for_peel (struct data_reference *dr,
if (known_alignment_for_access_p (dr) if (known_alignment_for_access_p (dr)
&& known_alignment_for_access_p (dr_peel)) && known_alignment_for_access_p (dr_peel))
{ {
bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
int misal = DR_MISALIGNMENT (dr); int misal = DR_MISALIGNMENT (dr);
tree vectype = STMT_VINFO_VECTYPE (stmt_info); tree vectype = STMT_VINFO_VECTYPE (stmt_info);
misal += npeel * dr_size; misal += negative ? -npeel * dr_size : npeel * dr_size;
misal %= GET_MODE_SIZE (TYPE_MODE (vectype)); misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
SET_DR_MISALIGNMENT (dr, misal); SET_DR_MISALIGNMENT (dr, misal);
return; return;
} }
...@@ -1503,6 +1504,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ...@@ -1503,6 +1504,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
if (known_alignment_for_access_p (dr)) if (known_alignment_for_access_p (dr))
{ {
unsigned int npeel_tmp; unsigned int npeel_tmp;
bool negative = tree_int_cst_compare (DR_STEP (dr),
size_zero_node) < 0;
/* Save info about DR in the hash table. */ /* Save info about DR in the hash table. */
if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo)) if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
...@@ -1514,7 +1517,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ...@@ -1514,7 +1517,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
nelements = TYPE_VECTOR_SUBPARTS (vectype); nelements = TYPE_VECTOR_SUBPARTS (vectype);
mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE ( mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
TREE_TYPE (DR_REF (dr)))); TREE_TYPE (DR_REF (dr))));
npeel_tmp = (nelements - mis) % vf; npeel_tmp = (negative
? (mis - nelements) : (nelements - mis)) & (vf - 1);
/* For multiple types, it is possible that the bigger type access /* For multiple types, it is possible that the bigger type access
will have more than one peeling option. E.g., a loop with two will have more than one peeling option. E.g., a loop with two
...@@ -1707,6 +1711,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ...@@ -1707,6 +1711,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
if (known_alignment_for_access_p (dr0)) if (known_alignment_for_access_p (dr0))
{ {
bool negative = tree_int_cst_compare (DR_STEP (dr0),
size_zero_node) < 0;
if (!npeel) if (!npeel)
{ {
/* Since it's known at compile time, compute the number of /* Since it's known at compile time, compute the number of
...@@ -1716,7 +1722,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ...@@ -1716,7 +1722,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
count. */ count. */
mis = DR_MISALIGNMENT (dr0); mis = DR_MISALIGNMENT (dr0);
mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0)))); mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
npeel = nelements - mis; npeel = (negative ? mis - nelements : nelements - mis) & (vf - 1);
} }
/* For interleaved data access every iteration accesses all the /* For interleaved data access every iteration accesses all the
......
...@@ -1993,8 +1993,11 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, ...@@ -1993,8 +1993,11 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters,
else else
{ {
gimple_seq new_stmts = NULL; gimple_seq new_stmts = NULL;
bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
tree offset = negative
? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt, tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
&new_stmts, NULL_TREE, loop); &new_stmts, offset, loop);
tree ptr_type = TREE_TYPE (start_addr); tree ptr_type = TREE_TYPE (start_addr);
tree size = TYPE_SIZE (ptr_type); tree size = TYPE_SIZE (ptr_type);
tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1); tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
...@@ -2019,7 +2022,10 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, ...@@ -2019,7 +2022,10 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters,
fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log); fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
/* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */ /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign); if (negative)
iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
else
iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1); iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
iters = fold_convert (niters_type, iters); iters = fold_convert (niters_type, iters);
} }
...@@ -2236,11 +2242,17 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo, ...@@ -2236,11 +2242,17 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
tree addr_tmp, addr_tmp_name; tree addr_tmp, addr_tmp_name;
tree or_tmp, new_or_tmp_name; tree or_tmp, new_or_tmp_name;
gimple addr_stmt, or_stmt; gimple addr_stmt, or_stmt;
stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
bool negative = tree_int_cst_compare
(DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
tree offset = negative
? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
/* create: addr_tmp = (int)(address_of_first_vector) */ /* create: addr_tmp = (int)(address_of_first_vector) */
addr_base = addr_base =
vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list, vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
NULL_TREE, loop); offset, loop);
if (new_stmt_list != NULL) if (new_stmt_list != NULL)
gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list); gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
...@@ -2387,6 +2399,7 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, ...@@ -2387,6 +2399,7 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
tree addr_base_a, addr_base_b; tree addr_base_a, addr_base_b;
tree segment_length_a, segment_length_b; tree segment_length_a, segment_length_b;
gimple stmt_a, stmt_b; gimple stmt_a, stmt_b;
tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
dr_a = DDR_A (ddr); dr_a = DDR_A (ddr);
stmt_a = DR_STMT (DDR_A (ddr)); stmt_a = DR_STMT (DDR_A (ddr));
...@@ -2425,19 +2438,22 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, ...@@ -2425,19 +2438,22 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM); print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
} }
seg_a_min = addr_base_a;
seg_a_max = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a),
addr_base_a, segment_length_a);
if (tree_int_cst_compare (DR_STEP (dr_a), size_zero_node) < 0)
seg_a_min = seg_a_max, seg_a_max = addr_base_a;
seg_b_min = addr_base_b;
seg_b_max = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
addr_base_b, segment_length_b);
if (tree_int_cst_compare (DR_STEP (dr_b), size_zero_node) < 0)
seg_b_min = seg_b_max, seg_b_max = addr_base_b;
part_cond_expr = part_cond_expr =
fold_build2 (TRUTH_OR_EXPR, boolean_type_node, fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
fold_build2 (LT_EXPR, boolean_type_node, fold_build2 (LT_EXPR, boolean_type_node, seg_a_max, seg_b_min),
fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a), fold_build2 (LT_EXPR, boolean_type_node, seg_b_max, seg_a_min));
addr_base_a,
segment_length_a),
addr_base_b),
fold_build2 (LT_EXPR, boolean_type_node,
fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
addr_base_b,
segment_length_b),
addr_base_a));
if (*cond_expr) if (*cond_expr)
*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment