Commit 61fdfd8c by Richard Biener Committed by Richard Biener

re PR tree-optimization/79920 (Incorrect floating point results when compiling with -O3)

2017-03-08  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/79920
	* tree-vect-slp.c (vect_create_mask_and_perm): Remove and inline
	with ncopies == 1 to ...
	(vect_transform_slp_perm_load): ... here.  Properly compute
	all element loads by iterating VF times over the group.  Do
	not handle ncopies (computed in a broken way) in
	vect_create_mask_and_perm.

	* gcc.dg/vect/pr79920.c: New testcase.

From-SVN: r245968
parent 4b48e883
2017-03-08 Richard Biener <rguenther@suse.de>
PR tree-optimization/79920
* tree-vect-slp.c (vect_create_mask_and_perm): Remove and inline
with ncopies == 1 to ...
(vect_transform_slp_perm_load): ... here. Properly compute
all element loads by iterating VF times over the group. Do
not handle ncopies (computed in a broken way) in
vect_create_mask_and_perm.
2017-03-08 Jakub Jelinek <jakub@redhat.com> 2017-03-08 Jakub Jelinek <jakub@redhat.com>
PR sanitizer/79904 PR sanitizer/79904
......
2017-03-08 Richard Biener <rguenther@suse.de>
PR tree-optimization/79920
* gcc.dg/vect/pr79920.c: New testcase.
2017-03-08 Jakub Jelinek <jakub@redhat.com> 2017-03-08 Jakub Jelinek <jakub@redhat.com>
PR sanitizer/79904 PR sanitizer/79904
......
/* { dg-do run } */
/* { dg-additional-options "-O3" } */
#include "tree-vect.h"
double __attribute__((noinline,noclone))
compute_integral (double w_1[18])
{
double A = 0;
double t33[2][6] = {{0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
{0.0, 0.0, 0.0, 0.0, 0.0, 0.0}};
double t43[2] = {0.0, 0.0};
double t31[2][2] = {{1.0, 1.0}, {1.0, 1.0}};
double t32[2][3] = {{0.0, 0.0, 1.0}, {0.0, 0.0, 1.0}};
for (int ip_1 = 0; ip_1 < 2; ++ip_1)
{
for (int i_0 = 0; i_0 < 6; ++i_0)
t33[ip_1][i_0] = ((w_1[i_0*3] * t32[ip_1][0])
+ (w_1[i_0*3+2] * t32[ip_1][2]));
t43[ip_1] = 2.0;
}
for (int i_0 = 0; i_0 < 6; ++i_0)
A += t43[1]*t33[1][i_0];
return A;
}
int main()
{
check_vect ();
double w_1[18] = {0., 1.0, 1.0,
0., 1.0, 1.0,
0., 1.0, 1.0,
0., 1.0, 1.0,
0., 1.0, 1.0,
0., 1.0, 1.0};
double A = compute_integral(w_1);
if (A != 12.0)
__builtin_abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_double && { vect_perm && vect_hw_misalign } } } } } */
...@@ -3379,66 +3379,6 @@ vect_get_slp_defs (vec<tree> ops, slp_tree slp_node, ...@@ -3379,66 +3379,6 @@ vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
} }
} }
/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
building a vector of type MASK_TYPE from it) and two input vectors placed in
DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
shifting by STRIDE elements of DR_CHAIN for every copy.
(STRIDE is the number of vectorized stmts for NODE divided by the number of
copies).
VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
the created stmts must be inserted. */
static inline void
vect_create_mask_and_perm (gimple *stmt,
tree mask, int first_vec_indx, int second_vec_indx,
gimple_stmt_iterator *gsi, slp_tree node,
tree vectype, vec<tree> dr_chain,
int ncopies, int vect_stmts_counter)
{
tree perm_dest;
gimple *perm_stmt = NULL;
int i, stride_in, stride_out;
tree first_vec, second_vec, data_ref;
stride_out = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
stride_in = dr_chain.length () / ncopies;
/* Initialize the vect stmts of NODE to properly insert the generated
stmts later. */
for (i = SLP_TREE_VEC_STMTS (node).length ();
i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
SLP_TREE_VEC_STMTS (node).quick_push (NULL);
perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
for (i = 0; i < ncopies; i++)
{
first_vec = dr_chain[first_vec_indx];
second_vec = dr_chain[second_vec_indx];
/* Generate the permute statement if necessary. */
if (mask)
{
perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
first_vec, second_vec, mask);
data_ref = make_ssa_name (perm_dest, perm_stmt);
gimple_set_lhs (perm_stmt, data_ref);
vect_finish_stmt_generation (stmt, perm_stmt, gsi);
}
else
/* If mask was NULL_TREE generate the requested identity transform. */
perm_stmt = SSA_NAME_DEF_STMT (first_vec);
/* Store the vector statement in NODE. */
SLP_TREE_VEC_STMTS (node)[stride_out * i + vect_stmts_counter]
= perm_stmt;
first_vec_indx += stride_in;
second_vec_indx += stride_in;
}
}
/* Generate vector permute statements from a list of loads in DR_CHAIN. /* Generate vector permute statements from a list of loads in DR_CHAIN.
If ANALYZE_ONLY is TRUE, only check that it is possible to create valid If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
permute statements for the SLP node NODE of the SLP instance permute statements for the SLP node NODE of the SLP instance
...@@ -3456,7 +3396,7 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain, ...@@ -3456,7 +3396,7 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
int nunits, vec_index = 0; int nunits, vec_index = 0;
tree vectype = STMT_VINFO_VECTYPE (stmt_info); tree vectype = STMT_VINFO_VECTYPE (stmt_info);
int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance); int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
int unroll_factor, mask_element, ncopies; int mask_element;
unsigned char *mask; unsigned char *mask;
machine_mode mode; machine_mode mode;
...@@ -3474,11 +3414,13 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain, ...@@ -3474,11 +3414,13 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
mask_type = get_vectype_for_scalar_type (mask_element_type); mask_type = get_vectype_for_scalar_type (mask_element_type);
nunits = TYPE_VECTOR_SUBPARTS (vectype); nunits = TYPE_VECTOR_SUBPARTS (vectype);
mask = XALLOCAVEC (unsigned char, nunits); mask = XALLOCAVEC (unsigned char, nunits);
unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
/* Number of copies is determined by the final vectorization factor /* Initialize the vect stmts of NODE to properly insert the generated
relatively to SLP_NODE_INSTANCE unrolling factor. */ stmts later. */
ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance); if (! analyze_only)
for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
SLP_TREE_VEC_STMTS (node).quick_push (NULL);
/* Generate permutation masks for every NODE. Number of masks for each NODE /* Generate permutation masks for every NODE. Number of masks for each NODE
is equal to GROUP_SIZE. is equal to GROUP_SIZE.
...@@ -3505,7 +3447,7 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain, ...@@ -3505,7 +3447,7 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
bool noop_p = true; bool noop_p = true;
*n_perms = 0; *n_perms = 0;
for (int j = 0; j < unroll_factor; j++) for (int j = 0; j < vf; j++)
{ {
for (int k = 0; k < group_size; k++) for (int k = 0; k < group_size; k++)
{ {
...@@ -3578,10 +3520,30 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain, ...@@ -3578,10 +3520,30 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
if (second_vec_index == -1) if (second_vec_index == -1)
second_vec_index = first_vec_index; second_vec_index = first_vec_index;
vect_create_mask_and_perm (stmt, mask_vec, first_vec_index,
second_vec_index, /* Generate the permute statement if necessary. */
gsi, node, vectype, dr_chain, tree first_vec = dr_chain[first_vec_index];
ncopies, vect_stmts_counter++); tree second_vec = dr_chain[second_vec_index];
gimple *perm_stmt;
if (! noop_p)
{
tree perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt),
vectype);
perm_dest = make_ssa_name (perm_dest);
perm_stmt = gimple_build_assign (perm_dest,
VEC_PERM_EXPR,
first_vec, second_vec,
mask_vec);
vect_finish_stmt_generation (stmt, perm_stmt, gsi);
}
else
/* If mask was NULL_TREE generate the requested
identity transform. */
perm_stmt = SSA_NAME_DEF_STMT (first_vec);
/* Store the vector statement in NODE. */
SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
} }
index = 0; index = 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment