Commit 694a4f61 by Ira Rosen Committed by Ira Rosen

re PR tree-optimization/45752 (ICE in ix86_vectorize_builtin_vec_perm_ok)


	PR tree-optimization/45752
	* tree-vect-slp.c (vect_get_mask_element): Remove static
	variables, make them function arguments.
	(vect_transform_slp_perm_load): Pass new arguments to
	vect_get_mask_element.

From-SVN: r164987
parent 4d3814a5
2010-10-05 Ira Rosen <irar@il.ibm.com>
PR tree-optimization/45752
* tree-vect-slp.c (vect_get_mask_element): Remove static
variables, make them function arguments.
(vect_transform_slp_perm_load): Pass new arguments to
vect_get_mask_element.
2010-10-05 Richard Guenther <rguenther@suse.de> 2010-10-05 Richard Guenther <rguenther@suse.de>
* value-prof.c (gimple_divmod_fixed_value): Work on SSA form. * value-prof.c (gimple_divmod_fixed_value): Work on SSA form.
2010-10-05 Ira Rosen <irar@il.ibm.com>
PR tree-optimization/45752
* gcc.dg/vect/pr45752.c: New test.
2010-10-05 Richard Guenther <rguenther@suse.de> 2010-10-05 Richard Guenther <rguenther@suse.de>
* gcc.dg/tree-prof/val-prof-1.c: Adjust. * gcc.dg/tree-prof/val-prof-1.c: Adjust.
......
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include <stdio.h>
#include "tree-vect.h"
#define M00 100
#define M10 216
#define M20 23
#define M30 237
#define M40 437
#define M01 1322
#define M11 13
#define M21 27271
#define M31 2280
#define M41 284
#define M02 74
#define M12 191
#define M22 500
#define M32 111
#define M42 1114
#define M03 134
#define M13 117
#define M23 11
#define M33 771
#define M43 71
#define M04 334
#define M14 147
#define M24 115
#define M34 7716
#define M44 16
#define N 16
void foo (unsigned int *__restrict__ pInput,
unsigned int *__restrict__ pOutput,
unsigned int *__restrict__ pInput2,
unsigned int *__restrict__ pOutput2)
{
unsigned int i, a, b, c, d, e;
for (i = 0; i < N / 5; i++)
{
a = *pInput++;
b = *pInput++;
c = *pInput++;
d = *pInput++;
e = *pInput++;
*pOutput++ = M00 * a + M01 * b + M02 * c + M03 * d + M04 * e;
*pOutput++ = M10 * a + M11 * b + M12 * c + M13 * d + M14 * e;
*pOutput++ = M20 * a + M21 * b + M22 * c + M23 * d + M24 * e;
*pOutput++ = M30 * a + M31 * b + M32 * c + M33 * d + M34 * e;
*pOutput++ = M40 * a + M41 * b + M42 * c + M43 * d + M44 * e;
a = *pInput2++;
b = *pInput2++;
c = *pInput2++;
d = *pInput2++;
e = *pInput2++;
*pOutput2++ = M00 * a + M01 * b + M02 * c + M03 * d + M04 * e;
*pOutput2++ = M10 * a + M11 * b + M12 * c + M13 * d + M14 * e;
*pOutput2++ = M20 * a + M21 * b + M22 * c + M23 * d + M24 * e;
*pOutput2++ = M30 * a + M31 * b + M32 * c + M33 * d + M34 * e;
*pOutput2++ = M40 * a + M41 * b + M42 * c + M43 * d + M44 * e;
}
}
int main (int argc, const char* argv[])
{
unsigned int input[N], output[N], i, input2[N], output2[N];
unsigned int check_results[N] = {3208, 1334, 28764, 35679, 2789, 13028,
4754, 168364, 91254, 12399, 22848, 8174, 307964, 146829, 22009, 0};
unsigned int check_results2[N] = {7136, 2702, 84604, 57909, 6633, 16956,
6122, 224204, 113484, 16243, 26776, 9542, 363804, 169059, 25853, 0};
check_vect ();
for (i = 0; i < N; i++)
{
input[i] = i%256;
input2[i] = i + 2;
output[i] = 0;
output2[i] = 0;
__asm__ volatile ("");
}
foo (input, output, input2, output2);
for (i = 0; i < N; i++)
if (output[i] != check_results[i]
|| output2[i] != check_results2[i])
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "permutation requires at least three vectors" 2 "vect" { target vect_perm } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -2177,20 +2177,18 @@ static bool ...@@ -2177,20 +2177,18 @@ static bool
vect_get_mask_element (gimple stmt, int first_mask_element, int m, vect_get_mask_element (gimple stmt, int first_mask_element, int m,
int mask_nunits, bool only_one_vec, int index, int mask_nunits, bool only_one_vec, int index,
int *mask, int *current_mask_element, int *mask, int *current_mask_element,
bool *need_next_vector) bool *need_next_vector, int *number_of_mask_fixes,
bool *mask_fixed, bool *needs_first_vector)
{ {
int i; int i;
static int number_of_mask_fixes = 1;
static bool mask_fixed = false;
static bool needs_first_vector = false;
/* Convert to target specific representation. */ /* Convert to target specific representation. */
*current_mask_element = first_mask_element + m; *current_mask_element = first_mask_element + m;
/* Adjust the value in case it's a mask for second and third vectors. */ /* Adjust the value in case it's a mask for second and third vectors. */
*current_mask_element -= mask_nunits * (number_of_mask_fixes - 1); *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
if (*current_mask_element < mask_nunits) if (*current_mask_element < mask_nunits)
needs_first_vector = true; *needs_first_vector = true;
/* We have only one input vector to permute but the mask accesses values in /* We have only one input vector to permute but the mask accesses values in
the next vector as well. */ the next vector as well. */
...@@ -2208,7 +2206,7 @@ vect_get_mask_element (gimple stmt, int first_mask_element, int m, ...@@ -2208,7 +2206,7 @@ vect_get_mask_element (gimple stmt, int first_mask_element, int m,
/* The mask requires the next vector. */ /* The mask requires the next vector. */
if (*current_mask_element >= mask_nunits * 2) if (*current_mask_element >= mask_nunits * 2)
{ {
if (needs_first_vector || mask_fixed) if (*needs_first_vector || *mask_fixed)
{ {
/* We either need the first vector too or have already moved to the /* We either need the first vector too or have already moved to the
next vector. In both cases, this permutation needs three next vector. In both cases, this permutation needs three
...@@ -2226,23 +2224,23 @@ vect_get_mask_element (gimple stmt, int first_mask_element, int m, ...@@ -2226,23 +2224,23 @@ vect_get_mask_element (gimple stmt, int first_mask_element, int m,
/* We move to the next vector, dropping the first one and working with /* We move to the next vector, dropping the first one and working with
the second and the third - we need to adjust the values of the mask the second and the third - we need to adjust the values of the mask
accordingly. */ accordingly. */
*current_mask_element -= mask_nunits * number_of_mask_fixes; *current_mask_element -= mask_nunits * *number_of_mask_fixes;
for (i = 0; i < index; i++) for (i = 0; i < index; i++)
mask[i] -= mask_nunits * number_of_mask_fixes; mask[i] -= mask_nunits * *number_of_mask_fixes;
(number_of_mask_fixes)++; (*number_of_mask_fixes)++;
mask_fixed = true; *mask_fixed = true;
} }
*need_next_vector = mask_fixed; *need_next_vector = *mask_fixed;
/* This was the last element of this mask. Start a new one. */ /* This was the last element of this mask. Start a new one. */
if (index == mask_nunits - 1) if (index == mask_nunits - 1)
{ {
number_of_mask_fixes = 1; *number_of_mask_fixes = 1;
mask_fixed = false; *mask_fixed = false;
needs_first_vector = false; *needs_first_vector = false;
} }
return true; return true;
...@@ -2268,6 +2266,9 @@ vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain, ...@@ -2268,6 +2266,9 @@ vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
int index, unroll_factor, *mask, current_mask_element, ncopies; int index, unroll_factor, *mask, current_mask_element, ncopies;
bool only_one_vec = false, need_next_vector = false; bool only_one_vec = false, need_next_vector = false;
int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter; int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
int number_of_mask_fixes = 1;
bool mask_fixed = false;
bool needs_first_vector = false;
if (!targetm.vectorize.builtin_vec_perm) if (!targetm.vectorize.builtin_vec_perm)
{ {
...@@ -2351,7 +2352,9 @@ vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain, ...@@ -2351,7 +2352,9 @@ vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
{ {
if (!vect_get_mask_element (stmt, first_mask_element, m, if (!vect_get_mask_element (stmt, first_mask_element, m,
mask_nunits, only_one_vec, index, mask, mask_nunits, only_one_vec, index, mask,
&current_mask_element, &need_next_vector)) &current_mask_element, &need_next_vector,
&number_of_mask_fixes, &mask_fixed,
&needs_first_vector))
return false; return false;
mask[index++] = current_mask_element; mask[index++] = current_mask_element;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment