Commit 2ce27200 by Richard Biener Committed by Richard Biener

re PR tree-optimization/68553 (gcc.dg/vect/pr68445.c FAILs)

2015-11-27  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/68553
	* tree-vect-slp.c (vect_get_mask_element): Remove.
	(vect_transform_slp_perm_load): Implement in a simpler way.

	* gcc.dg/vect/pr45752.c: Adjust.
	* gcc.dg/vect/slp-perm-4.c: Likewise.

From-SVN: r230993
parent f0a813f2
2015-11-27 Richard Biener <rguenther@suse.de>
PR tree-optimization/68553
* tree-vect-slp.c (vect_get_mask_element): Remove.
(vect_transform_slp_perm_load): Implement in a simpler way.
2015-11-26 Alexandre Oliva <aoliva@redhat.com> 2015-11-26 Alexandre Oliva <aoliva@redhat.com>
PR rtl-optimization/67753 PR rtl-optimization/67753
2015-11-27 Richard Biener <rguenther@suse.de>
PR tree-optimization/68553
* gcc.dg/vect/pr45752.c: Adjust.
* gcc.dg/vect/slp-perm-4.c: Likewise.
2015-11-26 Martin Sebor <msebor@redhat.com> 2015-11-26 Martin Sebor <msebor@redhat.com>
* g++.dg/init/new45.C (cookie_size): New constant set to a value * g++.dg/init/new45.C (cookie_size): New constant set to a value
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
#define M34 7716 #define M34 7716
#define M44 16 #define M44 16
#define N 16 #define N 40
void foo (unsigned int *__restrict__ pInput, void foo (unsigned int *__restrict__ pInput,
unsigned int *__restrict__ pOutput, unsigned int *__restrict__ pOutput,
...@@ -75,10 +75,16 @@ void foo (unsigned int *__restrict__ pInput, ...@@ -75,10 +75,16 @@ void foo (unsigned int *__restrict__ pInput,
int main (int argc, const char* argv[]) int main (int argc, const char* argv[])
{ {
unsigned int input[N], output[N], i, input2[N], output2[N]; unsigned int input[N], output[N], i, input2[N], output2[N];
unsigned int check_results[N] = {3208, 1334, 28764, 35679, 2789, 13028, unsigned int check_results[N]
4754, 168364, 91254, 12399, 22848, 8174, 307964, 146829, 22009, 0}; = {3208, 1334, 28764, 35679, 2789, 13028, 4754, 168364, 91254, 12399,
unsigned int check_results2[N] = {7136, 2702, 84604, 57909, 6633, 16956, 22848, 8174, 307964, 146829, 22009, 32668, 11594, 447564, 202404, 31619,
6122, 224204, 113484, 16243, 26776, 9542, 363804, 169059, 25853, 0}; 42488, 15014, 587164, 257979, 41229, 52308, 18434, 726764, 313554, 50839,
62128, 21854, 866364, 369129, 60449, 71948, 25274, 1005964, 424704, 70059};
unsigned int check_results2[N]
= {7136, 2702, 84604, 57909, 6633, 16956, 6122, 224204, 113484, 16243,
26776, 9542, 363804, 169059, 25853, 36596, 12962, 503404, 224634, 35463,
46416, 16382, 643004, 280209, 45073, 56236, 19802, 782604, 335784, 54683,
66056, 23222, 922204, 391359, 64293, 75876, 26642, 1061804, 446934, 73903};
check_vect (); check_vect ();
...@@ -101,7 +107,5 @@ int main (int argc, const char* argv[]) ...@@ -101,7 +107,5 @@ int main (int argc, const char* argv[])
return 0; return 0;
} }
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump "permutation requires at least three vectors" "vect" { target vect_perm } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
#define M34 7716 #define M34 7716
#define M44 16 #define M44 16
#define N 16 #define N 40
void foo (unsigned int *__restrict__ pInput, unsigned int *__restrict__ pOutput) void foo (unsigned int *__restrict__ pInput, unsigned int *__restrict__ pOutput)
{ {
...@@ -58,7 +58,11 @@ void foo (unsigned int *__restrict__ pInput, unsigned int *__restrict__ pOutput) ...@@ -58,7 +58,11 @@ void foo (unsigned int *__restrict__ pInput, unsigned int *__restrict__ pOutput)
int main (int argc, const char* argv[]) int main (int argc, const char* argv[])
{ {
unsigned int input[N], output[N], i; unsigned int input[N], output[N], i;
unsigned int check_results[N] = {3208, 1334, 28764, 35679, 2789, 13028, 4754, 168364, 91254, 12399, 22848, 8174, 307964, 146829, 22009, 0}; unsigned int check_results[N]
= {3208, 1334, 28764, 35679, 2789, 13028, 4754, 168364, 91254, 12399,
22848, 8174, 307964, 146829, 22009, 32668, 11594, 447564, 202404, 31619,
42488, 15014, 587164, 257979, 41229, 52308, 18434, 726764, 313554, 50839,
62128, 21854, 866364, 369129, 60449, 71948, 25274, 1005964, 424704, 70059};
check_vect (); check_vect ();
...@@ -80,7 +84,6 @@ int main (int argc, const char* argv[]) ...@@ -80,7 +84,6 @@ int main (int argc, const char* argv[])
return 0; return 0;
} }
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump "permutation requires at least three vectors" "vect" { target vect_perm} } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
...@@ -3241,102 +3241,6 @@ vect_create_mask_and_perm (gimple *stmt, ...@@ -3241,102 +3241,6 @@ vect_create_mask_and_perm (gimple *stmt,
} }
/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
return in CURRENT_MASK_ELEMENT its equivalent in target specific
representation. Check that the mask is valid and return FALSE if not.
Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
the next vector, i.e., the current first vector is not needed. */
static bool
vect_get_mask_element (gimple *stmt, int first_mask_element, int m,
int mask_nunits, bool only_one_vec, int index,
unsigned char *mask, int *current_mask_element,
bool *need_next_vector, int *number_of_mask_fixes,
bool *mask_fixed, bool *needs_first_vector)
{
int i;
/* Convert to target specific representation. */
*current_mask_element = first_mask_element + m;
/* Adjust the value in case it's a mask for second and third vectors. */
*current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
if (*current_mask_element < 0)
{
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"permutation requires past vector ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return false;
}
if (*current_mask_element < mask_nunits)
*needs_first_vector = true;
/* We have only one input vector to permute but the mask accesses values in
the next vector as well. */
if (only_one_vec && *current_mask_element >= mask_nunits)
{
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"permutation requires at least two vectors ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return false;
}
/* The mask requires the next vector. */
while (*current_mask_element >= mask_nunits * 2)
{
if (*needs_first_vector || *mask_fixed)
{
/* We either need the first vector too or have already moved to the
next vector. In both cases, this permutation needs three
vectors. */
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"permutation requires at "
"least three vectors ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return false;
}
/* We move to the next vector, dropping the first one and working with
the second and the third - we need to adjust the values of the mask
accordingly. */
*current_mask_element -= mask_nunits * *number_of_mask_fixes;
for (i = 0; i < index; i++)
mask[i] -= mask_nunits * *number_of_mask_fixes;
(*number_of_mask_fixes)++;
*mask_fixed = true;
}
*need_next_vector = *mask_fixed;
/* This was the last element of this mask. Start a new one. */
if (index == mask_nunits - 1)
{
*number_of_mask_fixes = 1;
*mask_fixed = false;
*needs_first_vector = false;
}
return true;
}
/* Generate vector permute statements from a list of loads in DR_CHAIN. /* Generate vector permute statements from a list of loads in DR_CHAIN.
If ANALYZE_ONLY is TRUE, only check that it is possible to create valid If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
permute statements for the SLP node NODE of the SLP instance permute statements for the SLP node NODE of the SLP instance
...@@ -3350,17 +3254,11 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain, ...@@ -3350,17 +3254,11 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]; gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree mask_element_type = NULL_TREE, mask_type; tree mask_element_type = NULL_TREE, mask_type;
int i, j, k, nunits, vec_index = 0; int nunits, vec_index = 0;
tree vectype = STMT_VINFO_VECTYPE (stmt_info); tree vectype = STMT_VINFO_VECTYPE (stmt_info);
int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance); int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
int first_mask_element; int unroll_factor, mask_element, ncopies;
int index, unroll_factor, current_mask_element, ncopies;
unsigned char *mask; unsigned char *mask;
bool only_one_vec = false, need_next_vector = false;
int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
int number_of_mask_fixes = 1;
bool mask_fixed = false;
bool needs_first_vector = false;
machine_mode mode; machine_mode mode;
if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)) if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
...@@ -3391,15 +3289,6 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain, ...@@ -3391,15 +3289,6 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
mask = XALLOCAVEC (unsigned char, nunits); mask = XALLOCAVEC (unsigned char, nunits);
unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance); unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
/* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
unrolling factor. */
orig_vec_stmts_num
= (STMT_VINFO_GROUP_SIZE (stmt_info)
* SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance)
+ nunits - 1) / nunits;
if (orig_vec_stmts_num == 1)
only_one_vec = true;
/* Number of copies is determined by the final vectorization factor /* Number of copies is determined by the final vectorization factor
relatively to SLP_NODE_INSTANCE unrolling factor. */ relatively to SLP_NODE_INSTANCE unrolling factor. */
ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance); ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
...@@ -3422,75 +3311,85 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain, ...@@ -3422,75 +3311,85 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
we need the second and the third vectors: {b1,c1,a2,b2} and we need the second and the third vectors: {b1,c1,a2,b2} and
{c2,a3,b3,c3}. */ {c2,a3,b3,c3}. */
{ int vect_stmts_counter = 0;
index = 0; int index = 0;
vect_stmts_counter = 0; int first_vec_index = -1;
vec_index = 0; int second_vec_index = -1;
first_vec_index = vec_index++;
if (only_one_vec)
second_vec_index = first_vec_index;
else
second_vec_index = vec_index++;
for (j = 0; j < unroll_factor; j++) for (int j = 0; j < unroll_factor; j++)
{ {
for (k = 0; k < group_size; k++) for (int k = 0; k < group_size; k++)
{ {
i = SLP_TREE_LOAD_PERMUTATION (node)[k]; int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
first_mask_element = i + j * STMT_VINFO_GROUP_SIZE (stmt_info); + j * STMT_VINFO_GROUP_SIZE (stmt_info));
if (!vect_get_mask_element (stmt, first_mask_element, 0, vec_index = i / nunits;
nunits, only_one_vec, index, mask_element = i % nunits;
mask, &current_mask_element, if (vec_index == first_vec_index
&need_next_vector, || first_vec_index == -1)
&number_of_mask_fixes, &mask_fixed, {
&needs_first_vector)) first_vec_index = vec_index;
return false; }
gcc_assert (current_mask_element >= 0 else if (vec_index == second_vec_index
&& current_mask_element < 2 * nunits); || second_vec_index == -1)
mask[index++] = current_mask_element; {
second_vec_index = vec_index;
mask_element += nunits;
}
else
{
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"permutation requires at "
"least three vectors ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
stmt, 0);
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return false;
}
if (index == nunits) gcc_assert (mask_element >= 0
{ && mask_element < 2 * nunits);
index = 0; mask[index++] = mask_element;
if (!can_vec_perm_p (mode, false, mask))
if (index == nunits)
{
if (!can_vec_perm_p (mode, false, mask))
{
if (dump_enabled_p ())
{ {
if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION,
{ vect_location,
dump_printf_loc (MSG_MISSED_OPTIMIZATION, "unsupported vect permute { ");
vect_location, for (i = 0; i < nunits; ++i)
"unsupported vect permute { "); dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]);
for (i = 0; i < nunits; ++i) dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
mask[i]);
dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
}
return false;
} }
return false;
}
if (!analyze_only) if (!analyze_only)
{ {
int l; tree mask_vec, *mask_elts;
tree mask_vec, *mask_elts; mask_elts = XALLOCAVEC (tree, nunits);
mask_elts = XALLOCAVEC (tree, nunits); for (int l = 0; l < nunits; ++l)
for (l = 0; l < nunits; ++l) mask_elts[l] = build_int_cst (mask_element_type, mask[l]);
mask_elts[l] = build_int_cst (mask_element_type, mask_vec = build_vector (mask_type, mask_elts);
mask[l]);
mask_vec = build_vector (mask_type, mask_elts); if (second_vec_index == -1)
second_vec_index = first_vec_index;
if (need_next_vector) vect_create_mask_and_perm (stmt, mask_vec, first_vec_index,
{ second_vec_index,
first_vec_index = second_vec_index; gsi, node, vectype, dr_chain,
second_vec_index = vec_index; ncopies, vect_stmts_counter++);
} }
vect_create_mask_and_perm (stmt, index = 0;
mask_vec, first_vec_index, second_vec_index, first_vec_index = -1;
gsi, node, vectype, dr_chain, second_vec_index = -1;
ncopies, vect_stmts_counter++); }
} }
}
}
}
} }
return true; return true;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment