Commit 91ff1504 by Richard Biener Committed by Richard Biener

re PR tree-optimization/66510 (gcc.target/arm/pr53636.c FAILs after r224221)

2015-06-18  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/66510
	* tree-vect-stmts.c (vectorizable_load): Properly compute the
	number of vector loads for SLP permuted loads.
	* tree-vect-data-refs.c (vect_compute_data_ref_alignment): Also
	check the stride for loop vectorization.
	(vect_enhance_data_refs_alignment): Deal with SLP adjusted
	vectorization factor.
	(vect_analyze_group_access): If the group size is not a power
	of two require a epilogue loop.
	* tree-vect-loop.c (vect_analyze_loop_2): Move alignment
	compute and optimizing and alias test pruning after final
	vectorization factor computation.
	* tree-vect-slp.c (vect_build_slp_tree_1): Remove check on
	vector alignment.
	(vect_transform_slp_perm_load): Properly compute the original
	number of vector load stmts.

	* gcc.dg/vect/slp-perm-12.c: New testcase.

From-SVN: r224598
parent d38052e0
2015-06-18 Richard Biener <rguenther@suse.de>
PR tree-optimization/66510
* tree-vect-stmts.c (vectorizable_load): Properly compute the
number of vector loads for SLP permuted loads.
* tree-vect-data-refs.c (vect_compute_data_ref_alignment): Also
check the stride for loop vectorization.
(vect_enhance_data_refs_alignment): Deal with SLP adjusted
vectorization factor.
(vect_analyze_group_access): If the group size is not a power
of two require a epilogue loop.
* tree-vect-loop.c (vect_analyze_loop_2): Move alignment
compute and optimizing and alias test pruning after final
vectorization factor computation.
* tree-vect-slp.c (vect_build_slp_tree_1): Remove check on
vector alignment.
(vect_transform_slp_perm_load): Properly compute the original
number of vector load stmts.
2015-06-18 Uros Bizjak <ubizjak@gmail.com> 2015-06-18 Uros Bizjak <ubizjak@gmail.com>
* doc/invoke.texi (-fsanitize-sections): Split @var to avoid * doc/invoke.texi (-fsanitize-sections): Split @var to avoid
......
2015-06-18 Richard Biener <rguenther@suse.de>
PR tree-optimization/66510
* gcc.dg/vect/slp-perm-12.c: New testcase.
2015-06-17 Uros Bizjak <ubizjak@gmail.com> 2015-06-17 Uros Bizjak <ubizjak@gmail.com>
* gcc.target/i386/noplt-1.c (dg-do): Fix target selector. * gcc.target/i386/noplt-1.c (dg-do): Fix target selector.
......
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_pack_trunc } */
/* { dg-additional-options "-msse4" { target { i?86-*-* x86_64-*-* } } } */
#include "tree-vect.h"
extern void abort (void);
unsigned char a[64];
short b[88];
void __attribute__((noinline))
test(unsigned char * __restrict__ dst, short * __restrict__ tptr)
{
int i;
for (i = 0; i < 8; i++)
{
dst[0] = (tptr[0] - tptr[0 + 3]);
dst[1] = (tptr[1] - tptr[1 + 3]);
dst[2] = (tptr[2] - tptr[2 + 3]);
dst[3] = (tptr[3] - tptr[3 + 3]);
dst[4] = (tptr[4] - tptr[4 + 3]);
dst[5] = (tptr[5] - tptr[5 + 3]);
dst[6] = (tptr[6] - tptr[6 + 3]);
dst[7] = (tptr[7] - tptr[7 + 3]);
dst += 8;
tptr += 11;
}
}
int main()
{
int i;
check_vect ();
for (i = 0; i < 88; ++i)
{
b[i] = i;
__asm__ volatile ("");
}
test (a, b);
for (i = 0; i < 64; ++i)
if (a[i] != 253)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */
...@@ -691,21 +691,22 @@ vect_compute_data_ref_alignment (struct data_reference *dr) ...@@ -691,21 +691,22 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
} }
} }
/* Similarly, if we're doing basic-block vectorization, we can only use /* Similarly we can only use base and misalignment information relative to
base and misalignment information relative to an innermost loop if the an innermost loop if the misalignment stays the same throughout the
misalignment stays the same throughout the execution of the loop. execution of the loop. As above, this is the case if the stride of
As above, this is the case if the stride of the dataref evenly divides the dataref evenly divides by the vector size. */
by the vector size. */ else
if (!loop)
{ {
tree step = DR_STEP (dr); tree step = DR_STEP (dr);
unsigned vf = loop ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
if (tree_fits_shwi_p (step) if (tree_fits_shwi_p (step)
&& tree_to_shwi (step) % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0) && ((tree_to_shwi (step) * vf)
% GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
{ {
if (dump_enabled_p ()) if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"SLP: step doesn't divide the vector-size.\n"); "step doesn't divide the vector-size.\n");
misalign = NULL_TREE; misalign = NULL_TREE;
} }
} }
...@@ -1440,7 +1441,13 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ...@@ -1440,7 +1441,13 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
We do this automtically for cost model, since we calculate cost We do this automtically for cost model, since we calculate cost
for every peeling option. */ for every peeling option. */
if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
possible_npeel_number = vf /nelements; {
if (STMT_SLP_TYPE (stmt_info))
possible_npeel_number
= (vf * GROUP_SIZE (stmt_info)) / nelements;
else
possible_npeel_number = vf / nelements;
}
/* Handle the aligned case. We may decide to align some other /* Handle the aligned case. We may decide to align some other
access, making DR unaligned. */ access, making DR unaligned. */
...@@ -1453,7 +1460,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ...@@ -1453,7 +1460,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
for (j = 0; j < possible_npeel_number; j++) for (j = 0; j < possible_npeel_number; j++)
{ {
gcc_assert (npeel_tmp <= vf);
vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp); vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
npeel_tmp += nelements; npeel_tmp += nelements;
} }
...@@ -2230,8 +2236,13 @@ vect_analyze_group_access (struct data_reference *dr) ...@@ -2230,8 +2236,13 @@ vect_analyze_group_access (struct data_reference *dr)
BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt); BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt);
} }
/* There is a gap in the end of the group. */ /* If there is a gap in the end of the group or the group size cannot
if (groupsize - last_accessed_element > 0 && loop_vinfo) be made a multiple of the vector element count then we access excess
elements in the last iteration and thus need to peel that off. */
if (loop_vinfo
&& (groupsize - last_accessed_element > 0
|| exact_log2 (groupsize) == -1))
{ {
if (dump_enabled_p ()) if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
......
...@@ -1791,6 +1791,22 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo) ...@@ -1791,6 +1791,22 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
return false; return false;
} }
/* Check the SLP opportunities in the loop, analyze and build SLP trees. */
ok = vect_analyze_slp (loop_vinfo, NULL, n_stmts);
if (!ok)
return false;
/* If there are any SLP instances mark them as pure_slp. */
bool slp = vect_make_slp_decision (loop_vinfo);
if (slp)
{
/* Find stmts that need to be both vectorized and SLPed. */
vect_detect_hybrid_slp (loop_vinfo);
/* Update the vectorization factor based on the SLP decision. */
vect_update_vf_for_slp (loop_vinfo);
}
/* Analyze the alignment of the data-refs in the loop. /* Analyze the alignment of the data-refs in the loop.
Fail if a data reference is found that cannot be vectorized. */ Fail if a data reference is found that cannot be vectorized. */
...@@ -1830,31 +1846,17 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo) ...@@ -1830,31 +1846,17 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
return false; return false;
} }
/* Check the SLP opportunities in the loop, analyze and build SLP trees. */ if (slp)
ok = vect_analyze_slp (loop_vinfo, NULL, n_stmts);
if (ok)
{ {
/* If there are any SLP instances mark them as pure_slp. */ /* Analyze operations in the SLP instances. Note this may
if (vect_make_slp_decision (loop_vinfo)) remove unsupported SLP instances which makes the above
{ SLP kind detection invalid. */
/* Find stmts that need to be both vectorized and SLPed. */ unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
vect_detect_hybrid_slp (loop_vinfo); vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
/* Update the vectorization factor based on the SLP decision. */ if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
vect_update_vf_for_slp (loop_vinfo); return false;
/* Analyze operations in the SLP instances. Note this may
remove unsupported SLP instances which makes the above
SLP kind detection invalid. */
unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
return false;
}
} }
else
return false;
/* Scan all the remaining operations in the loop that are not subject /* Scan all the remaining operations in the loop that are not subject
to SLP and make sure they are vectorizable. */ to SLP and make sure they are vectorizable. */
......
...@@ -485,9 +485,8 @@ vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -485,9 +485,8 @@ vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
int icode; int icode;
machine_mode optab_op2_mode; machine_mode optab_op2_mode;
machine_mode vec_mode; machine_mode vec_mode;
struct data_reference *first_dr;
HOST_WIDE_INT dummy; HOST_WIDE_INT dummy;
gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL; gimple first_load = NULL, prev_first_load = NULL;
tree cond; tree cond;
/* For every stmt in NODE find its def stmt/s. */ /* For every stmt in NODE find its def stmt/s. */
...@@ -785,7 +784,6 @@ vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -785,7 +784,6 @@ vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
return false; return false;
} }
old_first_load = first_load;
first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)); first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
if (prev_first_load) if (prev_first_load)
{ {
...@@ -809,30 +807,6 @@ vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -809,30 +807,6 @@ vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
} }
else else
prev_first_load = first_load; prev_first_load = first_load;
/* In some cases a group of loads is just the same load
repeated N times. Only analyze its cost once. */
if (first_load == stmt && old_first_load != first_load)
{
first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
if (vect_supportable_dr_alignment (first_dr, false)
== dr_unaligned_unsupported)
{
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION,
vect_location,
"Build SLP failed: unsupported "
"unaligned load ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
stmt, 0);
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
/* Fatal mismatch. */
matches[0] = false;
return false;
}
}
} }
} /* Grouped access. */ } /* Grouped access. */
else else
...@@ -3201,6 +3175,11 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain, ...@@ -3201,6 +3175,11 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
bool needs_first_vector = false; bool needs_first_vector = false;
machine_mode mode; machine_mode mode;
if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
return false;
stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
mode = TYPE_MODE (vectype); mode = TYPE_MODE (vectype);
if (!can_vec_perm_p (mode, false, NULL)) if (!can_vec_perm_p (mode, false, NULL))
...@@ -3226,8 +3205,10 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain, ...@@ -3226,8 +3205,10 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
/* The number of vector stmts to generate based only on SLP_NODE_INSTANCE /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
unrolling factor. */ unrolling factor. */
orig_vec_stmts_num = group_size * orig_vec_stmts_num
SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits; = (STMT_VINFO_GROUP_SIZE (stmt_info)
* SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance)
+ nunits - 1) / nunits;
if (orig_vec_stmts_num == 1) if (orig_vec_stmts_num == 1)
only_one_vec = true; only_one_vec = true;
...@@ -3235,11 +3216,6 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain, ...@@ -3235,11 +3216,6 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
relatively to SLP_NODE_INSTANCE unrolling factor. */ relatively to SLP_NODE_INSTANCE unrolling factor. */
ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance); ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
return false;
stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
/* Generate permutation masks for every NODE. Number of masks for each NODE /* Generate permutation masks for every NODE. Number of masks for each NODE
is equal to GROUP_SIZE. is equal to GROUP_SIZE.
E.g., we have a group of three nodes with three loads from the same E.g., we have a group of three nodes with three loads from the same
......
...@@ -6422,7 +6422,13 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ...@@ -6422,7 +6422,13 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
if (slp) if (slp)
{ {
grouped_load = false; grouped_load = false;
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); /* For SLP permutation support we need to load the whole group,
not only the number of vector stmts the permutation result
fits in. */
if (slp_perm)
vec_num = (group_size * vf + nunits - 1) / nunits;
else
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
group_gap_adj = vf * group_size - nunits * vec_num; group_gap_adj = vf * group_size - nunits * vec_num;
} }
else else
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment