Commit 91ff1504 by Richard Biener Committed by Richard Biener

re PR tree-optimization/66510 (gcc.target/arm/pr53636.c FAILs after r224221)

2015-06-18  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/66510
	* tree-vect-stmts.c (vectorizable_load): Properly compute the
	number of vector loads for SLP permuted loads.
	* tree-vect-data-refs.c (vect_compute_data_ref_alignment): Also
	check the stride for loop vectorization.
	(vect_enhance_data_refs_alignment): Deal with SLP adjusted
	vectorization factor.
	(vect_analyze_group_access): If the group size is not a power
	of two require a epilogue loop.
	* tree-vect-loop.c (vect_analyze_loop_2): Move alignment
	compute and optimizing and alias test pruning after final
	vectorization factor computation.
	* tree-vect-slp.c (vect_build_slp_tree_1): Remove check on
	vector alignment.
	(vect_transform_slp_perm_load): Properly compute the original
	number of vector load stmts.

	* gcc.dg/vect/slp-perm-12.c: New testcase.

From-SVN: r224598
parent d38052e0
2015-06-18 Richard Biener <rguenther@suse.de>
PR tree-optimization/66510
* tree-vect-stmts.c (vectorizable_load): Properly compute the
number of vector loads for SLP permuted loads.
* tree-vect-data-refs.c (vect_compute_data_ref_alignment): Also
check the stride for loop vectorization.
(vect_enhance_data_refs_alignment): Deal with SLP adjusted
vectorization factor.
(vect_analyze_group_access): If the group size is not a power
of two require a epilogue loop.
* tree-vect-loop.c (vect_analyze_loop_2): Move alignment
compute and optimizing and alias test pruning after final
vectorization factor computation.
* tree-vect-slp.c (vect_build_slp_tree_1): Remove check on
vector alignment.
(vect_transform_slp_perm_load): Properly compute the original
number of vector load stmts.
2015-06-18 Uros Bizjak <ubizjak@gmail.com>
* doc/invoke.texi (-fsanitize-sections): Split @var to avoid
......
2015-06-18 Richard Biener <rguenther@suse.de>
PR tree-optimization/66510
* gcc.dg/vect/slp-perm-12.c: New testcase.
2015-06-17 Uros Bizjak <ubizjak@gmail.com>
* gcc.target/i386/noplt-1.c (dg-do): Fix target selector.
......
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_pack_trunc } */
/* { dg-additional-options "-msse4" { target { i?86-*-* x86_64-*-* } } } */
#include "tree-vect.h"
extern void abort (void);
unsigned char a[64];
short b[88];
void __attribute__((noinline))
test(unsigned char * __restrict__ dst, short * __restrict__ tptr)
{
int i;
for (i = 0; i < 8; i++)
{
dst[0] = (tptr[0] - tptr[0 + 3]);
dst[1] = (tptr[1] - tptr[1 + 3]);
dst[2] = (tptr[2] - tptr[2 + 3]);
dst[3] = (tptr[3] - tptr[3 + 3]);
dst[4] = (tptr[4] - tptr[4 + 3]);
dst[5] = (tptr[5] - tptr[5 + 3]);
dst[6] = (tptr[6] - tptr[6 + 3]);
dst[7] = (tptr[7] - tptr[7 + 3]);
dst += 8;
tptr += 11;
}
}
int main()
{
int i;
check_vect ();
for (i = 0; i < 88; ++i)
{
b[i] = i;
__asm__ volatile ("");
}
test (a, b);
for (i = 0; i < 64; ++i)
if (a[i] != 253)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */
......@@ -691,21 +691,22 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
}
}
/* Similarly, if we're doing basic-block vectorization, we can only use
base and misalignment information relative to an innermost loop if the
misalignment stays the same throughout the execution of the loop.
As above, this is the case if the stride of the dataref evenly divides
by the vector size. */
if (!loop)
/* Similarly we can only use base and misalignment information relative to
an innermost loop if the misalignment stays the same throughout the
execution of the loop. As above, this is the case if the stride of
the dataref evenly divides by the vector size. */
else
{
tree step = DR_STEP (dr);
unsigned vf = loop ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
if (tree_fits_shwi_p (step)
&& tree_to_shwi (step) % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
&& ((tree_to_shwi (step) * vf)
% GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"SLP: step doesn't divide the vector-size.\n");
"step doesn't divide the vector-size.\n");
misalign = NULL_TREE;
}
}
......@@ -1440,7 +1441,13 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
We do this automtically for cost model, since we calculate cost
for every peeling option. */
if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
possible_npeel_number = vf /nelements;
{
if (STMT_SLP_TYPE (stmt_info))
possible_npeel_number
= (vf * GROUP_SIZE (stmt_info)) / nelements;
else
possible_npeel_number = vf / nelements;
}
/* Handle the aligned case. We may decide to align some other
access, making DR unaligned. */
......@@ -1453,7 +1460,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
for (j = 0; j < possible_npeel_number; j++)
{
gcc_assert (npeel_tmp <= vf);
vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
npeel_tmp += nelements;
}
......@@ -2230,8 +2236,13 @@ vect_analyze_group_access (struct data_reference *dr)
BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt);
}
/* There is a gap in the end of the group. */
if (groupsize - last_accessed_element > 0 && loop_vinfo)
/* If there is a gap in the end of the group or the group size cannot
be made a multiple of the vector element count then we access excess
elements in the last iteration and thus need to peel that off. */
if (loop_vinfo
&& (groupsize - last_accessed_element > 0
|| exact_log2 (groupsize) == -1))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
......
......@@ -1791,6 +1791,22 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
return false;
}
/* Check the SLP opportunities in the loop, analyze and build SLP trees. */
ok = vect_analyze_slp (loop_vinfo, NULL, n_stmts);
if (!ok)
return false;
/* If there are any SLP instances mark them as pure_slp. */
bool slp = vect_make_slp_decision (loop_vinfo);
if (slp)
{
/* Find stmts that need to be both vectorized and SLPed. */
vect_detect_hybrid_slp (loop_vinfo);
/* Update the vectorization factor based on the SLP decision. */
vect_update_vf_for_slp (loop_vinfo);
}
/* Analyze the alignment of the data-refs in the loop.
Fail if a data reference is found that cannot be vectorized. */
......@@ -1830,19 +1846,8 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
return false;
}
/* Check the SLP opportunities in the loop, analyze and build SLP trees. */
ok = vect_analyze_slp (loop_vinfo, NULL, n_stmts);
if (ok)
{
/* If there are any SLP instances mark them as pure_slp. */
if (vect_make_slp_decision (loop_vinfo))
if (slp)
{
/* Find stmts that need to be both vectorized and SLPed. */
vect_detect_hybrid_slp (loop_vinfo);
/* Update the vectorization factor based on the SLP decision. */
vect_update_vf_for_slp (loop_vinfo);
/* Analyze operations in the SLP instances. Note this may
remove unsupported SLP instances which makes the above
SLP kind detection invalid. */
......@@ -1852,9 +1857,6 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
return false;
}
}
else
return false;
/* Scan all the remaining operations in the loop that are not subject
to SLP and make sure they are vectorizable. */
......
......@@ -485,9 +485,8 @@ vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
int icode;
machine_mode optab_op2_mode;
machine_mode vec_mode;
struct data_reference *first_dr;
HOST_WIDE_INT dummy;
gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
gimple first_load = NULL, prev_first_load = NULL;
tree cond;
/* For every stmt in NODE find its def stmt/s. */
......@@ -785,7 +784,6 @@ vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
return false;
}
old_first_load = first_load;
first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
if (prev_first_load)
{
......@@ -809,30 +807,6 @@ vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
}
else
prev_first_load = first_load;
/* In some cases a group of loads is just the same load
repeated N times. Only analyze its cost once. */
if (first_load == stmt && old_first_load != first_load)
{
first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
if (vect_supportable_dr_alignment (first_dr, false)
== dr_unaligned_unsupported)
{
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION,
vect_location,
"Build SLP failed: unsupported "
"unaligned load ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
stmt, 0);
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
/* Fatal mismatch. */
matches[0] = false;
return false;
}
}
}
} /* Grouped access. */
else
......@@ -3201,6 +3175,11 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
bool needs_first_vector = false;
machine_mode mode;
if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
return false;
stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
mode = TYPE_MODE (vectype);
if (!can_vec_perm_p (mode, false, NULL))
......@@ -3226,8 +3205,10 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
/* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
unrolling factor. */
orig_vec_stmts_num = group_size *
SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
orig_vec_stmts_num
= (STMT_VINFO_GROUP_SIZE (stmt_info)
* SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance)
+ nunits - 1) / nunits;
if (orig_vec_stmts_num == 1)
only_one_vec = true;
......@@ -3235,11 +3216,6 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
relatively to SLP_NODE_INSTANCE unrolling factor. */
ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
return false;
stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
/* Generate permutation masks for every NODE. Number of masks for each NODE
is equal to GROUP_SIZE.
E.g., we have a group of three nodes with three loads from the same
......
......@@ -6422,6 +6422,12 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
if (slp)
{
grouped_load = false;
/* For SLP permutation support we need to load the whole group,
not only the number of vector stmts the permutation result
fits in. */
if (slp_perm)
vec_num = (group_size * vf + nunits - 1) / nunits;
else
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
group_gap_adj = vf * group_size - nunits * vec_num;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment