Commit fde9c428 by Richard Guenther Committed by Richard Biener

re PR tree-optimization/44284 (vectorization does not work for short variable)

2010-05-27  Richard Guenther  <rguenther@suse.de>

	PR tree-optimization/44284
	* tree-vect-stmts.c (vectorizable_assignment): Handle
	sign-changing conversions as simple copy.

	* gcc.dg/vect/vect-118.c: New testcase.
	* gcc.dg/vect/bb-slp-20.c: Adjust.
	* gcc.dg/vect/no-section-anchors-vect-36.c: Likewise.
	* gcc.dg/vect/slp-9.c: Likewise.
	* gcc.dg/vect/slp-reduc-4.c: Likewise.
	* gcc.dg/vect/vect-10.c: Likewise.
	* gcc.dg/vect/vect-109.c: Likewise.
	* gcc.dg/vect/vect-12.c: Likewise.
	* gcc.dg/vect/vect-36.c: Likewise.
	* gcc.dg/vect/vect-7.c: Likewise.
	* gcc.dg/vect/vect-iv-8.c: Likewise.
	* gcc.dg/vect/vect-multitypes-10.c: Likewise.
	* gcc.dg/vect/vect-multitypes-13.c: Likewise.
	* gcc.dg/vect/vect-multitypes-14.c: Likewise.
	* gcc.dg/vect/vect-multitypes-15.c: Likewise.
	* gcc.dg/vect/vect-multitypes-7.c: Likewise.
	* gcc.dg/vect/vect-multitypes-8.c: Likewise.
	* gcc.dg/vect/vect-multitypes-9.c: Likewise.
	* gcc.dg/vect/vect-reduc-dot-s16b.c: Likewise.
	* gcc.dg/vect/vect-reduc-dot-s8a.c: Likewise.
	* gcc.dg/vect/vect-reduc-dot-s8b.c: Likewise.
	* gcc.dg/vect/vect-reduc-dot-u16b.c: Likewise.
	* gcc.dg/vect/vect-strided-a-u32-mult.c: Likewise.
	* gcc.dg/vect/vect-strided-u32-mult.c: Likewise.
	* gcc.dg/vect/vect-widen-mult-s16.c: Likewise.
	* gcc.dg/vect/vect-widen-mult-s8.c: Likewise.
	* gcc.dg/vect/vect-widen-mult-sum.c: Likewise.
	* gcc.dg/vect/vect-widen-mult-u16.c: Likewise.

From-SVN: r159920
parent 3714764d
2010-05-27 Richard Guenther <rguenther@suse.de>
PR tree-optimization/44284
* tree-vect-stmts.c (vectorizable_assignment): Handle
sign-changing conversions as simple copy.
2010-05-27 Maxim Kuvyrkov <maxim@codesourcery.com>
* gthr-posix.h (pthread_cancel): Don't declare if compiling against
......
2010-05-27 Richard Guenther <rguenther@suse.de>
PR tree-optimization/44284
* gcc.dg/vect/vect-118.c: New testcase.
* gcc.dg/vect/bb-slp-20.c: Adjust.
* gcc.dg/vect/no-section-anchors-vect-36.c: Likewise.
* gcc.dg/vect/slp-9.c: Likewise.
* gcc.dg/vect/slp-reduc-4.c: Likewise.
* gcc.dg/vect/vect-10.c: Likewise.
* gcc.dg/vect/vect-109.c: Likewise.
* gcc.dg/vect/vect-12.c: Likewise.
* gcc.dg/vect/vect-36.c: Likewise.
* gcc.dg/vect/vect-7.c: Likewise.
* gcc.dg/vect/vect-iv-8.c: Likewise.
* gcc.dg/vect/vect-multitypes-10.c: Likewise.
* gcc.dg/vect/vect-multitypes-13.c: Likewise.
* gcc.dg/vect/vect-multitypes-14.c: Likewise.
* gcc.dg/vect/vect-multitypes-15.c: Likewise.
* gcc.dg/vect/vect-multitypes-7.c: Likewise.
* gcc.dg/vect/vect-multitypes-8.c: Likewise.
* gcc.dg/vect/vect-multitypes-9.c: Likewise.
* gcc.dg/vect/vect-reduc-dot-s16b.c: Likewise.
* gcc.dg/vect/vect-reduc-dot-s8a.c: Likewise.
* gcc.dg/vect/vect-reduc-dot-s8b.c: Likewise.
* gcc.dg/vect/vect-reduc-dot-u16b.c: Likewise.
* gcc.dg/vect/vect-strided-a-u32-mult.c: Likewise.
* gcc.dg/vect/vect-strided-u32-mult.c: Likewise.
* gcc.dg/vect/vect-widen-mult-s16.c: Likewise.
* gcc.dg/vect/vect-widen-mult-s8.c: Likewise.
* gcc.dg/vect/vect-widen-mult-sum.c: Likewise.
* gcc.dg/vect/vect-widen-mult-u16.c: Likewise.
2010-05-27 Maxim Kuvyrkov <maxim@codesourcery.com>
* gcc.dg/glibc-uclibc-1.c, gcc.dg/glibc-uclibc-2.c: Remove, no longer
......
......@@ -21,8 +21,7 @@ main1 (unsigned int x, unsigned int y)
else
y = x;
/* Two SLP instances in the basic block, only one is supported for now,
the second one contains type conversion. */
/* Two SLP instances in the basic block. */
a0 = in[0] + 23;
a1 = in[1] + 142;
a2 = in[2] + 2;
......@@ -63,6 +62,6 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_int_mult } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp" { target vect_int_mult } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "slp" { target vect_int_mult } } } */
/* { dg-final { cleanup-tree-dump "slp" } } */
......@@ -18,6 +18,7 @@ int main1 ()
for (i = 0; i < N; i++)
{
s.cb[i] = 3*i;
__asm__ volatile ("");
}
for (i = 0; i < N; i++)
......
......@@ -29,6 +29,7 @@ int main (void)
for (i=0; i<N; i++) {
X[i] = i;
Y[i] = 64-i;
__asm__ volatile ("");
}
foo1 (N);
......
......@@ -48,7 +48,10 @@ int main (void)
check_vect ();
for (i = 0; i < N; i++)
uc[i] = i+3;
{
uc[i] = i+3;
__asm__ volatile ("");
}
main1 (N/8, 123, 124, 125, 126, 127, 128, 129, 313);
return 0;
......
......@@ -12,8 +12,7 @@ int foo ()
short b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
short c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
/* Not vectorizable yet (strided access pattern). */
/* Strided access pattern. */
for (i = 0; i < N/2; i++)
{
a[i] = b[2*i+1] * c[2*i+1] - b[2*i] * c[2*i];
......@@ -23,5 +22,5 @@ int foo ()
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { ! vect_extract_even_odd } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
......@@ -72,7 +72,7 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 2 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
/* { dg-final { scan-tree-dump-times "not vectorized: unsupported unaligned store" 2 "vect" { xfail vect_hw_misalign } } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 10 "vect" { target vect_hw_misalign } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
......
/* { dg-do compile } */
/* { dg-require-effective-target vect_int } */
void f(short * __restrict__ a, short * __restrict__ b, short * __restrict__ x)
{
int i;
for (i=0;i<1024;i++)
x[i] = a[i] + b[i];
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
......@@ -16,7 +16,7 @@ int main1 ()
short sc[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
short sb[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
/* Not vectorizable yet (multiple types with different nunits in vector). */
/* Multiple types with different nunits in vector. */
for (i = 0; i < N; i++)
{
ia[i] = ib[i] + ic[i];
......@@ -40,5 +40,5 @@ int main (void)
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
......@@ -18,6 +18,7 @@ int main1 ()
for (i = 0; i < N; i++)
{
s.cb[i] = 3*i;
__asm__ volatile ("");
}
for (i = 0; i < N; i++)
......
......@@ -47,7 +47,6 @@ int main (void)
return main1 ();
}
/* Fails for 32-bit targets that don't vectorize PLUS. */
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
......@@ -39,9 +39,5 @@ int main (void)
return main1 (3);
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
/* Fails to get vectorized due to a redundant cast. Once this is fixed,
should be vectorized as follows:
dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_pack_trunc && vect_unpack } } }
*/
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_pack_trunc && vect_unpack } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
......@@ -43,6 +43,7 @@ int main (void)
for (i=0; i<N; i++) {
X[i] = 16-i;
uX[i] = 16-i;
__asm__ volatile ("");
}
foo1 (N);
......
......@@ -39,6 +39,7 @@ int main (void)
for (i=0; i<N; i++) {
X[i] = 16-i;
uX[i] = 16-i;
__asm__ volatile ("");
}
foo1 (N);
......
......@@ -31,6 +31,7 @@ int main (void)
for (i=0; i<N; i++) {
uX[i] = 16-i;
uY[i] = 16-i;
__asm__ volatile ("");
}
foo1 (N);
......
......@@ -38,6 +38,7 @@ int main (void)
X[i] = i;
Y[i] = 64-i;
CX[i] = i;
__asm__ volatile ("");
}
foo1 (N);
......
......@@ -39,6 +39,7 @@ int main (void)
X[i] = i;
Y[i] = 64-i;
CX[i] = i;
__asm__ volatile ("");
}
foo1 (N);
......
......@@ -31,6 +31,7 @@ int main (void)
for (i=0; i<N; i++) {
uX[i] = 16-i;
uY[i] = 16-i;
__asm__ volatile ("");
}
foo1 (N);
......
......@@ -39,6 +39,7 @@ int main (void)
for (i=0; i<N; i++) {
X[i] = 16-i;
uX[i] = 16-i;
__asm__ volatile ("");
}
foo1 (N);
......
......@@ -39,6 +39,7 @@ main (void)
{
X[i] = i;
Y[i] = N - i;
__asm__ volatile ("");
}
dot = foo (N);
......
......@@ -39,6 +39,7 @@ main (void)
{
X[i] = i;
Y[i] = 64 - i;
__asm__ volatile ("");
}
dot = foo (N);
......
......@@ -40,6 +40,7 @@ int main (void)
for (i=0; i<N; i++) {
X[i] = i;
Y[i] = 64-i;
__asm__ volatile ("");
}
dot1 = foo1 (N);
......
......@@ -47,6 +47,7 @@ int main (void)
for (i=0; i<N; i++) {
X[i] = i;
Y[i] = 64-i;
__asm__ volatile ("");
}
dot2 = foo2 (N);
......
......@@ -32,6 +32,7 @@ int main (void)
for (i=0; i<N; i++) {
X[i] = i;
Y[i] = 64-i;
__asm__ volatile ("");
}
dot3 = foo3 (N);
......
......@@ -37,6 +37,7 @@ int main (void)
for (i=0; i<N; i++) {
X[i] = i;
Y[i] = 64-i;
__asm__ volatile ("");
}
dot2 = foo2 (N);
......
......@@ -26,8 +26,7 @@ main1 ()
arr[i] = i;
iarr[i].a = i;
iarr[i].b = i * 3;
if (arr[i] == 178)
abort();
__asm__ volatile ("");
}
for (i = 0; i < N; i++)
......
......@@ -53,8 +53,7 @@ int main (void)
arr[i] = i;
iarr[i].a = i;
iarr[i].b = i * 3;
if (arr[i] == 178)
abort();
__asm__ volatile ("");
}
main1 (arr, iarr);
......
......@@ -28,6 +28,7 @@ int main (void)
for (i=0; i<N; i++) {
X[i] = i;
Y[i] = 64-i;
__asm__ volatile ("");
}
foo1 (N);
......
......@@ -28,6 +28,7 @@ int main (void)
for (i=0; i<N; i++) {
X[i] = i;
Y[i] = 64-i;
__asm__ volatile ("");
}
foo1 (N);
......
......@@ -30,6 +30,7 @@ int main (void)
for (i=0; i<N; i++) {
X[i] = 16-i;
__asm__ volatile ("");
}
sum = main1 (X, 1, 16, N-1);
......
......@@ -30,6 +30,7 @@ int main (void)
for (i=0; i<N; i++) {
X[i] = i;
Y[i] = 64-i;
__asm__ volatile ("");
}
foo1 (N);
......
......@@ -1829,7 +1829,7 @@ vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
tree def;
gimple def_stmt;
enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
int nunits = TYPE_VECTOR_SUBPARTS (vectype);
unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
int ncopies;
int i, j;
VEC(tree,heap) *vec_oprnds = NULL;
......@@ -1837,6 +1837,8 @@ vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
gimple new_stmt = NULL;
stmt_vec_info prev_stmt_info = NULL;
enum tree_code code;
tree vectype_in;
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
......@@ -1862,19 +1864,31 @@ vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
if (TREE_CODE (scalar_dest) != SSA_NAME)
return false;
code = gimple_assign_rhs_code (stmt);
if (gimple_assign_single_p (stmt)
|| gimple_assign_rhs_code (stmt) == PAREN_EXPR)
|| code == PAREN_EXPR
|| CONVERT_EXPR_CODE_P (code))
op = gimple_assign_rhs1 (stmt);
else
return false;
if (!vect_is_simple_use (op, loop_vinfo, bb_vinfo, &def_stmt, &def, &dt[0]))
if (!vect_is_simple_use_1 (op, loop_vinfo, bb_vinfo,
&def_stmt, &def, &dt[0], &vectype_in))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "use not simple.");
return false;
}
/* We can handle NOP_EXPR conversions that do not change the number
of elements or the vector size. */
if (CONVERT_EXPR_CODE_P (code)
&& (!vectype_in
|| TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
|| (GET_MODE_SIZE (TYPE_MODE (vectype))
!= GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
return false;
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
......@@ -1903,6 +1917,8 @@ vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
/* Arguments are ready. create the new vector stmt. */
for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++)
{
if (CONVERT_EXPR_CODE_P (code))
vop = build1_stat (VIEW_CONVERT_EXPR, vectype, vop);
new_stmt = gimple_build_assign (vec_dest, vop);
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment