Commit ab0ef706 by Jakub Jelinek Committed by Jakub Jelinek

re PR tree-optimization/50596 (Problems in vectorization of condition expression)

	PR tree-optimization/50596
	* tree-vect-stmts.c (vect_mark_relevant): Only use
	FOR_EACH_IMM_USE_FAST if lhs is SSA_NAME.
	(vectorizable_store): If is_pattern_stmt_p look through
	VIEW_CONVERT_EXPR on lhs.
	* tree-vect-patterns.c (check_bool_pattern, adjust_bool_pattern):
	Use unsigned type instead of signed.
	(vect_recog_bool_pattern): Optimize also stores into bool memory in
	addition to casts from bool to integral types.
	(vect_mark_pattern_stmts): If pattern_stmt already has vinfo
	created, don't create it again.

	* gcc.dg/vect/vect-cond-10.c: New test.

From-SVN: r180424
parent db854378
2011-10-25 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/50596
* tree-vect-stmts.c (vect_mark_relevant): Only use
FOR_EACH_IMM_USE_FAST if lhs is SSA_NAME.
(vectorizable_store): If is_pattern_stmt_p look through
VIEW_CONVERT_EXPR on lhs.
* tree-vect-patterns.c (check_bool_pattern, adjust_bool_pattern):
Use unsigned type instead of signed.
(vect_recog_bool_pattern): Optimize also stores into bool memory in
addition to casts from bool to integral types.
(vect_mark_pattern_stmts): If pattern_stmt already has vinfo
created, don't create it again.
2011-10-25 Kai Tietz <ktietz@redhat.com> 2011-10-25 Kai Tietz <ktietz@redhat.com>
* config/i386/i386.c (ix86_frame_pointer_required): Require * config/i386/i386.c (ix86_frame_pointer_required): Require
2011-10-25 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/50596
* gcc.dg/vect/vect-cond-10.c: New test.
2011-10-24 Joey Ye <joey.ye@arm.com> 2011-10-24 Joey Ye <joey.ye@arm.com>
* gcc.target/arm/pr42575.c: Remove architecture option. * gcc.target/arm/pr42575.c: Remove architecture option.
......
/* { dg-require-effective-target vect_cond_mixed } */
#include "tree-vect.h"
#define N 1024
float a[N], b[N], c[N], d[N];
_Bool k[N];
__attribute__((noinline, noclone)) void
f1 (void)
{
int i;
for (i = 0; i < N; ++i)
{
int x = a[i] < b[i];
int y = c[i] < d[i];
k[i] = x & y;
}
}
__attribute__((noinline, noclone)) void
f2 (void)
{
int i;
for (i = 0; i < N; ++i)
k[i] = (a[i] < b[i]) & (c[i] < d[i]);
}
__attribute__((noinline, noclone)) void
f3 (void)
{
int i;
for (i = 0; i < N; ++i)
{
int x = a[i] < b[i];
int y = c[i] < d[i];
k[i] = x | y;
}
}
__attribute__((noinline, noclone)) void
f4 (void)
{
int i;
for (i = 0; i < N; ++i)
k[i] = (a[i] < b[i]) | (c[i] < d[i]);
}
__attribute__((noinline, noclone)) void
f5 (_Bool *p)
{
int i;
for (i = 0; i < N; ++i)
{
int x = a[i] < b[i];
int y = c[i] < d[i];
p[i] = x & y;
}
}
__attribute__((noinline, noclone)) void
f6 (_Bool *p)
{
int i;
for (i = 0; i < N; ++i)
p[i] = (a[i] < b[i]) & (c[i] < d[i]);
}
__attribute__((noinline, noclone)) void
f7 (_Bool *p)
{
int i;
for (i = 0; i < N; ++i)
{
int x = a[i] < b[i];
int y = c[i] < d[i];
p[i] = x | y;
}
}
__attribute__((noinline, noclone)) void
f8 (_Bool *p)
{
int i;
for (i = 0; i < N; ++i)
p[i] = (a[i] < b[i]) | (c[i] < d[i]);
}
int
main ()
{
int i;
check_vect ();
for (i = 0; i < N; i++)
{
switch (i % 9)
{
case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break;
case 1: a[i] = 0; b[i] = 0; break;
case 2: a[i] = i + 1; b[i] = - i - 1; break;
case 3: a[i] = i; b[i] = i + 7; break;
case 4: a[i] = i; b[i] = i; break;
case 5: a[i] = i + 16; b[i] = i + 3; break;
case 6: a[i] = - i - 5; b[i] = - i; break;
case 7: a[i] = - i; b[i] = - i; break;
case 8: a[i] = - i; b[i] = - i - 7; break;
}
}
for (i = 0; i < N; i++)
{
switch ((i / 9) % 3)
{
case 0: c[i] = a[i / 9]; d[i] = b[i / 9]; break;
case 1: c[i] = a[i / 9 + 3]; d[i] = b[i / 9 + 3]; break;
case 2: c[i] = a[i / 9 + 6]; d[i] = b[i / 9 + 6]; break;
}
}
f1 ();
for (i = 0; i < N; i++)
if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
abort ();
__builtin_memset (k, 0, sizeof (k));
f2 ();
for (i = 0; i < N; i++)
if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
abort ();
__builtin_memset (k, 0, sizeof (k));
f3 ();
for (i = 0; i < N; i++)
if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
abort ();
__builtin_memset (k, 0, sizeof (k));
f4 ();
for (i = 0; i < N; i++)
if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
abort ();
__builtin_memset (k, 0, sizeof (k));
f5 (k);
for (i = 0; i < N; i++)
if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
abort ();
__builtin_memset (k, 0, sizeof (k));
f6 (k);
for (i = 0; i < N; i++)
if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
abort ();
__builtin_memset (k, 0, sizeof (k));
f7 (k);
for (i = 0; i < N; i++)
if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
abort ();
__builtin_memset (k, 0, sizeof (k));
f8 (k);
for (i = 0; i < N; i++)
if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
abort ();
__builtin_memset (k, 0, sizeof (k));
return 0;
}
/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 8 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -1617,7 +1617,7 @@ check_bool_pattern (tree var, loop_vec_info loop_vinfo) ...@@ -1617,7 +1617,7 @@ check_bool_pattern (tree var, loop_vec_info loop_vinfo)
{ {
enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1)); enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
tree itype tree itype
= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 0); = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
vecitype = get_vectype_for_scalar_type (itype); vecitype = get_vectype_for_scalar_type (itype);
if (vecitype == NULL_TREE) if (vecitype == NULL_TREE)
return false; return false;
...@@ -1813,11 +1813,11 @@ adjust_bool_pattern (tree var, tree out_type, tree trueval, ...@@ -1813,11 +1813,11 @@ adjust_bool_pattern (tree var, tree out_type, tree trueval,
default: default:
gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison); gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
|| TYPE_UNSIGNED (TREE_TYPE (rhs1))) || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
{ {
enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1)); enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
itype itype
= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 0); = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
} }
else else
itype = TREE_TYPE (rhs1); itype = TREE_TYPE (rhs1);
...@@ -1933,6 +1933,44 @@ vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in, ...@@ -1933,6 +1933,44 @@ vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in,
VEC_safe_push (gimple, heap, *stmts, last_stmt); VEC_safe_push (gimple, heap, *stmts, last_stmt);
return pattern_stmt; return pattern_stmt;
} }
else if (rhs_code == SSA_NAME
&& STMT_VINFO_DATA_REF (stmt_vinfo))
{
stmt_vec_info pattern_stmt_info;
vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
gcc_assert (vectype != NULL_TREE);
if (!check_bool_pattern (var, loop_vinfo))
return NULL;
rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
{
tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
gimple cast_stmt
= gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = cast_stmt;
rhs = rhs2;
}
pattern_stmt
= gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
STMT_VINFO_DATA_REF (pattern_stmt_info)
= STMT_VINFO_DATA_REF (stmt_vinfo);
STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
= STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
STMT_VINFO_DR_OFFSET (pattern_stmt_info)
= STMT_VINFO_DR_OFFSET (stmt_vinfo);
STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
= STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
*type_out = vectype;
*type_in = vectype;
VEC_safe_push (gimple, heap, *stmts, last_stmt);
return pattern_stmt;
}
else else
return NULL; return NULL;
} }
...@@ -1949,19 +1987,22 @@ vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt, ...@@ -1949,19 +1987,22 @@ vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
gimple def_stmt; gimple def_stmt;
set_vinfo_for_stmt (pattern_stmt,
new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
pattern_stmt_info = vinfo_for_stmt (pattern_stmt); pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
if (pattern_stmt_info == NULL)
{
pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL);
set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
}
gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt; STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
STMT_VINFO_DEF_TYPE (pattern_stmt_info) STMT_VINFO_DEF_TYPE (pattern_stmt_info)
= STMT_VINFO_DEF_TYPE (orig_stmt_info); = STMT_VINFO_DEF_TYPE (orig_stmt_info);
STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype; STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true; STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt; STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info) STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)
= STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info); = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)) if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
{ {
def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info); def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
......
...@@ -159,19 +159,20 @@ vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt, ...@@ -159,19 +159,20 @@ vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
/* This use is out of pattern use, if LHS has other uses that are /* This use is out of pattern use, if LHS has other uses that are
pattern uses, we should mark the stmt itself, and not the pattern pattern uses, we should mark the stmt itself, and not the pattern
stmt. */ stmt. */
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) if (TREE_CODE (lhs) == SSA_NAME)
{ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
if (is_gimple_debug (USE_STMT (use_p))) {
continue; if (is_gimple_debug (USE_STMT (use_p)))
use_stmt = USE_STMT (use_p); continue;
use_stmt = USE_STMT (use_p);
if (vinfo_for_stmt (use_stmt) if (vinfo_for_stmt (use_stmt)
&& STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
{ {
found = true; found = true;
break; break;
} }
} }
} }
if (!found) if (!found)
...@@ -3722,6 +3723,9 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ...@@ -3722,6 +3723,9 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
return false; return false;
scalar_dest = gimple_assign_lhs (stmt); scalar_dest = gimple_assign_lhs (stmt);
if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
&& is_pattern_stmt_p (stmt_info))
scalar_dest = TREE_OPERAND (scalar_dest, 0);
if (TREE_CODE (scalar_dest) != ARRAY_REF if (TREE_CODE (scalar_dest) != ARRAY_REF
&& TREE_CODE (scalar_dest) != INDIRECT_REF && TREE_CODE (scalar_dest) != INDIRECT_REF
&& TREE_CODE (scalar_dest) != COMPONENT_REF && TREE_CODE (scalar_dest) != COMPONENT_REF
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment