Commit af29617a by Alan Hayward Committed by Alan Hayward

Support for vectorizing conditional expressions

2015-10-23  Alan Hayward <alan.hayward@arm.com>

gcc/
	PR tree-optimization/65947
	* tree-vect-loop.c
	(vect_is_simple_reduction_1): Find condition reductions.
	(vect_model_reduction_cost): Add condition reduction costs.
	(get_initial_def_for_reduction): Add condition reduction initial var.
	(vect_create_epilog_for_reduction): Add condition reduction epilog.
	(vectorizable_reduction): Condition reduction support.
	* tree-vect-stmts.c (vectorizable_condition): Add vect reduction arg
	* doc/sourcebuild.texi (Vector-specific attributes): Document
	vect_max_reduc

gcc/testsuite
	PR tree-optimization/65947
	* lib/target-supports.exp
	(check_effective_target_vect_max_reduc): Add.
	* gcc.dg/vect/pr65947-1.c: New test.
	* gcc.dg/vect/pr65947-2.c: New test.
	* gcc.dg/vect/pr65947-3.c: New test.
	* gcc.dg/vect/pr65947-4.c: New test.
	* gcc.dg/vect/pr65947-5.c: New test.
	* gcc.dg/vect/pr65947-6.c: New test.
	* gcc.dg/vect/pr65947-7.c: New test.
	* gcc.dg/vect/pr65947-8.c: New test.
	* gcc.dg/vect/pr65947-9.c: New test.
	* gcc.dg/vect/pr65947-10.c: New test.
	* gcc.dg/vect/pr65947-11.c: New test.

From-SVN: r229245
parent f3bf0d9a
2015-10-23 Alan Hayward <alan.hayward@arm.com>
PR tree-optimization/65947
* tree-vect-loop.c
(vect_is_simple_reduction_1): Find condition reductions.
(vect_model_reduction_cost): Add condition reduction costs.
(get_initial_def_for_reduction): Add condition reduction initial var.
(vect_create_epilog_for_reduction): Add condition reduction epilog.
(vectorizable_reduction): Condition reduction support.
* tree-vect-stmts.c (vectorizable_condition): Add vect reduction arg
* doc/sourcebuild.texi (Vector-specific attributes): Document
vect_max_reduc
2015-10-23 Richard Biener <rguenther@suse.de> 2015-10-23 Richard Biener <rguenther@suse.de>
* Makefile.in (build/genmatch.o): Properly depend on is-a.h, tree.def * Makefile.in (build/genmatch.o): Properly depend on is-a.h, tree.def
...@@ -1466,6 +1466,9 @@ Target supports conversion from @code{float} to @code{signed int}. ...@@ -1466,6 +1466,9 @@ Target supports conversion from @code{float} to @code{signed int}.
@item vect_floatuint_cvt @item vect_floatuint_cvt
Target supports conversion from @code{float} to @code{unsigned int}. Target supports conversion from @code{float} to @code{unsigned int}.
@item vect_max_reduc
Target supports max reduction for vectors.
@end table @end table
@subsubsection Thread Local Storage attributes @subsubsection Thread Local Storage attributes
......
2015-10-23 Alan Hayward <alan.hayward@arm.com>
PR tree-optimization/65947
* lib/target-supports.exp
(check_effective_target_vect_max_reduc): Add.
* gcc.dg/vect/pr65947-1.c: New test.
* gcc.dg/vect/pr65947-2.c: New test.
* gcc.dg/vect/pr65947-3.c: New test.
* gcc.dg/vect/pr65947-4.c: New test.
* gcc.dg/vect/pr65947-5.c: New test.
* gcc.dg/vect/pr65947-6.c: New test.
* gcc.dg/vect/pr65947-7.c: New test.
* gcc.dg/vect/pr65947-8.c: New test.
* gcc.dg/vect/pr65947-9.c: New test.
* gcc.dg/vect/pr65947-10.c: New test.
* gcc.dg/vect/pr65947-11.c: New test.
2015-10-23 Richard Sandiford <richard.sandiford@arm.com> 2015-10-23 Richard Sandiford <richard.sandiford@arm.com>
* gcc.c-torture/execute/20030125-1.c (floor, floorf, sin, sinf): * gcc.c-torture/execute/20030125-1.c (floor, floorf, sin, sinf):
......
/* { dg-require-effective-target vect_condition } */
extern void abort (void) __attribute__ ((noreturn));
#define N 32
/* Simple condition reduction. */
int
condition_reduction (int *a, int min_v)
{
int last = -1;
for (int i = 0; i < N; i++)
if (a[i] < min_v)
last = i;
return last;
}
int
main (void)
{
int a[N] = {
11, -12, 13, 14, 15, 16, 17, 18, 19, 20,
1, 2, -3, 4, 5, 6, 7, -8, 9, 10,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
31, 32
};
int ret = condition_reduction (a, 16);
if (ret != 19)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
/* { dg-require-effective-target vect_condition } */
extern void abort (void) __attribute__ ((noreturn));
#define N 32
/* Non-integer data types. */
float
condition_reduction (float *a, float min_v)
{
float last = 0;
for (int i = 0; i < N; i++)
if (a[i] < min_v)
last = a[i];
return last;
}
int
main (void)
{
float a[N] = {
11.5, 12.2, 13.22, 14.1, 15.2, 16.3, 17, 18.7, 19, 20,
1, 2, 3.3, 4.3333, 5.5, 6.23, 7, 8.63, 9, 10.6,
21, 22.12, 23.55, 24.76, 25, 26, 27.34, 28.765, 29, 30,
31.111, 32.322
};
float ret = condition_reduction (a, 16.7);
if (ret != (float)10.6)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
/* { dg-require-effective-target vect_condition } */
extern void abort (void) __attribute__ ((noreturn));
#define N 37
/* Re-use the result of the condition inside the loop. Will fail to
vectorize. */
unsigned int
condition_reduction (unsigned int *a, unsigned int min_v, unsigned int *b)
{
unsigned int last = N + 65;
for (unsigned int i = 0; i < N; i++)
{
if (b[i] < min_v)
last = i;
a[i] = last;
}
return last;
}
int
main (void)
{
unsigned int a[N] = {
31, 32, 33, 34, 35, 36, 37,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20
};
unsigned int b[N] = {
11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
31, 32, 33, 34, 35, 36, 37
};
unsigned int ret = condition_reduction (a, 16, b);
if (ret != 29)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
/* { dg-require-effective-target vect_condition } */
extern void abort (void) __attribute__ ((noreturn));
#define N 254
/* Non-simple condition reduction. */
unsigned char
condition_reduction (unsigned char *a, unsigned char min_v)
{
unsigned char last = 65;
for (unsigned char i = 0; i < N; i++)
if (a[i] < min_v)
last = a[i];
return last;
}
int
main (void)
{
unsigned char a[N] = {
11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
31, 32
};
__builtin_memset (a+32, 43, N-32);
unsigned char ret = condition_reduction (a, 16);
if (ret != 10)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
/* { dg-require-effective-target vect_condition } */
extern void abort (void) __attribute__ ((noreturn));
#define N 37
/* Non-simple condition reduction with additional variable and unsigned
types. */
unsigned int
condition_reduction (unsigned int *a, unsigned int min_v, unsigned int *b)
{
unsigned int last = N + 65;
unsigned int aval;
for (unsigned int i = 0; i < N; i++)
{
aval = a[i];
if (b[i] < min_v)
last = aval;
}
return last;
}
int
main (void)
{
unsigned int a[N] = {
31, 32, 33, 34, 35, 36, 37,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20
};
unsigned int b[N] = {
11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
31, 32, 33, 34, 35, 36, 37
};
unsigned int ret = condition_reduction (a, 16, b);
if (ret != 13)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
/* { dg-require-effective-target vect_condition } */
extern void abort (void) __attribute__ ((noreturn));
#define N 27
/* Condition reduction with no valid matches at runtime. */
int
condition_reduction (int *a, int min_v)
{
int last = N + 96;
for (int i = 0; i < N; i++)
if (a[i] > min_v)
last = i;
return last;
}
int
main (void)
{
int a[N] = {
11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
21, 22, 23, 24, 25, 26, 27
};
int ret = condition_reduction (a, 46);
/* loop should never have found a value. */
if (ret != N + 96)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
/* { dg-require-effective-target vect_condition } */
extern void abort (void) __attribute__ ((noreturn));
#define N 32
/* Condition reduction where loop size is not known at compile time. Will fail
to vectorize. Version inlined into main loop will vectorize. */
unsigned char
condition_reduction (unsigned char *a, unsigned char min_v, int count)
{
unsigned char last = 65;
for (int i = 0; i < count; i++)
if (a[i] < min_v)
last = a[i];
return last;
}
int
main (void)
{
unsigned char a[N] = {
11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
31, 32
};
unsigned char ret = condition_reduction (a, 16, N);
if (ret != 10)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { xfail { ! vect_max_reduc } } } } */
/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { xfail { ! vect_max_reduc } } } } */
/* { dg-require-effective-target vect_condition } */
extern void abort (void) __attribute__ ((noreturn));
#define N 30
/* Condition reduction where loop type is different than the data type. */
int
condition_reduction (int *a, int min_v)
{
int last = N + 65;
for (char i = 0; i < N; i++)
if (a[i] < min_v)
last = a[i];
return last;
}
int
main (void)
{
int a[N] = {
67, 32, 45, 43, 21, -11, 12, 3, 4, 5,
6, 76, -32, 56, -32, -1, 4, 5, 6, 99,
43, 22, -3, 22, 16, 34, 55, 31, 87, 324
};
int ret = condition_reduction (a, 16);
if (ret != -3)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
/* { dg-require-effective-target vect_condition } */
extern void abort (void) __attribute__ ((noreturn));
#define N 43
/* Condition reduction with comparison is a different type to the data. Will
fail to vectorize. */
int
condition_reduction (short *a, int min_v, int *b)
{
int last = N + 65;
short aval;
for (int i = 0; i < N; i++)
{
aval = a[i];
if (b[i] < min_v)
last = aval;
}
return last;
}
int
main (void)
{
short a[N] = {
31, -32, 133, 324, 335, 36, 37, 45, 11, 65,
1, -28, 3, 48, 5, -68, 7, 88, 89, 180,
121, -122, 123, 124, -125, 126, 127, 128, 129, 130,
11, 12, 13, 14, -15, -16, 17, 18, 19, 20,
33, 27, 99
};
int b[N] = {
11, -12, -13, 14, 15, 16, 17, 18, 19, 20,
21, -22, 23, 24, -25, 26, 27, 28, 29, 30,
1, 62, 3, 14, -15, 6, 37, 48, 99, 10,
31, -32, 33, 34, -35, 36, 37, 56, 54, 22,
73, 2, 87
};
int ret = condition_reduction (a, 16, b);
if (ret != 27)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
/* { dg-require-effective-target vect_condition } */
extern void abort (void) __attribute__ ((noreturn));
#define N 27
/* Condition reduction with multiple types in the comparison. Will fail to
vectorize. */
int
condition_reduction (char *a, int min_v)
{
int last = N + 65;
for (int i = 0; i < N; i++)
if (a[i] < min_v)
last = a[i];
return last;
}
int
main (void)
{
char a[N] = {
1, 28, 3, 48, 5, 68, 7, -88, 89, 180,
121, 122, -123, 124, 12, -12, 12, 67, 84, 122,
67, 55, 112, 22, 45, 23, 111
};
int ret = condition_reduction (a, 16);
if (ret != 12)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
/* { dg-final { scan-tree-dump "multiple types in double reduction or condition reduction" "vect" { xfail { ! vect_max_reduc } } } } */
/* { dg-require-effective-target vect_condition } */
extern void abort (void) __attribute__ ((noreturn));
#define N 255
/* Condition reduction with maximum possible loop size. Will fail to
vectorize because the vectorisation requires a slot for default values. */
char
condition_reduction (char *a, char min_v)
{
char last = -72;
for (int i = 0; i < N; i++)
if (a[i] < min_v)
last = a[i];
return last;
}
char
main (void)
{
char a[N] = {
11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
31, 32
};
__builtin_memset (a+32, 43, N-32);
char ret = condition_reduction (a, 16);
if (ret != 10)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { xfail { ! vect_max_reduc } } } } */
...@@ -6485,3 +6485,12 @@ proc check_effective_target_builtin_eh_return { } { ...@@ -6485,3 +6485,12 @@ proc check_effective_target_builtin_eh_return { } {
} }
} "" ] } "" ]
} }
# Return 1 if the target supports max reduction for vectors.
proc check_effective_target_vect_max_reduc { } {
if { [istarget aarch64*-*-*] || [istarget arm*-*-*] } {
return 1
}
return 0
}
...@@ -2244,6 +2244,11 @@ vect_is_slp_reduction (loop_vec_info loop_info, gimple *phi, ...@@ -2244,6 +2244,11 @@ vect_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
inner loop (def of a3) inner loop (def of a3)
a2 = phi < a3 > a2 = phi < a3 >
(4) Detect condition expressions, ie:
for (int i = 0; i < N; i++)
if (a[i] < val)
ret_val = a[i];
If MODIFY is true it tries also to rework the code in-place to enable If MODIFY is true it tries also to rework the code in-place to enable
detection of more reduction patterns. For the time being we rewrite detection of more reduction patterns. For the time being we rewrite
"res -= RHS" into "rhs += -RHS" when it seems worthwhile. "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
...@@ -2252,7 +2257,8 @@ vect_is_slp_reduction (loop_vec_info loop_info, gimple *phi, ...@@ -2252,7 +2257,8 @@ vect_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
static gimple * static gimple *
vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple *phi, vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple *phi,
bool check_reduction, bool *double_reduc, bool check_reduction, bool *double_reduc,
bool modify, bool need_wrapping_integral_overflow) bool modify, bool need_wrapping_integral_overflow,
enum vect_reduction_type *v_reduc_type)
{ {
struct loop *loop = (gimple_bb (phi))->loop_father; struct loop *loop = (gimple_bb (phi))->loop_father;
struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
...@@ -2269,6 +2275,7 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple *phi, ...@@ -2269,6 +2275,7 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple *phi,
bool phi_def; bool phi_def;
*double_reduc = false; *double_reduc = false;
*v_reduc_type = TREE_CODE_REDUCTION;
/* If CHECK_REDUCTION is true, we assume inner-most loop vectorization, /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
otherwise, we assume outer loop vectorization. */ otherwise, we assume outer loop vectorization. */
...@@ -2414,13 +2421,17 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple *phi, ...@@ -2414,13 +2421,17 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple *phi,
&& SSA_NAME_DEF_STMT (op1) == phi) && SSA_NAME_DEF_STMT (op1) == phi)
code = PLUS_EXPR; code = PLUS_EXPR;
if (check_reduction if (check_reduction)
&& (!commutative_tree_code (code) || !associative_tree_code (code)))
{ {
if (dump_enabled_p ()) if (code == COND_EXPR)
report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, *v_reduc_type = COND_REDUCTION;
"reduction: not commutative/associative: "); else if (!commutative_tree_code (code) || !associative_tree_code (code))
return NULL; {
if (dump_enabled_p ())
report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
"reduction: not commutative/associative: ");
return NULL;
}
} }
if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
...@@ -2516,47 +2527,50 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple *phi, ...@@ -2516,47 +2527,50 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple *phi,
and therefore vectorizing reductions in the inner-loop during and therefore vectorizing reductions in the inner-loop during
outer-loop vectorization is safe. */ outer-loop vectorization is safe. */
/* CHECKME: check for !flag_finite_math_only too? */ if (*v_reduc_type != COND_REDUCTION)
if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
&& check_reduction)
{
/* Changing the order of operations changes the semantics. */
if (dump_enabled_p ())
report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
"reduction: unsafe fp math optimization: ");
return NULL;
}
else if (INTEGRAL_TYPE_P (type) && check_reduction)
{ {
if (!operation_no_trapping_overflow (type, code)) /* CHECKME: check for !flag_finite_math_only too? */
if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
&& check_reduction)
{ {
/* Changing the order of operations changes the semantics. */ /* Changing the order of operations changes the semantics. */
if (dump_enabled_p ()) if (dump_enabled_p ())
report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
"reduction: unsafe int math optimization" "reduction: unsafe fp math optimization: ");
" (overflow traps): ");
return NULL; return NULL;
} }
if (need_wrapping_integral_overflow else if (INTEGRAL_TYPE_P (type) && check_reduction)
&& !TYPE_OVERFLOW_WRAPS (type) {
&& operation_can_overflow (code)) if (!operation_no_trapping_overflow (type, code))
{
/* Changing the order of operations changes the semantics. */
if (dump_enabled_p ())
report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
"reduction: unsafe int math optimization"
" (overflow traps): ");
return NULL;
}
if (need_wrapping_integral_overflow
&& !TYPE_OVERFLOW_WRAPS (type)
&& operation_can_overflow (code))
{
/* Changing the order of operations changes the semantics. */
if (dump_enabled_p ())
report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
"reduction: unsafe int math optimization"
" (overflow doesn't wrap): ");
return NULL;
}
}
else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
{ {
/* Changing the order of operations changes the semantics. */ /* Changing the order of operations changes the semantics. */
if (dump_enabled_p ()) if (dump_enabled_p ())
report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
"reduction: unsafe int math optimization" "reduction: unsafe fixed-point math optimization: ");
" (overflow doesn't wrap): ");
return NULL; return NULL;
} }
} }
else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
{
/* Changing the order of operations changes the semantics. */
if (dump_enabled_p ())
report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
"reduction: unsafe fixed-point math optimization: ");
return NULL;
}
/* If we detected "res -= x[i]" earlier, rewrite it into /* If we detected "res -= x[i]" earlier, rewrite it into
"res += -x[i]" now. If this turns out to be useless reassoc "res += -x[i]" now. If this turns out to be useless reassoc
...@@ -2632,6 +2646,16 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple *phi, ...@@ -2632,6 +2646,16 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple *phi,
{ {
if (check_reduction) if (check_reduction)
{ {
if (code == COND_EXPR)
{
/* No current known use where this case would be useful. */
if (dump_enabled_p ())
report_vect_op (MSG_NOTE, def_stmt,
"detected reduction: cannot currently swap "
"operands for cond_expr");
return NULL;
}
/* Swap operands (just for simplicity - so that the rest of the code /* Swap operands (just for simplicity - so that the rest of the code
can assume that the reduction variable is always the last (second) can assume that the reduction variable is always the last (second)
argument). */ argument). */
...@@ -2655,7 +2679,8 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple *phi, ...@@ -2655,7 +2679,8 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple *phi,
} }
/* Try to find SLP reduction chain. */ /* Try to find SLP reduction chain. */
if (check_reduction && vect_is_slp_reduction (loop_info, phi, def_stmt)) if (check_reduction && code != COND_EXPR
&& vect_is_slp_reduction (loop_info, phi, def_stmt))
{ {
if (dump_enabled_p ()) if (dump_enabled_p ())
report_vect_op (MSG_NOTE, def_stmt, report_vect_op (MSG_NOTE, def_stmt,
...@@ -2677,11 +2702,13 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple *phi, ...@@ -2677,11 +2702,13 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple *phi,
static gimple * static gimple *
vect_is_simple_reduction (loop_vec_info loop_info, gimple *phi, vect_is_simple_reduction (loop_vec_info loop_info, gimple *phi,
bool check_reduction, bool *double_reduc, bool check_reduction, bool *double_reduc,
bool need_wrapping_integral_overflow) bool need_wrapping_integral_overflow,
enum vect_reduction_type *v_reduc_type)
{ {
return vect_is_simple_reduction_1 (loop_info, phi, check_reduction, return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
double_reduc, false, double_reduc, false,
need_wrapping_integral_overflow); need_wrapping_integral_overflow,
v_reduc_type);
} }
/* Wrapper around vect_is_simple_reduction_1, which will modify code /* Wrapper around vect_is_simple_reduction_1, which will modify code
...@@ -2693,9 +2720,11 @@ vect_force_simple_reduction (loop_vec_info loop_info, gimple *phi, ...@@ -2693,9 +2720,11 @@ vect_force_simple_reduction (loop_vec_info loop_info, gimple *phi,
bool check_reduction, bool *double_reduc, bool check_reduction, bool *double_reduc,
bool need_wrapping_integral_overflow) bool need_wrapping_integral_overflow)
{ {
enum vect_reduction_type v_reduc_type;
return vect_is_simple_reduction_1 (loop_info, phi, check_reduction, return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
double_reduc, true, double_reduc, true,
need_wrapping_integral_overflow); need_wrapping_integral_overflow,
&v_reduc_type);
} }
/* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */ /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */
...@@ -3200,6 +3229,10 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code, ...@@ -3200,6 +3229,10 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
else else
target_cost_data = BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info)); target_cost_data = BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info));
/* Condition reductions generate two reductions in the loop. */
if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
ncopies *= 2;
/* Cost of reduction op inside loop. */ /* Cost of reduction op inside loop. */
unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt, unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
stmt_info, 0, vect_body); stmt_info, 0, vect_body);
...@@ -3229,9 +3262,14 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code, ...@@ -3229,9 +3262,14 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
code = gimple_assign_rhs_code (orig_stmt); code = gimple_assign_rhs_code (orig_stmt);
/* Add in cost for initial definition. */ /* Add in cost for initial definition.
prologue_cost += add_stmt_cost (target_cost_data, 1, scalar_to_vec, For cond reduction we have four vectors: initial index, step, initial
stmt_info, 0, vect_prologue); result of the data reduction, initial value of the index reduction. */
int prologue_stmts = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
== COND_REDUCTION ? 4 : 1;
prologue_cost += add_stmt_cost (target_cost_data, prologue_stmts,
scalar_to_vec, stmt_info, 0,
vect_prologue);
/* Determine cost of epilogue code. /* Determine cost of epilogue code.
...@@ -3242,10 +3280,30 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code, ...@@ -3242,10 +3280,30 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
{ {
if (reduc_code != ERROR_MARK) if (reduc_code != ERROR_MARK)
{ {
epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt, if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
stmt_info, 0, vect_epilogue); {
epilogue_cost += add_stmt_cost (target_cost_data, 1, vec_to_scalar, /* An EQ stmt and an COND_EXPR stmt. */
stmt_info, 0, vect_epilogue); epilogue_cost += add_stmt_cost (target_cost_data, 2,
vector_stmt, stmt_info, 0,
vect_epilogue);
/* Reduction of the max index and a reduction of the found
values. */
epilogue_cost += add_stmt_cost (target_cost_data, 2,
vec_to_scalar, stmt_info, 0,
vect_epilogue);
/* A broadcast of the max value. */
epilogue_cost += add_stmt_cost (target_cost_data, 1,
scalar_to_vec, stmt_info, 0,
vect_epilogue);
}
else
{
epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
stmt_info, 0, vect_epilogue);
epilogue_cost += add_stmt_cost (target_cost_data, 1,
vec_to_scalar, stmt_info, 0,
vect_epilogue);
}
} }
else else
{ {
...@@ -3831,15 +3889,17 @@ get_initial_def_for_reduction (gimple *stmt, tree init_val, ...@@ -3831,15 +3889,17 @@ get_initial_def_for_reduction (gimple *stmt, tree init_val,
case MIN_EXPR: case MIN_EXPR:
case MAX_EXPR: case MAX_EXPR:
case COND_EXPR: case COND_EXPR:
if (adjustment_def) if (adjustment_def)
{ {
*adjustment_def = NULL_TREE; *adjustment_def = NULL_TREE;
init_def = vect_get_vec_def_for_operand (init_val, stmt); if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo) != COND_REDUCTION)
break; {
} init_def = vect_get_vec_def_for_operand (init_val, stmt);
break;
}
}
init_def = build_vector_from_val (vectype, init_value); init_def = build_vector_from_val (vectype, init_value);
break; break;
default: default:
gcc_unreachable (); gcc_unreachable ();
...@@ -3869,6 +3929,8 @@ get_initial_def_for_reduction (gimple *stmt, tree init_val, ...@@ -3869,6 +3929,8 @@ get_initial_def_for_reduction (gimple *stmt, tree init_val,
DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled. DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
SLP_NODE is an SLP node containing a group of reduction statements. The SLP_NODE is an SLP node containing a group of reduction statements. The
first one in this group is STMT. first one in this group is STMT.
INDUCTION_INDEX is the index of the loop for condition reductions.
Otherwise it is undefined.
This function: This function:
1. Creates the reduction def-use cycles: sets the arguments for 1. Creates the reduction def-use cycles: sets the arguments for
...@@ -3914,7 +3976,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt, ...@@ -3914,7 +3976,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
int ncopies, enum tree_code reduc_code, int ncopies, enum tree_code reduc_code,
vec<gimple *> reduction_phis, vec<gimple *> reduction_phis,
int reduc_index, bool double_reduc, int reduc_index, bool double_reduc,
slp_tree slp_node) slp_tree slp_node, tree induction_index)
{ {
stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
stmt_vec_info prev_phi_info; stmt_vec_info prev_phi_info;
...@@ -4214,11 +4276,123 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt, ...@@ -4214,11 +4276,123 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
} }
else else
new_phi_result = PHI_RESULT (new_phis[0]); new_phi_result = PHI_RESULT (new_phis[0]);
if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
{
/* For condition reductions, we have a vector (NEW_PHI_RESULT) containing
various data values where the condition matched and another vector
(INDUCTION_INDEX) containing all the indexes of those matches. We
need to extract the last matching index (which will be the index with
highest value) and use this to index into the data vector.
For the case where there were no matches, the data vector will contain
all default values and the index vector will be all zeros. */
/* Get various versions of the type of the vector of indexes. */
tree index_vec_type = TREE_TYPE (induction_index);
gcc_checking_assert (TYPE_UNSIGNED (index_vec_type));
tree index_vec_type_signed = signed_type_for (index_vec_type);
tree index_scalar_type = TREE_TYPE (index_vec_type);
/* Get an unsigned integer version of the type of the data vector. */
int scalar_precision = GET_MODE_PRECISION (TYPE_MODE (scalar_type));
tree scalar_type_unsigned = make_unsigned_type (scalar_precision);
tree vectype_unsigned = build_vector_type
(scalar_type_unsigned, TYPE_VECTOR_SUBPARTS (vectype));
/* First we need to create a vector (ZERO_VEC) of zeros and another
vector (MAX_INDEX_VEC) filled with the last matching index, which we
can create using a MAX reduction and then expanding.
In the case where the loop never made any matches, the max index will
be zero. */
/* Vector of {0, 0, 0,...}. */
tree zero_vec = make_ssa_name (vectype);
tree zero_vec_rhs = build_zero_cst (vectype);
gimple *zero_vec_stmt = gimple_build_assign (zero_vec, zero_vec_rhs);
gsi_insert_before (&exit_gsi, zero_vec_stmt, GSI_SAME_STMT);
/* Find maximum value from the vector of found indexes. */
tree max_index = make_ssa_name (index_scalar_type);
gimple *max_index_stmt = gimple_build_assign (max_index, REDUC_MAX_EXPR,
induction_index);
gsi_insert_before (&exit_gsi, max_index_stmt, GSI_SAME_STMT);
/* Vector of {max_index, max_index, max_index,...}. */
tree max_index_vec = make_ssa_name (index_vec_type);
tree max_index_vec_rhs = build_vector_from_val (index_vec_type,
max_index);
gimple *max_index_vec_stmt = gimple_build_assign (max_index_vec,
max_index_vec_rhs);
gsi_insert_before (&exit_gsi, max_index_vec_stmt, GSI_SAME_STMT);
/* Next we compare the new vector (MAX_INDEX_VEC) full of max indexes
with the vector (INDUCTION_INDEX) of found indexes, choosing values
from the data vector (NEW_PHI_RESULT) for matches, 0 (ZERO_VEC)
otherwise. Only one value should match, resulting in a vector
(VEC_COND) with one data value and the rest zeros.
In the case where the loop never made any matches, every index will
match, resulting in a vector with all data values (which will all be
the default value). */
/* Compare the max index vector to the vector of found indexes to find
the position of the max value. */
tree vec_compare = make_ssa_name (index_vec_type_signed);
gimple *vec_compare_stmt = gimple_build_assign (vec_compare, EQ_EXPR,
induction_index,
max_index_vec);
gsi_insert_before (&exit_gsi, vec_compare_stmt, GSI_SAME_STMT);
/* Use the compare to choose either values from the data vector or
zero. */
tree vec_cond = make_ssa_name (vectype);
gimple *vec_cond_stmt = gimple_build_assign (vec_cond, VEC_COND_EXPR,
vec_compare, new_phi_result,
zero_vec);
gsi_insert_before (&exit_gsi, vec_cond_stmt, GSI_SAME_STMT);
/* Finally we need to extract the data value from the vector (VEC_COND)
into a scalar (MATCHED_DATA_REDUC). Logically we want to do a OR
reduction, but because this doesn't exist, we can use a MAX reduction
instead. The data value might be signed or a float so we need to cast
it first.
In the case where the loop never made any matches, the data values are
all identical, and so will reduce down correctly. */
/* Make the matched data values unsigned. */
tree vec_cond_cast = make_ssa_name (vectype_unsigned);
tree vec_cond_cast_rhs = build1 (VIEW_CONVERT_EXPR, vectype_unsigned,
vec_cond);
gimple *vec_cond_cast_stmt = gimple_build_assign (vec_cond_cast,
VIEW_CONVERT_EXPR,
vec_cond_cast_rhs);
gsi_insert_before (&exit_gsi, vec_cond_cast_stmt, GSI_SAME_STMT);
/* Reduce down to a scalar value. */
tree data_reduc = make_ssa_name (scalar_type_unsigned);
optab ot = optab_for_tree_code (REDUC_MAX_EXPR, vectype_unsigned,
optab_default);
gcc_assert (optab_handler (ot, TYPE_MODE (vectype_unsigned))
!= CODE_FOR_nothing);
gimple *data_reduc_stmt = gimple_build_assign (data_reduc,
REDUC_MAX_EXPR,
vec_cond_cast);
gsi_insert_before (&exit_gsi, data_reduc_stmt, GSI_SAME_STMT);
/* Convert the reduced value back to the result type and set as the
result. */
tree data_reduc_cast = build1 (VIEW_CONVERT_EXPR, scalar_type,
data_reduc);
epilog_stmt = gimple_build_assign (new_scalar_dest, data_reduc_cast);
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
gimple_assign_set_lhs (epilog_stmt, new_temp);
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
scalar_results.safe_push (new_temp);
}
/* 2.3 Create the reduction code, using one of the three schemes described /* 2.3 Create the reduction code, using one of the three schemes described
above. In SLP we simply need to extract all the elements from the above. In SLP we simply need to extract all the elements from the
vector (without reducing them), so we use scalar shifts. */ vector (without reducing them), so we use scalar shifts. */
if (reduc_code != ERROR_MARK && !slp_reduc) else if (reduc_code != ERROR_MARK && !slp_reduc)
{ {
tree tmp; tree tmp;
tree vec_elem_type; tree vec_elem_type;
...@@ -4739,6 +4913,15 @@ vect_finalize_reduction: ...@@ -4739,6 +4913,15 @@ vect_finalize_reduction:
and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
sequence that had been detected and replaced by the pattern-stmt (STMT). sequence that had been detected and replaced by the pattern-stmt (STMT).
This function also handles reduction of condition expressions, for example:
for (int i = 0; i < N; i++)
if (a[i] < value)
last = a[i];
This is handled by vectorising the loop and creating an additional vector
containing the loop indexes for which "a[i] < value" was true. In the
function epilogue this is reduced to a single max value and then used to
index into the vector of results.
In some cases of reduction patterns, the type of the reduction variable X is In some cases of reduction patterns, the type of the reduction variable X is
different than the type of the other arguments of STMT. different than the type of the other arguments of STMT.
In such cases, the vectype that is used when transforming STMT into a vector In such cases, the vectype that is used when transforming STMT into a vector
...@@ -4813,6 +4996,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, ...@@ -4813,6 +4996,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
int vec_num; int vec_num;
tree def0, def1, tem, op0, op1 = NULL_TREE; tree def0, def1, tem, op0, op1 = NULL_TREE;
bool first_p = true; bool first_p = true;
tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
/* In case of reduction chain we switch to the first stmt in the chain, but /* In case of reduction chain we switch to the first stmt in the chain, but
we don't update STMT_INFO, since only the last stmt is marked as reduction we don't update STMT_INFO, since only the last stmt is marked as reduction
...@@ -4981,8 +5165,10 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, ...@@ -4981,8 +5165,10 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
return false; return false;
} }
gimple *tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt, gimple *tmp = vect_is_simple_reduction
!nested_cycle, &dummy, false); (loop_vinfo, reduc_def_stmt,
!nested_cycle, &dummy, false,
&STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info));
if (orig_stmt) if (orig_stmt)
gcc_assert (tmp == orig_stmt gcc_assert (tmp == orig_stmt
|| GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt); || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
...@@ -5007,12 +5193,14 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, ...@@ -5007,12 +5193,14 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
if (code == COND_EXPR) if (code == COND_EXPR)
{ {
if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL)) /* Only call during the analysis stage, otherwise we'll lose
STMT_VINFO_TYPE. */
if (!vec_stmt && !vectorizable_condition (stmt, gsi, NULL,
ops[reduc_index], 0, NULL))
{ {
if (dump_enabled_p ()) if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"unsupported condition in reduction\n"); "unsupported condition in reduction\n");
return false; return false;
} }
} }
...@@ -5136,49 +5324,74 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, ...@@ -5136,49 +5324,74 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
} }
epilog_reduc_code = ERROR_MARK; epilog_reduc_code = ERROR_MARK;
if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
{ {
reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out, if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
{
reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
optab_default); optab_default);
if (!reduc_optab) if (!reduc_optab)
{ {
if (dump_enabled_p ()) if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"no optab for reduction.\n"); "no optab for reduction.\n");
epilog_reduc_code = ERROR_MARK;
}
else if (optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
{
optab = scalar_reduc_to_vector (reduc_optab, vectype_out);
if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"reduc op not supported by target.\n");
epilog_reduc_code = ERROR_MARK; epilog_reduc_code = ERROR_MARK;
} }
} else if (optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
{
optab = scalar_reduc_to_vector (reduc_optab, vectype_out);
if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"reduc op not supported by target.\n");
epilog_reduc_code = ERROR_MARK;
}
}
}
else
{
if (!nested_cycle || double_reduc)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"no reduc code for scalar code.\n");
return false;
}
}
} }
else else
{ {
if (!nested_cycle || double_reduc) int scalar_precision = GET_MODE_PRECISION (TYPE_MODE (scalar_type));
{ cr_index_scalar_type = make_unsigned_type (scalar_precision);
if (dump_enabled_p ()) cr_index_vector_type = build_vector_type
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, (cr_index_scalar_type, TYPE_VECTOR_SUBPARTS (vectype_out));
"no reduc code for scalar code.\n");
return false; epilog_reduc_code = REDUC_MAX_EXPR;
} optab = optab_for_tree_code (REDUC_MAX_EXPR, cr_index_vector_type,
optab_default);
if (optab_handler (optab, TYPE_MODE (cr_index_vector_type))
== CODE_FOR_nothing)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"reduc max op not supported by target.\n");
return false;
}
} }
if (double_reduc && ncopies > 1) if ((double_reduc
|| STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
&& ncopies > 1)
{ {
if (dump_enabled_p ()) if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"multiple types in double reduction\n"); "multiple types in double reduction or condition "
"reduction.\n");
return false; return false;
} }
...@@ -5202,6 +5415,34 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, ...@@ -5202,6 +5415,34 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
} }
} }
if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
{
widest_int ni;
if (! max_loop_iterations (loop, &ni))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"loop count not known, cannot create cond "
"reduction.\n");
return false;
}
/* Convert backedges to iterations. */
ni += 1;
/* The additional index will be the same type as the condition. Check
that the loop can fit into this less one (because we'll use up the
zero slot for when there are no matches). */
tree max_index = TYPE_MAX_VALUE (cr_index_scalar_type);
if (wi::geu_p (ni, wi::to_widest (max_index)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"loop size is greater than data size.\n");
return false;
}
}
if (!vec_stmt) /* transformation not required. */ if (!vec_stmt) /* transformation not required. */
{ {
if (first_p if (first_p
...@@ -5414,17 +5655,107 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, ...@@ -5414,17 +5655,107 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
prev_phi_info = vinfo_for_stmt (new_phi); prev_phi_info = vinfo_for_stmt (new_phi);
} }
tree indx_before_incr, indx_after_incr, cond_name = NULL;
/* Finalize the reduction-phi (set its arguments) and create the /* Finalize the reduction-phi (set its arguments) and create the
epilog reduction code. */ epilog reduction code. */
if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node) if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
{ {
new_temp = gimple_assign_lhs (*vec_stmt); new_temp = gimple_assign_lhs (*vec_stmt);
vect_defs[0] = new_temp; vect_defs[0] = new_temp;
/* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
which is updated with the current index of the loop for every match of
the original loop's cond_expr (VEC_STMT). This results in a vector
containing the last time the condition passed for that vector lane.
The first match will be a 1 to allow 0 to be used for non-matching
indexes. If there are no matches at all then the vector will be all
zeroes. */
if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
{
int nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
int k;
gcc_assert (gimple_assign_rhs_code (*vec_stmt) == VEC_COND_EXPR);
/* First we create a simple vector induction variable which starts
with the values {1,2,3,...} (SERIES_VECT) and increments by the
vector size (STEP). */
/* Create a {1,2,3,...} vector. */
tree *vtemp = XALLOCAVEC (tree, nunits_out);
for (k = 0; k < nunits_out; ++k)
vtemp[k] = build_int_cst (cr_index_scalar_type, k + 1);
tree series_vect = build_vector (cr_index_vector_type, vtemp);
/* Create a vector of the step value. */
tree step = build_int_cst (cr_index_scalar_type, nunits_out);
tree vec_step = build_vector_from_val (cr_index_vector_type, step);
/* Create an induction variable. */
gimple_stmt_iterator incr_gsi;
bool insert_after;
standard_iv_increment_position (loop, &incr_gsi, &insert_after);
create_iv (series_vect, vec_step, NULL_TREE, loop, &incr_gsi,
insert_after, &indx_before_incr, &indx_after_incr);
/* Next create a new phi node vector (NEW_PHI_TREE) which starts
filled with zeros (VEC_ZERO). */
/* Create a vector of 0s. */
tree zero = build_zero_cst (cr_index_scalar_type);
tree vec_zero = build_vector_from_val (cr_index_vector_type, zero);
/* Create a vector phi node. */
tree new_phi_tree = make_ssa_name (cr_index_vector_type);
new_phi = create_phi_node (new_phi_tree, loop->header);
set_vinfo_for_stmt (new_phi,
new_stmt_vec_info (new_phi, loop_vinfo));
add_phi_arg (new_phi, vec_zero, loop_preheader_edge (loop),
UNKNOWN_LOCATION);
/* Now take the condition from the loops original cond_expr
(VEC_STMT) and produce a new cond_expr (INDEX_COND_EXPR) which for
every match uses values from the induction variable
(INDEX_BEFORE_INCR) otherwise uses values from the phi node
(NEW_PHI_TREE).
Finally, we update the phi (NEW_PHI_TREE) to take the value of
the new cond_expr (INDEX_COND_EXPR). */
/* Turn the condition from vec_stmt into an ssa name. */
gimple_stmt_iterator vec_stmt_gsi = gsi_for_stmt (*vec_stmt);
tree ccompare = gimple_assign_rhs1 (*vec_stmt);
tree ccompare_name = make_ssa_name (TREE_TYPE (ccompare));
gimple *ccompare_stmt = gimple_build_assign (ccompare_name,
ccompare);
gsi_insert_before (&vec_stmt_gsi, ccompare_stmt, GSI_SAME_STMT);
gimple_assign_set_rhs1 (*vec_stmt, ccompare_name);
update_stmt (*vec_stmt);
/* Create a conditional, where the condition is taken from vec_stmt
(CCOMPARE_NAME), then is the induction index (INDEX_BEFORE_INCR)
and else is the phi (NEW_PHI_TREE). */
tree index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
ccompare_name, indx_before_incr,
new_phi_tree);
cond_name = make_ssa_name (cr_index_vector_type);
gimple *index_condition = gimple_build_assign (cond_name,
index_cond_expr);
gsi_insert_before (&incr_gsi, index_condition, GSI_SAME_STMT);
stmt_vec_info index_vec_info = new_stmt_vec_info (index_condition,
loop_vinfo);
STMT_VINFO_VECTYPE (index_vec_info) = cr_index_vector_type;
set_vinfo_for_stmt (index_condition, index_vec_info);
/* Update the phi with the vec cond. */
add_phi_arg (new_phi, cond_name, loop_latch_edge (loop),
UNKNOWN_LOCATION);
}
} }
vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies, vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
epilog_reduc_code, phis, reduc_index, epilog_reduc_code, phis, reduc_index,
double_reduc, slp_node); double_reduc, slp_node, cond_name);
return true; return true;
} }
......
...@@ -7202,21 +7202,24 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi, ...@@ -7202,21 +7202,24 @@ vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
if (reduc_index && STMT_SLP_TYPE (stmt_info)) if (reduc_index && STMT_SLP_TYPE (stmt_info))
return false; return false;
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
return false; {
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false;
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
&& !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
&& reduc_def)) && reduc_def))
return false; return false;
/* FORNOW: not yet supported. */ /* FORNOW: not yet supported. */
if (STMT_VINFO_LIVE_P (stmt_info)) if (STMT_VINFO_LIVE_P (stmt_info))
{ {
if (dump_enabled_p ()) if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"value used after loop.\n"); "value used after loop.\n");
return false; return false;
}
} }
/* Is vectorizable conditional operation? */ /* Is vectorizable conditional operation? */
...@@ -7865,6 +7868,7 @@ new_stmt_vec_info (gimple *stmt, vec_info *vinfo) ...@@ -7865,6 +7868,7 @@ new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
STMT_VINFO_RELATED_STMT (res) = NULL; STMT_VINFO_RELATED_STMT (res) = NULL;
STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL; STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
STMT_VINFO_DATA_REF (res) = NULL; STMT_VINFO_DATA_REF (res) = NULL;
STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
STMT_VINFO_DR_BASE_ADDRESS (res) = NULL; STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
STMT_VINFO_DR_OFFSET (res) = NULL; STMT_VINFO_DR_OFFSET (res) = NULL;
...@@ -8118,8 +8122,8 @@ vect_is_simple_use (tree operand, vec_info *vinfo, ...@@ -8118,8 +8122,8 @@ vect_is_simple_use (tree operand, vec_info *vinfo,
if (TREE_CODE (operand) != SSA_NAME) if (TREE_CODE (operand) != SSA_NAME)
{ {
if (dump_enabled_p ()) if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not ssa-name.\n"); "not ssa-name.\n");
return false; return false;
} }
......
...@@ -60,6 +60,12 @@ enum vect_def_type { ...@@ -60,6 +60,12 @@ enum vect_def_type {
vect_unknown_def_type vect_unknown_def_type
}; };
/* Define type of reduction. */
enum vect_reduction_type {
TREE_CODE_REDUCTION,
COND_REDUCTION
};
#define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def) \ #define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def) \
|| ((D) == vect_double_reduction_def) \ || ((D) == vect_double_reduction_def) \
|| ((D) == vect_nested_cycle)) || ((D) == vect_nested_cycle))
...@@ -581,6 +587,10 @@ typedef struct _stmt_vec_info { ...@@ -581,6 +587,10 @@ typedef struct _stmt_vec_info {
/* For both loads and stores. */ /* For both loads and stores. */
bool simd_lane_access_p; bool simd_lane_access_p;
/* For reduction loops, this is the type of reduction. */
enum vect_reduction_type v_reduc_type;
} *stmt_vec_info; } *stmt_vec_info;
/* Access Functions. */ /* Access Functions. */
...@@ -609,6 +619,7 @@ STMT_VINFO_BB_VINFO (stmt_vec_info stmt_vinfo) ...@@ -609,6 +619,7 @@ STMT_VINFO_BB_VINFO (stmt_vec_info stmt_vinfo)
#define STMT_VINFO_GATHER_SCATTER_P(S) (S)->gather_scatter_p #define STMT_VINFO_GATHER_SCATTER_P(S) (S)->gather_scatter_p
#define STMT_VINFO_STRIDED_P(S) (S)->strided_p #define STMT_VINFO_STRIDED_P(S) (S)->strided_p
#define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p #define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p
#define STMT_VINFO_VEC_REDUCTION_TYPE(S) (S)->v_reduc_type
#define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_base_address #define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_base_address
#define STMT_VINFO_DR_INIT(S) (S)->dr_init #define STMT_VINFO_DR_INIT(S) (S)->dr_init
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment