Commit 5a951baa by Richard Biener Committed by Richard Biener

re PR tree-optimization/87914 (gcc fails to vectorize bitreverse code)

2018-11-07  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/87914
	* tree-vect-loop.c (vect_is_simple_reduction): Improve detection
	of nested cycles.
	(vectorizable_reduction): Handle shifts and rotates by dispatching
	to vectorizable_shift.
	* tree-vect-stmts.c (vect_get_vec_def_for_operand_1): Handle
	in-loop uses of vect_nested_cycle defs.  Merge cycle and internal
	def cases.
	(vectorizable_shift): Export and handle being called as
	vect_nested_cycle.
	(vect_analyze_stmt): Call vectorizable_shift after
	vectorizable_reduction.
	* tree-vectorizer.h (vectorizable_shift): Declare.

	* lib/target-supports.exp (check_effective_target_vect_var_shift): New.
	(check_avx2_available): Likewise.
	* g++.dg/vect/pr87914.cc: New testcase.

From-SVN: r265876
parent 7d3a67d7
2018-11-07 Richard Biener <rguenther@suse.de>
PR tree-optimization/87914
* tree-vect-loop.c (vect_is_simple_reduction): Improve detection
of nested cycles.
(vectorizable_reduction): Handle shifts and rotates by dispatching
to vectorizable_shift.
* tree-vect-stmts.c (vect_get_vec_def_for_operand_1): Handle
in-loop uses of vect_nested_cycle defs. Merge cycle and internal
def cases.
(vectorizable_shift): Export and handle being called as
vect_nested_cycle.
(vect_analyze_stmt): Call vectorizable_shift after
vectorizable_reduction.
* tree-vectorizer.h (vectorizable_shift): Declare.
2018-11-07 Jan Hubicka <jh@suse.cz> 2018-11-07 Jan Hubicka <jh@suse.cz>
* ipa-devirt.c (odr_types_equivalent_p): Expect constants * ipa-devirt.c (odr_types_equivalent_p): Expect constants
2018-11-07 Richard Biener <rguenther@suse.de>
PR tree-optimization/87914
* lib/target-supports.exp (check_effective_target_vect_var_shift): New.
(check_avx2_available): Likewise.
* g++.dg/vect/pr87914.cc: New testcase.
2018-11-07 Chenghua Xu <paul.hua.gm@gmail.com> 2018-11-07 Chenghua Xu <paul.hua.gm@gmail.com>
* gcc.target/mips/loongson-ctz.c: Fix typo. * gcc.target/mips/loongson-ctz.c: Fix typo.
......
// { dg-do run }
// { dg-additional-options "-fopenmp-simd" }
// { dg-additional-options "-mavx2" { target { avx2_runtime } } }
extern "C" int memcmp(const void *s1, const void *s2, __SIZE_TYPE__ n);
extern "C" void abort(void);
template <typename T>
T reverseBits(T x)
{
unsigned int s = sizeof(x) * 8;
T mask = ~T(0);
while ((s >>= 1) > 0)
{
mask ^= (mask << s);
x = ((x >> s) & mask) | ((x << s) & ~mask); // unsupported use in stmt
}
return x;
}
void __attribute__((noinline,noipa))
test_reverseBits(unsigned* x)
{
#pragma omp simd aligned(x:32)
for (int i = 0; i < 16; ++i)
x[i] = reverseBits(x[i]); // couldn't vectorize loop
}
int main()
{
unsigned arr[16] __attribute__((aligned(32)))
= { 0x01020304, 0x05060708, 0x0a0b0c0d, 0x0e0f1011,
0x11121314, 0x45065708, 0xfa0b3c0du, 0x0e0f1211,
0x21222324, 0x55066708, 0xfa0b2c0du, 0x1e0f1011,
0x31323334, 0x65067708, 0xfa0b5c0du, 0x0e3f1011 };
unsigned arr2[16]
= { 0x20c04080, 0x10e060a0, 0xb030d050, 0x8808f070u,
0x28c84888, 0x10ea60a2, 0xb03cd05f, 0x8848f070u,
0x24c44484, 0x10e660aa, 0xb034d05f, 0x8808f078u,
0x2ccc4c8c, 0x10ee60a6, 0xb03ad05f, 0x8808fc70u };
test_reverseBits (arr);
if (memcmp (arr, arr2, sizeof (arr)) != 0)
abort ();
return 0;
}
// { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" { target { vect_var_shift && vect_int } } } }
...@@ -5329,6 +5329,15 @@ proc check_effective_target_vect_shift { } { ...@@ -5329,6 +5329,15 @@ proc check_effective_target_vect_shift { } {
&& [check_effective_target_s390_vx]) }}] && [check_effective_target_s390_vx]) }}]
} }
# Return 1 if the target supports hardware vector shift by register operation.
proc check_effective_target_vect_var_shift { } {
return [check_cached_effective_target_indexed vect_var_shift {
expr {(([istarget i?86-*-*] || [istarget x86_64-*-*])
&& [check_avx2_available])
}}]
}
proc check_effective_target_whole_vector_shift { } { proc check_effective_target_whole_vector_shift { } {
if { [istarget i?86-*-*] || [istarget x86_64-*-*] if { [istarget i?86-*-*] || [istarget x86_64-*-*]
|| [istarget ia64-*-*] || [istarget ia64-*-*]
...@@ -7163,6 +7172,19 @@ proc check_avx_available { } { ...@@ -7163,6 +7172,19 @@ proc check_avx_available { } {
return 0; return 0;
} }
# Return true if we are compiling for AVX2 target.
proc check_avx2_available { } {
if { [check_no_compiler_messages avx_available assembly {
#ifndef __AVX2__
#error unsupported
#endif
} ""] } {
return 1;
}
return 0;
}
# Return true if we are compiling for SSSE3 target. # Return true if we are compiling for SSSE3 target.
proc check_ssse3_available { } { proc check_ssse3_available { } {
......
...@@ -2843,6 +2843,11 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, ...@@ -2843,6 +2843,11 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
return NULL; return NULL;
} }
/* For inner loop reductions in nested vectorization there are no
constraints on the number of uses in the inner loop. */
if (loop == vect_loop->inner)
continue;
nloop_uses++; nloop_uses++;
if (nloop_uses > 1) if (nloop_uses > 1)
{ {
...@@ -2901,13 +2906,19 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, ...@@ -2901,13 +2906,19 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
else else
/* We can have more than one loop-closed PHI. */ /* We can have more than one loop-closed PHI. */
lcphis.safe_push (as_a <gphi *> (use_stmt)); lcphis.safe_push (as_a <gphi *> (use_stmt));
if (nloop_uses > 1) }
{
if (dump_enabled_p ()) /* If this isn't a nested cycle or if the nested cycle reduction value
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, is used ouside of the inner loop we cannot handle uses of the reduction
"reduction used in loop.\n"); value. */
return NULL; bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
} if ((!nested_in_vect_loop || !lcphis.is_empty ())
&& nloop_uses > 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"reduction used in loop.\n");
return NULL;
} }
/* If DEF_STMT is a phi node itself, we expect it to have a single argument /* If DEF_STMT is a phi node itself, we expect it to have a single argument
...@@ -2968,9 +2979,15 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, ...@@ -2968,9 +2979,15 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
} }
gassign *def_stmt = as_a <gassign *> (def_stmt_info->stmt); gassign *def_stmt = as_a <gassign *> (def_stmt_info->stmt);
bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
code = orig_code = gimple_assign_rhs_code (def_stmt); code = orig_code = gimple_assign_rhs_code (def_stmt);
if (nested_in_vect_loop && !check_reduction)
{
if (dump_enabled_p ())
report_vect_op (MSG_NOTE, def_stmt, "detected nested cycle: ");
return def_stmt_info;
}
/* We can handle "res -= x[i]", which is non-associative by /* We can handle "res -= x[i]", which is non-associative by
simply rewriting this into "res += -x[i]". Avoid changing simply rewriting this into "res += -x[i]". Avoid changing
gimple instruction for the first simple tests and only do this gimple instruction for the first simple tests and only do this
...@@ -6448,6 +6465,19 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -6448,6 +6465,19 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
vec_mode = TYPE_MODE (vectype_in); vec_mode = TYPE_MODE (vectype_in);
poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
if (nested_cycle)
{
def_bb = gimple_bb (reduc_def_phi);
def_stmt_loop = def_bb->loop_father;
def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
loop_preheader_edge (def_stmt_loop));
stmt_vec_info def_arg_stmt_info = loop_vinfo->lookup_def (def_arg);
if (def_arg_stmt_info
&& (STMT_VINFO_DEF_TYPE (def_arg_stmt_info)
== vect_double_reduction_def))
double_reduc = true;
}
if (code == COND_EXPR) if (code == COND_EXPR)
{ {
/* Only call during the analysis stage, otherwise we'll lose /* Only call during the analysis stage, otherwise we'll lose
...@@ -6462,20 +6492,26 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -6462,20 +6492,26 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
return false; return false;
} }
} }
else else if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
|| code == LROTATE_EXPR || code == RROTATE_EXPR)
{ {
/* 4. Supportable by target? */ /* Only call during the analysis stage, otherwise we'll lose
STMT_VINFO_TYPE. We only support this for nested cycles
if (code == LSHIFT_EXPR || code == RSHIFT_EXPR without double reductions at the moment. */
|| code == LROTATE_EXPR || code == RROTATE_EXPR) if (!nested_cycle
|| double_reduc
|| (!vec_stmt && !vectorizable_shift (stmt_info, gsi, NULL,
NULL, cost_vec)))
{ {
/* Shifts and rotates are only supported by vectorizable_shifts,
not vectorizable_reduction. */
if (dump_enabled_p ()) if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"unsupported shift or rotation.\n"); "unsupported shift or rotation in reduction\n");
return false; return false;
} }
}
else
{
/* 4. Supportable by target? */
/* 4.1. check support for the operation in the loop */ /* 4.1. check support for the operation in the loop */
optab = optab_for_tree_code (code, vectype_in, optab_default); optab = optab_for_tree_code (code, vectype_in, optab_default);
...@@ -6580,19 +6616,6 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -6580,19 +6616,6 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
orig_code = cond_reduc_op_code; orig_code = cond_reduc_op_code;
} }
if (nested_cycle)
{
def_bb = gimple_bb (reduc_def_phi);
def_stmt_loop = def_bb->loop_father;
def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
loop_preheader_edge (def_stmt_loop));
stmt_vec_info def_arg_stmt_info = loop_vinfo->lookup_def (def_arg);
if (def_arg_stmt_info
&& (STMT_VINFO_DEF_TYPE (def_arg_stmt_info)
== vect_double_reduction_def))
double_reduc = true;
}
reduc_fn = IFN_LAST; reduc_fn = IFN_LAST;
if (reduction_type == TREE_CODE_REDUCTION if (reduction_type == TREE_CODE_REDUCTION
...@@ -6963,6 +6986,12 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -6963,6 +6986,12 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
/* Multiple types are not supported for condition. */ /* Multiple types are not supported for condition. */
break; break;
} }
if (code == LSHIFT_EXPR
|| code == RSHIFT_EXPR)
{
vectorizable_shift (stmt_info, gsi, vec_stmt, slp_node, NULL);
break;
}
/* Handle uses. */ /* Handle uses. */
if (j == 0) if (j == 0)
......
...@@ -1461,6 +1461,16 @@ vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info, ...@@ -1461,6 +1461,16 @@ vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
/* Code should use vect_get_vec_def_for_operand. */ /* Code should use vect_get_vec_def_for_operand. */
gcc_unreachable (); gcc_unreachable ();
/* Operand is defined by a loop header phi. In case of nested
cycles we also may have uses of the backedge def. */
case vect_reduction_def:
case vect_double_reduction_def:
case vect_nested_cycle:
case vect_induction_def:
gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
|| dt == vect_nested_cycle);
/* Fallthru. */
/* operand is defined inside the loop. */ /* operand is defined inside the loop. */
case vect_internal_def: case vect_internal_def:
{ {
...@@ -1480,23 +1490,6 @@ vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info, ...@@ -1480,23 +1490,6 @@ vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
return vec_oprnd; return vec_oprnd;
} }
/* operand is defined by a loop header phi. */
case vect_reduction_def:
case vect_double_reduction_def:
case vect_nested_cycle:
case vect_induction_def:
{
gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI);
/* Get the def from the vectorized stmt. */
vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
vec_oprnd = PHI_RESULT (phi);
else
vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
return vec_oprnd;
}
default: default:
gcc_unreachable (); gcc_unreachable ();
} }
...@@ -5363,7 +5356,7 @@ vect_supportable_shift (enum tree_code code, tree scalar_type) ...@@ -5363,7 +5356,7 @@ vect_supportable_shift (enum tree_code code, tree scalar_type)
stmt to replace it, put it in VEC_STMT, and insert it at GSI. stmt to replace it, put it in VEC_STMT, and insert it at GSI.
Return true if STMT_INFO is vectorizable in this way. */ Return true if STMT_INFO is vectorizable in this way. */
static bool bool
vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
stmt_vec_info *vec_stmt, slp_tree slp_node, stmt_vec_info *vec_stmt, slp_tree slp_node,
stmt_vector_for_cost *cost_vec) stmt_vector_for_cost *cost_vec)
...@@ -5401,6 +5394,7 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -5401,6 +5394,7 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
return false; return false;
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
&& STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
&& ! vec_stmt) && ! vec_stmt)
return false; return false;
...@@ -5480,7 +5474,8 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -5480,7 +5474,8 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
shift/rotate amount is a vector, use the vector/vector shift optabs. */ shift/rotate amount is a vector, use the vector/vector shift optabs. */
if ((dt[1] == vect_internal_def if ((dt[1] == vect_internal_def
|| dt[1] == vect_induction_def) || dt[1] == vect_induction_def
|| dt[1] == vect_nested_cycle)
&& !slp_node) && !slp_node)
scalar_shift_arg = false; scalar_shift_arg = false;
else if (dt[1] == vect_constant_def else if (dt[1] == vect_constant_def
...@@ -9540,7 +9535,6 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize, ...@@ -9540,7 +9535,6 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
|| vectorizable_simd_clone_call (stmt_info, NULL, NULL, node, || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
cost_vec) cost_vec)
|| vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec) || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
|| vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
|| vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec) || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
|| vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec) || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
|| vectorizable_load (stmt_info, NULL, NULL, node, node_instance, || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
...@@ -9549,6 +9543,7 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize, ...@@ -9549,6 +9543,7 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
|| vectorizable_reduction (stmt_info, NULL, NULL, node, || vectorizable_reduction (stmt_info, NULL, NULL, node,
node_instance, cost_vec) node_instance, cost_vec)
|| vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec) || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
|| vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
|| vectorizable_condition (stmt_info, NULL, NULL, NULL, 0, node, || vectorizable_condition (stmt_info, NULL, NULL, NULL, 0, node,
cost_vec) cost_vec)
|| vectorizable_comparison (stmt_info, NULL, NULL, NULL, node, || vectorizable_comparison (stmt_info, NULL, NULL, NULL, node,
......
...@@ -1483,6 +1483,9 @@ extern opt_result vect_analyze_stmt (stmt_vec_info, bool *, slp_tree, ...@@ -1483,6 +1483,9 @@ extern opt_result vect_analyze_stmt (stmt_vec_info, bool *, slp_tree,
extern bool vectorizable_condition (stmt_vec_info, gimple_stmt_iterator *, extern bool vectorizable_condition (stmt_vec_info, gimple_stmt_iterator *,
stmt_vec_info *, tree, int, slp_tree, stmt_vec_info *, tree, int, slp_tree,
stmt_vector_for_cost *); stmt_vector_for_cost *);
extern bool vectorizable_shift (stmt_vec_info, gimple_stmt_iterator *,
stmt_vec_info *, slp_tree,
stmt_vector_for_cost *);
extern void vect_get_load_cost (stmt_vec_info, int, bool, extern void vect_get_load_cost (stmt_vec_info, int, bool,
unsigned int *, unsigned int *, unsigned int *, unsigned int *,
stmt_vector_for_cost *, stmt_vector_for_cost *,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment