Commit 5deb57cb by Jakub Jelinek Committed by Jakub Jelinek

re PR tree-optimization/51581 (Integer division by constant is not vectorized)

	PR tree-optimization/51581
	* tree-vect-stmts.c (permute_vec_elements): Add forward decl.
	(vectorizable_operation): Handle vectorization of MULT_HIGHPART_EXPR
	also using VEC_WIDEN_MULT_*_EXPR or builtin_mul_widen_* plus
	VEC_PERM_EXPR if vector MULT_HIGHPART_EXPR isn't supported.
	* tree-vect-patterns.c (vect_recog_divmod_pattern): Use
	MULT_HIGHPART_EXPR instead of VEC_WIDEN_MULT_*_EXPR and shifts.

	* gcc.dg/vect/pr51581-4.c: New test.

From-SVN: r189053
parent c9ba3307
2012-06-28 Jakub Jelinek <jakub@redhat.com> 2012-06-28 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/51581
* tree-vect-stmts.c (permute_vec_elements): Add forward decl.
(vectorizable_operation): Handle vectorization of MULT_HIGHPART_EXPR
also using VEC_WIDEN_MULT_*_EXPR or builtin_mul_widen_* plus
VEC_PERM_EXPR if vector MULT_HIGHPART_EXPR isn't supported.
* tree-vect-patterns.c (vect_recog_divmod_pattern): Use
MULT_HIGHPART_EXPR instead of VEC_WIDEN_MULT_*_EXPR and shifts.
PR tree-optimization/53645 PR tree-optimization/53645
* tree-vect-generic.c (expand_vector_divmod): Use MULT_HIGHPART_EXPR * tree-vect-generic.c (expand_vector_divmod): Use MULT_HIGHPART_EXPR
instead of VEC_WIDEN_MULT_{HI,LO}_EXPR followed by VEC_PERM_EXPR instead of VEC_WIDEN_MULT_{HI,LO}_EXPR followed by VEC_PERM_EXPR
......
2012-06-28 Jakub Jelinek <jakub@redhat.com> 2012-06-28 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/51581
* gcc.dg/vect/pr51581-4.c: New test.
PR tree-optimization/53645 PR tree-optimization/53645
* gcc.c-torture/execute/pr53645-2.c: New test. * gcc.c-torture/execute/pr53645-2.c: New test.
......
/* PR tree-optimization/51581 */
#include "tree-vect.h"
short int a[16], b[16];
unsigned short int c[16], d[16];
void
f1 (void)
{
a[0] = b[0] / 8;
a[1] = b[1] / 8;
a[2] = b[2] / 8;
a[3] = b[3] / 8;
a[4] = b[4] / 8;
a[5] = b[5] / 8;
a[6] = b[6] / 8;
a[7] = b[7] / 8;
a[8] = b[8] / 8;
a[9] = b[9] / 8;
a[10] = b[10] / 8;
a[11] = b[11] / 8;
a[12] = b[12] / 8;
a[13] = b[13] / 8;
a[14] = b[14] / 8;
a[15] = b[15] / 8;
}
void
f2 (void)
{
c[0] = d[0] / 3;
c[1] = d[1] / 3;
c[2] = d[2] / 3;
c[3] = d[3] / 3;
c[4] = d[4] / 3;
c[5] = d[5] / 3;
c[6] = d[6] / 3;
c[7] = d[7] / 3;
c[8] = d[8] / 3;
c[9] = d[9] / 3;
c[10] = d[10] / 3;
c[11] = d[11] / 3;
c[12] = d[12] / 3;
c[13] = d[13] / 3;
c[14] = d[14] / 3;
c[15] = d[15] / 3;
}
void
f3 (void)
{
a[0] = b[0] / 8;
a[1] = b[1] / 4;
a[2] = b[2] / 8;
a[3] = b[3] / 4;
a[4] = b[4] / 8;
a[5] = b[5] / 4;
a[6] = b[6] / 8;
a[7] = b[7] / 4;
a[8] = b[8] / 8;
a[9] = b[9] / 4;
a[10] = b[10] / 8;
a[11] = b[11] / 4;
a[12] = b[12] / 8;
a[13] = b[13] / 4;
a[14] = b[14] / 8;
a[15] = b[15] / 4;
}
void
f4 (void)
{
c[0] = d[0] / 3;
c[1] = d[1] / 5;
c[2] = d[2] / 3;
c[3] = d[3] / 5;
c[4] = d[4] / 3;
c[5] = d[5] / 5;
c[6] = d[6] / 3;
c[7] = d[7] / 5;
c[8] = d[8] / 3;
c[9] = d[9] / 5;
c[10] = d[10] / 3;
c[11] = d[11] / 5;
c[12] = d[12] / 3;
c[13] = d[13] / 5;
c[14] = d[14] / 3;
c[15] = d[15] / 5;
}
void
f5 (void)
{
a[0] = b[0] / 14;
a[1] = b[1] / 15;
a[2] = b[2] / 14;
a[3] = b[3] / 15;
a[4] = b[4] / 14;
a[5] = b[5] / 15;
a[6] = b[6] / 14;
a[7] = b[7] / 15;
a[8] = b[8] / 14;
a[9] = b[9] / 15;
a[10] = b[10] / 14;
a[11] = b[11] / 15;
a[12] = b[12] / 14;
a[13] = b[13] / 15;
a[14] = b[14] / 14;
a[15] = b[15] / 15;
}
void
f6 (void)
{
c[0] = d[0] / 6;
c[1] = d[1] / 5;
c[2] = d[2] / 6;
c[3] = d[3] / 5;
c[4] = d[4] / 6;
c[5] = d[5] / 5;
c[6] = d[6] / 13;
c[7] = d[7] / 5;
c[8] = d[8] / 6;
c[9] = d[9] / 5;
c[10] = d[10] / 6;
c[11] = d[11] / 5;
c[12] = d[12] / 6;
c[13] = d[13] / 5;
c[14] = d[14] / 13;
c[15] = d[15] / 5;
}
int
main ()
{
int i;
check_vect ();
asm ("");
for (i = 0; i < 16; i++)
{
asm ("");
b[i] = i - 8;
d[i] = i - 8;
}
f1 ();
f2 ();
for (i = 0; i < 16; i++)
if (a[i] != b[i] / 8 || c[i] != d[i] / 3)
abort ();
f3 ();
f4 ();
for (i = 0; i < 16; i+= 2)
if (a[i] != b[i] / 8 || a[i + 1] != b[i + 1] / 4
|| c[i] != d[i] / 3 || c[i + 1] != d[i + 1] / 5)
abort ();
f5 ();
f6 ();
for (i = 0; i < 16; i+= 2)
if (a[i] != b[i] / 14 || a[i + 1] != b[i + 1] / 15
|| c[i] != d[i] / ((i & 7) == 6 ? 13 : 6) || c[i + 1] != d[i + 1] / 5)
abort ();
return 0;
}
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3288,6 +3288,10 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -3288,6 +3288,10 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
} }
static tree permute_vec_elements (tree, tree, tree, gimple,
gimple_stmt_iterator *);
/* Function vectorizable_operation. /* Function vectorizable_operation.
Check if STMT performs a binary, unary or ternary operation that can Check if STMT performs a binary, unary or ternary operation that can
...@@ -3300,17 +3304,18 @@ static bool ...@@ -3300,17 +3304,18 @@ static bool
vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
gimple *vec_stmt, slp_tree slp_node) gimple *vec_stmt, slp_tree slp_node)
{ {
tree vec_dest; tree vec_dest, vec_dest2 = NULL_TREE;
tree vec_dest3 = NULL_TREE, vec_dest4 = NULL_TREE;
tree scalar_dest; tree scalar_dest;
tree op0, op1 = NULL_TREE, op2 = NULL_TREE; tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype; tree vectype, wide_vectype = NULL_TREE;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
enum tree_code code; enum tree_code code;
enum machine_mode vec_mode; enum machine_mode vec_mode;
tree new_temp; tree new_temp;
int op_type; int op_type;
optab optab; optab optab, optab2 = NULL;
int icode; int icode;
tree def; tree def;
gimple def_stmt; gimple def_stmt;
...@@ -3327,6 +3332,8 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -3327,6 +3332,8 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
tree vop0, vop1, vop2; tree vop0, vop1, vop2;
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
int vf; int vf;
unsigned char *sel = NULL;
tree decl1 = NULL_TREE, decl2 = NULL_TREE, perm_mask = NULL_TREE;
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false; return false;
...@@ -3451,31 +3458,97 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -3451,31 +3458,97 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
optab = optab_for_tree_code (code, vectype, optab_default); optab = optab_for_tree_code (code, vectype, optab_default);
/* Supportable by target? */ /* Supportable by target? */
if (!optab) if (!optab && code != MULT_HIGHPART_EXPR)
{ {
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "no optab."); fprintf (vect_dump, "no optab.");
return false; return false;
} }
vec_mode = TYPE_MODE (vectype); vec_mode = TYPE_MODE (vectype);
icode = (int) optab_handler (optab, vec_mode); icode = optab ? (int) optab_handler (optab, vec_mode) : CODE_FOR_nothing;
if (icode == CODE_FOR_nothing
&& code == MULT_HIGHPART_EXPR
&& VECTOR_MODE_P (vec_mode)
&& BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN)
{
/* If MULT_HIGHPART_EXPR isn't supported by the backend, see
if we can emit VEC_WIDEN_MULT_{LO,HI}_EXPR followed by VEC_PERM_EXPR
or builtin_mul_widen_{even,odd} followed by VEC_PERM_EXPR. */
unsigned int prec = TYPE_PRECISION (TREE_TYPE (scalar_dest));
unsigned int unsignedp = TYPE_UNSIGNED (TREE_TYPE (scalar_dest));
tree wide_type
= build_nonstandard_integer_type (prec * 2, unsignedp);
wide_vectype
= get_same_sized_vectype (wide_type, vectype);
sel = XALLOCAVEC (unsigned char, nunits_in);
if (VECTOR_MODE_P (TYPE_MODE (wide_vectype))
&& GET_MODE_SIZE (TYPE_MODE (wide_vectype))
== GET_MODE_SIZE (vec_mode))
{
if (targetm.vectorize.builtin_mul_widen_even
&& (decl1 = targetm.vectorize.builtin_mul_widen_even (vectype))
&& targetm.vectorize.builtin_mul_widen_odd
&& (decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype))
&& TYPE_MODE (TREE_TYPE (TREE_TYPE (decl1)))
== TYPE_MODE (wide_vectype))
{
for (i = 0; i < nunits_in; i++)
sel[i] = !BYTES_BIG_ENDIAN + (i & ~1)
+ ((i & 1) ? nunits_in : 0);
if (can_vec_perm_p (vec_mode, false, sel))
icode = 0;
}
if (icode == CODE_FOR_nothing)
{
decl1 = NULL_TREE;
decl2 = NULL_TREE;
optab = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR,
vectype, optab_default);
optab2 = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR,
vectype, optab_default);
if (optab != NULL
&& optab2 != NULL
&& optab_handler (optab, vec_mode) != CODE_FOR_nothing
&& optab_handler (optab2, vec_mode) != CODE_FOR_nothing)
{
for (i = 0; i < nunits_in; i++)
sel[i] = !BYTES_BIG_ENDIAN + 2 * i;
if (can_vec_perm_p (vec_mode, false, sel))
icode = optab_handler (optab, vec_mode);
}
}
}
if (icode == CODE_FOR_nothing)
{
if (optab_for_tree_code (code, vectype, optab_default) == NULL)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "no optab.");
return false;
}
wide_vectype = NULL_TREE;
optab2 = NULL;
}
}
if (icode == CODE_FOR_nothing) if (icode == CODE_FOR_nothing)
{ {
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "op not supported by target."); fprintf (vect_dump, "op not supported by target.");
/* Check only during analysis. */ /* Check only during analysis. */
if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
|| (vf < vect_min_worthwhile_factor (code) || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
&& !vec_stmt))
return false; return false;
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "proceeding using word mode."); fprintf (vect_dump, "proceeding using word mode.");
} }
/* Worthwhile without SIMD support? Check only during analysis. */ /* Worthwhile without SIMD support? Check only during analysis. */
if (!VECTOR_MODE_P (TYPE_MODE (vectype)) if (!VECTOR_MODE_P (vec_mode)
&& vf < vect_min_worthwhile_factor (code) && !vec_stmt
&& !vec_stmt) && vf < vect_min_worthwhile_factor (code))
{ {
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "not worthwhile without SIMD support."); fprintf (vect_dump, "not worthwhile without SIMD support.");
...@@ -3497,7 +3570,16 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -3497,7 +3570,16 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
fprintf (vect_dump, "transform binary/unary operation."); fprintf (vect_dump, "transform binary/unary operation.");
/* Handle def. */ /* Handle def. */
vec_dest = vect_create_destination_var (scalar_dest, vectype); if (wide_vectype)
{
vec_dest = vect_create_destination_var (scalar_dest, wide_vectype);
vec_dest2 = vect_create_destination_var (scalar_dest, wide_vectype);
vec_dest3 = vect_create_destination_var (scalar_dest, vectype);
vec_dest4 = vect_create_destination_var (scalar_dest, vectype);
perm_mask = vect_gen_perm_mask (vectype, sel);
}
else
vec_dest = vect_create_destination_var (scalar_dest, vectype);
/* Allocate VECs for vector operands. In case of SLP, vector operands are /* Allocate VECs for vector operands. In case of SLP, vector operands are
created in the previous stages of the recursion, so no allocation is created in the previous stages of the recursion, so no allocation is
...@@ -3606,6 +3688,66 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -3606,6 +3688,66 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
? VEC_index (tree, vec_oprnds1, i) : NULL_TREE); ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
vop2 = ((op_type == ternary_op) vop2 = ((op_type == ternary_op)
? VEC_index (tree, vec_oprnds2, i) : NULL_TREE); ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
if (wide_vectype)
{
tree new_temp2, vce;
gcc_assert (code == MULT_HIGHPART_EXPR);
if (decl1 != NULL_TREE)
{
new_stmt = gimple_build_call (decl1, 2, vop0, vop1);
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_call_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
new_stmt = gimple_build_call (decl2, 2, vop0, vop1);
new_temp2 = make_ssa_name (vec_dest2, new_stmt);
gimple_call_set_lhs (new_stmt, new_temp2);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
}
else
{
new_temp = make_ssa_name (vec_dest, NULL);
new_stmt
= gimple_build_assign_with_ops (BYTES_BIG_ENDIAN
? VEC_WIDEN_MULT_HI_EXPR
: VEC_WIDEN_MULT_LO_EXPR,
new_temp, vop0, vop1);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
new_temp2 = make_ssa_name (vec_dest2, NULL);
new_stmt
= gimple_build_assign_with_ops (BYTES_BIG_ENDIAN
? VEC_WIDEN_MULT_LO_EXPR
: VEC_WIDEN_MULT_HI_EXPR,
new_temp2, vop0, vop1);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
}
vce = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
new_stmt = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
vec_dest3, vce,
NULL_TREE);
new_temp = make_ssa_name (vec_dest3, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
vce = build1 (VIEW_CONVERT_EXPR, vectype, new_temp2);
new_stmt = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
vec_dest4, vce,
NULL_TREE);
new_temp2 = make_ssa_name (vec_dest4, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp2);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
new_temp = permute_vec_elements (new_temp, new_temp2,
perm_mask, stmt, gsi);
new_stmt = SSA_NAME_DEF_STMT (new_temp);
if (slp_node)
VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
new_stmt);
continue;
}
new_stmt = gimple_build_assign_with_ops3 (code, vec_dest, new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
vop0, vop1, vop2); vop0, vop1, vop2);
new_temp = make_ssa_name (vec_dest, new_stmt); new_temp = make_ssa_name (vec_dest, new_stmt);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment