Commit 5deb57cb by Jakub Jelinek Committed by Jakub Jelinek

re PR tree-optimization/51581 (Integer division by constant is not vectorized)

	PR tree-optimization/51581
	* tree-vect-stmts.c (permute_vec_elements): Add forward decl.
	(vectorizable_operation): Handle vectorization of MULT_HIGHPART_EXPR
	also using VEC_WIDEN_MULT_*_EXPR or builtin_mul_widen_* plus
	VEC_PERM_EXPR if vector MULT_HIGHPART_EXPR isn't supported.
	* tree-vect-patterns.c (vect_recog_divmod_pattern): Use
	MULT_HIGHPART_EXPR instead of VEC_WIDEN_MULT_*_EXPR and shifts.

	* gcc.dg/vect/pr51581-4.c: New test.

From-SVN: r189053
parent c9ba3307
2012-06-28 Jakub Jelinek <jakub@redhat.com> 2012-06-28 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/51581
* tree-vect-stmts.c (permute_vec_elements): Add forward decl.
(vectorizable_operation): Handle vectorization of MULT_HIGHPART_EXPR
also using VEC_WIDEN_MULT_*_EXPR or builtin_mul_widen_* plus
VEC_PERM_EXPR if vector MULT_HIGHPART_EXPR isn't supported.
* tree-vect-patterns.c (vect_recog_divmod_pattern): Use
MULT_HIGHPART_EXPR instead of VEC_WIDEN_MULT_*_EXPR and shifts.
PR tree-optimization/53645 PR tree-optimization/53645
* tree-vect-generic.c (expand_vector_divmod): Use MULT_HIGHPART_EXPR * tree-vect-generic.c (expand_vector_divmod): Use MULT_HIGHPART_EXPR
instead of VEC_WIDEN_MULT_{HI,LO}_EXPR followed by VEC_PERM_EXPR instead of VEC_WIDEN_MULT_{HI,LO}_EXPR followed by VEC_PERM_EXPR
......
2012-06-28 Jakub Jelinek <jakub@redhat.com> 2012-06-28 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/51581
* gcc.dg/vect/pr51581-4.c: New test.
PR tree-optimization/53645 PR tree-optimization/53645
* gcc.c-torture/execute/pr53645-2.c: New test. * gcc.c-torture/execute/pr53645-2.c: New test.
......
/* PR tree-optimization/51581 */
#include "tree-vect.h"
short int a[16], b[16];
unsigned short int c[16], d[16];
void
f1 (void)
{
a[0] = b[0] / 8;
a[1] = b[1] / 8;
a[2] = b[2] / 8;
a[3] = b[3] / 8;
a[4] = b[4] / 8;
a[5] = b[5] / 8;
a[6] = b[6] / 8;
a[7] = b[7] / 8;
a[8] = b[8] / 8;
a[9] = b[9] / 8;
a[10] = b[10] / 8;
a[11] = b[11] / 8;
a[12] = b[12] / 8;
a[13] = b[13] / 8;
a[14] = b[14] / 8;
a[15] = b[15] / 8;
}
void
f2 (void)
{
c[0] = d[0] / 3;
c[1] = d[1] / 3;
c[2] = d[2] / 3;
c[3] = d[3] / 3;
c[4] = d[4] / 3;
c[5] = d[5] / 3;
c[6] = d[6] / 3;
c[7] = d[7] / 3;
c[8] = d[8] / 3;
c[9] = d[9] / 3;
c[10] = d[10] / 3;
c[11] = d[11] / 3;
c[12] = d[12] / 3;
c[13] = d[13] / 3;
c[14] = d[14] / 3;
c[15] = d[15] / 3;
}
void
f3 (void)
{
a[0] = b[0] / 8;
a[1] = b[1] / 4;
a[2] = b[2] / 8;
a[3] = b[3] / 4;
a[4] = b[4] / 8;
a[5] = b[5] / 4;
a[6] = b[6] / 8;
a[7] = b[7] / 4;
a[8] = b[8] / 8;
a[9] = b[9] / 4;
a[10] = b[10] / 8;
a[11] = b[11] / 4;
a[12] = b[12] / 8;
a[13] = b[13] / 4;
a[14] = b[14] / 8;
a[15] = b[15] / 4;
}
void
f4 (void)
{
c[0] = d[0] / 3;
c[1] = d[1] / 5;
c[2] = d[2] / 3;
c[3] = d[3] / 5;
c[4] = d[4] / 3;
c[5] = d[5] / 5;
c[6] = d[6] / 3;
c[7] = d[7] / 5;
c[8] = d[8] / 3;
c[9] = d[9] / 5;
c[10] = d[10] / 3;
c[11] = d[11] / 5;
c[12] = d[12] / 3;
c[13] = d[13] / 5;
c[14] = d[14] / 3;
c[15] = d[15] / 5;
}
void
f5 (void)
{
a[0] = b[0] / 14;
a[1] = b[1] / 15;
a[2] = b[2] / 14;
a[3] = b[3] / 15;
a[4] = b[4] / 14;
a[5] = b[5] / 15;
a[6] = b[6] / 14;
a[7] = b[7] / 15;
a[8] = b[8] / 14;
a[9] = b[9] / 15;
a[10] = b[10] / 14;
a[11] = b[11] / 15;
a[12] = b[12] / 14;
a[13] = b[13] / 15;
a[14] = b[14] / 14;
a[15] = b[15] / 15;
}
void
f6 (void)
{
c[0] = d[0] / 6;
c[1] = d[1] / 5;
c[2] = d[2] / 6;
c[3] = d[3] / 5;
c[4] = d[4] / 6;
c[5] = d[5] / 5;
c[6] = d[6] / 13;
c[7] = d[7] / 5;
c[8] = d[8] / 6;
c[9] = d[9] / 5;
c[10] = d[10] / 6;
c[11] = d[11] / 5;
c[12] = d[12] / 6;
c[13] = d[13] / 5;
c[14] = d[14] / 13;
c[15] = d[15] / 5;
}
int
main ()
{
int i;
check_vect ();
asm ("");
for (i = 0; i < 16; i++)
{
asm ("");
b[i] = i - 8;
d[i] = i - 8;
}
f1 ();
f2 ();
for (i = 0; i < 16; i++)
if (a[i] != b[i] / 8 || c[i] != d[i] / 3)
abort ();
f3 ();
f4 ();
for (i = 0; i < 16; i+= 2)
if (a[i] != b[i] / 8 || a[i + 1] != b[i + 1] / 4
|| c[i] != d[i] / 3 || c[i + 1] != d[i + 1] / 5)
abort ();
f5 ();
f6 ();
for (i = 0; i < 16; i+= 2)
if (a[i] != b[i] / 14 || a[i + 1] != b[i + 1] / 15
|| c[i] != d[i] / ((i & 7) == 6 ? 13 : 6) || c[i + 1] != d[i + 1] / 5)
abort ();
return 0;
}
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -3288,6 +3288,10 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -3288,6 +3288,10 @@ vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
} }
static tree permute_vec_elements (tree, tree, tree, gimple,
gimple_stmt_iterator *);
/* Function vectorizable_operation. /* Function vectorizable_operation.
Check if STMT performs a binary, unary or ternary operation that can Check if STMT performs a binary, unary or ternary operation that can
...@@ -3300,17 +3304,18 @@ static bool ...@@ -3300,17 +3304,18 @@ static bool
vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
gimple *vec_stmt, slp_tree slp_node) gimple *vec_stmt, slp_tree slp_node)
{ {
tree vec_dest; tree vec_dest, vec_dest2 = NULL_TREE;
tree vec_dest3 = NULL_TREE, vec_dest4 = NULL_TREE;
tree scalar_dest; tree scalar_dest;
tree op0, op1 = NULL_TREE, op2 = NULL_TREE; tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype; tree vectype, wide_vectype = NULL_TREE;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
enum tree_code code; enum tree_code code;
enum machine_mode vec_mode; enum machine_mode vec_mode;
tree new_temp; tree new_temp;
int op_type; int op_type;
optab optab; optab optab, optab2 = NULL;
int icode; int icode;
tree def; tree def;
gimple def_stmt; gimple def_stmt;
...@@ -3327,6 +3332,8 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -3327,6 +3332,8 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
tree vop0, vop1, vop2; tree vop0, vop1, vop2;
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
int vf; int vf;
unsigned char *sel = NULL;
tree decl1 = NULL_TREE, decl2 = NULL_TREE, perm_mask = NULL_TREE;
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false; return false;
...@@ -3451,31 +3458,97 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -3451,31 +3458,97 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
optab = optab_for_tree_code (code, vectype, optab_default); optab = optab_for_tree_code (code, vectype, optab_default);
/* Supportable by target? */ /* Supportable by target? */
if (!optab) if (!optab && code != MULT_HIGHPART_EXPR)
{ {
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "no optab."); fprintf (vect_dump, "no optab.");
return false; return false;
} }
vec_mode = TYPE_MODE (vectype); vec_mode = TYPE_MODE (vectype);
icode = (int) optab_handler (optab, vec_mode); icode = optab ? (int) optab_handler (optab, vec_mode) : CODE_FOR_nothing;
if (icode == CODE_FOR_nothing
&& code == MULT_HIGHPART_EXPR
&& VECTOR_MODE_P (vec_mode)
&& BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN)
{
/* If MULT_HIGHPART_EXPR isn't supported by the backend, see
if we can emit VEC_WIDEN_MULT_{LO,HI}_EXPR followed by VEC_PERM_EXPR
or builtin_mul_widen_{even,odd} followed by VEC_PERM_EXPR. */
unsigned int prec = TYPE_PRECISION (TREE_TYPE (scalar_dest));
unsigned int unsignedp = TYPE_UNSIGNED (TREE_TYPE (scalar_dest));
tree wide_type
= build_nonstandard_integer_type (prec * 2, unsignedp);
wide_vectype
= get_same_sized_vectype (wide_type, vectype);
sel = XALLOCAVEC (unsigned char, nunits_in);
if (VECTOR_MODE_P (TYPE_MODE (wide_vectype))
&& GET_MODE_SIZE (TYPE_MODE (wide_vectype))
== GET_MODE_SIZE (vec_mode))
{
if (targetm.vectorize.builtin_mul_widen_even
&& (decl1 = targetm.vectorize.builtin_mul_widen_even (vectype))
&& targetm.vectorize.builtin_mul_widen_odd
&& (decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype))
&& TYPE_MODE (TREE_TYPE (TREE_TYPE (decl1)))
== TYPE_MODE (wide_vectype))
{
for (i = 0; i < nunits_in; i++)
sel[i] = !BYTES_BIG_ENDIAN + (i & ~1)
+ ((i & 1) ? nunits_in : 0);
if (can_vec_perm_p (vec_mode, false, sel))
icode = 0;
}
if (icode == CODE_FOR_nothing)
{
decl1 = NULL_TREE;
decl2 = NULL_TREE;
optab = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR,
vectype, optab_default);
optab2 = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR,
vectype, optab_default);
if (optab != NULL
&& optab2 != NULL
&& optab_handler (optab, vec_mode) != CODE_FOR_nothing
&& optab_handler (optab2, vec_mode) != CODE_FOR_nothing)
{
for (i = 0; i < nunits_in; i++)
sel[i] = !BYTES_BIG_ENDIAN + 2 * i;
if (can_vec_perm_p (vec_mode, false, sel))
icode = optab_handler (optab, vec_mode);
}
}
}
if (icode == CODE_FOR_nothing)
{
if (optab_for_tree_code (code, vectype, optab_default) == NULL)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "no optab.");
return false;
}
wide_vectype = NULL_TREE;
optab2 = NULL;
}
}
if (icode == CODE_FOR_nothing) if (icode == CODE_FOR_nothing)
{ {
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "op not supported by target."); fprintf (vect_dump, "op not supported by target.");
/* Check only during analysis. */ /* Check only during analysis. */
if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
|| (vf < vect_min_worthwhile_factor (code) || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
&& !vec_stmt))
return false; return false;
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "proceeding using word mode."); fprintf (vect_dump, "proceeding using word mode.");
} }
/* Worthwhile without SIMD support? Check only during analysis. */ /* Worthwhile without SIMD support? Check only during analysis. */
if (!VECTOR_MODE_P (TYPE_MODE (vectype)) if (!VECTOR_MODE_P (vec_mode)
&& vf < vect_min_worthwhile_factor (code) && !vec_stmt
&& !vec_stmt) && vf < vect_min_worthwhile_factor (code))
{ {
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "not worthwhile without SIMD support."); fprintf (vect_dump, "not worthwhile without SIMD support.");
...@@ -3497,6 +3570,15 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -3497,6 +3570,15 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
fprintf (vect_dump, "transform binary/unary operation."); fprintf (vect_dump, "transform binary/unary operation.");
/* Handle def. */ /* Handle def. */
if (wide_vectype)
{
vec_dest = vect_create_destination_var (scalar_dest, wide_vectype);
vec_dest2 = vect_create_destination_var (scalar_dest, wide_vectype);
vec_dest3 = vect_create_destination_var (scalar_dest, vectype);
vec_dest4 = vect_create_destination_var (scalar_dest, vectype);
perm_mask = vect_gen_perm_mask (vectype, sel);
}
else
vec_dest = vect_create_destination_var (scalar_dest, vectype); vec_dest = vect_create_destination_var (scalar_dest, vectype);
/* Allocate VECs for vector operands. In case of SLP, vector operands are /* Allocate VECs for vector operands. In case of SLP, vector operands are
...@@ -3606,6 +3688,66 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -3606,6 +3688,66 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
? VEC_index (tree, vec_oprnds1, i) : NULL_TREE); ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
vop2 = ((op_type == ternary_op) vop2 = ((op_type == ternary_op)
? VEC_index (tree, vec_oprnds2, i) : NULL_TREE); ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
if (wide_vectype)
{
tree new_temp2, vce;
gcc_assert (code == MULT_HIGHPART_EXPR);
if (decl1 != NULL_TREE)
{
new_stmt = gimple_build_call (decl1, 2, vop0, vop1);
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_call_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
new_stmt = gimple_build_call (decl2, 2, vop0, vop1);
new_temp2 = make_ssa_name (vec_dest2, new_stmt);
gimple_call_set_lhs (new_stmt, new_temp2);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
}
else
{
new_temp = make_ssa_name (vec_dest, NULL);
new_stmt
= gimple_build_assign_with_ops (BYTES_BIG_ENDIAN
? VEC_WIDEN_MULT_HI_EXPR
: VEC_WIDEN_MULT_LO_EXPR,
new_temp, vop0, vop1);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
new_temp2 = make_ssa_name (vec_dest2, NULL);
new_stmt
= gimple_build_assign_with_ops (BYTES_BIG_ENDIAN
? VEC_WIDEN_MULT_LO_EXPR
: VEC_WIDEN_MULT_HI_EXPR,
new_temp2, vop0, vop1);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
}
vce = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
new_stmt = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
vec_dest3, vce,
NULL_TREE);
new_temp = make_ssa_name (vec_dest3, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
vce = build1 (VIEW_CONVERT_EXPR, vectype, new_temp2);
new_stmt = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
vec_dest4, vce,
NULL_TREE);
new_temp2 = make_ssa_name (vec_dest4, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp2);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
new_temp = permute_vec_elements (new_temp, new_temp2,
perm_mask, stmt, gsi);
new_stmt = SSA_NAME_DEF_STMT (new_temp);
if (slp_node)
VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
new_stmt);
continue;
}
new_stmt = gimple_build_assign_with_ops3 (code, vec_dest, new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
vop0, vop1, vop2); vop0, vop1, vop2);
new_temp = make_ssa_name (vec_dest, new_stmt); new_temp = make_ssa_name (vec_dest, new_stmt);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment