Commit c9ba3307 by Jakub Jelinek Committed by Jakub Jelinek

re PR tree-optimization/53645 (Missed optimization for vector integer division lowering)

	PR tree-optimization/53645
	* tree-vect-generic.c (expand_vector_divmod): Use MULT_HIGHPART_EXPR
	instead of VEC_WIDEN_MULT_{HI,LO}_EXPR followed by VEC_PERM_EXPR
	if possible.

	* gcc.c-torture/execute/pr53645-2.c: New test.

From-SVN: r189052
parent b04ffa56
2012-06-28 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/53645
* tree-vect-generic.c (expand_vector_divmod): Use MULT_HIGHPART_EXPR
instead of VEC_WIDEN_MULT_{HI,LO}_EXPR followed by VEC_PERM_EXPR
if possible.
2012-06-28 Georg-Johann Lay <avr@gjlay.de>
PR 53595
......
2012-06-28 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/53645
* gcc.c-torture/execute/pr53645-2.c: New test.
2012-06-28 Richard Guenther <rguenther@suse.de>
PR middle-end/53790
......
/* PR tree-optimization/53645 */
typedef unsigned short int UV __attribute__((vector_size (16)));
typedef short int SV __attribute__((vector_size (16)));
extern void abort (void);
#define TEST(a, b, c, d, e, f, g, h) \
__attribute__((noinline)) void \
uq##a##b##c##d##e##f##g##h (UV *x, UV *y) \
{ \
*x = *y / ((UV) { a, b, c, d, e, f, g, h }); \
} \
\
__attribute__((noinline)) void \
ur##a##b##c##d##e##f##g##h (UV *x, UV *y) \
{ \
*x = *y % ((UV) { a, b, c, d, e, f, g, h }); \
} \
\
__attribute__((noinline)) void \
sq##a##b##c##d##e##f##g##h (SV *x, SV *y) \
{ \
*x = *y / ((SV) { a, b, c, d, e, f, g, h }); \
} \
\
__attribute__((noinline)) void \
sr##a##b##c##d##e##f##g##h (SV *x, SV *y) \
{ \
*x = *y % ((SV) { a, b, c, d, e, f, g, h }); \
}
#define TESTS \
TEST (4, 4, 4, 4, 4, 4, 4, 4) \
TEST (1, 4, 2, 8, 16, 64, 32, 128) \
TEST (3, 3, 3, 3, 3, 3, 3, 3) \
TEST (6, 5, 6, 5, 6, 5, 6, 5) \
TEST (14, 14, 14, 6, 14, 6, 14, 14) \
TEST (7, 7, 7, 7, 7, 7, 7, 7) \
TESTS
UV u[] =
{ ((UV) { 73U, 65531U, 0U, 174U, 921U, 65535U, 17U, 178U }),
((UV) { 1U, 8173U, 65535U, 65472U, 12U, 29612U, 128U, 8912U }) };
SV s[] =
{ ((SV) { 73, -9123, 32761, 8191, 16371, 1201, 12701, 9999 }),
((SV) { 9903, -1, -7323, 0, -7, -323, 9124, -9199 }) };
int
main ()
{
UV ur, ur2;
SV sr, sr2;
int i;
#undef TEST
#define TEST(a, b, c, d, e, f, g, h) \
uq##a##b##c##d##e##f##g##h (&ur, u + i); \
if (ur[0] != u[i][0] / a || ur[3] != u[i][3] / d) \
abort (); \
asm volatile ("" : : "r" (&ur) : "memory"); \
if (ur[2] != u[i][2] / c || ur[1] != u[i][1] / b) \
abort (); \
asm volatile ("" : : "r" (&ur) : "memory"); \
if (ur[4] != u[i][4] / e || ur[7] != u[i][7] / h) \
abort (); \
asm volatile ("" : : "r" (&ur) : "memory"); \
if (ur[6] != u[i][6] / g || ur[5] != u[i][5] / f) \
abort (); \
asm volatile ("" : : "r" (&ur) : "memory"); \
ur##a##b##c##d##e##f##g##h (&ur, u + i); \
if (ur[0] != u[i][0] % a || ur[3] != u[i][3] % d) \
abort (); \
asm volatile ("" : : "r" (&ur) : "memory"); \
if (ur[2] != u[i][2] % c || ur[1] != u[i][1] % b) \
abort (); \
asm volatile ("" : : "r" (&ur) : "memory"); \
if (ur[4] != u[i][4] % e || ur[7] != u[i][7] % h) \
abort (); \
asm volatile ("" : : "r" (&ur) : "memory"); \
if (ur[6] != u[i][6] % g || ur[5] != u[i][5] % f) \
abort (); \
asm volatile ("" : : "r" (&ur) : "memory");
for (i = 0; i < sizeof (u) / sizeof (u[0]); i++)
{
TESTS
}
#undef TEST
#define TEST(a, b, c, d, e, f, g, h) \
sq##a##b##c##d##e##f##g##h (&sr, s + i); \
if (sr[0] != s[i][0] / a || sr[3] != s[i][3] / d) \
abort (); \
asm volatile ("" : : "r" (&sr) : "memory"); \
if (sr[2] != s[i][2] / c || sr[1] != s[i][1] / b) \
abort (); \
asm volatile ("" : : "r" (&sr) : "memory"); \
if (sr[4] != s[i][4] / e || sr[7] != s[i][7] / h) \
abort (); \
asm volatile ("" : : "r" (&sr) : "memory"); \
if (sr[6] != s[i][6] / g || sr[5] != s[i][5] / f) \
abort (); \
asm volatile ("" : : "r" (&sr) : "memory"); \
sr##a##b##c##d##e##f##g##h (&sr, s + i); \
if (sr[0] != s[i][0] % a || sr[3] != s[i][3] % d) \
abort (); \
asm volatile ("" : : "r" (&sr) : "memory"); \
if (sr[2] != s[i][2] % c || sr[1] != s[i][1] % b) \
abort (); \
asm volatile ("" : : "r" (&sr) : "memory"); \
if (sr[4] != s[i][4] % e || sr[7] != s[i][7] % h) \
abort (); \
asm volatile ("" : : "r" (&sr) : "memory"); \
if (sr[6] != s[i][6] % g || sr[5] != s[i][5] % f) \
abort (); \
asm volatile ("" : : "r" (&sr) : "memory");
for (i = 0; i < sizeof (s) / sizeof (s[0]); i++)
{
TESTS
}
return 0;
}
......@@ -455,7 +455,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
optab op;
tree *vec;
unsigned char *sel;
unsigned char *sel = NULL;
tree cur_op, mhi, mlo, mulcst, perm_mask, wider_type, tem;
if (prec > HOST_BITS_PER_WIDE_INT)
......@@ -744,26 +744,34 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
return NULL_TREE;
op = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR, type, optab_default);
if (op == NULL
|| optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
return NULL_TREE;
op = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR, type, optab_default);
if (op == NULL
|| optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
return NULL_TREE;
sel = XALLOCAVEC (unsigned char, nunits);
for (i = 0; i < nunits; i++)
sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1);
if (!can_vec_perm_p (TYPE_MODE (type), false, sel))
return NULL_TREE;
wider_type
= build_vector_type (build_nonstandard_integer_type (prec * 2, unsignedp),
nunits / 2);
if (GET_MODE_CLASS (TYPE_MODE (wider_type)) != MODE_VECTOR_INT
|| GET_MODE_BITSIZE (TYPE_MODE (wider_type))
!= GET_MODE_BITSIZE (TYPE_MODE (type)))
return NULL_TREE;
op = optab_for_tree_code (MULT_HIGHPART_EXPR, type, optab_default);
if (op != NULL
&& optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
wider_type = NULL_TREE;
else
{
op = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR, type, optab_default);
if (op == NULL
|| optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
return NULL_TREE;
op = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR, type, optab_default);
if (op == NULL
|| optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
return NULL_TREE;
sel = XALLOCAVEC (unsigned char, nunits);
for (i = 0; i < nunits; i++)
sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1);
if (!can_vec_perm_p (TYPE_MODE (type), false, sel))
return NULL_TREE;
wider_type
= build_vector_type (build_nonstandard_integer_type (prec * 2,
unsignedp),
nunits / 2);
if (GET_MODE_CLASS (TYPE_MODE (wider_type)) != MODE_VECTOR_INT
|| GET_MODE_BITSIZE (TYPE_MODE (wider_type))
!= GET_MODE_BITSIZE (TYPE_MODE (type)))
return NULL_TREE;
}
cur_op = op0;
......@@ -772,7 +780,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
case 0:
gcc_assert (unsignedp);
/* t1 = oprnd0 >> pre_shift;
t2 = (type) (t1 w* ml >> prec);
t2 = t1 h* ml;
q = t2 >> post_shift; */
cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
if (cur_op == NULL_TREE)
......@@ -801,30 +809,37 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
for (i = 0; i < nunits; i++)
vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
mulcst = build_vector (type, vec);
for (i = 0; i < nunits; i++)
vec[i] = build_int_cst (TREE_TYPE (type), sel[i]);
perm_mask = build_vector (type, vec);
mhi = gimplify_build2 (gsi, VEC_WIDEN_MULT_HI_EXPR, wider_type,
cur_op, mulcst);
mhi = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, mhi);
mlo = gimplify_build2 (gsi, VEC_WIDEN_MULT_LO_EXPR, wider_type,
cur_op, mulcst);
mlo = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, mlo);
if (BYTES_BIG_ENDIAN)
cur_op = gimplify_build3 (gsi, VEC_PERM_EXPR, type, mhi, mlo, perm_mask);
if (wider_type == NULL_TREE)
cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
else
cur_op = gimplify_build3 (gsi, VEC_PERM_EXPR, type, mlo, mhi, perm_mask);
{
for (i = 0; i < nunits; i++)
vec[i] = build_int_cst (TREE_TYPE (type), sel[i]);
perm_mask = build_vector (type, vec);
mhi = gimplify_build2 (gsi, VEC_WIDEN_MULT_HI_EXPR, wider_type,
cur_op, mulcst);
mhi = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, mhi);
mlo = gimplify_build2 (gsi, VEC_WIDEN_MULT_LO_EXPR, wider_type,
cur_op, mulcst);
mlo = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, mlo);
if (BYTES_BIG_ENDIAN)
cur_op = gimplify_build3 (gsi, VEC_PERM_EXPR, type, mhi, mlo,
perm_mask);
else
cur_op = gimplify_build3 (gsi, VEC_PERM_EXPR, type, mlo, mhi,
perm_mask);
}
switch (mode)
{
case 0:
/* t1 = oprnd0 >> pre_shift;
t2 = (type) (t1 w* ml >> prec);
t2 = t1 h* ml;
q = t2 >> post_shift; */
cur_op = add_rshift (gsi, type, cur_op, post_shifts);
break;
case 1:
/* t1 = (type) (oprnd0 w* ml >> prec);
/* t1 = oprnd0 h* ml;
t2 = oprnd0 - t1;
t3 = t2 >> 1;
t4 = t1 + t3;
......@@ -848,7 +863,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
case 3:
case 4:
case 5:
/* t1 = (type) (oprnd0 w* ml >> prec);
/* t1 = oprnd0 h* ml;
t2 = t1; [ iff (mode & 2) != 0 ]
t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
t3 = t2 >> post_shift;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment