Commit f33fa066 by Jakub Jelinek Committed by Jakub Jelinek

i386.c (expand_vec_perm_vpshufb2_vpermq_even_odd): Use d->op1 instead of d->op0…

i386.c (expand_vec_perm_vpshufb2_vpermq_even_odd): Use d->op1 instead of d->op0 for the second vpshufb.

	* config/i386/i386.c (expand_vec_perm_vpshufb2_vpermq_even_odd): Use
	d->op1 instead of d->op0 for the second vpshufb.
	(expand_vec_perm_even_odd_1): For V8SImode fix vpshufd immediates.
	(ix86_expand_vec_perm_const): If mask indicates two operands are
	needed, but both are the same and expanding them as d.op0 == d.op1
	failed, retry with d.op0 != d.op1.
	(ix86_expand_vec_perm_builtin): Likewise.  Handle sorry printing
	also for d.nelt == 32.

	* gcc.dg/torture/vshuf-32.inc: Add interleave permutations.
	* gcc.dg/torture/vshuf-16.inc: Likewise.
	* gcc.dg/torture/vshuf-8.inc: Likewise.
	* gcc.dg/torture/vshuf-4.inc: Likewise.

From-SVN: r180212
parent 24ef84f6
2011-10-19 Jakub Jelinek <jakub@redhat.com>
* config/i386/i386.c (expand_vec_perm_vpshufb2_vpermq_even_odd): Use
d->op1 instead of d->op0 for the second vpshufb.
(expand_vec_perm_even_odd_1): For V8SImode fix vpshufd immediates.
(ix86_expand_vec_perm_const): If mask indicates two operands are
needed, but both are the same and expanding them as d.op0 == d.op1
failed, retry with d.op0 != d.op1.
(ix86_expand_vec_perm_builtin): Likewise. Handle sorry printing
also for d.nelt == 32.
PR middle-end/50754
* cfgexpand.c (expand_debug_expr): Handle WIDEN_LSHIFT_EXPR, ignore
VEC_PERM_EXPR.
......@@ -35992,7 +35992,7 @@ expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
vperm = force_reg (V32QImode, vperm);
h = gen_reg_rtx (V32QImode);
op = gen_lowpart (V32QImode, d->op0);
op = gen_lowpart (V32QImode, d->op1);
emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
ior = gen_reg_rtx (V32QImode);
......@@ -36154,9 +36154,9 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
/* Swap the 2nd and 3rd position in each lane into
{ 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
emit_insn (gen_avx2_pshufdv3 (t1, t1,
GEN_INT (2 * 2 + 1 * 16 + 3 * 64)));
GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
emit_insn (gen_avx2_pshufdv3 (t2, t2,
GEN_INT (2 * 2 + 1 * 16 + 3 * 64)));
GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
/* Now an vpunpck[lh]qdq will produce
{ 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
......@@ -36498,6 +36498,7 @@ ix86_expand_vec_perm_builtin (tree exp)
{
struct expand_vec_perm_d d;
tree arg0, arg1, arg2;
bool maybe_retry = false;
arg0 = CALL_EXPR_ARG (exp, 0);
arg1 = CALL_EXPR_ARG (exp, 1);
......@@ -36543,6 +36544,7 @@ ix86_expand_vec_perm_builtin (tree exp)
for (i = 0; i < nelt; ++i)
if (d.perm[i] >= nelt)
d.perm[i] -= nelt;
maybe_retry = true;
}
/* FALLTHRU */
......@@ -36563,6 +36565,28 @@ ix86_expand_vec_perm_builtin (tree exp)
if (ix86_expand_vec_perm_builtin_1 (&d))
return d.target;
/* If the mask says both arguments are needed, but they are the same,
the above tried to expand with d.op0 == d.op1. If that didn't work,
retry with d.op0 != d.op1 as that is what testing has been done with. */
if (maybe_retry)
{
rtx seq;
bool ok;
extract_vec_perm_cst (&d, arg2);
d.op1 = gen_reg_rtx (d.vmode);
start_sequence ();
ok = ix86_expand_vec_perm_builtin_1 (&d);
seq = get_insns ();
end_sequence ();
if (ok)
{
emit_move_insn (d.op1, d.op0);
emit_insn (seq);
return d.target;
}
}
/* For compiler generated permutations, we should never got here, because
the compiler should also be checking the ok hook. But since this is a
builtin the user has access too, so don't abort. */
......@@ -36588,6 +36612,19 @@ ix86_expand_vec_perm_builtin (tree exp)
d.perm[8], d.perm[9], d.perm[10], d.perm[11],
d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
break;
case 32:
sorry ("vector permutation "
"(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d "
"%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
d.perm[0], d.perm[1], d.perm[2], d.perm[3],
d.perm[4], d.perm[5], d.perm[6], d.perm[7],
d.perm[8], d.perm[9], d.perm[10], d.perm[11],
d.perm[12], d.perm[13], d.perm[14], d.perm[15],
d.perm[16], d.perm[17], d.perm[18], d.perm[19],
d.perm[20], d.perm[21], d.perm[22], d.perm[23],
d.perm[24], d.perm[25], d.perm[26], d.perm[27],
d.perm[28], d.perm[29], d.perm[30], d.perm[31]);
break;
default:
gcc_unreachable ();
}
......@@ -36599,6 +36636,7 @@ bool
ix86_expand_vec_perm_const (rtx operands[4])
{
struct expand_vec_perm_d d;
unsigned char perm[MAX_VECT_LEN];
int i, nelt, which;
rtx sel;
......@@ -36614,6 +36652,7 @@ ix86_expand_vec_perm_const (rtx operands[4])
gcc_assert (GET_CODE (sel) == CONST_VECTOR);
gcc_assert (XVECLEN (sel, 0) == nelt);
gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
for (i = which = 0; i < nelt; ++i)
{
......@@ -36622,6 +36661,7 @@ ix86_expand_vec_perm_const (rtx operands[4])
which |= (ei < nelt ? 1 : 2);
d.perm[i] = ei;
perm[i] = ei;
}
switch (which)
......@@ -36653,7 +36693,32 @@ ix86_expand_vec_perm_const (rtx operands[4])
break;
}
return ix86_expand_vec_perm_builtin_1 (&d);
if (ix86_expand_vec_perm_builtin_1 (&d))
return true;
/* If the mask says both arguments are needed, but they are the same,
the above tried to expand with d.op0 == d.op1. If that didn't work,
retry with d.op0 != d.op1 as that is what testing has been done with. */
if (which == 3 && d.op0 == d.op1)
{
rtx seq;
bool ok;
memcpy (d.perm, perm, sizeof (perm));
d.op1 = gen_reg_rtx (d.vmode);
start_sequence ();
ok = ix86_expand_vec_perm_builtin_1 (&d);
seq = get_insns ();
end_sequence ();
if (ok)
{
emit_move_insn (d.op1, d.op0);
emit_insn (seq);
return true;
}
}
return false;
}
/* Implement targetm.vectorize.builtin_vec_perm_ok. */
2011-10-19 Jakub Jelinek <jakub@redhat.com>
* gcc.dg/torture/vshuf-32.inc: Add interleave permutations.
* gcc.dg/torture/vshuf-16.inc: Likewise.
* gcc.dg/torture/vshuf-8.inc: Likewise.
* gcc.dg/torture/vshuf-4.inc: Likewise.
2011-10-19 Janus Weil <janus@gcc.gnu.org>
PR fortran/47023
......
......@@ -15,7 +15,9 @@ T (11, 15, 9, 14, 10, 8, 12, 13, 11, 7, 1, 2, 0, 4, 5, 6, 3) \
T (12, 2, 5, 24, 23, 17, 22, 20, 21, 12, 14, 13, 8, 6, 20, 10, 18) \
T (13, 23, 11, 15, 9, 0, 14, 8, 12, 10, 13, 19, 11, 2, 26, 24, 30) \
T (14, 25, 5, 17, 1, 9, 15, 21, 7, 28, 2, 18, 13, 30, 14, 10, 4) \
T (15, 1, 30, 27, 31, 9, 18, 25, 12, 7, 4, 2, 16, 25, 20, 10, 3)
T (15, 1, 30, 27, 31, 9, 18, 25, 12, 7, 4, 2, 16, 25, 20, 10, 3) \
T (16, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30) \
T (17, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31)
#define EXPTESTS \
T (116, 28, 13, 27, 11, 21, 1, 5, 22, 29, 14, 15, 6, 3, 10, 16, 30) \
T (117, 22, 26, 1, 13, 29, 3, 18, 18, 11, 21, 12, 28, 19, 5, 7, 4) \
......
......@@ -15,7 +15,9 @@ T (11, 13, 40, 7, 33, 51, 21, 59, 46, 47, 32, 9, 34, 63, 35, 41, 17, 24, 1, 8, 2
T (12, 39, 43, 54, 27, 53, 39, 27, 30, 2, 17, 13, 33, 7, 52, 40, 15, 36, 57, 10, 28, 22, 23, 25, 24, 41, 47, 8, 20, 5, 3, 4, 0) \
T (13, 7, 51, 13, 61, 25, 4, 19, 58, 35, 33, 29, 15, 40, 2, 39, 16, 38, 3, 54, 63, 15, 6, 48, 21, 14, 52, 17, 50, 34, 55, 57, 50) \
T (14, 22, 53, 28, 42, 45, 38, 49, 13, 54, 61, 21, 52, 7, 16, 34, 9, 1, 43, 62, 43, 35, 50, 47, 58, 20, 3, 30, 15, 37, 53, 43, 36) \
T (15, 2, 43, 49, 34, 28, 35, 29, 36, 51, 9, 17, 48, 10, 37, 45, 21, 52, 19, 25, 33, 60, 31, 30, 42, 12, 26, 27, 46, 5, 40, 14, 36)
T (15, 2, 43, 49, 34, 28, 35, 29, 36, 51, 9, 17, 48, 10, 37, 45, 21, 52, 19, 25, 33, 60, 31, 30, 42, 12, 26, 27, 46, 5, 40, 14, 36) \
T (16, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62) \
T (17, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63)
#define EXPTESTS \
T (116, 13, 38, 47, 3, 17, 8, 38, 20, 59, 61, 39, 26, 7, 49, 63, 43, 57, 16, 40, 19, 4, 32, 27, 7, 52, 19, 46, 55, 36, 41, 48, 6) \
T (117, 39, 35, 59, 20, 56, 18, 58, 63, 57, 14, 2, 16, 5, 61, 35, 4, 53, 9, 52, 51, 27, 33, 61, 12, 3, 35, 36, 40, 37, 7, 45, 42) \
......
......@@ -15,7 +15,9 @@ T (11, 1, 4, 0, 7) \
T (12, 1, 5, 7, 2) \
T (13, 2, 3, 0, 4) \
T (14, 7, 6, 4, 2) \
T (15, 6, 1, 3, 4)
T (15, 6, 1, 3, 4) \
T (16, 0, 2, 4, 6) \
T (17, 1, 3, 5, 7)
#define EXPTESTS \
T (116, 1, 2, 4, 3) \
T (117, 7, 3, 3, 0) \
......
......@@ -15,7 +15,9 @@ T (11, 5, 11, 12, 6, 3, 2, 4, 15) \
T (12, 5, 13, 14, 8, 4, 10, 4, 12) \
T (13, 14, 8, 12, 3, 13, 9, 5, 4) \
T (14, 15, 3, 13, 6, 14, 12, 10, 0) \
T (15, 0, 5, 11, 7, 4, 6, 14, 1)
T (15, 0, 5, 11, 7, 4, 6, 14, 1) \
T (16, 0, 2, 4, 6, 8, 10, 12, 14) \
T (17, 1, 3, 5, 7, 9, 11, 13, 15)
#define EXPTESTS \
T (116, 9, 3, 9, 4, 7, 0, 0, 6) \
T (117, 4, 14, 12, 8, 9, 6, 0, 10) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment