Commit 77a3dbf6 by Richard Henderson Committed by Richard Henderson

i386: Extract the guts of mulv16qi3 to ix86_expand_vecop_qihi

        * config/i386/sse.md (mul<VI1_AVX2>3): Change from insn_and_split to
        pure expander; move expansion code ...
        * config/i386/i386.c (ix86_expand_vecop_qihi): ... here.  New function.
        * config/i386/i386-protos.h: Update.

From-SVN: r188907
parent 6b39c806
2012-06-23 Richard Henderson <rth@redhat.com>
* config/i386/sse.md (mul<VI1_AVX2>3): Change from insn_and_split to
pure expander; move expansion code ...
* config/i386/i386.c (ix86_expand_vecop_qihi): ... here. New function.
* config/i386/i386-protos.h: Update.
2012-06-22 Edmar Wienskoski <edmar@freescale.com>
* config/rs6000/rs6000.md (define_attr "type"): New type popcnt.
......
......@@ -192,6 +192,8 @@ extern void ix86_expand_rounddf_32 (rtx, rtx);
extern void ix86_expand_trunc (rtx, rtx);
extern void ix86_expand_truncdf_32 (rtx, rtx);
extern void ix86_expand_vecop_qihi (enum rtx_code, rtx, rtx, rtx);
#ifdef TREE_CODE
extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
#endif /* TREE_CODE */
......
......@@ -38438,6 +38438,91 @@ ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
expand_vec_perm_even_odd_1 (&d, odd);
}
/* Expand a vector operation CODE for a V*QImode in terms of the
same operation on V*HImode. */
void
ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
{
enum machine_mode qimode = GET_MODE (dest);
enum machine_mode himode;
rtx (*gen_il) (rtx, rtx, rtx);
rtx (*gen_ih) (rtx, rtx, rtx);
rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
struct expand_vec_perm_d d;
bool ok;
int i;
if (qimode == V16QImode)
{
himode = V8HImode;
gen_il = gen_vec_interleave_lowv16qi;
gen_ih = gen_vec_interleave_highv16qi;
}
else if (qimode == V32QImode)
{
himode = V16HImode;
gen_il = gen_avx2_interleave_lowv32qi;
gen_ih = gen_avx2_interleave_highv32qi;
}
else
gcc_unreachable ();
/* Unpack data such that we've got a source byte in each low byte of
each word. We don't care what goes into the high byte of each word.
Rather than trying to get zero in there, most convenient is to let
it be a copy of the low byte. */
op1_l = gen_reg_rtx (qimode);
op1_h = gen_reg_rtx (qimode);
emit_insn (gen_il (op1_l, op1, op1));
emit_insn (gen_ih (op1_h, op1, op1));
op2_l = gen_reg_rtx (qimode);
op2_h = gen_reg_rtx (qimode);
emit_insn (gen_il (op2_l, op2, op2));
emit_insn (gen_ih (op2_h, op2, op2));
/* Perform the operation. */
res_l = expand_simple_binop (himode, code, gen_lowpart (himode, op1_l),
gen_lowpart (himode, op2_l), NULL_RTX,
1, OPTAB_DIRECT);
res_h = expand_simple_binop (himode, code, gen_lowpart (himode, op1_h),
gen_lowpart (himode, op2_h), NULL_RTX,
1, OPTAB_DIRECT);
gcc_assert (res_l && res_h);
/* Merge the data back into the right place. */
d.target = dest;
d.op0 = gen_lowpart (qimode, res_l);
d.op1 = gen_lowpart (qimode, res_h);
d.vmode = qimode;
d.nelt = GET_MODE_NUNITS (qimode);
d.one_operand_p = false;
d.testing_p = false;
if (qimode == V16QImode)
{
/* For SSE2, we used an full interleave, so the desired
results are in the even elements. */
for (i = 0; i < 16; ++i)
d.perm[i] = i * 2;
}
else
{
/* For AVX, the interleave used above was not cross-lane. So the
extraction is evens but with the second and third quarter swapped.
Happily, that is even one insn shorter than even extraction. */
for (i = 0; i < 32; ++i)
d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
}
ok = ix86_expand_vec_perm_const_1 (&d);
gcc_assert (ok);
set_unique_reg_note (get_last_insn (), REG_EQUAL,
gen_rtx_fmt_ee (code, qimode, op1, op2));
}
void
ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
{
......@@ -5213,70 +5213,13 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
(define_insn_and_split "mul<mode>3"
(define_expand "mul<mode>3"
[(set (match_operand:VI1_AVX2 0 "register_operand")
(mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand")
(match_operand:VI1_AVX2 2 "register_operand")))]
"TARGET_SSE2
&& can_create_pseudo_p ()"
"#"
"&& 1"
[(const_int 0)]
"TARGET_SSE2"
{
rtx t[6];
int i;
enum machine_mode mulmode = <sseunpackmode>mode;
for (i = 0; i < 6; ++i)
t[i] = gen_reg_rtx (<MODE>mode);
/* Unpack data such that we've got a source byte in each low byte of
each word. We don't care what goes into the high byte of each word.
Rather than trying to get zero in there, most convenient is to let
it be a copy of the low byte. */
emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[0], operands[1],
operands[1]));
emit_insn (gen_<vec_avx2>_interleave_high<mode> (t[1], operands[2],
operands[2]));
emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[2], operands[1],
operands[1]));
emit_insn (gen_<vec_avx2>_interleave_low<mode> (t[3], operands[2],
operands[2]));
/* Multiply words. The end-of-line annotations here give a picture of what
the output of that instruction looks like. Dot means don't care; the
letters are the bytes of the result with A being the most significant. */
emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[4]),
gen_rtx_MULT (mulmode, /* .A.B.C.D.E.F.G.H */
gen_lowpart (mulmode, t[0]),
gen_lowpart (mulmode, t[1]))));
emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (mulmode, t[5]),
gen_rtx_MULT (mulmode, /* .I.J.K.L.M.N.O.P */
gen_lowpart (mulmode, t[2]),
gen_lowpart (mulmode, t[3]))));
/* Extract the even bytes and merge them back together. */
if (<MODE>mode == V16QImode)
ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
else
{
/* Since avx2_interleave_{low,high}v32qi used above aren't cross-lane,
this can't be normal even extraction, but one where additionally
the second and third quarter are swapped. That is even one insn
shorter than even extraction. */
rtvec v = rtvec_alloc (32);
for (i = 0; i < 32; ++i)
RTVEC_ELT (v, i)
= GEN_INT (i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0));
t[0] = operands[0];
t[1] = t[5];
t[2] = t[4];
t[3] = gen_rtx_CONST_VECTOR (<MODE>mode, v);
ix86_expand_vec_perm_const (t);
}
set_unique_reg_note (get_last_insn (), REG_EQUAL,
gen_rtx_MULT (<MODE>mode, operands[1], operands[2]));
ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
DONE;
})
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment