Commit 2b5bf0e2 by Richard Henderson Committed by Richard Henderson

sse.md (mulv4si3): New.

        * config/i386/sse.md (mulv4si3): New.

        * lib/target-supports.exp (check_effective_target_vect_int_mul): Add
        i?86 and x86_64.

From-SVN: r99811
parent 9d9bd40f
2005-05-16 Richard Henderson <rth@redhat.com>
* config/i386/sse.md (mulv4si3): New.
2005-05-17 Hans-Peter Nilsson <hp@axis.com> 2005-05-17 Hans-Peter Nilsson <hp@axis.com>
* config/cris/cris.h (EXTRA_CONSTRAINT_T): Remove FIXME and * config/cris/cris.h (EXTRA_CONSTRAINT_T): Remove FIXME and
......
...@@ -2490,6 +2490,52 @@ ...@@ -2490,6 +2490,52 @@
[(set_attr "type" "sseiadd") [(set_attr "type" "sseiadd")
(set_attr "mode" "TI")]) (set_attr "mode" "TI")])
(define_expand "mulv4si3"
[(set (match_operand:V4SI 0 "register_operand" "")
(mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
(match_operand:V4SI 2 "nonimmediate_operand" "")))]
"TARGET_SSE2"
{
rtx t1, t2, t3, t4, t5, t6, thirtytwo;
rtx op0, op1, op2;
op0 = operands[0];
op1 = operands[1];
op2 = operands[2];
t1 = gen_reg_rtx (V4SImode);
t2 = gen_reg_rtx (V4SImode);
t3 = gen_reg_rtx (V4SImode);
t4 = gen_reg_rtx (V4SImode);
t5 = gen_reg_rtx (V4SImode);
t6 = gen_reg_rtx (V4SImode);
thirtytwo = GEN_INT (32);
/* Multiply elements 2 and 0. */
emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
/* Shift both input vectors down one element, so that elements 3 and 1
are now in the slots for elements 2 and 0. For K8, at least, this is
faster than using a shuffle. */
emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
gen_lowpart (TImode, op1), thirtytwo));
emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
gen_lowpart (TImode, op2), thirtytwo));
/* Multiply elements 3 and 1. */
emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
/* Move the results in element 2 down to element 1; we don't care what
goes in elements 2 and 3. */
emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
const0_rtx, const0_rtx));
emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
const0_rtx, const0_rtx));
/* Merge the parts back together. */
emit_insn (gen_sse2_punpckldq (op0, t5, t6));
DONE;
})
(define_insn "ashr<mode>3" (define_insn "ashr<mode>3"
[(set (match_operand:SSEMODE24 0 "register_operand" "=x") [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
(ashiftrt:SSEMODE24 (ashiftrt:SSEMODE24
......
2005-05-16 Richard Henderson <rth@redhat.com>
* lib/target-supports.exp (check_effective_target_vect_int_mul): Add
i?86 and x86_64.
2005-05-16 Mark Mitchell <mark@codesourcery.com> 2005-05-16 Mark Mitchell <mark@codesourcery.com>
* gcc.dg/compat/generate-random.c (config.h): Do not include. * gcc.dg/compat/generate-random.c (config.h): Do not include.
......
...@@ -955,7 +955,9 @@ proc check_effective_target_vect_int_mult { } { ...@@ -955,7 +955,9 @@ proc check_effective_target_vect_int_mult { } {
verbose "check_effective_target_vect_int_mult: using cached result" 2 verbose "check_effective_target_vect_int_mult: using cached result" 2
} else { } else {
set et_vect_int_mult_saved 0 set et_vect_int_mult_saved 0
if { [istarget powerpc*-*-*] } { if { [istarget powerpc*-*-*]
|| [istarget i?86-*-*]
|| [istarget x86_64-*-*] } {
set et_vect_int_mult_saved 1 set et_vect_int_mult_saved 1
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment