Commit 08f3084b by Richard Henderson Committed by Richard Henderson

sse.md (mulv16qi3): Change back from an expander to an insn-and-split.

	* config/i386/sse.md (mulv16qi3): Change back from an expander
	to an insn-and-split.
	(mulv4si): Mention AVX not XOP for AVX exception.
	(*sse2_mulv4si3): Likewise.
	(mulv2di3): Use vmulld not vpmacsdd for XOP expansion.  Tidy.
testsuite/
        * gcc.target/i386/xop-imul64-vector.c: Look for vpmulld not vpmacsdd.

From-SVN: r154693
parent 5e1a89f0
2009-11-26 Richard Henderson <rth@redhat.com>
* config/i386/sse.md (mulv16qi3): Change back from an expander
to an insn-and-split.
(mulv4si): Mention AVX not XOP for AVX exception.
(*sse2_mulv4si3): Likewise.
(mulv2di3): Use vmulld not vpmacsdd for XOP expansion. Tidy.
2009-11-26 Bernd Schmidt <bernd.schmidt@analog.com> 2009-11-26 Bernd Schmidt <bernd.schmidt@analog.com>
* cfgexpand (n_stack_vars_conflict): New static variable. * cfgexpand (n_stack_vars_conflict): New static variable.
...@@ -5227,11 +5227,15 @@ ...@@ -5227,11 +5227,15 @@
(set_attr "prefix_data16" "1") (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")]) (set_attr "mode" "TI")])
(define_expand "mulv16qi3" (define_insn_and_split "mulv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "") [(set (match_operand:V16QI 0 "register_operand" "")
(mult:V16QI (match_operand:V16QI 1 "register_operand" "") (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
(match_operand:V16QI 2 "register_operand" "")))] (match_operand:V16QI 2 "register_operand" "")))]
"TARGET_SSE2" "TARGET_SSE2
&& can_create_pseudo_p ()"
"#"
"&& 1"
[(const_int 0)]
{ {
rtx t[6]; rtx t[6];
int i; int i;
...@@ -5592,7 +5596,7 @@ ...@@ -5592,7 +5596,7 @@
(match_operand:V4SI 2 "register_operand" "")))] (match_operand:V4SI 2 "register_operand" "")))]
"TARGET_SSE2" "TARGET_SSE2"
{ {
if (TARGET_SSE4_1 || TARGET_XOP) if (TARGET_SSE4_1 || TARGET_AVX)
ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands); ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
}) })
...@@ -5621,7 +5625,7 @@ ...@@ -5621,7 +5625,7 @@
[(set (match_operand:V4SI 0 "register_operand" "") [(set (match_operand:V4SI 0 "register_operand" "")
(mult:V4SI (match_operand:V4SI 1 "register_operand" "") (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
(match_operand:V4SI 2 "register_operand" "")))] (match_operand:V4SI 2 "register_operand" "")))]
"TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_XOP "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
&& can_create_pseudo_p ()" && can_create_pseudo_p ()"
"#" "#"
"&& 1" "&& 1"
...@@ -5683,17 +5687,20 @@ ...@@ -5683,17 +5687,20 @@
rtx t1, t2, t3, t4, t5, t6, thirtytwo; rtx t1, t2, t3, t4, t5, t6, thirtytwo;
rtx op0, op1, op2; rtx op0, op1, op2;
op0 = operands[0];
op1 = operands[1];
op2 = operands[2];
if (TARGET_XOP) if (TARGET_XOP)
{ {
/* op1: A,B,C,D, op2: E,F,G,H */ /* op1: A,B,C,D, op2: E,F,G,H */
op0 = operands[0]; op1 = gen_lowpart (V4SImode, op1);
op1 = gen_lowpart (V4SImode, operands[1]); op2 = gen_lowpart (V4SImode, op2);
op2 = gen_lowpart (V4SImode, operands[2]);
t1 = gen_reg_rtx (V4SImode); t1 = gen_reg_rtx (V4SImode);
t2 = gen_reg_rtx (V4SImode); t2 = gen_reg_rtx (V4SImode);
t3 = gen_reg_rtx (V4SImode); t3 = gen_reg_rtx (V2DImode);
t4 = gen_reg_rtx (V2DImode); t4 = gen_reg_rtx (V2DImode);
t5 = gen_reg_rtx (V2DImode);
/* t1: B,A,D,C */ /* t1: B,A,D,C */
emit_insn (gen_sse2_pshufd_1 (t1, op1, emit_insn (gen_sse2_pshufd_1 (t1, op1,
...@@ -5702,55 +5709,50 @@ ...@@ -5702,55 +5709,50 @@
GEN_INT (3), GEN_INT (3),
GEN_INT (2))); GEN_INT (2)));
/* t2: 0 */ /* t2: (B*E),(A*F),(D*G),(C*H) */
emit_move_insn (t2, CONST0_RTX (V4SImode)); emit_insn (gen_mulv4si3 (t2, t1, op2));
/* t3: (B*E),(A*F),(D*G),(C*H) */
emit_insn (gen_xop_pmacsdd (t3, t1, op2, t2));
/* t4: (B*E)+(A*F), (D*G)+(C*H) */ /* t4: (B*E)+(A*F), (D*G)+(C*H) */
emit_insn (gen_xop_phadddq (t4, t3)); emit_insn (gen_xop_phadddq (t3, t2));
/* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */ /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32))); emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
/* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */ /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
emit_insn (gen_xop_pmacsdql (op0, op1, op2, t5)); emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
DONE; }
else
{
t1 = gen_reg_rtx (V2DImode);
t2 = gen_reg_rtx (V2DImode);
t3 = gen_reg_rtx (V2DImode);
t4 = gen_reg_rtx (V2DImode);
t5 = gen_reg_rtx (V2DImode);
t6 = gen_reg_rtx (V2DImode);
thirtytwo = GEN_INT (32);
/* Multiply low parts. */
emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
gen_lowpart (V4SImode, op2)));
/* Shift input vectors left 32 bits so we can multiply high parts. */
emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
/* Multiply high parts by low parts. */
emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
gen_lowpart (V4SImode, t3)));
emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
gen_lowpart (V4SImode, t2)));
/* Shift them back. */
emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
/* Add the three parts together. */
emit_insn (gen_addv2di3 (t6, t1, t4));
emit_insn (gen_addv2di3 (op0, t6, t5));
} }
op0 = operands[0];
op1 = operands[1];
op2 = operands[2];
t1 = gen_reg_rtx (V2DImode);
t2 = gen_reg_rtx (V2DImode);
t3 = gen_reg_rtx (V2DImode);
t4 = gen_reg_rtx (V2DImode);
t5 = gen_reg_rtx (V2DImode);
t6 = gen_reg_rtx (V2DImode);
thirtytwo = GEN_INT (32);
/* Multiply low parts. */
emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
gen_lowpart (V4SImode, op2)));
/* Shift input vectors left 32 bits so we can multiply high parts. */
emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
/* Multiply high parts by low parts. */
emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
gen_lowpart (V4SImode, t3)));
emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
gen_lowpart (V4SImode, t2)));
/* Shift them back. */
emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
/* Add the three parts together. */
emit_insn (gen_addv2di3 (t6, t1, t4));
emit_insn (gen_addv2di3 (op0, t6, t5));
DONE; DONE;
}) })
......
2009-11-26 Richard Henderson <rth@redhat.com>
* gcc.target/i386/xop-imul64-vector.c: Look for vpmulld not vpmacsdd.
2009-11-26 Jason Merrill <jason@redhat.com> 2009-11-26 Jason Merrill <jason@redhat.com>
PR c++/42026, DR 239 PR c++/42026, DR 239
......
...@@ -31,6 +31,6 @@ int main () ...@@ -31,6 +31,6 @@ int main ()
exit (0); exit (0);
} }
/* { dg-final { scan-assembler "vpmacsdd" } } */ /* { dg-final { scan-assembler "vpmulld" } } */
/* { dg-final { scan-assembler "vphadddq" } } */ /* { dg-final { scan-assembler "vphadddq" } } */
/* { dg-final { scan-assembler "vpmacsdql" } } */ /* { dg-final { scan-assembler "vpmacsdql" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment