Commit 1312c276 by Richard Henderson Committed by Richard Henderson

ia64: Update vector multiply pattern names; delete dot_prod

With the even/odd v4hi multiply pattern names, we'll automatically
generate the same code for dot_prodv4hi.  The dot_prodv8qi pattern
was actively incorrect wrt output mode.

        * config/ia64/vect.md (smulv4hi3_highpart): New.
        (umulv4hi3_highpart): New.
        (vec_widen_smult_even_v4hi): Rename from pmpy2_even.
        (vec_widen_smult_odd_v4hi): Rename from pmpy2_odd.
        (udot_prodv8qi, sdot_prodv8qi): Remove.
        (sdot_prodv4hi, udot_prodv4hi): Remove.
        * config/ia64/ia64.c (ia64_expand_dot_prod_v8qi): Remove.
        * config/ia64/ia64-protos.h: Update.

From-SVN: r189674
parent e6d4f8f5
2012-07-19 Richard Henderson <rth@redhat.com> 2012-07-19 Richard Henderson <rth@redhat.com>
* config/ia64/vect.md (smulv4hi3_highpart): New.
(umulv4hi3_highpart): New.
(vec_widen_smult_even_v4hi): Rename from pmpy2_even.
(vec_widen_smult_odd_v4hi): Rename from pmpy2_odd.
(udot_prodv8qi, sdot_prodv8qi): Remove.
(sdot_prodv4hi, udot_prodv4hi): Remove.
* config/ia64/ia64.c (ia64_expand_dot_prod_v8qi): Remove.
* config/ia64/ia64-protos.h: Update.
* tree-vect-patterns.c (vect_recog_divmod_pattern): Do not require * tree-vect-patterns.c (vect_recog_divmod_pattern): Do not require
vectors larger than UNITS_PER_WORD. vectors larger than UNITS_PER_WORD.
......
...@@ -43,7 +43,6 @@ extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]); ...@@ -43,7 +43,6 @@ extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]);
extern void ia64_unpack_assemble (rtx, rtx, rtx, bool); extern void ia64_unpack_assemble (rtx, rtx, rtx, bool);
extern void ia64_expand_unpack (rtx [], bool, bool); extern void ia64_expand_unpack (rtx [], bool, bool);
extern void ia64_expand_widen_sum (rtx[], bool); extern void ia64_expand_widen_sum (rtx[], bool);
extern void ia64_expand_dot_prod_v8qi (rtx[], bool);
extern void ia64_expand_call (rtx, rtx, rtx, int); extern void ia64_expand_call (rtx, rtx, rtx, int);
extern void ia64_split_call (rtx, rtx, rtx, rtx, rtx, int, int); extern void ia64_split_call (rtx, rtx, rtx, rtx, rtx, int, int);
extern void ia64_reload_gp (void); extern void ia64_reload_gp (void);
......
...@@ -2096,46 +2096,6 @@ ia64_expand_widen_sum (rtx operands[3], bool unsignedp) ...@@ -2096,46 +2096,6 @@ ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
emit_move_insn (operands[0], t); emit_move_insn (operands[0], t);
} }
/* Emit a signed or unsigned V8QI dot product operation. */
void
ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
{
rtx op1, op2, sn1, sn2, l1, l2, h1, h2;
rtx p1, p2, p3, p4, s1, s2, s3;
op1 = operands[1];
op2 = operands[2];
sn1 = ia64_unpack_sign (op1, unsignedp);
sn2 = ia64_unpack_sign (op2, unsignedp);
l1 = gen_reg_rtx (V4HImode);
l2 = gen_reg_rtx (V4HImode);
h1 = gen_reg_rtx (V4HImode);
h2 = gen_reg_rtx (V4HImode);
ia64_unpack_assemble (l1, op1, sn1, false);
ia64_unpack_assemble (l2, op2, sn2, false);
ia64_unpack_assemble (h1, op1, sn1, true);
ia64_unpack_assemble (h2, op2, sn2, true);
p1 = gen_reg_rtx (V2SImode);
p2 = gen_reg_rtx (V2SImode);
p3 = gen_reg_rtx (V2SImode);
p4 = gen_reg_rtx (V2SImode);
emit_insn (gen_pmpy2_even (p1, l1, l2));
emit_insn (gen_pmpy2_even (p2, h1, h2));
emit_insn (gen_pmpy2_odd (p3, l1, l2));
emit_insn (gen_pmpy2_odd (p4, h1, h2));
s1 = gen_reg_rtx (V2SImode);
s2 = gen_reg_rtx (V2SImode);
s3 = gen_reg_rtx (V2SImode);
emit_insn (gen_addv2si3 (s1, p1, p2));
emit_insn (gen_addv2si3 (s2, p3, p4));
emit_insn (gen_addv2si3 (s3, s1, operands[3]));
emit_insn (gen_addv2si3 (operands[0], s2, s3));
}
/* Emit the appropriate sequence for a call. */ /* Emit the appropriate sequence for a call. */
void void
......
...@@ -278,7 +278,29 @@ ...@@ -278,7 +278,29 @@
"pmpyshr2.u %0 = %1, %2, %3" "pmpyshr2.u %0 = %1, %2, %3"
[(set_attr "itanium_class" "mmmul")]) [(set_attr "itanium_class" "mmmul")])
(define_insn "pmpy2_even" (define_expand "smulv4hi3_highpart"
[(match_operand:V4HI 0 "gr_register_operand")
(match_operand:V4HI 1 "gr_register_operand")
(match_operand:V4HI 2 "gr_register_operand")]
""
{
emit_insn (gen_pmpyshr2 (operands[0], operands[1],
operands[2], GEN_INT (16)));
DONE;
})
(define_expand "umulv4hi3_highpart"
[(match_operand:V4HI 0 "gr_register_operand")
(match_operand:V4HI 1 "gr_register_operand")
(match_operand:V4HI 2 "gr_register_operand")]
""
{
emit_insn (gen_pmpyshr2_u (operands[0], operands[1],
operands[2], GEN_INT (16)));
DONE;
})
(define_insn "vec_widen_smult_even_v4hi"
[(set (match_operand:V2SI 0 "gr_register_operand" "=r") [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
(mult:V2SI (mult:V2SI
(vec_select:V2SI (vec_select:V2SI
...@@ -299,7 +321,7 @@ ...@@ -299,7 +321,7 @@
} }
[(set_attr "itanium_class" "mmshf")]) [(set_attr "itanium_class" "mmshf")])
(define_insn "pmpy2_odd" (define_insn "vec_widen_smult_odd_v4hi"
[(set (match_operand:V2SI 0 "gr_register_operand" "=r") [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
(mult:V2SI (mult:V2SI
(vec_select:V2SI (vec_select:V2SI
...@@ -602,68 +624,6 @@ ...@@ -602,68 +624,6 @@
DONE; DONE;
}) })
(define_expand "udot_prodv8qi"
[(match_operand:V2SI 0 "gr_register_operand" "")
(match_operand:V8QI 1 "gr_register_operand" "")
(match_operand:V8QI 2 "gr_register_operand" "")
(match_operand:V2SI 3 "gr_register_operand" "")]
""
{
ia64_expand_dot_prod_v8qi (operands, true);
DONE;
})
(define_expand "sdot_prodv8qi"
[(match_operand:V2SI 0 "gr_register_operand" "")
(match_operand:V8QI 1 "gr_register_operand" "")
(match_operand:V8QI 2 "gr_register_operand" "")
(match_operand:V2SI 3 "gr_register_operand" "")]
""
{
ia64_expand_dot_prod_v8qi (operands, false);
DONE;
})
(define_expand "sdot_prodv4hi"
[(match_operand:V2SI 0 "gr_register_operand" "")
(match_operand:V4HI 1 "gr_register_operand" "")
(match_operand:V4HI 2 "gr_register_operand" "")
(match_operand:V2SI 3 "gr_register_operand" "")]
""
{
rtx e, o, t;
e = gen_reg_rtx (V2SImode);
o = gen_reg_rtx (V2SImode);
t = gen_reg_rtx (V2SImode);
emit_insn (gen_pmpy2_even (e, operands[1], operands[2]));
emit_insn (gen_pmpy2_odd (o, operands[1], operands[2]));
emit_insn (gen_addv2si3 (t, e, operands[3]));
emit_insn (gen_addv2si3 (operands[0], t, o));
DONE;
})
(define_expand "udot_prodv4hi"
[(match_operand:V2SI 0 "gr_register_operand" "")
(match_operand:V4HI 1 "gr_register_operand" "")
(match_operand:V4HI 2 "gr_register_operand" "")
(match_operand:V2SI 3 "gr_register_operand" "")]
""
{
rtx l, h, t;
l = gen_reg_rtx (V2SImode);
h = gen_reg_rtx (V2SImode);
t = gen_reg_rtx (V2SImode);
emit_insn (gen_vec_widen_umult_lo_v4hi (l, operands[1], operands[2]));
emit_insn (gen_vec_widen_umult_hi_v4hi (h, operands[1], operands[2]));
emit_insn (gen_addv2si3 (t, l, operands[3]));
emit_insn (gen_addv2si3 (operands[0], t, h));
DONE;
})
(define_expand "vcond<mode><mode>" (define_expand "vcond<mode><mode>"
[(set (match_operand:VECINT 0 "gr_register_operand" "") [(set (match_operand:VECINT 0 "gr_register_operand" "")
(if_then_else:VECINT (if_then_else:VECINT
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment