Commit 55eaaa5b by Richard Henderson Committed by Steve Ellcey

re PR target/46997 (new ia64 vector instructions are broken on HP-UX (big-endian))

2011-02-04  Richard Henderson  <rth@redhat.com>
	    Steve Ellcey  <sje@cup.hp.com>

	PR target/46997
	* config/ia64/predicates.md (mux1_brcst_element): New.
	* config/ia64/ia64-protos.h (ia64_unpack_assemble): New.
	* config/ia64/ia64.c (ia64_unpack_assemble): New.
	(ia64_unpack_sign): New.
	(ia64_expand_unpack): Rewrite using new routines.
	(ia64_expand_widen_sum): Ditto.
	(ia64_expand_dot_prod_v8qi): Ditto.
	* config/ia64/vect.md (mulv8qi3): Rewrite to use new
	routines, add endian check.
	(pmpy2_even): Rename from pmpy2_r, add endian check.
	(pmpy2_odd): Rename from pmpy2_l, add endian check.
	(vec_widen_smult_lo_v4hi): Rewrite using new routines.
	(vec_widen_smult_hi_v4hi): Ditto.
	(vec_widen_umult_lo_v4hi): Ditto.
	(vec_widen_umult_hi_v4hi): Ditto.
	(mulv2si3): Change endian checks.
	(sdot_prodv4hi): Rewrite with new calls.
	(udot_prodv4hi): New.
	(vec_pack_ssat_v4hi): Add endian check.
	(vec_pack_usat_v4hi): Ditto.
	(vec_pack_ssat_v2si): Ditto.
	(max1_even): Rename from max1_r, add endian check.
	(max1_odd): Rename from max1_l, add endian check.
	(*mux1_rev): Format change.
	(*mux1_mix): Ditto.
	(*mux1_shuf): Ditto.
	(*mux1_alt): Ditto.
	(*mux1_brcst_v8qi): Use new predicate.
	(vec_extract_evenv8qi): Remove endian check.
	(vec_extract_oddv8qi): Ditto.
	(vec_interleave_lowv4hi): Format change.
	(vec_interleave_highv4hi): Ditto.
	(mix2_even): Rename from mix2_r, add endian check.
	(mix2_odd): Rename from mux2_l, add endian check.
	(*mux2): Fix mask setting for TARGET_BIG_ENDIAN.
	(vec_extract_evenodd_helper): Format change.
	(vec_extract_evenv4hi): Remove endian check.
	(vec_extract_oddv4hi): Remove endian check.
	(vec_interleave_lowv2si): Format change.
	(vec_interleave_highv2si): Format change.
	(vec_initv2si): Remove endian check.
	(vecinit_v2si): Add endian check.
	(reduc_splus_v2sf): Add endian check.
	(reduc_smax_v2sf): Ditto.
	(reduc_smin_v2sf): Ditto.
	(vec_initv2sf): Remove endian check.
	(fpack): Add endian check.
	(fswap): Add endian check.
	(vec_interleave_highv2sf): Add endian check.
	(vec_interleave_lowv2sf): Add endian check.
	(fmix_lr): Add endian check.
	(vec_setv2sf): Format change.
	(*vec_extractv2sf_0_be): Use shift to extract operand.
	(*vec_extractv2sf_1_be): New.
	(vec_pack_trunc_v4hi): Add endian check.
	(vec_pack_trunc_v2si): Format change.

Co-Authored-By: Steve Ellcey <sje@cup.hp.com>

From-SVN: r169840
parent 4946bd35
2011-02-04 Richard Henderson <rth@redhat.com>
Steve Ellcey <sje@cup.hp.com>
PR target/46997
* config/ia64/predicates.md (mux1_brcst_element): New.
* config/ia64/ia64-protos.h (ia64_unpack_assemble): New.
* config/ia64/ia64.c (ia64_unpack_assemble): New.
(ia64_unpack_sign): New.
(ia64_expand_unpack): Rewrite using new routines.
(ia64_expand_widen_sum): Ditto.
(ia64_expand_dot_prod_v8qi): Ditto.
* config/ia64/vect.md (mulv8qi3): Rewrite to use new
routines, add endian check.
(pmpy2_even): Rename from pmpy2_r, add endian check.
(pmpy2_odd): Rename from pmpy2_l, add endian check.
(vec_widen_smult_lo_v4hi): Rewrite using new routines.
(vec_widen_smult_hi_v4hi): Ditto.
(vec_widen_umult_lo_v4hi): Ditto.
(vec_widen_umult_hi_v4hi): Ditto.
(mulv2si3): Change endian checks.
(sdot_prodv4hi): Rewrite with new calls.
(udot_prodv4hi): New.
(vec_pack_ssat_v4hi): Add endian check.
(vec_pack_usat_v4hi): Ditto.
(vec_pack_ssat_v2si): Ditto.
(max1_even): Rename from max1_r, add endian check.
(max1_odd): Rename from max1_l, add endian check.
(*mux1_rev): Format change.
(*mux1_mix): Ditto.
(*mux1_shuf): Ditto.
(*mux1_alt): Ditto.
(*mux1_brcst_v8qi): Use new predicate.
(vec_extract_evenv8qi): Remove endian check.
(vec_extract_oddv8qi): Ditto.
(vec_interleave_lowv4hi): Format change.
(vec_interleave_highv4hi): Ditto.
(mix2_even): Rename from mix2_r, add endian check.
(mix2_odd): Rename from mux2_l, add endian check.
(*mux2): Fix mask setting for TARGET_BIG_ENDIAN.
(vec_extract_evenodd_helper): Format change.
(vec_extract_evenv4hi): Remove endian check.
(vec_extract_oddv4hi): Remove endian check.
(vec_interleave_lowv2si): Format change.
(vec_interleave_highv2si): Format change.
(vec_initv2si): Remove endian check.
(vecinit_v2si): Add endian check.
(reduc_splus_v2sf): Add endian check.
(reduc_smax_v2sf): Ditto.
(reduc_smin_v2sf): Ditto.
(vec_initv2sf): Remove endian check.
(fpack): Add endian check.
(fswap): Add endian check.
(vec_interleave_highv2sf): Add endian check.
(vec_interleave_lowv2sf): Add endian check.
(fmix_lr): Add endian check.
(vec_setv2sf): Format change.
(*vec_extractv2sf_0_be): Use shift to extract operand.
(*vec_extractv2sf_1_be): New.
(vec_pack_trunc_v4hi): Add endian check.
(vec_pack_trunc_v2si): Format change.
2011-02-04 Jakub Jelinek <jakub@redhat.com> 2011-02-04 Jakub Jelinek <jakub@redhat.com>
PR inline-asm/23200 PR inline-asm/23200
......
...@@ -39,9 +39,9 @@ extern bool ia64_expand_movxf_movrf (enum machine_mode, rtx[]); ...@@ -39,9 +39,9 @@ extern bool ia64_expand_movxf_movrf (enum machine_mode, rtx[]);
extern void ia64_expand_compare (rtx *, rtx *, rtx *); extern void ia64_expand_compare (rtx *, rtx *, rtx *);
extern void ia64_expand_vecint_cmov (rtx[]); extern void ia64_expand_vecint_cmov (rtx[]);
extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]); extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]);
extern void ia64_unpack_assemble (rtx, rtx, rtx, bool);
extern void ia64_expand_unpack (rtx [], bool, bool); extern void ia64_expand_unpack (rtx [], bool, bool);
extern void ia64_expand_widen_sum (rtx[], bool); extern void ia64_expand_widen_sum (rtx[], bool);
extern void ia64_expand_widen_mul_v4hi (rtx [], bool, bool);
extern void ia64_expand_dot_prod_v8qi (rtx[], bool); extern void ia64_expand_dot_prod_v8qi (rtx[], bool);
extern void ia64_expand_call (rtx, rtx, rtx, int); extern void ia64_expand_call (rtx, rtx, rtx, int);
extern void ia64_split_call (rtx, rtx, rtx, rtx, rtx, int, int); extern void ia64_split_call (rtx, rtx, rtx, rtx, rtx, int, int);
......
...@@ -1972,12 +1972,13 @@ ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode, ...@@ -1972,12 +1972,13 @@ ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
return true; return true;
} }
/* Emit an integral vector unpack operation. */ /* The vectors LO and HI each contain N halves of a double-wide vector.
Reassemble either the first N/2 or the second N/2 elements. */
void void
ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp) ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
{ {
enum machine_mode mode = GET_MODE (operands[1]); enum machine_mode mode = GET_MODE (lo);
rtx (*gen) (rtx, rtx, rtx); rtx (*gen) (rtx, rtx, rtx);
rtx x; rtx x;
...@@ -1993,110 +1994,66 @@ ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp) ...@@ -1993,110 +1994,66 @@ ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
gcc_unreachable (); gcc_unreachable ();
} }
/* Fill in x with the sign extension of each element in op1. */ x = gen_lowpart (mode, out);
if (unsignedp)
x = CONST0_RTX (mode);
else
{
bool neg;
x = gen_reg_rtx (mode);
neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
CONST0_RTX (mode));
gcc_assert (!neg);
}
if (TARGET_BIG_ENDIAN) if (TARGET_BIG_ENDIAN)
emit_insn (gen (gen_lowpart (mode, operands[0]), x, operands[1])); x = gen (x, hi, lo);
else else
emit_insn (gen (gen_lowpart (mode, operands[0]), operands[1], x)); x = gen (x, lo, hi);
emit_insn (x);
} }
/* Emit an integral vector widening sum operations. */ /* Return a vector of the sign-extension of VEC. */
void static rtx
ia64_expand_widen_sum (rtx operands[3], bool unsignedp) ia64_unpack_sign (rtx vec, bool unsignedp)
{ {
rtx l, h, x, s; enum machine_mode mode = GET_MODE (vec);
enum machine_mode wmode, mode; rtx zero = CONST0_RTX (mode);
rtx (*unpack_l) (rtx, rtx, rtx);
rtx (*unpack_h) (rtx, rtx, rtx);
rtx (*plus) (rtx, rtx, rtx);
wmode = GET_MODE (operands[0]);
mode = GET_MODE (operands[1]);
switch (mode)
{
case V8QImode:
unpack_l = gen_vec_interleave_lowv8qi;
unpack_h = gen_vec_interleave_highv8qi;
plus = gen_addv4hi3;
break;
case V4HImode:
unpack_l = gen_vec_interleave_lowv4hi;
unpack_h = gen_vec_interleave_highv4hi;
plus = gen_addv2si3;
break;
default:
gcc_unreachable ();
}
/* Fill in x with the sign extension of each element in op1. */
if (unsignedp) if (unsignedp)
x = CONST0_RTX (mode); return zero;
else else
{ {
rtx sign = gen_reg_rtx (mode);
bool neg; bool neg;
x = gen_reg_rtx (mode); neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
CONST0_RTX (mode));
gcc_assert (!neg); gcc_assert (!neg);
return sign;
} }
}
l = gen_reg_rtx (wmode); /* Emit an integral vector unpack operation. */
h = gen_reg_rtx (wmode);
s = gen_reg_rtx (wmode);
if (TARGET_BIG_ENDIAN) void
{ ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
emit_insn (unpack_l (gen_lowpart (mode, l), x, operands[1])); {
emit_insn (unpack_h (gen_lowpart (mode, h), x, operands[1])); rtx sign = ia64_unpack_sign (operands[1], unsignedp);
} ia64_unpack_assemble (operands[0], operands[1], sign, highp);
else
{
emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
}
emit_insn (plus (s, l, operands[2]));
emit_insn (plus (operands[0], h, s));
} }
/* Emit an integral vector widening sum operations. */
void void
ia64_expand_widen_mul_v4hi (rtx operands[3], bool unsignedp, bool highp) ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
{ {
rtx l = gen_reg_rtx (V4HImode); enum machine_mode wmode;
rtx h = gen_reg_rtx (V4HImode); rtx l, h, t, sign;
rtx (*mulhigh)(rtx, rtx, rtx, rtx);
rtx (*interl)(rtx, rtx, rtx);
emit_insn (gen_mulv4hi3 (l, operands[1], operands[2])); sign = ia64_unpack_sign (operands[1], unsignedp);
wmode = GET_MODE (operands[0]);
l = gen_reg_rtx (wmode);
h = gen_reg_rtx (wmode);
/* For signed, pmpy2.r would appear to more closely match this operation. ia64_unpack_assemble (l, operands[1], sign, false);
However, the vectorizer is more likely to use the LO and HI patterns ia64_unpack_assemble (h, operands[1], sign, true);
in pairs. At which point, with this formulation, the first two insns
of each can be CSEd. */
mulhigh = unsignedp ? gen_pmpyshr2_u : gen_pmpyshr2;
emit_insn (mulhigh (h, operands[1], operands[2], GEN_INT (16)));
interl = highp ? gen_vec_interleave_highv4hi : gen_vec_interleave_lowv4hi; t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
if (TARGET_BIG_ENDIAN) t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
emit_insn (interl (gen_lowpart (V4HImode, operands[0]), h, l)); if (t != operands[0])
else emit_move_insn (operands[0], t);
emit_insn (interl (gen_lowpart (V4HImode, operands[0]), l, h));
} }
/* Emit a signed or unsigned V8QI dot product operation. */ /* Emit a signed or unsigned V8QI dot product operation. */
...@@ -2104,62 +2061,31 @@ ia64_expand_widen_mul_v4hi (rtx operands[3], bool unsignedp, bool highp) ...@@ -2104,62 +2061,31 @@ ia64_expand_widen_mul_v4hi (rtx operands[3], bool unsignedp, bool highp)
void void
ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp) ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
{ {
rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3; rtx op1, op2, sn1, sn2, l1, l2, h1, h2;
rtx p1, p2, p3, p4, s1, s2, s3;
/* Fill in x1 and x2 with the sign extension of each element. */ op1 = operands[1];
if (unsignedp) op2 = operands[2];
x1 = x2 = CONST0_RTX (V8QImode); sn1 = ia64_unpack_sign (op1, unsignedp);
else sn2 = ia64_unpack_sign (op2, unsignedp);
{
bool neg;
x1 = gen_reg_rtx (V8QImode);
x2 = gen_reg_rtx (V8QImode);
neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
CONST0_RTX (V8QImode));
gcc_assert (!neg);
neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
CONST0_RTX (V8QImode));
gcc_assert (!neg);
}
l1 = gen_reg_rtx (V4HImode); l1 = gen_reg_rtx (V4HImode);
l2 = gen_reg_rtx (V4HImode); l2 = gen_reg_rtx (V4HImode);
h1 = gen_reg_rtx (V4HImode); h1 = gen_reg_rtx (V4HImode);
h2 = gen_reg_rtx (V4HImode); h2 = gen_reg_rtx (V4HImode);
ia64_unpack_assemble (l1, op1, sn1, false);
if (TARGET_BIG_ENDIAN) ia64_unpack_assemble (l2, op2, sn2, false);
{ ia64_unpack_assemble (h1, op1, sn1, true);
emit_insn (gen_vec_interleave_lowv8qi ia64_unpack_assemble (h2, op2, sn2, true);
(gen_lowpart (V8QImode, l1), x1, operands[1]));
emit_insn (gen_vec_interleave_lowv8qi
(gen_lowpart (V8QImode, l2), x2, operands[2]));
emit_insn (gen_vec_interleave_highv8qi
(gen_lowpart (V8QImode, h1), x1, operands[1]));
emit_insn (gen_vec_interleave_highv8qi
(gen_lowpart (V8QImode, h2), x2, operands[2]));
}
else
{
emit_insn (gen_vec_interleave_lowv8qi
(gen_lowpart (V8QImode, l1), operands[1], x1));
emit_insn (gen_vec_interleave_lowv8qi
(gen_lowpart (V8QImode, l2), operands[2], x2));
emit_insn (gen_vec_interleave_highv8qi
(gen_lowpart (V8QImode, h1), operands[1], x1));
emit_insn (gen_vec_interleave_highv8qi
(gen_lowpart (V8QImode, h2), operands[2], x2));
}
p1 = gen_reg_rtx (V2SImode); p1 = gen_reg_rtx (V2SImode);
p2 = gen_reg_rtx (V2SImode); p2 = gen_reg_rtx (V2SImode);
p3 = gen_reg_rtx (V2SImode); p3 = gen_reg_rtx (V2SImode);
p4 = gen_reg_rtx (V2SImode); p4 = gen_reg_rtx (V2SImode);
emit_insn (gen_pmpy2_r (p1, l1, l2)); emit_insn (gen_pmpy2_even (p1, l1, l2));
emit_insn (gen_pmpy2_l (p2, l1, l2)); emit_insn (gen_pmpy2_even (p2, h1, h2));
emit_insn (gen_pmpy2_r (p3, h1, h2)); emit_insn (gen_pmpy2_odd (p3, l1, l2));
emit_insn (gen_pmpy2_l (p4, h1, h2)); emit_insn (gen_pmpy2_odd (p4, h1, h2));
s1 = gen_reg_rtx (V2SImode); s1 = gen_reg_rtx (V2SImode);
s2 = gen_reg_rtx (V2SImode); s2 = gen_reg_rtx (V2SImode);
......
...@@ -624,3 +624,7 @@ ...@@ -624,3 +624,7 @@
return REG_P (op) && REG_POINTER (op); return REG_P (op) && REG_POINTER (op);
}) })
;; True if this is the right-most vector element; for mux1 @brcst.
(define_predicate "mux1_brcst_element"
(and (match_code "const_int")
(match_test "INTVAL (op) == (TARGET_BIG_ENDIAN ? 7 : 0)")))
...@@ -172,35 +172,14 @@ ...@@ -172,35 +172,14 @@
(match_operand:V8QI 2 "gr_register_operand" "r")))] (match_operand:V8QI 2 "gr_register_operand" "r")))]
"" ""
{ {
rtx r1, l1, r2, l2, rm, lm; rtx l = gen_reg_rtx (V4HImode);
rtx h = gen_reg_rtx (V4HImode);
r1 = gen_reg_rtx (V4HImode); emit_insn (gen_vec_widen_umult_lo_v8qi (l, operands[1], operands[2]));
l1 = gen_reg_rtx (V4HImode); emit_insn (gen_vec_widen_umult_hi_v8qi (h, operands[1], operands[2]));
r2 = gen_reg_rtx (V4HImode); if (TARGET_BIG_ENDIAN)
l2 = gen_reg_rtx (V4HImode); emit_insn (gen_vec_pack_trunc_v4hi (operands[0], h, l));
else
/* Zero-extend the QImode elements into two words of HImode elements emit_insn (gen_vec_pack_trunc_v4hi (operands[0], l, h));
by interleaving them with zero bytes. */
emit_insn (gen_mix1_r (gen_lowpart (V8QImode, r1),
operands[1], CONST0_RTX (V8QImode)));
emit_insn (gen_mix1_r (gen_lowpart (V8QImode, r2),
operands[2], CONST0_RTX (V8QImode)));
emit_insn (gen_mix1_l (gen_lowpart (V8QImode, l1),
operands[1], CONST0_RTX (V8QImode)));
emit_insn (gen_mix1_l (gen_lowpart (V8QImode, l2),
operands[2], CONST0_RTX (V8QImode)));
/* Multiply. */
rm = gen_reg_rtx (V4HImode);
lm = gen_reg_rtx (V4HImode);
emit_insn (gen_mulv4hi3 (rm, r1, r2));
emit_insn (gen_mulv4hi3 (lm, l1, l2));
/* Zap the high order bytes of the HImode elements by overwriting those
in one part with the low order bytes of the other. */
emit_insn (gen_mix1_r (operands[0],
gen_lowpart (V8QImode, rm),
gen_lowpart (V8QImode, lm)));
DONE; DONE;
}) })
...@@ -296,7 +275,7 @@ ...@@ -296,7 +275,7 @@
"pmpyshr2.u %0 = %1, %2, %3" "pmpyshr2.u %0 = %1, %2, %3"
[(set_attr "itanium_class" "mmmul")]) [(set_attr "itanium_class" "mmmul")])
(define_insn "pmpy2_r" (define_insn "pmpy2_even"
[(set (match_operand:V2SI 0 "gr_register_operand" "=r") [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
(mult:V2SI (mult:V2SI
(vec_select:V2SI (vec_select:V2SI
...@@ -308,10 +287,16 @@ ...@@ -308,10 +287,16 @@
(match_operand:V4HI 2 "gr_register_operand" "r")) (match_operand:V4HI 2 "gr_register_operand" "r"))
(parallel [(const_int 0) (const_int 2)]))))] (parallel [(const_int 0) (const_int 2)]))))]
"" ""
"pmpy2.r %0 = %1, %2" {
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,pmpy2.l %0 = %1, %2";
else
return "%,pmpy2.r %0 = %1, %2";
}
[(set_attr "itanium_class" "mmshf")]) [(set_attr "itanium_class" "mmshf")])
(define_insn "pmpy2_l" (define_insn "pmpy2_odd"
[(set (match_operand:V2SI 0 "gr_register_operand" "=r") [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
(mult:V2SI (mult:V2SI
(vec_select:V2SI (vec_select:V2SI
...@@ -323,7 +308,13 @@ ...@@ -323,7 +308,13 @@
(match_operand:V4HI 2 "gr_register_operand" "r")) (match_operand:V4HI 2 "gr_register_operand" "r"))
(parallel [(const_int 1) (const_int 3)]))))] (parallel [(const_int 1) (const_int 3)]))))]
"" ""
"pmpy2.l %0 = %1, %2" {
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,pmpy2.r %0 = %1, %2";
else
return "%,pmpy2.l %0 = %1, %2";
}
[(set_attr "itanium_class" "mmshf")]) [(set_attr "itanium_class" "mmshf")])
(define_expand "vec_widen_smult_lo_v4hi" (define_expand "vec_widen_smult_lo_v4hi"
...@@ -332,7 +323,11 @@ ...@@ -332,7 +323,11 @@
(match_operand:V4HI 2 "gr_register_operand" "")] (match_operand:V4HI 2 "gr_register_operand" "")]
"" ""
{ {
ia64_expand_widen_mul_v4hi (operands, false, false); rtx l = gen_reg_rtx (V4HImode);
rtx h = gen_reg_rtx (V4HImode);
emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
emit_insn (gen_pmpyshr2 (h, operands[1], operands[2], GEN_INT (16)));
ia64_unpack_assemble (operands[0], l, h, false);
DONE; DONE;
}) })
...@@ -342,7 +337,11 @@ ...@@ -342,7 +337,11 @@
(match_operand:V4HI 2 "gr_register_operand" "")] (match_operand:V4HI 2 "gr_register_operand" "")]
"" ""
{ {
ia64_expand_widen_mul_v4hi (operands, false, true); rtx l = gen_reg_rtx (V4HImode);
rtx h = gen_reg_rtx (V4HImode);
emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
emit_insn (gen_pmpyshr2 (h, operands[1], operands[2], GEN_INT (16)));
ia64_unpack_assemble (operands[0], l, h, true);
DONE; DONE;
}) })
...@@ -352,7 +351,11 @@ ...@@ -352,7 +351,11 @@
(match_operand:V4HI 2 "gr_register_operand" "")] (match_operand:V4HI 2 "gr_register_operand" "")]
"" ""
{ {
ia64_expand_widen_mul_v4hi (operands, true, false); rtx l = gen_reg_rtx (V4HImode);
rtx h = gen_reg_rtx (V4HImode);
emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
emit_insn (gen_pmpyshr2_u (h, operands[1], operands[2], GEN_INT (16)));
ia64_unpack_assemble (operands[0], l, h, false);
DONE; DONE;
}) })
...@@ -362,7 +365,11 @@ ...@@ -362,7 +365,11 @@
(match_operand:V4HI 2 "gr_register_operand" "")] (match_operand:V4HI 2 "gr_register_operand" "")]
"" ""
{ {
ia64_expand_widen_mul_v4hi (operands, true, true); rtx l = gen_reg_rtx (V4HImode);
rtx h = gen_reg_rtx (V4HImode);
emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
emit_insn (gen_pmpyshr2_u (h, operands[1], operands[2], GEN_INT (16)));
ia64_unpack_assemble (operands[0], l, h, true);
DONE; DONE;
}) })
...@@ -390,12 +397,8 @@ ...@@ -390,12 +397,8 @@
of the full 32-bit product. */ of the full 32-bit product. */
/* T0 = CDBA. */ /* T0 = CDBA. */
if (TARGET_BIG_ENDIAN) x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, const1_rtx, const0_rtx,
x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, GEN_INT (3), const2_rtx, GEN_INT (3), const2_rtx));
const1_rtx, const0_rtx));
else
x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, const1_rtx, const0_rtx,
GEN_INT (3), const2_rtx));
x = gen_rtx_VEC_SELECT (V4HImode, op1h, x); x = gen_rtx_VEC_SELECT (V4HImode, op1h, x);
emit_insn (gen_rtx_SET (VOIDmode, t0, x)); emit_insn (gen_rtx_SET (VOIDmode, t0, x));
...@@ -409,15 +412,28 @@ ...@@ -409,15 +412,28 @@
emit_insn (gen_mulv4hi3 (t3, t0, op2h)); emit_insn (gen_mulv4hi3 (t3, t0, op2h));
/* T4 = CY.h, CY.l, AW.h, AW.l = CY, AW. */ /* T4 = CY.h, CY.l, AW.h, AW.l = CY, AW. */
emit_insn (gen_mix2_r (gen_lowpart (V4HImode, t4), t1, t2)); x = gen_lowpart (V4HImode, t4);
if (TARGET_BIG_ENDIAN)
x = gen_mix2_odd (x, t2, t1);
else
x = gen_mix2_even (x, t1, t2);
emit_insn (x);
/* T5 = CZ.l, 0, AX.l, 0 = CZ << 16, AX << 16. */ /* T5 = CZ.l, 0, AX.l, 0 = CZ << 16, AX << 16. */
emit_insn (gen_mix2_l (gen_lowpart (V4HImode, t5), x = gen_lowpart (V4HImode, t5);
CONST0_RTX (V4HImode), t3)); if (TARGET_BIG_ENDIAN)
x = gen_mix2_even (x, t3, CONST0_RTX (V4HImode));
else
x = gen_mix2_odd (x, CONST0_RTX (V4HImode), t3);
emit_insn (x);
/* T6 = DY.l, 0, BW.l, 0 = DY << 16, BW << 16. */ /* T6 = DY.l, 0, BW.l, 0 = DY << 16, BW << 16. */
emit_insn (gen_mix2_r (gen_lowpart (V4HImode, t6), x = gen_lowpart (V4HImode, t6);
CONST0_RTX (V4HImode), t3)); if (TARGET_BIG_ENDIAN)
x = gen_mix2_odd (x, t3, CONST0_RTX (V4HImode));
else
x = gen_mix2_even (x, CONST0_RTX (V4HImode), t3);
emit_insn (x);
emit_insn (gen_addv2si3 (t7, t4, t5)); emit_insn (gen_addv2si3 (t7, t4, t5));
emit_insn (gen_addv2si3 (operands[0], t6, t7)); emit_insn (gen_addv2si3 (operands[0], t6, t7));
...@@ -612,16 +628,36 @@ ...@@ -612,16 +628,36 @@
(match_operand:V2SI 3 "gr_register_operand" "")] (match_operand:V2SI 3 "gr_register_operand" "")]
"" ""
{ {
rtx l, r, t; rtx e, o, t;
e = gen_reg_rtx (V2SImode);
o = gen_reg_rtx (V2SImode);
t = gen_reg_rtx (V2SImode);
emit_insn (gen_pmpy2_even (e, operands[1], operands[2]));
emit_insn (gen_pmpy2_odd (o, operands[1], operands[2]));
emit_insn (gen_addv2si3 (t, e, operands[3]));
emit_insn (gen_addv2si3 (operands[0], t, o));
DONE;
})
(define_expand "udot_prodv4hi"
[(match_operand:V2SI 0 "gr_register_operand" "")
(match_operand:V4HI 1 "gr_register_operand" "")
(match_operand:V4HI 2 "gr_register_operand" "")
(match_operand:V2SI 3 "gr_register_operand" "")]
""
{
rtx l, h, t;
r = gen_reg_rtx (V2SImode);
l = gen_reg_rtx (V2SImode); l = gen_reg_rtx (V2SImode);
h = gen_reg_rtx (V2SImode);
t = gen_reg_rtx (V2SImode); t = gen_reg_rtx (V2SImode);
emit_insn (gen_pmpy2_r (r, operands[1], operands[2])); emit_insn (gen_vec_widen_umult_lo_v4hi (l, operands[1], operands[2]));
emit_insn (gen_pmpy2_l (l, operands[1], operands[2])); emit_insn (gen_vec_widen_umult_hi_v4hi (h, operands[1], operands[2]));
emit_insn (gen_addv2si3 (t, r, operands[3])); emit_insn (gen_addv2si3 (t, l, operands[3]));
emit_insn (gen_addv2si3 (operands[0], t, l)); emit_insn (gen_addv2si3 (operands[0], t, h));
DONE; DONE;
}) })
...@@ -677,7 +713,13 @@ ...@@ -677,7 +713,13 @@
(ss_truncate:V4QI (ss_truncate:V4QI
(match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))))] (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))))]
"" ""
"pack2.sss %0 = %r1, %r2" {
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,pack2.sss %0 = %r2, %r1";
else
return "%,pack2.sss %0 = %r1, %r2";
}
[(set_attr "itanium_class" "mmshf")]) [(set_attr "itanium_class" "mmshf")])
(define_insn "vec_pack_usat_v4hi" (define_insn "vec_pack_usat_v4hi"
...@@ -688,7 +730,13 @@ ...@@ -688,7 +730,13 @@
(us_truncate:V4QI (us_truncate:V4QI
(match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))))] (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))))]
"" ""
"pack2.uss %0 = %r1, %r2" {
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,pack2.uss %0 = %r2, %r1";
else
return "%,pack2.uss %0 = %r1, %r2";
}
[(set_attr "itanium_class" "mmshf")]) [(set_attr "itanium_class" "mmshf")])
(define_insn "vec_pack_ssat_v2si" (define_insn "vec_pack_ssat_v2si"
...@@ -699,7 +747,13 @@ ...@@ -699,7 +747,13 @@
(ss_truncate:V2HI (ss_truncate:V2HI
(match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))))] (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))))]
"" ""
"pack4.sss %0 = %r1, %r2" {
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,pack4.sss %0 = %r2, %r1";
else
return "%,pack4.sss %0 = %r1, %r2";
}
[(set_attr "itanium_class" "mmshf")]) [(set_attr "itanium_class" "mmshf")])
(define_insn "vec_interleave_lowv8qi" (define_insn "vec_interleave_lowv8qi"
...@@ -742,54 +796,54 @@ ...@@ -742,54 +796,54 @@
} }
[(set_attr "itanium_class" "mmshf")]) [(set_attr "itanium_class" "mmshf")])
(define_insn "mix1_r" (define_insn "mix1_even"
[(set (match_operand:V8QI 0 "gr_register_operand" "=r") [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI (vec_select:V8QI
(vec_concat:V16QI (vec_concat:V16QI
(match_operand:V8QI 1 "gr_reg_or_0_operand" "rU") (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V8QI 2 "gr_reg_or_0_operand" "rU")) (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU"))
(parallel [(const_int 0) (parallel [(const_int 0) (const_int 8)
(const_int 8) (const_int 2) (const_int 10)
(const_int 2) (const_int 4) (const_int 12)
(const_int 10) (const_int 6) (const_int 14)])))]
(const_int 4) ""
(const_int 12) {
(const_int 6) /* Recall that vector elements are numbered in memory order. */
(const_int 14)])))] if (TARGET_BIG_ENDIAN)
"" return "%,mix1.l %0 = %r1, %r2";
"mix1.r %0 = %r2, %r1" else
return "%,mix1.r %0 = %r2, %r1";
}
[(set_attr "itanium_class" "mmshf")]) [(set_attr "itanium_class" "mmshf")])
(define_insn "mix1_l" (define_insn "mix1_odd"
[(set (match_operand:V8QI 0 "gr_register_operand" "=r") [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI (vec_select:V8QI
(vec_concat:V16QI (vec_concat:V16QI
(match_operand:V8QI 1 "gr_reg_or_0_operand" "rU") (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V8QI 2 "gr_reg_or_0_operand" "rU")) (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU"))
(parallel [(const_int 1) (parallel [(const_int 1) (const_int 9)
(const_int 9) (const_int 3) (const_int 11)
(const_int 3) (const_int 5) (const_int 13)
(const_int 11) (const_int 7) (const_int 15)])))]
(const_int 5) ""
(const_int 13) {
(const_int 7) /* Recall that vector elements are numbered in memory order. */
(const_int 15)])))] if (TARGET_BIG_ENDIAN)
"" return "%,mix1.r %0 = %r1, %r2";
"mix1.l %0 = %r2, %r1" else
return "%,mix1.l %0 = %r2, %r1";
}
[(set_attr "itanium_class" "mmshf")]) [(set_attr "itanium_class" "mmshf")])
(define_insn "*mux1_rev" (define_insn "*mux1_rev"
[(set (match_operand:V8QI 0 "gr_register_operand" "=r") [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI (vec_select:V8QI
(match_operand:V8QI 1 "gr_register_operand" "r") (match_operand:V8QI 1 "gr_register_operand" "r")
(parallel [(const_int 7) (parallel [(const_int 7) (const_int 6)
(const_int 6) (const_int 5) (const_int 4)
(const_int 5) (const_int 3) (const_int 2)
(const_int 4) (const_int 1) (const_int 0)])))]
(const_int 3)
(const_int 2)
(const_int 1)
(const_int 0)])))]
"" ""
"mux1 %0 = %1, @rev" "mux1 %0 = %1, @rev"
[(set_attr "itanium_class" "mmshf")]) [(set_attr "itanium_class" "mmshf")])
...@@ -798,14 +852,10 @@ ...@@ -798,14 +852,10 @@
[(set (match_operand:V8QI 0 "gr_register_operand" "=r") [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI (vec_select:V8QI
(match_operand:V8QI 1 "gr_register_operand" "r") (match_operand:V8QI 1 "gr_register_operand" "r")
(parallel [(const_int 0) (parallel [(const_int 0) (const_int 4)
(const_int 4) (const_int 2) (const_int 6)
(const_int 2) (const_int 1) (const_int 5)
(const_int 6) (const_int 3) (const_int 7)])))]
(const_int 1)
(const_int 5)
(const_int 3)
(const_int 7)])))]
"" ""
"mux1 %0 = %1, @mix" "mux1 %0 = %1, @mix"
[(set_attr "itanium_class" "mmshf")]) [(set_attr "itanium_class" "mmshf")])
...@@ -814,14 +864,10 @@ ...@@ -814,14 +864,10 @@
[(set (match_operand:V8QI 0 "gr_register_operand" "=r") [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI (vec_select:V8QI
(match_operand:V8QI 1 "gr_register_operand" "r") (match_operand:V8QI 1 "gr_register_operand" "r")
(parallel [(const_int 0) (parallel [(const_int 0) (const_int 4)
(const_int 4) (const_int 1) (const_int 5)
(const_int 1) (const_int 2) (const_int 6)
(const_int 5) (const_int 3) (const_int 7)])))]
(const_int 2)
(const_int 6)
(const_int 3)
(const_int 7)])))]
"" ""
"mux1 %0 = %1, @shuf" "mux1 %0 = %1, @shuf"
[(set_attr "itanium_class" "mmshf")]) [(set_attr "itanium_class" "mmshf")])
...@@ -830,14 +876,10 @@ ...@@ -830,14 +876,10 @@
[(set (match_operand:V8QI 0 "gr_register_operand" "=r") [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI (vec_select:V8QI
(match_operand:V8QI 1 "gr_register_operand" "r") (match_operand:V8QI 1 "gr_register_operand" "r")
(parallel [(const_int 0) (parallel [(const_int 0) (const_int 2)
(const_int 2) (const_int 4) (const_int 6)
(const_int 4) (const_int 1) (const_int 3)
(const_int 6) (const_int 5) (const_int 7)])))]
(const_int 1)
(const_int 3)
(const_int 5)
(const_int 7)])))]
"" ""
"mux1 %0 = %1, @alt" "mux1 %0 = %1, @alt"
[(set_attr "itanium_class" "mmshf")]) [(set_attr "itanium_class" "mmshf")])
...@@ -846,14 +888,14 @@ ...@@ -846,14 +888,14 @@
[(set (match_operand:V8QI 0 "gr_register_operand" "=r") [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI (vec_select:V8QI
(match_operand:V8QI 1 "gr_register_operand" "r") (match_operand:V8QI 1 "gr_register_operand" "r")
(parallel [(const_int 0) (parallel [(match_operand 2 "mux1_brcst_element" "")
(const_int 0) (match_dup 2)
(const_int 0) (match_dup 2)
(const_int 0) (match_dup 2)
(const_int 0) (match_dup 2)
(const_int 0) (match_dup 2)
(const_int 0) (match_dup 2)
(const_int 0)])))] (match_dup 2)])))]
"" ""
"mux1 %0 = %1, @brcst" "mux1 %0 = %1, @brcst"
[(set_attr "itanium_class" "mmshf")]) [(set_attr "itanium_class" "mmshf")])
...@@ -873,10 +915,7 @@ ...@@ -873,10 +915,7 @@
"" ""
{ {
rtx temp = gen_reg_rtx (V8QImode); rtx temp = gen_reg_rtx (V8QImode);
if (TARGET_BIG_ENDIAN) emit_insn (gen_mix1_even (temp, operands[1], operands[2]));
emit_insn (gen_mix1_l (temp, operands[2], operands[1]));
else
emit_insn (gen_mix1_r (temp, operands[1], operands[2]));
emit_insn (gen_mux1_alt (operands[0], temp)); emit_insn (gen_mux1_alt (operands[0], temp));
DONE; DONE;
}) })
...@@ -888,10 +927,7 @@ ...@@ -888,10 +927,7 @@
"" ""
{ {
rtx temp = gen_reg_rtx (V8QImode); rtx temp = gen_reg_rtx (V8QImode);
if (TARGET_BIG_ENDIAN) emit_insn (gen_mix1_odd (temp, operands[1], operands[2]));
emit_insn (gen_mix1_r (temp, operands[2], operands[1]));
else
emit_insn (gen_mix1_l (temp, operands[1], operands[2]));
emit_insn (gen_mux1_alt (operands[0], temp)); emit_insn (gen_mux1_alt (operands[0], temp));
DONE; DONE;
}) })
...@@ -902,10 +938,8 @@ ...@@ -902,10 +938,8 @@
(vec_concat:V8HI (vec_concat:V8HI
(match_operand:V4HI 1 "gr_reg_or_0_operand" "rU") (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")) (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
(parallel [(const_int 0) (parallel [(const_int 0) (const_int 4)
(const_int 4) (const_int 1) (const_int 5)])))]
(const_int 1)
(const_int 5)])))]
"" ""
{ {
/* Recall that vector elements are numbered in memory order. */ /* Recall that vector elements are numbered in memory order. */
...@@ -922,10 +956,8 @@ ...@@ -922,10 +956,8 @@
(vec_concat:V8HI (vec_concat:V8HI
(match_operand:V4HI 1 "gr_reg_or_0_operand" "rU") (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")) (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
(parallel [(const_int 2) (parallel [(const_int 2) (const_int 6)
(const_int 6) (const_int 3) (const_int 7)])))]
(const_int 3)
(const_int 7)])))]
"" ""
{ {
/* Recall that vector elements are numbered in memory order. */ /* Recall that vector elements are numbered in memory order. */
...@@ -936,32 +968,40 @@ ...@@ -936,32 +968,40 @@
} }
[(set_attr "itanium_class" "mmshf")]) [(set_attr "itanium_class" "mmshf")])
(define_insn "mix2_r" (define_insn "mix2_even"
[(set (match_operand:V4HI 0 "gr_register_operand" "=r") [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
(vec_select:V4HI (vec_select:V4HI
(vec_concat:V8HI (vec_concat:V8HI
(match_operand:V4HI 1 "gr_reg_or_0_operand" "rU") (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")) (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
(parallel [(const_int 0) (parallel [(const_int 0) (const_int 4)
(const_int 4) (const_int 2) (const_int 6)])))]
(const_int 2)
(const_int 6)])))]
"" ""
"mix2.r %0 = %r2, %r1" {
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,mix2.l %0 = %r1, %r2";
else
return "%,mix2.r %0 = %r2, %r1";
}
[(set_attr "itanium_class" "mmshf")]) [(set_attr "itanium_class" "mmshf")])
(define_insn "mix2_l" (define_insn "mix2_odd"
[(set (match_operand:V4HI 0 "gr_register_operand" "=r") [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
(vec_select:V4HI (vec_select:V4HI
(vec_concat:V8HI (vec_concat:V8HI
(match_operand:V4HI 1 "gr_reg_or_0_operand" "rU") (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")) (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
(parallel [(const_int 1) (parallel [(const_int 1) (const_int 5)
(const_int 5) (const_int 3) (const_int 7)])))]
(const_int 3)
(const_int 7)])))]
"" ""
"mix2.l %0 = %r2, %r1" {
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,mix2.r %0 = %r1, %r2";
else
return "%,mix2.l %0 = %r2, %r1";
}
[(set_attr "itanium_class" "mmshf")]) [(set_attr "itanium_class" "mmshf")])
(define_insn "*mux2" (define_insn "*mux2"
...@@ -974,17 +1014,17 @@ ...@@ -974,17 +1014,17 @@
(match_operand 5 "const_int_2bit_operand" "")])))] (match_operand 5 "const_int_2bit_operand" "")])))]
"" ""
{ {
int mask; int mask = 0;
if (TARGET_BIG_ENDIAN) if (TARGET_BIG_ENDIAN)
{ {
mask = INTVAL (operands[2]) << 4; mask |= (3 - INTVAL (operands[2])) << 6;
mask |= INTVAL (operands[3]) << 6; mask |= (3 - INTVAL (operands[3])) << 4;
mask |= INTVAL (operands[4]); mask |= (3 - INTVAL (operands[4])) << 2;
mask |= INTVAL (operands[5]) << 2; mask |= 3 - INTVAL (operands[5]);
} }
else else
{ {
mask = INTVAL (operands[2]); mask |= INTVAL (operands[2]);
mask |= INTVAL (operands[3]) << 2; mask |= INTVAL (operands[3]) << 2;
mask |= INTVAL (operands[4]) << 4; mask |= INTVAL (operands[4]) << 4;
mask |= INTVAL (operands[5]) << 6; mask |= INTVAL (operands[5]) << 6;
...@@ -998,10 +1038,8 @@ ...@@ -998,10 +1038,8 @@
[(set (match_operand:V4HI 0 "gr_register_operand" "") [(set (match_operand:V4HI 0 "gr_register_operand" "")
(vec_select:V4HI (vec_select:V4HI
(match_operand:V4HI 1 "gr_register_operand" "") (match_operand:V4HI 1 "gr_register_operand" "")
(parallel [(const_int 0) (parallel [(const_int 0) (const_int 2)
(const_int 2) (const_int 1) (const_int 3)])))]
(const_int 1)
(const_int 3)])))]
"") "")
(define_expand "vec_extract_evenv4hi" (define_expand "vec_extract_evenv4hi"
...@@ -1011,10 +1049,7 @@ ...@@ -1011,10 +1049,7 @@
"" ""
{ {
rtx temp = gen_reg_rtx (V4HImode); rtx temp = gen_reg_rtx (V4HImode);
if (TARGET_BIG_ENDIAN) emit_insn (gen_mix2_even (temp, operands[1], operands[2]));
emit_insn (gen_mix2_l (temp, operands[1], operands[2]));
else
emit_insn (gen_mix2_r (temp, operands[1], operands[2]));
emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp)); emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp));
DONE; DONE;
}) })
...@@ -1026,10 +1061,7 @@ ...@@ -1026,10 +1061,7 @@
"" ""
{ {
rtx temp = gen_reg_rtx (V4HImode); rtx temp = gen_reg_rtx (V4HImode);
if (TARGET_BIG_ENDIAN) emit_insn (gen_mix2_odd (temp, operands[1], operands[2]));
emit_insn (gen_mix2_r (temp, operands[1], operands[2]));
else
emit_insn (gen_mix2_l (temp, operands[1], operands[2]));
emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp)); emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp));
DONE; DONE;
}) })
...@@ -1042,15 +1074,13 @@ ...@@ -1042,15 +1074,13 @@
"mux2 %0 = %1, 0" "mux2 %0 = %1, 0"
[(set_attr "itanium_class" "mmshf")]) [(set_attr "itanium_class" "mmshf")])
;; Note that mix4.r performs the exact same operation.
(define_insn "vec_interleave_lowv2si" (define_insn "vec_interleave_lowv2si"
[(set (match_operand:V2SI 0 "gr_register_operand" "=r") [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
(vec_select:V2SI (vec_select:V2SI
(vec_concat:V4SI (vec_concat:V4SI
(match_operand:V2SI 1 "gr_reg_or_0_operand" "rU") (match_operand:V2SI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V2SI 2 "gr_reg_or_0_operand" "rU")) (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))
(parallel [(const_int 0) (parallel [(const_int 0) (const_int 2)])))]
(const_int 2)])))]
"" ""
{ {
/* Recall that vector elements are numbered in memory order. */ /* Recall that vector elements are numbered in memory order. */
...@@ -1061,15 +1091,13 @@ ...@@ -1061,15 +1091,13 @@
} }
[(set_attr "itanium_class" "mmshf")]) [(set_attr "itanium_class" "mmshf")])
;; Note that mix4.l performs the exact same operation.
(define_insn "vec_interleave_highv2si" (define_insn "vec_interleave_highv2si"
[(set (match_operand:V2SI 0 "gr_register_operand" "=r") [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
(vec_select:V2SI (vec_select:V2SI
(vec_concat:V4SI (vec_concat:V4SI
(match_operand:V2SI 1 "gr_reg_or_0_operand" "rU") (match_operand:V2SI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V2SI 2 "gr_reg_or_0_operand" "rU")) (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))
(parallel [(const_int 1) (parallel [(const_int 1) (const_int 3)])))]
(const_int 3)])))]
"" ""
{ {
/* Recall that vector elements are numbered in memory order. */ /* Recall that vector elements are numbered in memory order. */
...@@ -1088,7 +1116,7 @@ ...@@ -1088,7 +1116,7 @@
{ {
if (TARGET_BIG_ENDIAN) if (TARGET_BIG_ENDIAN)
emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1], emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1],
operands[2])); operands[2]));
else else
emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1], emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1],
operands[2])); operands[2]));
...@@ -1103,7 +1131,7 @@ ...@@ -1103,7 +1131,7 @@
{ {
if (TARGET_BIG_ENDIAN) if (TARGET_BIG_ENDIAN)
emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1], emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1],
operands[2])); operands[2]));
else else
emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1], emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1],
operands[2])); operands[2]));
...@@ -1131,10 +1159,7 @@ ...@@ -1131,10 +1159,7 @@
if (!gr_reg_or_0_operand (op2, SImode)) if (!gr_reg_or_0_operand (op2, SImode))
op2 = force_reg (SImode, op2); op2 = force_reg (SImode, op2);
if (TARGET_BIG_ENDIAN) x = gen_rtx_VEC_CONCAT (V2SImode, op1, op2);
x = gen_rtx_VEC_CONCAT (V2SImode, op2, op1);
else
x = gen_rtx_VEC_CONCAT (V2SImode, op1, op2);
emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
DONE; DONE;
}) })
...@@ -1145,7 +1170,13 @@ ...@@ -1145,7 +1170,13 @@
(match_operand:SI 1 "gr_reg_or_0_operand" "rO") (match_operand:SI 1 "gr_reg_or_0_operand" "rO")
(match_operand:SI 2 "gr_reg_or_0_operand" "rO")))] (match_operand:SI 2 "gr_reg_or_0_operand" "rO")))]
"" ""
"unpack4.l %0 = %r2, %r1" {
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,unpack4.l %0 = %r1, %r2";
else
return "%,unpack4.l %0 = %r2, %r1";
}
[(set_attr "itanium_class" "mmshf")]) [(set_attr "itanium_class" "mmshf")])
;; Missing operations ;; Missing operations
...@@ -1315,7 +1346,10 @@ ...@@ -1315,7 +1346,10 @@
"" ""
{ {
rtx tmp = gen_reg_rtx (V2SFmode); rtx tmp = gen_reg_rtx (V2SFmode);
emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode))); if (TARGET_BIG_ENDIAN)
emit_insn (gen_fswap (tmp, CONST0_RTX (V2SFmode), operands[1]));
else
emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
emit_insn (gen_addv2sf3 (operands[0], operands[1], tmp)); emit_insn (gen_addv2sf3 (operands[0], operands[1], tmp));
DONE; DONE;
}) })
...@@ -1326,7 +1360,10 @@ ...@@ -1326,7 +1360,10 @@
"" ""
{ {
rtx tmp = gen_reg_rtx (V2SFmode); rtx tmp = gen_reg_rtx (V2SFmode);
emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode))); if (TARGET_BIG_ENDIAN)
emit_insn (gen_fswap (tmp, CONST0_RTX (V2SFmode), operands[1]));
else
emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
emit_insn (gen_smaxv2sf3 (operands[0], operands[1], tmp)); emit_insn (gen_smaxv2sf3 (operands[0], operands[1], tmp));
DONE; DONE;
}) })
...@@ -1337,7 +1374,10 @@ ...@@ -1337,7 +1374,10 @@
"" ""
{ {
rtx tmp = gen_reg_rtx (V2SFmode); rtx tmp = gen_reg_rtx (V2SFmode);
emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode))); if (TARGET_BIG_ENDIAN)
emit_insn (gen_fswap (tmp, CONST0_RTX (V2SFmode), operands[1]));
else
emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
emit_insn (gen_sminv2sf3 (operands[0], operands[1], tmp)); emit_insn (gen_sminv2sf3 (operands[0], operands[1], tmp));
DONE; DONE;
}) })
...@@ -1403,10 +1443,7 @@ ...@@ -1403,10 +1443,7 @@
if (!fr_reg_or_fp01_operand (op2, SFmode)) if (!fr_reg_or_fp01_operand (op2, SFmode))
op2 = force_reg (SFmode, op2); op2 = force_reg (SFmode, op2);
if (TARGET_BIG_ENDIAN) emit_insn (gen_fpack (operands[0], op1, op2));
emit_insn (gen_fpack (operands[0], op2, op1));
else
emit_insn (gen_fpack (operands[0], op1, op2));
DONE; DONE;
}) })
...@@ -1416,7 +1453,13 @@ ...@@ -1416,7 +1453,13 @@
(match_operand:SF 1 "fr_reg_or_fp01_operand" "fG") (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
(match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))] (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))]
"" ""
"fpack %0 = %F2, %F1" {
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,fpack %0 = %F1, %F2";
else
return "%,fpack %0 = %F2, %F1";
}
[(set_attr "itanium_class" "fmisc")]) [(set_attr "itanium_class" "fmisc")])
(define_insn "fswap" (define_insn "fswap"
...@@ -1427,7 +1470,13 @@ ...@@ -1427,7 +1470,13 @@
(match_operand:V2SF 2 "fr_reg_or_0_operand" "fU")) (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
(parallel [(const_int 1) (const_int 2)])))] (parallel [(const_int 1) (const_int 2)])))]
"" ""
"fswap %0 = %F1, %F2" {
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,fswap %0 = %F2, %F1";
else
return "%,fswap %0 = %F1, %F2";
}
[(set_attr "itanium_class" "fmisc")]) [(set_attr "itanium_class" "fmisc")])
(define_insn "vec_interleave_highv2sf" (define_insn "vec_interleave_highv2sf"
...@@ -1438,7 +1487,13 @@ ...@@ -1438,7 +1487,13 @@
(match_operand:V2SF 2 "fr_reg_or_0_operand" "fU")) (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
(parallel [(const_int 1) (const_int 3)])))] (parallel [(const_int 1) (const_int 3)])))]
"" ""
"fmix.l %0 = %F2, %F1" {
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,fmix.r %0 = %F1, %F2";
else
return "%,fmix.l %0 = %F2, %F1";
}
[(set_attr "itanium_class" "fmisc")]) [(set_attr "itanium_class" "fmisc")])
(define_insn "vec_interleave_lowv2sf" (define_insn "vec_interleave_lowv2sf"
...@@ -1449,7 +1504,13 @@ ...@@ -1449,7 +1504,13 @@
(match_operand:V2SF 2 "fr_reg_or_0_operand" "fU")) (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
(parallel [(const_int 0) (const_int 2)])))] (parallel [(const_int 0) (const_int 2)])))]
"" ""
"fmix.r %0 = %F2, %F1" {
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,fmix.l %0 = %F1, %F2";
else
return "%,fmix.r %0 = %F2, %F1";
}
[(set_attr "itanium_class" "fmisc")]) [(set_attr "itanium_class" "fmisc")])
(define_insn "fmix_lr" (define_insn "fmix_lr"
...@@ -1460,7 +1521,13 @@ ...@@ -1460,7 +1521,13 @@
(match_operand:V2SF 2 "fr_reg_or_0_operand" "fU")) (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
(parallel [(const_int 0) (const_int 3)])))] (parallel [(const_int 0) (const_int 3)])))]
"" ""
"fmix.lr %0 = %F2, %F1" {
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,fmix.lr %0 = %F1, %F2";
else
return "%,fmix.lr %0 = %F2, %F1";
}
[(set_attr "itanium_class" "fmisc")]) [(set_attr "itanium_class" "fmisc")])
(define_expand "vec_extract_evenv2sf" (define_expand "vec_extract_evenv2sf"
...@@ -1485,23 +1552,24 @@ ...@@ -1485,23 +1552,24 @@
DONE; DONE;
}) })
(define_expand "vec_setv2sf" (define_expand "vec_setv2sf"
[(match_operand:V2SF 0 "fr_register_operand" "") [(match_operand:V2SF 0 "fr_register_operand" "")
(match_operand:SF 1 "fr_register_operand" "") (match_operand:SF 1 "fr_register_operand" "")
(match_operand 2 "const_int_operand" "")] (match_operand 2 "const_int_operand" "")]
"" ""
{ {
rtx op0 = operands[0];
rtx tmp = gen_reg_rtx (V2SFmode); rtx tmp = gen_reg_rtx (V2SFmode);
emit_insn (gen_fpack (tmp, operands[1], CONST0_RTX (SFmode))); emit_insn (gen_fpack (tmp, operands[1], CONST0_RTX (SFmode)));
switch (INTVAL (operands[2])) switch (INTVAL (operands[2]))
{ {
case 0: case 0:
emit_insn (gen_fmix_lr (operands[0], tmp, operands[0])); emit_insn (gen_fmix_lr (op0, tmp, op0));
break; break;
case 1: case 1:
emit_insn (gen_vec_interleave_lowv2sf (operands[0], operands[0], tmp)); emit_insn (gen_vec_interleave_lowv2sf (op0, op0, tmp));
break; break;
default: default:
gcc_unreachable (); gcc_unreachable ();
...@@ -1528,8 +1596,8 @@ ...@@ -1528,8 +1596,8 @@
}) })
(define_insn_and_split "*vec_extractv2sf_0_be" (define_insn_and_split "*vec_extractv2sf_0_be"
[(set (match_operand:SF 0 "register_operand" "=r,f") [(set (match_operand:SF 0 "register_operand" "=rf,r")
(unspec:SF [(match_operand:V2SF 1 "register_operand" "rf,r") (unspec:SF [(match_operand:V2SF 1 "nonimmediate_operand" "m,r")
(const_int 0)] (const_int 0)]
UNSPEC_VECT_EXTR))] UNSPEC_VECT_EXTR))]
"TARGET_BIG_ENDIAN" "TARGET_BIG_ENDIAN"
...@@ -1537,31 +1605,44 @@ ...@@ -1537,31 +1605,44 @@
"reload_completed" "reload_completed"
[(set (match_dup 0) (match_dup 1))] [(set (match_dup 0) (match_dup 1))]
{ {
if (REG_P (operands[1]) && FR_REGNO_P (REGNO (operands[1]))) if (MEM_P (operands[1]))
operands[0] = gen_rtx_REG (V2SFmode, REGNO (operands[0])); operands[1] = adjust_address (operands[1], SFmode, 0);
else else
operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1])); {
emit_insn (gen_lshrdi3 (operands[0], operands[1], GEN_INT (32)));
DONE;
}
}) })
(define_insn_and_split "*vec_extractv2sf_1" (define_insn_and_split "*vec_extractv2sf_1_le"
[(set (match_operand:SF 0 "register_operand" "=r") [(set (match_operand:SF 0 "register_operand" "=r")
(unspec:SF [(match_operand:V2SF 1 "register_operand" "r") (unspec:SF [(match_operand:V2SF 1 "register_operand" "r")
(const_int 1)] (const_int 1)]
UNSPEC_VECT_EXTR))] UNSPEC_VECT_EXTR))]
"" "!TARGET_BIG_ENDIAN"
"#" "#"
"reload_completed" "&& reload_completed"
[(const_int 0)] [(const_int 0)]
{ {
operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
operands[1] = gen_rtx_REG (DImode, REGNO (operands[1])); operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
if (TARGET_BIG_ENDIAN) emit_insn (gen_lshrdi3 (operands[0], operands[1], GEN_INT (32)));
emit_move_insn (operands[0], operands[1]);
else
emit_insn (gen_lshrdi3 (operands[0], operands[1], GEN_INT (32)));
DONE; DONE;
}) })
(define_insn_and_split "*vec_extractv2sf_1_be"
[(set (match_operand:SF 0 "register_operand" "=rf")
(unspec:SF [(match_operand:V2SF 1 "register_operand" "r")
(const_int 1)]
UNSPEC_VECT_EXTR))]
"TARGET_BIG_ENDIAN"
"#"
"&& reload_completed"
[(set (match_dup 0) (match_dup 1))]
{
operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
})
(define_expand "vec_extractv2sf" (define_expand "vec_extractv2sf"
[(set (match_operand:SF 0 "register_operand" "") [(set (match_operand:SF 0 "register_operand" "")
(unspec:SF [(match_operand:V2SF 1 "register_operand" "") (unspec:SF [(match_operand:V2SF 1 "register_operand" "")
...@@ -1610,11 +1691,14 @@ ...@@ -1610,11 +1691,14 @@
[(match_operand:V8QI 0 "gr_register_operand" "") [(match_operand:V8QI 0 "gr_register_operand" "")
(match_operand:V4HI 1 "gr_register_operand" "") (match_operand:V4HI 1 "gr_register_operand" "")
(match_operand:V4HI 2 "gr_register_operand" "")] (match_operand:V4HI 2 "gr_register_operand" "")]
"!TARGET_BIG_ENDIAN" ""
{ {
rtx op1 = gen_lowpart(V8QImode, operands[1]); rtx op1 = gen_lowpart (V8QImode, operands[1]);
rtx op2 = gen_lowpart(V8QImode, operands[2]); rtx op2 = gen_lowpart (V8QImode, operands[2]);
emit_insn (gen_vec_extract_evenv8qi (operands[0], op1, op2)); if (TARGET_BIG_ENDIAN)
emit_insn (gen_vec_extract_oddv8qi (operands[0], op1, op2));
else
emit_insn (gen_vec_extract_evenv8qi (operands[0], op1, op2));
DONE; DONE;
}) })
...@@ -1624,8 +1708,8 @@ ...@@ -1624,8 +1708,8 @@
(match_operand:V2SI 2 "gr_register_operand" "")] (match_operand:V2SI 2 "gr_register_operand" "")]
"" ""
{ {
rtx op1 = gen_lowpart(V4HImode, operands[1]); rtx op1 = gen_lowpart (V4HImode, operands[1]);
rtx op2 = gen_lowpart(V4HImode, operands[2]); rtx op2 = gen_lowpart (V4HImode, operands[2]);
if (TARGET_BIG_ENDIAN) if (TARGET_BIG_ENDIAN)
emit_insn (gen_vec_extract_oddv4hi (operands[0], op1, op2)); emit_insn (gen_vec_extract_oddv4hi (operands[0], op1, op2));
else else
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment