Commit 9fb93f89 by Richard Henderson Committed by Richard Henderson

i386.c (ix86_expand_int_vcond): Remove unsignedp argument.

        * config/i386/i386.c (ix86_expand_int_vcond): Remove unsignedp
        argument.  Simplify canonicalization of condition.  Use unsigned
        saturating subtraction for QI and HImode unsigned compares.  Use
        bit arithmetic tricks for SImode unsigned compares.
        * config/i386/i386-protos.h (ix86_expand_int_vcond): Update decl.
        * config/i386/sse.md (SSEMODE14): New.
        (umaxv8hi3): Use us_minus+plus to avoid vcond.
        (umaxv4si3): New.
        (smax<SSEMODE14>3): Rename from smaxv16qi3 and macroize.
        (smin<SSEMODE14>3): Similarly with sminv16qi3.
        (umin<SSEMODE24>3): Similarly with uminv8hi3.

        * lib/target-supports.exp (check_effective_target_vect_no_max):
        Remove i386 and x86_64.

From-SVN: r101429
parent 88be5d43
2005-06-29 Richard Henderson <rth@redhat.com>
* config/i386/i386.c (ix86_expand_int_vcond): Remove unsignedp
argument. Simplify canonicalization of condition. Use unsigned
saturating subtraction for QI and HImode unsigned compares. Use
bit arithmetic tricks for SImode unsigned compares.
* config/i386/i386-protos.h (ix86_expand_int_vcond): Update decl.
* config/i386/sse.md (SSEMODE14): New.
(umaxv8hi3): Use us_minus+plus to avoid vcond.
(umaxv4si3): New.
(smax<SSEMODE14>3): Rename from smaxv16qi3 and macroize.
(smin<SSEMODE14>3): Similarly with sminv16qi3.
(umin<SSEMODE24>3): Similarly with uminv8hi3.
2005-06-29 Ian Lance Taylor <ian@airs.com> 2005-06-29 Ian Lance Taylor <ian@airs.com>
* dwarf2out.c (expand_builtin_init_dwarf_reg_sizes): Change * dwarf2out.c (expand_builtin_init_dwarf_reg_sizes): Change
......
...@@ -149,7 +149,7 @@ extern int ix86_expand_setcc (enum rtx_code, rtx); ...@@ -149,7 +149,7 @@ extern int ix86_expand_setcc (enum rtx_code, rtx);
extern int ix86_expand_int_movcc (rtx[]); extern int ix86_expand_int_movcc (rtx[]);
extern int ix86_expand_fp_movcc (rtx[]); extern int ix86_expand_fp_movcc (rtx[]);
extern bool ix86_expand_fp_vcond (rtx[]); extern bool ix86_expand_fp_vcond (rtx[]);
extern bool ix86_expand_int_vcond (rtx[], bool); extern bool ix86_expand_int_vcond (rtx[]);
extern int ix86_expand_int_addcc (rtx[]); extern int ix86_expand_int_addcc (rtx[]);
extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int); extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
extern void x86_initialize_trampoline (rtx, rtx, rtx); extern void x86_initialize_trampoline (rtx, rtx, rtx);
......
...@@ -10501,94 +10501,102 @@ ix86_expand_fp_vcond (rtx operands[]) ...@@ -10501,94 +10501,102 @@ ix86_expand_fp_vcond (rtx operands[])
/* Expand a signed integral vector conditional move. */ /* Expand a signed integral vector conditional move. */
bool bool
ix86_expand_int_vcond (rtx operands[], bool unsignedp) ix86_expand_int_vcond (rtx operands[])
{ {
enum machine_mode mode = GET_MODE (operands[0]); enum machine_mode mode = GET_MODE (operands[0]);
enum rtx_code code = GET_CODE (operands[3]); enum rtx_code code = GET_CODE (operands[3]);
rtx cmp, x; bool negate = false;
rtx x, cop0, cop1;
if (unsignedp) cop0 = operands[4];
code = signed_condition (code); cop1 = operands[5];
if (code == NE || code == LE || code == GE)
/* Canonicalize the comparison to EQ, GT, GTU. */
switch (code)
{ {
/* Inverse of a supported code. */ case EQ:
x = operands[1]; case GT:
operands[1] = operands[2]; case GTU:
operands[2] = x; break;
case NE:
case LE:
case LEU:
code = reverse_condition (code); code = reverse_condition (code);
} negate = true;
if (code == LT) break;
{
/* Swap of a supported code. */ case GE:
x = operands[4]; case GEU:
operands[4] = operands[5]; code = reverse_condition (code);
operands[5] = x; negate = true;
/* FALLTHRU */
case LT:
case LTU:
code = swap_condition (code); code = swap_condition (code);
} x = cop0, cop0 = cop1, cop1 = x;
gcc_assert (code == EQ || code == GT); break;
/* Unlike floating-point, we can rely on the optimizers to have already default:
converted to MIN/MAX expressions, so we don't have to handle that. */ gcc_unreachable ();
}
/* Unsigned GT is not directly supported. We can zero-extend QI and /* Unsigned parallel compare is not supported by the hardware. Play some
HImode elements to the next wider element size, use a signed compare, tricks to turn this into a signed comparison against 0. */
then repack. For three extra instructions, this is definitely a win. */ if (code == GTU)
if (code == GT && unsignedp)
{ {
rtx o0l, o0h, o1l, o1h, cl, ch, zero;
enum machine_mode wider;
rtx (*unpackl) (rtx, rtx, rtx);
rtx (*unpackh) (rtx, rtx, rtx);
rtx (*pack) (rtx, rtx, rtx);
switch (mode) switch (mode)
{ {
case V16QImode: case V4SImode:
wider = V8HImode; {
unpackl = gen_sse2_punpcklbw; rtx t1, t2, mask;
unpackh = gen_sse2_punpckhbw;
pack = gen_sse2_packsswb; /* Perform a parallel modulo subtraction. */
t1 = gen_reg_rtx (mode);
emit_insn (gen_subv4si3 (t1, cop0, cop1));
/* Extract the original sign bit of op0. */
mask = GEN_INT (-0x80000000);
mask = gen_rtx_CONST_VECTOR (mode,
gen_rtvec (4, mask, mask, mask, mask));
mask = force_reg (mode, mask);
t2 = gen_reg_rtx (mode);
emit_insn (gen_andv4si3 (t2, cop0, mask));
/* XOR it back into the result of the subtraction. This results
in the sign bit set iff we saw unsigned underflow. */
x = gen_reg_rtx (mode);
emit_insn (gen_xorv4si3 (x, t1, t2));
code = GT;
}
break; break;
case V16QImode:
case V8HImode: case V8HImode:
wider = V4SImode; /* Perform a parallel unsigned saturating subtraction. */
unpackl = gen_sse2_punpcklwd; x = gen_reg_rtx (mode);
unpackh = gen_sse2_punpckhwd; emit_insn (gen_rtx_SET (VOIDmode, x,
pack = gen_sse2_packssdw; gen_rtx_US_MINUS (mode, cop0, cop1)));
code = EQ;
negate = !negate;
break; break;
default: default:
gcc_unreachable (); gcc_unreachable ();
} }
operands[4] = force_reg (mode, operands[4]); cop0 = x;
operands[5] = force_reg (mode, operands[5]); cop1 = CONST0_RTX (mode);
o0l = gen_reg_rtx (wider);
o0h = gen_reg_rtx (wider);
o1l = gen_reg_rtx (wider);
o1h = gen_reg_rtx (wider);
cl = gen_reg_rtx (wider);
ch = gen_reg_rtx (wider);
cmp = gen_reg_rtx (mode);
zero = force_reg (mode, CONST0_RTX (mode));
emit_insn (unpackl (gen_lowpart (mode, o0l), operands[4], zero));
emit_insn (unpackh (gen_lowpart (mode, o0h), operands[4], zero));
emit_insn (unpackl (gen_lowpart (mode, o1l), operands[5], zero));
emit_insn (unpackh (gen_lowpart (mode, o1h), operands[5], zero));
x = gen_rtx_GT (wider, o0l, o1l);
emit_insn (gen_rtx_SET (VOIDmode, cl, x));
x = gen_rtx_GT (wider, o0h, o1h);
emit_insn (gen_rtx_SET (VOIDmode, ch, x));
emit_insn (pack (cmp, cl, ch));
} }
else
cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
operands[1], operands[2]);
ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
operands[1+negate], operands[2-negate]);
ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
operands[2-negate]);
return true; return true;
} }
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
;; Mix-n-match ;; Mix-n-match
(define_mode_macro SSEMODE12 [V16QI V8HI]) (define_mode_macro SSEMODE12 [V16QI V8HI])
(define_mode_macro SSEMODE24 [V8HI V4SI]) (define_mode_macro SSEMODE24 [V8HI V4SI])
(define_mode_macro SSEMODE14 [V16QI V4SI])
(define_mode_macro SSEMODE124 [V16QI V8HI V4SI]) (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
(define_mode_macro SSEMODE248 [V8HI V4SI V2DI]) (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
...@@ -2741,26 +2742,6 @@ ...@@ -2741,26 +2742,6 @@
operands[1] = gen_lowpart (TImode, operands[1]); operands[1] = gen_lowpart (TImode, operands[1]);
}) })
(define_expand "smaxv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "")
(smax:V16QI (match_operand:V16QI 1 "register_operand" "")
(match_operand:V16QI 2 "register_operand" "")))]
"TARGET_SSE2"
{
rtx xops[6];
bool ok;
xops[0] = operands[0];
xops[1] = operands[1];
xops[2] = operands[2];
xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
xops[4] = operands[1];
xops[5] = operands[2];
ok = ix86_expand_int_vcond (xops, false);
gcc_assert (ok);
DONE;
})
(define_expand "umaxv16qi3" (define_expand "umaxv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "") [(set (match_operand:V16QI 0 "register_operand" "")
(umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "") (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
...@@ -2794,33 +2775,42 @@ ...@@ -2794,33 +2775,42 @@
(set_attr "mode" "TI")]) (set_attr "mode" "TI")])
(define_expand "umaxv8hi3" (define_expand "umaxv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "") [(set (match_operand:V8HI 0 "register_operand" "=x")
(umax:V8HI (match_operand:V8HI 1 "register_operand" "") (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
(match_operand:V8HI 2 "register_operand" "")))] (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
(set (match_dup 3)
(plus:V8HI (match_dup 0) (match_dup 2)))]
"TARGET_SSE2" "TARGET_SSE2"
{ {
rtx xops[6], t1, t2; operands[3] = operands[0];
bool ok; if (rtx_equal_p (operands[0], operands[2]))
operands[0] = gen_reg_rtx (V8HImode);
})
t1 = gen_reg_rtx (V8HImode); (define_expand "smax<mode>3"
emit_insn (gen_sse2_ussubv8hi3 (t1, operands[2], operands[1])); [(set (match_operand:SSEMODE14 0 "register_operand" "")
t2 = force_reg (V8HImode, CONST0_RTX (V8HImode)); (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
(match_operand:SSEMODE14 2 "register_operand" "")))]
"TARGET_SSE2"
{
rtx xops[6];
bool ok;
xops[0] = operands[0]; xops[0] = operands[0];
xops[1] = operands[1]; xops[1] = operands[1];
xops[2] = operands[2]; xops[2] = operands[2];
xops[3] = gen_rtx_EQ (VOIDmode, t1, t2); xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
xops[4] = t1; xops[4] = operands[1];
xops[5] = t2; xops[5] = operands[2];
ok = ix86_expand_int_vcond (xops, false); ok = ix86_expand_int_vcond (xops);
gcc_assert (ok); gcc_assert (ok);
DONE; DONE;
}) })
(define_expand "sminv16qi3" (define_expand "umaxv4si3"
[(set (match_operand:V16QI 0 "register_operand" "") [(set (match_operand:V4SI 0 "register_operand" "")
(smin:V16QI (match_operand:V16QI 1 "register_operand" "") (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
(match_operand:V16QI 2 "register_operand" "")))] (match_operand:V4SI 2 "register_operand" "")))]
"TARGET_SSE2" "TARGET_SSE2"
{ {
rtx xops[6]; rtx xops[6];
...@@ -2829,10 +2819,10 @@ ...@@ -2829,10 +2819,10 @@
xops[0] = operands[0]; xops[0] = operands[0];
xops[1] = operands[1]; xops[1] = operands[1];
xops[2] = operands[2]; xops[2] = operands[2];
xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]); xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
xops[4] = operands[2]; xops[4] = operands[1];
xops[5] = operands[1]; xops[5] = operands[2];
ok = ix86_expand_int_vcond (xops, false); ok = ix86_expand_int_vcond (xops);
gcc_assert (ok); gcc_assert (ok);
DONE; DONE;
}) })
...@@ -2869,26 +2859,42 @@ ...@@ -2869,26 +2859,42 @@
[(set_attr "type" "sseiadd") [(set_attr "type" "sseiadd")
(set_attr "mode" "TI")]) (set_attr "mode" "TI")])
(define_expand "uminv8hi3" (define_expand "smin<mode>3"
[(set (match_operand:V8HI 0 "register_operand" "") [(set (match_operand:SSEMODE14 0 "register_operand" "")
(umin:V8HI (match_operand:V8HI 1 "register_operand" "") (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
(match_operand:V8HI 2 "register_operand" "")))] (match_operand:SSEMODE14 2 "register_operand" "")))]
"TARGET_SSE2" "TARGET_SSE2"
{ {
rtx xops[6], t1, t2; rtx xops[6];
bool ok; bool ok;
t1 = gen_reg_rtx (V8HImode); xops[0] = operands[0];
emit_insn (gen_sse2_ussubv8hi3 (t1, operands[1], operands[2])); xops[1] = operands[2];
t2 = force_reg (V8HImode, CONST0_RTX (V8HImode)); xops[2] = operands[1];
xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
xops[4] = operands[1];
xops[5] = operands[2];
ok = ix86_expand_int_vcond (xops);
gcc_assert (ok);
DONE;
})
(define_expand "umin<mode>3"
[(set (match_operand:SSEMODE24 0 "register_operand" "")
(umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
(match_operand:SSEMODE24 2 "register_operand" "")))]
"TARGET_SSE2"
{
rtx xops[6];
bool ok;
xops[0] = operands[0]; xops[0] = operands[0];
xops[1] = operands[1]; xops[1] = operands[2];
xops[2] = operands[2]; xops[2] = operands[1];
xops[3] = gen_rtx_EQ (VOIDmode, t1, t2); xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
xops[4] = t1; xops[4] = operands[1];
xops[5] = t2; xops[5] = operands[2];
ok = ix86_expand_int_vcond (xops, false); ok = ix86_expand_int_vcond (xops);
gcc_assert (ok); gcc_assert (ok);
DONE; DONE;
}) })
...@@ -2929,7 +2935,7 @@ ...@@ -2929,7 +2935,7 @@
(match_operand:SSEMODE124 2 "general_operand" "")))] (match_operand:SSEMODE124 2 "general_operand" "")))]
"TARGET_SSE2" "TARGET_SSE2"
{ {
if (ix86_expand_int_vcond (operands, false)) if (ix86_expand_int_vcond (operands))
DONE; DONE;
else else
FAIL; FAIL;
...@@ -2945,7 +2951,7 @@ ...@@ -2945,7 +2951,7 @@
(match_operand:SSEMODE12 2 "general_operand" "")))] (match_operand:SSEMODE12 2 "general_operand" "")))]
"TARGET_SSE2" "TARGET_SSE2"
{ {
if (ix86_expand_int_vcond (operands, true)) if (ix86_expand_int_vcond (operands))
DONE; DONE;
else else
FAIL; FAIL;
......
2005-05-29 Richard Henderson <rth@redhat.com>
* lib/target-supports.exp (check_effective_target_vect_no_max):
Remove i386 and x86_64.
2005-06-29 Steve Ellcey <sje@cup.hp.com> 2005-06-29 Steve Ellcey <sje@cup.hp.com>
PR testsuite/21969 PR testsuite/21969
......
...@@ -973,9 +973,7 @@ proc check_effective_target_vect_no_max { } { ...@@ -973,9 +973,7 @@ proc check_effective_target_vect_no_max { } {
verbose "check_effective_target_vect_no_max: using cached result" 2 verbose "check_effective_target_vect_no_max: using cached result" 2
} else { } else {
set et_vect_no_max_saved 0 set et_vect_no_max_saved 0
if { [istarget i?86-*-*] if { [istarget sparc*-*-*]
|| [istarget x86_64-*-*]
|| [istarget sparc*-*-*]
|| [istarget alpha*-*-*] } { || [istarget alpha*-*-*] } {
set et_vect_no_max_saved 1 set et_vect_no_max_saved 1
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment