Commit 93330ea1 by Richard Henderson Committed by Richard Henderson

i386.c (ix86_split_ashldi): Special case op1 as one or minus one.

        * config/i386/i386.c (ix86_split_ashldi): Special case op1 as one
        or minus one.
        (ix86_split_ashrdi, ix86_split_lshrdi): Clean up conditions for
        post-reload, and scratch NULL.
        * config/i386/i386.md (testqi_1): Use FLAGS_REG name.
        (x86_shift_adj_2): Use ix86_expand_clear.
        (ashldi3): Remove CMOVE expansion difference.
        (ashldi3_1): Remove.
        (*ashldi3_1): Rename from *ashldi3_2.  Use reg_or_pm1_operand and
        add constraints for immediates.  New peephole for split-with-temp.
        Run splitter after peep2 pass.
        (ashrdi3, ashrdi3_1, ashrdi3_2): Similarly.
        (lshrdi3, lshrdi3_1, lshrdi3_2): Similarly.
        (setcc_2): Rename with *.
        * config/i386/predicates.md (reg_or_pm1_operand): New.
        (ashldi_input_operand): New.

From-SVN: r87398
parent 21076c8e
2004-09-12 Richard Henderson <rth@redhat.com>
* config/i386/i386.c (ix86_split_ashldi): Special case op1 as one
or minus one.
(ix86_split_ashrdi, ix86_split_lshrdi): Clean up conditions for
post-reload, and scratch NULL.
* config/i386/i386.md (testqi_1): Use FLAGS_REG name.
(x86_shift_adj_2): Use ix86_expand_clear.
(ashldi3): Remove CMOVE expansion difference.
(ashldi3_1): Remove.
(*ashldi3_1): Rename from *ashldi3_2. Use reg_or_pm1_operand and
add constraints for immediates. New peephole for split-with-temp.
Run splitter after peep2 pass.
(ashrdi3, ashrdi3_1, ashrdi3_2): Similarly.
(lshrdi3, lshrdi3_1, lshrdi3_2): Similarly.
(setcc_2): Rename with *.
* config/i386/predicates.md (reg_or_pm1_operand): New.
(ashldi_input_operand): New.
2004-09-12 Richard Henderson <rth@redhat.com 2004-09-12 Richard Henderson <rth@redhat.com
* print-rtl.c (print_decl_name): New. * print-rtl.c (print_decl_name): New.
......
...@@ -10019,30 +10019,88 @@ ix86_split_ashldi (rtx *operands, rtx scratch) ...@@ -10019,30 +10019,88 @@ ix86_split_ashldi (rtx *operands, rtx scratch)
emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count))); emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
ix86_expand_ashlsi3_const (low[0], count); ix86_expand_ashlsi3_const (low[0], count);
} }
return;
} }
else
split_di (operands, 1, low, high);
if (operands[1] == const1_rtx)
{ {
if (!rtx_equal_p (operands[0], operands[1])) /* Assuming we've chosen a QImode capable registers, then 1LL << N
emit_move_insn (operands[0], operands[1]); can be done with two 32-bit shifts, no branches, no cmoves. */
if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
{
rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
split_di (operands, 1, low, high); ix86_expand_clear (low[0]);
ix86_expand_clear (high[0]);
emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32)));
d = gen_lowpart (QImode, low[0]);
d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
s = gen_rtx_EQ (QImode, flags, const0_rtx);
emit_insn (gen_rtx_SET (VOIDmode, d, s));
emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2])); d = gen_lowpart (QImode, high[0]);
emit_insn (gen_ashlsi3 (low[0], low[0], operands[2])); d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
s = gen_rtx_NE (QImode, flags, const0_rtx);
emit_insn (gen_rtx_SET (VOIDmode, d, s));
}
if (TARGET_CMOVE && (! no_new_pseudos || scratch)) /* Otherwise, we can get the same results by manually performing
a bit extract operation on bit 5, and then performing the two
shifts. The two methods of getting 0/1 into low/high are exactly
the same size. Avoiding the shift in the bit extract case helps
pentium4 a bit; no one else seems to care much either way. */
else
{ {
if (! no_new_pseudos) rtx x;
scratch = force_reg (SImode, const0_rtx);
if (TARGET_PARTIAL_REG_STALL && !optimize_size)
x = gen_rtx_ZERO_EXTEND (SImode, operands[2]);
else else
emit_move_insn (scratch, const0_rtx); x = gen_lowpart (SImode, operands[2]);
emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (5)));
scratch)); emit_insn (gen_andsi3 (high[0], high[0], GEN_INT (1)));
emit_move_insn (low[0], high[0]);
emit_insn (gen_xorsi3 (low[0], low[0], GEN_INT (1)));
} }
emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
emit_insn (gen_ashlsi3 (high[0], high[0], operands[2]));
return;
}
if (operands[1] == constm1_rtx)
{
/* For -1LL << N, we can avoid the shld instruction, because we
know that we're shifting 0...31 ones into a -1. */
emit_move_insn (low[0], constm1_rtx);
if (optimize_size)
emit_move_insn (high[0], low[0]);
else else
emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); emit_move_insn (high[0], constm1_rtx);
} }
else
{
if (!rtx_equal_p (operands[0], operands[1]))
emit_move_insn (operands[0], operands[1]);
split_di (operands, 1, low, high);
emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
}
emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
if (TARGET_CMOVE && scratch)
{
ix86_expand_clear (scratch);
emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
}
else
emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
} }
void void
...@@ -10066,15 +10124,8 @@ ix86_split_ashrdi (rtx *operands, rtx scratch) ...@@ -10066,15 +10124,8 @@ ix86_split_ashrdi (rtx *operands, rtx scratch)
else if (count >= 32) else if (count >= 32)
{ {
emit_move_insn (low[0], high[1]); emit_move_insn (low[0], high[1]);
emit_move_insn (high[0], low[0]);
if (! reload_completed) emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
else
{
emit_move_insn (high[0], low[0]);
emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
}
if (count > 32) if (count > 32)
emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32))); emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
} }
...@@ -10096,10 +10147,8 @@ ix86_split_ashrdi (rtx *operands, rtx scratch) ...@@ -10096,10 +10147,8 @@ ix86_split_ashrdi (rtx *operands, rtx scratch)
emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2])); emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
emit_insn (gen_ashrsi3 (high[0], high[0], operands[2])); emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
if (TARGET_CMOVE && (! no_new_pseudos || scratch)) if (TARGET_CMOVE && scratch)
{ {
if (! no_new_pseudos)
scratch = gen_reg_rtx (SImode);
emit_move_insn (scratch, high[0]); emit_move_insn (scratch, high[0]);
emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31))); emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
...@@ -10124,7 +10173,7 @@ ix86_split_lshrdi (rtx *operands, rtx scratch) ...@@ -10124,7 +10173,7 @@ ix86_split_lshrdi (rtx *operands, rtx scratch)
if (count >= 32) if (count >= 32)
{ {
emit_move_insn (low[0], high[1]); emit_move_insn (low[0], high[1]);
emit_move_insn (high[0], const0_rtx); ix86_expand_clear (high[0]);
if (count > 32) if (count > 32)
emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32))); emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
...@@ -10148,13 +10197,9 @@ ix86_split_lshrdi (rtx *operands, rtx scratch) ...@@ -10148,13 +10197,9 @@ ix86_split_lshrdi (rtx *operands, rtx scratch)
emit_insn (gen_lshrsi3 (high[0], high[0], operands[2])); emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
/* Heh. By reversing the arguments, we can reuse this pattern. */ /* Heh. By reversing the arguments, we can reuse this pattern. */
if (TARGET_CMOVE && (! no_new_pseudos || scratch)) if (TARGET_CMOVE && scratch)
{ {
if (! no_new_pseudos) ix86_expand_clear (scratch);
scratch = force_reg (SImode, const0_rtx);
else
emit_move_insn (scratch, const0_rtx);
emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
scratch)); scratch));
} }
......
...@@ -553,12 +553,23 @@ ...@@ -553,12 +553,23 @@
return op == const1_rtx || op == constm1_rtx; return op == const1_rtx || op == constm1_rtx;
}) })
;; True for registers, or 1 or -1. Used to optimize double-word shifts.
(define_predicate "reg_or_pm1_operand"
(ior (match_operand 0 "register_operand")
(and (match_code "const_int")
(match_test "op == const1_rtx || op == constm1_rtx"))))
;; True if OP is acceptable as operand of DImode shift expander. ;; True if OP is acceptable as operand of DImode shift expander.
(define_predicate "shiftdi_operand" (define_predicate "shiftdi_operand"
(if_then_else (match_test "TARGET_64BIT") (if_then_else (match_test "TARGET_64BIT")
(match_operand 0 "nonimmediate_operand") (match_operand 0 "nonimmediate_operand")
(match_operand 0 "register_operand"))) (match_operand 0 "register_operand")))
(define_predicate "ashldi_input_operand"
(if_then_else (match_test "TARGET_64BIT")
(match_operand 0 "nonimmediate_operand")
(match_operand 0 "reg_or_pm1_operand")))
;; Return true if OP is a vector load from the constant pool with just ;; Return true if OP is a vector load from the constant pool with just
;; the first element non-zero. ;; the first element non-zero.
(define_predicate "zero_extended_scalar_load_operand" (define_predicate "zero_extended_scalar_load_operand"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment