Commit 40d848c7 by Oleg Endo

re PR target/52898 (SH Target: Inefficient DImode comparisons)

gcc/
	PR target/52898
	* config/sh/sh.c (sh_option_override): Remove TARGET_CBRANCHDI4,
	TARGET_CMPEQDI_T.
	(prepare_cbranch_operands): Don't use scratch register.  Assume that
	function is used when pseudos can be created.
	(expand_cbranchdi4): Likewise.  Remove unused TARGET_CMPEQDI_T paths.
	* config/sh/sh.md (cbranchsi4): Allow only when pseudos can be created.
	(cbranchdi4, cbranchdi4_i): Simplify to single cbranchdi4
	define_expand.  Allow it only when pseudos can be created.
	* config/sh/sh.opt (mcbranchdi, mcmpeqdi): Delete.

From-SVN: r235698
parent 126fa2bb
2016-05-02 Oleg Endo <olegendo@gcc.gnu.org>
PR target/52898
* config/sh/sh.c (sh_option_override): Remove TARGET_CBRANCHDI4,
TARGET_CMPEQDI_T.
(prepare_cbranch_operands): Don't use scratch register. Assume that
function is used when pseudos can be created.
(expand_cbranchdi4): Likewise. Remove unused TARGET_CMPEQDI_T paths.
* config/sh/sh.md (cbranchsi4): Allow only when pseudos can be created.
(cbranchdi4, cbranchdi4_i): Simplify to single cbranchdi4
define_expand. Allow it only when pseudos can be created.
* config/sh/sh.opt (mcbranchdi, mcmpeqdi): Delete.
2016-05-01 Uros Bizjak <ubizjak@gmail.com> 2016-05-01 Uros Bizjak <ubizjak@gmail.com>
* config/i386/constraints.md (BC): Only allow -1 operands. * config/i386/constraints.md (BC): Only allow -1 operands.
......
...@@ -809,10 +809,6 @@ sh_option_override (void) ...@@ -809,10 +809,6 @@ sh_option_override (void)
if (optimize > 1 && !optimize_size) if (optimize > 1 && !optimize_size)
target_flags |= MASK_SAVE_ALL_TARGET_REGS; target_flags |= MASK_SAVE_ALL_TARGET_REGS;
/* Set default values of TARGET_CBRANCHDI4 and TARGET_CMPEQDI_T. */
TARGET_CBRANCHDI4 = 1;
TARGET_CMPEQDI_T = 0;
sh_cpu = PROCESSOR_SH1; sh_cpu = PROCESSOR_SH1;
assembler_dialect = 0; assembler_dialect = 0;
if (TARGET_SH2) if (TARGET_SH2)
...@@ -1936,24 +1932,17 @@ enum rtx_code ...@@ -1936,24 +1932,17 @@ enum rtx_code
prepare_cbranch_operands (rtx *operands, machine_mode mode, prepare_cbranch_operands (rtx *operands, machine_mode mode,
enum rtx_code comparison) enum rtx_code comparison)
{ {
/* The scratch reg is only available when this is invoked from within gcc_assert (can_create_pseudo_p ());
the cbranchdi4_i splitter, through expand_cbranchdi4. */
rtx scratch = NULL_RTX;
if (comparison == LAST_AND_UNUSED_RTX_CODE) if (comparison == LAST_AND_UNUSED_RTX_CODE)
comparison = GET_CODE (operands[0]); comparison = GET_CODE (operands[0]);
else
scratch = operands[4];
sh_canonicalize_comparison (comparison, operands[1], operands[2], sh_canonicalize_comparison (comparison, operands[1], operands[2],
mode, false); mode, false);
/* Notice that this function is also invoked after reload by
the cbranchdi4_i pattern, through expand_cbranchdi4. */
rtx op1 = operands[1]; rtx op1 = operands[1];
operands[1] = force_reg (mode, op1);
if (can_create_pseudo_p ())
operands[1] = force_reg (mode, op1);
/* When we are handling DImode comparisons, we want to keep constants so /* When we are handling DImode comparisons, we want to keep constants so
that we can optimize the component comparisons; however, memory loads that we can optimize the component comparisons; however, memory loads
are better issued as a whole so that they can be scheduled well. are better issued as a whole so that they can be scheduled well.
...@@ -1969,15 +1958,8 @@ prepare_cbranch_operands (rtx *operands, machine_mode mode, ...@@ -1969,15 +1958,8 @@ prepare_cbranch_operands (rtx *operands, machine_mode mode,
&& ((comparison != EQ && comparison != NE) && ((comparison != EQ && comparison != NE)
|| (REG_P (op1) && REGNO (op1) != R0_REG) || (REG_P (op1) && REGNO (op1) != R0_REG)
|| !satisfies_constraint_I08 (operands[2]))))) || !satisfies_constraint_I08 (operands[2])))))
{ operands[2] = force_reg (mode, operands[2]);
if (scratch && GET_MODE (scratch) == mode)
{
emit_move_insn (scratch, operands[2]);
operands[2] = scratch;
}
else if (can_create_pseudo_p ())
operands[2] = force_reg (mode, operands[2]);
}
return comparison; return comparison;
} }
...@@ -2027,7 +2009,6 @@ expand_cbranchdi4 (rtx *operands, enum rtx_code comparison) ...@@ -2027,7 +2009,6 @@ expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
int num_branches; int num_branches;
int prob, rev_prob; int prob, rev_prob;
int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1; int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
rtx scratch = operands[4];
comparison = prepare_cbranch_operands (operands, DImode, comparison); comparison = prepare_cbranch_operands (operands, DImode, comparison);
op1h = gen_highpart_mode (SImode, DImode, operands[1]); op1h = gen_highpart_mode (SImode, DImode, operands[1]);
...@@ -2039,17 +2020,7 @@ expand_cbranchdi4 (rtx *operands, enum rtx_code comparison) ...@@ -2039,17 +2020,7 @@ expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
rev_prob = REG_BR_PROB_BASE - prob; rev_prob = REG_BR_PROB_BASE - prob;
switch (comparison) switch (comparison)
{ {
/* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
That costs 1 cycle more when the first branch can be predicted taken,
but saves us mispredicts because only one branch needs prediction.
It also enables generating the cmpeqdi_t-1 pattern. */
case EQ: case EQ:
if (TARGET_CMPEQDI_T)
{
emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
emit_jump_insn (gen_branch_true (operands[3]));
return true;
}
msw_skip = NE; msw_skip = NE;
lsw_taken = EQ; lsw_taken = EQ;
if (prob >= 0) if (prob >= 0)
...@@ -2070,12 +2041,6 @@ expand_cbranchdi4 (rtx *operands, enum rtx_code comparison) ...@@ -2070,12 +2041,6 @@ expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
} }
break; break;
case NE: case NE:
if (TARGET_CMPEQDI_T)
{
emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
emit_jump_insn (gen_branch_false (operands[3]));
return true;
}
msw_taken = NE; msw_taken = NE;
msw_taken_prob = prob; msw_taken_prob = prob;
lsw_taken = NE; lsw_taken = NE;
...@@ -2152,15 +2117,7 @@ expand_cbranchdi4 (rtx *operands, enum rtx_code comparison) ...@@ -2152,15 +2117,7 @@ expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
operands[1] = op1h; operands[1] = op1h;
operands[2] = op2h; operands[2] = op2h;
operands[4] = NULL_RTX; operands[4] = NULL_RTX;
if (reload_completed
&& ! arith_reg_or_0_operand (op2h, SImode)
&& (true_regnum (op1h) || (comparison != EQ && comparison != NE))
&& (msw_taken != LAST_AND_UNUSED_RTX_CODE
|| msw_skip != LAST_AND_UNUSED_RTX_CODE))
{
emit_move_insn (scratch, operands[2]);
operands[2] = scratch;
}
if (msw_taken != LAST_AND_UNUSED_RTX_CODE) if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
expand_cbranchsi4 (operands, msw_taken, msw_taken_prob); expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
if (msw_skip != LAST_AND_UNUSED_RTX_CODE) if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
...@@ -2173,13 +2130,6 @@ expand_cbranchdi4 (rtx *operands, enum rtx_code comparison) ...@@ -2173,13 +2130,6 @@ expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
{ {
operands[1] = op1h; operands[1] = op1h;
operands[2] = op2h; operands[2] = op2h;
if (reload_completed
&& ! arith_reg_or_0_operand (op2h, SImode)
&& (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
{
emit_move_insn (scratch, operands[2]);
operands[2] = scratch;
}
} }
operands[3] = skip_label = gen_label_rtx (); operands[3] = skip_label = gen_label_rtx ();
...@@ -2189,16 +2139,7 @@ expand_cbranchdi4 (rtx *operands, enum rtx_code comparison) ...@@ -2189,16 +2139,7 @@ expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
operands[1] = op1l; operands[1] = op1l;
operands[2] = op2l; operands[2] = op2l;
if (lsw_taken != LAST_AND_UNUSED_RTX_CODE) if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
{ expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
if (reload_completed
&& ! arith_reg_or_0_operand (op2l, SImode)
&& (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
{
emit_move_insn (scratch, operands[2]);
operands[2] = scratch;
}
expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
}
if (msw_skip != LAST_AND_UNUSED_RTX_CODE) if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
emit_label (skip_label); emit_label (skip_label);
return true; return true;
......
...@@ -1115,7 +1115,7 @@ ...@@ -1115,7 +1115,7 @@
(label_ref (match_operand 3 "" "")) (label_ref (match_operand 3 "" ""))
(pc))) (pc)))
(clobber (reg:SI T_REG))] (clobber (reg:SI T_REG))]
"" "can_create_pseudo_p ()"
{ {
expand_cbranchsi4 (operands, LAST_AND_UNUSED_RTX_CODE, -1); expand_cbranchsi4 (operands, LAST_AND_UNUSED_RTX_CODE, -1);
DONE; DONE;
...@@ -1161,7 +1161,8 @@ ...@@ -1161,7 +1161,8 @@
(label_ref (match_dup 2)) (label_ref (match_dup 2))
(pc)))]) (pc)))])
;; FIXME: These could probably use code iterators for the compare op. ;; FIXME: These don't seem to have any effect on the generated cbranch code
;; anymore, but only on some register allocation choices.
(define_split (define_split
[(set (pc) [(set (pc)
(if_then_else (le (match_operand:SI 0 "arith_reg_operand" "") (if_then_else (le (match_operand:SI 0 "arith_reg_operand" "")
...@@ -1255,48 +1256,12 @@ ...@@ -1255,48 +1256,12 @@
(define_expand "cbranchdi4" (define_expand "cbranchdi4"
[(set (pc) [(set (pc)
(if_then_else (match_operator 0 "comparison_operator" (if_then_else (match_operator 0 "comparison_operator"
[(match_operand:DI 1 "arith_operand" "") [(match_operand:DI 1 "arith_operand")
(match_operand:DI 2 "arith_operand" "")]) (match_operand:DI 2 "arith_operand")])
(label_ref (match_operand 3 "" "")) (label_ref (match_operand 3))
(pc)))
(clobber (match_dup 4))
(clobber (reg:SI T_REG))]
"TARGET_CBRANCHDI4 || TARGET_SH2"
{
enum rtx_code comparison;
if (!TARGET_CBRANCHDI4)
{
sh_emit_compare_and_branch (operands, DImode);
DONE;
}
else
{
if (expand_cbranchdi4 (operands, LAST_AND_UNUSED_RTX_CODE))
DONE;
comparison = prepare_cbranch_operands (operands, DImode,
LAST_AND_UNUSED_RTX_CODE);
if (comparison != GET_CODE (operands[0]))
operands[0]
= gen_rtx_fmt_ee (comparison, VOIDmode, operands[1], operands[2]);
operands[4] = gen_rtx_SCRATCH (SImode);
}
})
(define_insn_and_split "cbranchdi4_i"
[(set (pc)
(if_then_else (match_operator 0 "comparison_operator"
[(match_operand:DI 1 "arith_operand" "r,r")
(match_operand:DI 2 "arith_operand" "rN,I08")])
(label_ref (match_operand 3 "" ""))
(pc))) (pc)))
(clobber (match_scratch:SI 4 "=X,&r"))
(clobber (reg:SI T_REG))] (clobber (reg:SI T_REG))]
"TARGET_CBRANCHDI4" "TARGET_SH2 && can_create_pseudo_p ()"
"#"
"&& reload_completed"
[(pc)]
{ {
if (!expand_cbranchdi4 (operands, GET_CODE (operands[0]))) if (!expand_cbranchdi4 (operands, GET_CODE (operands[0])))
FAIL; FAIL;
......
...@@ -205,14 +205,6 @@ mzdcbranch ...@@ -205,14 +205,6 @@ mzdcbranch
Target Report Var(TARGET_ZDCBRANCH) Target Report Var(TARGET_ZDCBRANCH)
Assume that zero displacement conditional branches are fast. Assume that zero displacement conditional branches are fast.
mcbranchdi
Target Undocumented Var(TARGET_CBRANCHDI4) Warn(%qs is deprecated and has no effect)
Enable cbranchdi4 pattern.
mcmpeqdi
Target Undocumented Var(TARGET_CMPEQDI_T) Warn(%qs is deprecated and has no effect)
Emit cmpeqdi_t pattern even when -mcbranchdi is in effect.
mcbranch-force-delay-slot mcbranch-force-delay-slot
Target Report RejectNegative Var(TARGET_CBRANCH_FORCE_DELAY_SLOT) Init(0) Target Report RejectNegative Var(TARGET_CBRANCH_FORCE_DELAY_SLOT) Init(0)
Force the usage of delay slots for conditional branches. Force the usage of delay slots for conditional branches.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment