Commit c7cb1555 by Bernd Schmidt Committed by Bernd Schmidt

bfin.md (UNSPEC_VOLATILE_STALL): New constant.

	* config/bfin/bfin.md (UNSPEC_VOLATILE_STALL): New constant.
	(attr "addrtype"): New member "spreg".
	Use it if mem_spfp_address_operand is true for the address.
	(attr "type"): New entry "stall".
	(cpu_unit "load"): New.
	(insn_reservations "load32", "loadp", "loadi"): Add reservation of
	"load".
	(insn_reservation "loadsp"): New.
	(insn_reservation "load_stall1"): New.
	(insn_reservation "load_stall3"): New.
	(stall): New insn.	
	* config/bfin/predicates.md (const1_operand, const3_operand): New.
	(mem_p_address_operand): Exclude stack and frame pointer based
	addresses.
	(mem_spfp_address_operand): New; match them here.
	* config/bfin/bfin.c (add_sched_insns_for_speculation): New function.
	(bfin_reorg): Call it if scheduling insns.
	(bfin_gen_bundles): Remove dummy insns created by
	add_sched_insns_for_speculation.

From-SVN: r151486
parent 4a50e99c
2009-09-07 Bernd Schmidt <bernd.schmidt@analog.com>
* config/bfin/bfin.md (UNSPEC_VOLATILE_STALL): New constant.
(attr "addrtype"): New member "spreg".
Use it if mem_spfp_address_operand is true for the address.
(attr "type"): New entry "stall".
(cpu_unit "load"): New.
(insn_reservations "load32", "loadp", "loadi"): Add reservation of
"load".
(insn_reservation "loadsp"): New.
(insn_reservation "load_stall1"): New.
(insn_reservation "load_stall3"): New.
(stall): New insn.
* config/bfin/predicates.md (const1_operand, const3_operand): New.
(mem_p_address_operand): Exclude stack and frame pointer based
addresses.
(mem_spfp_address_operand): New; match them here.
* config/bfin/bfin.c (add_sched_insns_for_speculation): New function.
(bfin_reorg): Call it if scheduling insns.
(bfin_gen_bundles): Remove dummy insns created by
add_sched_insns_for_speculation.
2009-09-07 Martin Jambor <mjambor@suse.cz>
PR middle-end/41282
......
......@@ -4784,15 +4784,27 @@ bfin_gen_bundles (void)
for (insn = BB_HEAD (bb);; insn = next)
{
int at_end;
rtx delete_this = NULL_RTX;
if (INSN_P (insn))
{
if (get_attr_type (insn) == TYPE_DSP32)
slot[0] = insn;
else if (slot[1] == NULL_RTX)
slot[1] = insn;
enum attr_type type = get_attr_type (insn);
if (type == TYPE_STALL)
{
gcc_assert (n_filled == 0);
delete_this = insn;
}
else
slot[2] = insn;
n_filled++;
{
if (type == TYPE_DSP32)
slot[0] = insn;
else if (slot[1] == NULL_RTX)
slot[1] = insn;
else
slot[2] = insn;
n_filled++;
}
}
next = NEXT_INSN (insn);
......@@ -4807,7 +4819,7 @@ bfin_gen_bundles (void)
/* BB_END can change due to emitting extra NOPs, so check here. */
at_end = insn == BB_END (bb);
if (at_end || GET_MODE (next) == TImode)
if (delete_this == NULL_RTX && (at_end || GET_MODE (next) == TImode))
{
if ((n_filled < 2
|| !gen_one_bundle (slot))
......@@ -4826,6 +4838,8 @@ bfin_gen_bundles (void)
n_filled = 0;
slot[0] = slot[1] = slot[2] = NULL_RTX;
}
if (delete_this != NULL_RTX)
delete_insn (delete_this);
if (at_end)
break;
}
......@@ -5226,6 +5240,65 @@ workaround_speculation (void)
}
}
/* Called just before the final scheduling pass. If we need to insert NOPs
later on to work around speculative loads, insert special placeholder
insns that cause loads to be delayed for as many cycles as necessary
(and possible). This reduces the number of NOPs we need to add.
The dummy insns we generate are later removed by bfin_gen_bundles. */
static void
add_sched_insns_for_speculation (void)
{
rtx insn;
if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS
&& ! ENABLE_WA_INDIRECT_CALLS)
return;
/* First pass: find predicted-false branches; if something after them
needs nops, insert them or change the branch to predict true. */
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
rtx pat;
if (NOTE_P (insn) || BARRIER_P (insn) || LABEL_P (insn))
continue;
pat = PATTERN (insn);
if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
|| GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
|| GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
continue;
if (JUMP_P (insn))
{
if (any_condjump_p (insn)
&& !cbranch_predicted_taken_p (insn))
{
rtx n = next_real_insn (insn);
emit_insn_before (gen_stall (GEN_INT (3)), n);
}
}
}
/* Second pass: for predicted-true branches, see if anything at the
branch destination needs extra nops. */
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
if (JUMP_P (insn)
&& any_condjump_p (insn)
&& (cbranch_predicted_taken_p (insn)))
{
rtx target = JUMP_LABEL (insn);
rtx next = next_real_insn (target);
if (GET_CODE (PATTERN (next)) == UNSPEC_VOLATILE
&& get_attr_type (next) == TYPE_STALL)
continue;
emit_insn_before (gen_stall (GEN_INT (1)), next);
}
}
}
/* We use the machine specific reorg pass for emitting CSYNC instructions
after conditional branches as needed.
......@@ -5259,6 +5332,8 @@ bfin_reorg (void)
split_all_insns ();
splitting_for_sched = 0;
add_sched_insns_for_speculation ();
timevar_push (TV_SCHED2);
schedule_insns ();
timevar_pop (TV_SCHED2);
......
......@@ -146,7 +146,8 @@
(UNSPEC_VOLATILE_SSYNC 2)
(UNSPEC_VOLATILE_LOAD_FUNCDESC 3)
(UNSPEC_VOLATILE_STORE_EH_HANDLER 4)
(UNSPEC_VOLATILE_DUMMY 5)])
(UNSPEC_VOLATILE_DUMMY 5)
(UNSPEC_VOLATILE_STALL 6)])
(define_constants
[(MACFLAG_NONE 0)
......@@ -163,16 +164,20 @@
(MACFLAG_IH 11)])
(define_attr "type"
"move,movcc,mvi,mcld,mcst,dsp32,mult,alu0,shft,brcc,br,call,misc,sync,compare,dummy"
"move,movcc,mvi,mcld,mcst,dsp32,mult,alu0,shft,brcc,br,call,misc,sync,compare,dummy,stall"
(const_string "misc"))
(define_attr "addrtype" "32bit,preg,ireg"
(define_attr "addrtype" "32bit,preg,spreg,ireg"
(cond [(and (eq_attr "type" "mcld")
(and (match_operand 0 "d_register_operand" "")
(match_operand 1 "mem_p_address_operand" "")))
(const_string "preg")
(and (eq_attr "type" "mcld")
(and (match_operand 0 "d_register_operand" "")
(match_operand 1 "mem_spfp_address_operand" "")))
(const_string "spreg")
(and (eq_attr "type" "mcld")
(and (match_operand 0 "d_register_operand" "")
(match_operand 1 "mem_i_address_operand" "")))
(const_string "ireg")
(and (eq_attr "type" "mcst")
......@@ -181,6 +186,10 @@
(const_string "preg")
(and (eq_attr "type" "mcst")
(and (match_operand 1 "d_register_operand" "")
(match_operand 0 "mem_spfp_address_operand" "")))
(const_string "spreg")
(and (eq_attr "type" "mcst")
(and (match_operand 1 "d_register_operand" "")
(match_operand 0 "mem_i_address_operand" "")))
(const_string "ireg")]
(const_string "32bit")))
......@@ -199,6 +208,10 @@
(define_cpu_unit "store" "bfin")
(define_cpu_unit "pregs" "bfin")
;; A dummy unit used to delay scheduling of loads after a conditional
;; branch.
(define_cpu_unit "load" "bfin")
(define_reservation "core" "slot0+slot1+slot2")
(define_insn_reservation "alu" 1
......@@ -216,17 +229,22 @@
(define_insn_reservation "load32" 1
(and (not (eq_attr "seq_insns" "multi"))
(and (eq_attr "type" "mcld") (eq_attr "addrtype" "32bit")))
"core")
"core+load")
(define_insn_reservation "loadp" 1
(and (not (eq_attr "seq_insns" "multi"))
(and (eq_attr "type" "mcld") (eq_attr "addrtype" "preg")))
"(slot1|slot2)+pregs+load")
(define_insn_reservation "loadsp" 1
(and (not (eq_attr "seq_insns" "multi"))
(and (eq_attr "type" "mcld") (eq_attr "addrtype" "spreg")))
"(slot1|slot2)+pregs")
(define_insn_reservation "loadi" 1
(and (not (eq_attr "seq_insns" "multi"))
(and (eq_attr "type" "mcld") (eq_attr "addrtype" "ireg")))
"(slot1|slot2)")
"(slot1|slot2)+load")
(define_insn_reservation "store32" 1
(and (not (eq_attr "seq_insns" "multi"))
......@@ -235,7 +253,8 @@
(define_insn_reservation "storep" 1
(and (not (eq_attr "seq_insns" "multi"))
(and (eq_attr "type" "mcst") (eq_attr "addrtype" "preg")))
(and (eq_attr "type" "mcst")
(ior (eq_attr "addrtype" "preg") (eq_attr "addrtype" "spreg"))))
"(slot1|slot2)+pregs+store")
(define_insn_reservation "storei" 1
......@@ -247,6 +266,16 @@
(eq_attr "seq_insns" "multi")
"core")
(define_insn_reservation "load_stall1" 1
(and (eq_attr "type" "stall")
(match_operand 0 "const1_operand" ""))
"core+load*2")
(define_insn_reservation "load_stall3" 1
(and (eq_attr "type" "stall")
(match_operand 0 "const3_operand" ""))
"core+load*4")
(absence_set "slot0" "slot1,slot2")
(absence_set "slot1" "slot2")
......@@ -2667,6 +2696,9 @@
gcc_unreachable ();
})
;; When used at a location where CC contains 1, causes a speculative load
;; that is later cancelled. This is used for certain workarounds in
;; interrupt handler prologues.
(define_insn "dummy_load"
[(unspec_volatile [(match_operand 0 "register_operand" "a")
(match_operand 1 "register_operand" "C")]
......@@ -2677,6 +2709,17 @@
(set_attr "length" "4")
(set_attr "seq_insns" "multi")])
;; A placeholder insn inserted before the final scheduling pass. It is used
;; to improve scheduling of loads when workarounds for speculative loads are
;; needed, by not placing them in the first few cycles after a conditional
;; branch.
(define_insn "stall"
[(unspec_volatile [(match_operand 0 "const_int_operand" "P1P3")]
UNSPEC_VOLATILE_STALL)]
""
""
[(set_attr "type" "stall")])
(define_insn "csync"
[(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_CSYNC)]
""
......
......@@ -59,6 +59,14 @@
(and (match_code "const_int")
(match_test "op == const0_rtx || op == const1_rtx")))
(define_predicate "const1_operand"
(and (match_code "const_int")
(match_test "op == const1_rtx")))
(define_predicate "const3_operand"
(and (match_code "const_int")
(match_test "INTVAL (op) == 3")))
(define_predicate "vec_shift_operand"
(ior (and (match_code "const_int")
(match_test "INTVAL (op) >= -16 && INTVAL (op) < 15"))
......@@ -180,10 +188,14 @@
(define_predicate "bfin_direct_comparison_operator"
(match_code "eq,lt,le,leu,ltu"))
;; The following two are used to compute the addrtype attribute. They return
;; The following three are used to compute the addrtype attribute. They return
;; true if passed a memory address usable for a 16-bit load or store using a
;; P or I register, respectively. If neither matches, we know we have a
;; 32-bit instruction.
;; We subdivide the P case into normal P registers, and SP/FP. We can assume
;; that speculative loads through SP and FP are no problem, so this has
;; an effect on the anomaly workaround code.
(define_predicate "mem_p_address_operand"
(match_code "mem")
{
......@@ -193,7 +205,19 @@
if (GET_CODE (op) == PLUS || GET_RTX_CLASS (GET_CODE (op)) == RTX_AUTOINC)
op = XEXP (op, 0);
gcc_assert (REG_P (op));
return PREG_P (op);
return PREG_P (op) && op != stack_pointer_rtx && op != frame_pointer_rtx;
})
(define_predicate "mem_spfp_address_operand"
(match_code "mem")
{
if (effective_address_32bit_p (op, mode))
return 0;
op = XEXP (op, 0);
if (GET_CODE (op) == PLUS || GET_RTX_CLASS (GET_CODE (op)) == RTX_AUTOINC)
op = XEXP (op, 0);
gcc_assert (REG_P (op));
return op == stack_pointer_rtx || op == frame_pointer_rtx;
})
(define_predicate "mem_i_address_operand"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment