Commit 635aeaa2 by Claudiu Zissulescu Committed by Claudiu Zissulescu

[ARC] Handle store cacheline hazard.

Handle store cacheline hazard for A700 cpus by inserting two NOP_S
between ST ST LD or their logical equivalent (like ST ST NOP_S NOP_S
J_L.D LD)

gcc/
xxxx-xx-xx  Claudiu Zissulescu  <claziss@synopsys.com>

        * config/arc/arc-arch.h (ARC_TUNE_ARC7XX): New tune value.
        * config/arc/arc.c (arc_active_insn): New function.
        (check_store_cacheline_hazard): Likewise.
        (workaround_arc_anomaly): Use check_store_cacheline_hazard.
        (arc_override_options): Disable delay slot scheduler for older
        A7.
        (arc_store_addr_hazard_p): New implementation, old one renamed to
        ...
        (arc_store_addr_hazard_internal_p): Renamed.
        (arc_reorg): Don't combine into brcc instructions which are part
        of hardware hazard solution.
        * config/arc/arc.md (attr tune): Consider new arc7xx tune value.
        (tune_arc700): Likewise.
        * config/arc/arc.opt (arc7xx): New tune value.
        * config/arc/arc700.md: Improve A7 scheduler.

From-SVN: r265676
parent aac1c11c
2018-10-31 Claudiu Zissulescu <claziss@synopsys.com> 2018-10-31 Claudiu Zissulescu <claziss@synopsys.com>
* config/arc/arc-arch.h (ARC_TUNE_ARC7XX): New tune value.
* config/arc/arc.c (arc_active_insn): New function.
(check_store_cacheline_hazard): Likewise.
(workaround_arc_anomaly): Use check_store_cacheline_hazard.
(arc_override_options): Disable delay slot scheduler for older
A7.
(arc_store_addr_hazard_p): New implementation, old one renamed to
...
(arc_store_addr_hazard_internal_p): Renamed.
(arc_reorg): Don't combine into brcc instructions which are part
of hardware hazard solution.
* config/arc/arc.md (attr tune): Consider new arc7xx tune value.
(tune_arc700): Likewise.
* config/arc/arc.opt (arc7xx): New tune value.
* config/arc/arc700.md: Improve A7 scheduler.
2018-10-31 Claudiu Zissulescu <claziss@synopsys.com>
* config/arc/arc.c (arc_override_options): Remove * config/arc/arc.c (arc_override_options): Remove
TARGET_COMPACT_CASESI. TARGET_COMPACT_CASESI.
* config/arc/arc.h (ASM_OUTPUT_ADDR_DIFF_ELT): Update. * config/arc/arc.h (ASM_OUTPUT_ADDR_DIFF_ELT): Update.
...@@ -71,6 +71,7 @@ enum arc_tune_attr ...@@ -71,6 +71,7 @@ enum arc_tune_attr
{ {
ARC_TUNE_NONE, ARC_TUNE_NONE,
ARC_TUNE_ARC600, ARC_TUNE_ARC600,
ARC_TUNE_ARC7XX,
ARC_TUNE_ARC700_4_2_STD, ARC_TUNE_ARC700_4_2_STD,
ARC_TUNE_ARC700_4_2_XMAC, ARC_TUNE_ARC700_4_2_XMAC,
ARC_TUNE_CORE_3, ARC_TUNE_CORE_3,
......
...@@ -1303,6 +1303,10 @@ arc_override_options (void) ...@@ -1303,6 +1303,10 @@ arc_override_options (void)
if (!global_options_set.x_g_switch_value && !TARGET_NO_SDATA_SET) if (!global_options_set.x_g_switch_value && !TARGET_NO_SDATA_SET)
g_switch_value = TARGET_LL64 ? 8 : 4; g_switch_value = TARGET_LL64 ? 8 : 4;
/* A7 has an issue with delay slots. */
if (TARGET_ARC700 && (arc_tune != ARC_TUNE_ARC7XX))
flag_delayed_branch = 0;
/* These need to be done at start up. It's convenient to do them here. */ /* These need to be done at start up. It's convenient to do them here. */
arc_init (); arc_init ();
} }
...@@ -7137,11 +7141,90 @@ arc_invalid_within_doloop (const rtx_insn *insn) ...@@ -7137,11 +7141,90 @@ arc_invalid_within_doloop (const rtx_insn *insn)
return NULL; return NULL;
} }
/* Return the next active insn, skiping the inline assembly code. */
static rtx_insn *
arc_active_insn (rtx_insn *insn)
{
rtx_insn *nxt = next_active_insn (insn);
if (nxt && GET_CODE (PATTERN (nxt)) == ASM_INPUT)
nxt = next_active_insn (nxt);
return nxt;
}
/* Search for a sequence made out of two stores and a given number of
loads, insert a nop if required. */
static void
check_store_cacheline_hazard (void)
{
rtx_insn *insn, *succ0, *insn1;
bool found = false;
for (insn = get_insns (); insn; insn = arc_active_insn (insn))
{
succ0 = arc_active_insn (insn);
if (!succ0)
return;
if (!single_set (insn) || !single_set (succ0))
continue;
if ((get_attr_type (insn) != TYPE_STORE)
|| (get_attr_type (succ0) != TYPE_STORE))
continue;
/* Found at least two consecutive stores. Goto the end of the
store sequence. */
for (insn1 = succ0; insn1; insn1 = arc_active_insn (insn1))
if (!single_set (insn1) || get_attr_type (insn1) != TYPE_STORE)
break;
/* Now, check the next two instructions for the following cases:
1. next instruction is a LD => insert 2 nops between store
sequence and load.
2. next-next instruction is a LD => inset 1 nop after the store
sequence. */
if (insn1 && single_set (insn1)
&& (get_attr_type (insn1) == TYPE_LOAD))
{
found = true;
emit_insn_before (gen_nopv (), insn1);
emit_insn_before (gen_nopv (), insn1);
}
else
{
if (insn1 && (get_attr_type (insn1) == TYPE_COMPARE))
{
/* REG_SAVE_NOTE is used by Haifa scheduler, we are in
reorg, so it is safe to reuse it for avoiding the
current compare insn to be part of a BRcc
optimization. */
add_reg_note (insn1, REG_SAVE_NOTE, GEN_INT (3));
}
insn1 = arc_active_insn (insn1);
if (insn1 && single_set (insn1)
&& (get_attr_type (insn1) == TYPE_LOAD))
{
found = true;
emit_insn_before (gen_nopv (), insn1);
}
}
insn = insn1;
if (found)
found = false;
}
}
/* Return true if a load instruction (CONSUMER) uses the same address as a /* Return true if a load instruction (CONSUMER) uses the same address as a
store instruction (PRODUCER). This function is used to avoid st/ld store instruction (PRODUCER). This function is used to avoid st/ld
address hazard in ARC700 cores. */ address hazard in ARC700 cores. */
bool
arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer) static bool
arc_store_addr_hazard_internal_p (rtx_insn* producer, rtx_insn* consumer)
{ {
rtx in_set, out_set; rtx in_set, out_set;
rtx out_addr, in_addr; rtx out_addr, in_addr;
...@@ -7189,6 +7272,16 @@ arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer) ...@@ -7189,6 +7272,16 @@ arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer)
return false; return false;
} }
/* Return TRUE is we have an store address hazard. */
bool
arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer)
{
if (TARGET_ARC700 && (arc_tune != ARC_TUNE_ARC7XX))
return true;
return arc_store_addr_hazard_internal_p (producer, consumer);
}
/* The same functionality as arc_hazard. It is called in machine /* The same functionality as arc_hazard. It is called in machine
reorg before any other optimization. Hence, the NOP size is taken reorg before any other optimization. Hence, the NOP size is taken
into account when doing branch shortening. */ into account when doing branch shortening. */
...@@ -7197,6 +7290,7 @@ static void ...@@ -7197,6 +7290,7 @@ static void
workaround_arc_anomaly (void) workaround_arc_anomaly (void)
{ {
rtx_insn *insn, *succ0; rtx_insn *insn, *succ0;
rtx_insn *succ1;
/* For any architecture: call arc_hazard here. */ /* For any architecture: call arc_hazard here. */
for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
...@@ -7208,27 +7302,30 @@ workaround_arc_anomaly (void) ...@@ -7208,27 +7302,30 @@ workaround_arc_anomaly (void)
} }
} }
if (TARGET_ARC700) if (!TARGET_ARC700)
{ return;
rtx_insn *succ1;
for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) /* Old A7 are suffering of a cache hazard, and we need to insert two
{ nops between any sequence of stores and a load. */
succ0 = next_real_insn (insn); if (arc_tune != ARC_TUNE_ARC7XX)
if (arc_store_addr_hazard_p (insn, succ0)) check_store_cacheline_hazard ();
{
emit_insn_after (gen_nopv (), insn);
emit_insn_after (gen_nopv (), insn);
continue;
}
/* Avoid adding nops if the instruction between the ST and LD is for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
a call or jump. */ {
succ1 = next_real_insn (succ0); succ0 = next_real_insn (insn);
if (succ0 && !JUMP_P (succ0) && !CALL_P (succ0) if (arc_store_addr_hazard_internal_p (insn, succ0))
&& arc_store_addr_hazard_p (insn, succ1)) {
emit_insn_after (gen_nopv (), insn); emit_insn_after (gen_nopv (), insn);
emit_insn_after (gen_nopv (), insn);
continue;
} }
/* Avoid adding nops if the instruction between the ST and LD is
a call or jump. */
succ1 = next_real_insn (succ0);
if (succ0 && !JUMP_P (succ0) && !CALL_P (succ0)
&& arc_store_addr_hazard_internal_p (insn, succ1))
emit_insn_after (gen_nopv (), insn);
} }
} }
...@@ -7866,11 +7963,15 @@ arc_reorg (void) ...@@ -7866,11 +7963,15 @@ arc_reorg (void)
if (!link_insn) if (!link_insn)
continue; continue;
else else
/* Check if this is a data dependency. */
{ {
/* Check if this is a data dependency. */
rtx op, cc_clob_rtx, op0, op1, brcc_insn, note; rtx op, cc_clob_rtx, op0, op1, brcc_insn, note;
rtx cmp0, cmp1; rtx cmp0, cmp1;
/* Make sure we can use it for brcc insns. */
if (find_reg_note (link_insn, REG_SAVE_NOTE, GEN_INT (3)))
continue;
/* Ok this is the set cc. copy args here. */ /* Ok this is the set cc. copy args here. */
op = XEXP (pc_target, 0); op = XEXP (pc_target, 0);
......
...@@ -600,11 +600,13 @@ ...@@ -600,11 +600,13 @@
;; somehow modify them to become inelegible for delay slots if a decision ;; somehow modify them to become inelegible for delay slots if a decision
;; is made that makes conditional execution required. ;; is made that makes conditional execution required.
(define_attr "tune" "none,arc600,arc700_4_2_std,arc700_4_2_xmac, core_3, \ (define_attr "tune" "none,arc600,arc7xx,arc700_4_2_std,arc700_4_2_xmac, \
archs4x, archs4xd, archs4xd_slow" core_3, archs4x, archs4xd, archs4xd_slow"
(const (const
(cond [(symbol_ref "arc_tune == TUNE_ARC600") (cond [(symbol_ref "arc_tune == TUNE_ARC600")
(const_string "arc600") (const_string "arc600")
(symbol_ref "arc_tune == ARC_TUNE_ARC7XX")
(const_string "arc7xx")
(symbol_ref "arc_tune == TUNE_ARC700_4_2_STD") (symbol_ref "arc_tune == TUNE_ARC700_4_2_STD")
(const_string "arc700_4_2_std") (const_string "arc700_4_2_std")
(symbol_ref "arc_tune == TUNE_ARC700_4_2_XMAC") (symbol_ref "arc_tune == TUNE_ARC700_4_2_XMAC")
...@@ -619,7 +621,7 @@ archs4x, archs4xd, archs4xd_slow" ...@@ -619,7 +621,7 @@ archs4x, archs4xd, archs4xd_slow"
(const_string "none")))) (const_string "none"))))
(define_attr "tune_arc700" "false,true" (define_attr "tune_arc700" "false,true"
(if_then_else (eq_attr "tune" "arc700_4_2_std, arc700_4_2_xmac") (if_then_else (eq_attr "tune" "arc7xx, arc700_4_2_std, arc700_4_2_xmac")
(const_string "true") (const_string "true")
(const_string "false"))) (const_string "false")))
......
...@@ -263,6 +263,9 @@ EnumValue ...@@ -263,6 +263,9 @@ EnumValue
Enum(arc_tune_attr) String(arc601) Value(ARC_TUNE_ARC600) Enum(arc_tune_attr) String(arc601) Value(ARC_TUNE_ARC600)
EnumValue EnumValue
Enum(arc_tune_attr) String(arc7xx) Value(ARC_TUNE_ARC7XX)
EnumValue
Enum(arc_tune_attr) String(arc700) Value(ARC_TUNE_ARC700_4_2_STD) Enum(arc_tune_attr) String(arc700) Value(ARC_TUNE_ARC700_4_2_STD)
EnumValue EnumValue
......
...@@ -145,28 +145,14 @@ ...@@ -145,28 +145,14 @@
; no functional unit runs when blockage is reserved ; no functional unit runs when blockage is reserved
(exclusion_set "blockage" "core, multiplier") (exclusion_set "blockage" "core, multiplier")
(define_insn_reservation "data_load_DI" 4
(and (eq_attr "tune_arc700" "true")
(eq_attr "type" "load")
(match_operand:DI 0 "" ""))
"issue+dmp, issue+dmp, dmp_write_port, dmp_write_port")
(define_insn_reservation "data_load" 3 (define_insn_reservation "data_load" 3
(and (eq_attr "tune_arc700" "true") (and (eq_attr "tune_arc700" "true")
(eq_attr "type" "load") (eq_attr "type" "load"))
(not (match_operand:DI 0 "" "")))
"issue+dmp, nothing, dmp_write_port") "issue+dmp, nothing, dmp_write_port")
(define_insn_reservation "data_store_DI" 2
(and (eq_attr "tune_arc700" "true")
(eq_attr "type" "store")
(match_operand:DI 0 "" ""))
"issue+dmp_write_port, issue+dmp_write_port")
(define_insn_reservation "data_store" 1 (define_insn_reservation "data_store" 1
(and (eq_attr "tune_arc700" "true") (and (eq_attr "tune_arc700" "true")
(eq_attr "type" "store") (eq_attr "type" "store"))
(not (match_operand:DI 0 "" "")))
"issue+dmp_write_port") "issue+dmp_write_port")
(define_bypass 3 "data_store" "data_load" "arc_store_addr_hazard_p") (define_bypass 3 "data_store" "data_load" "arc_store_addr_hazard_p")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment