Commit 635aeaa2 by Claudiu Zissulescu Committed by Claudiu Zissulescu

[ARC] Handle store cacheline hazard.

Handle store cacheline hazard for A700 cpus by inserting two NOP_S
between ST ST LD or their logical equivalent (like ST ST NOP_S NOP_S
J_L.D LD)

gcc/
xxxx-xx-xx  Claudiu Zissulescu  <claziss@synopsys.com>

        * config/arc/arc-arch.h (ARC_TUNE_ARC7XX): New tune value.
        * config/arc/arc.c (arc_active_insn): New function.
        (check_store_cacheline_hazard): Likewise.
        (workaround_arc_anomaly): Use check_store_cacheline_hazard.
        (arc_override_options): Disable delay slot scheduler for older
        A7.
        (arc_store_addr_hazard_p): New implementation, old one renamed to
        ...
        (arc_store_addr_hazard_internal_p): Renamed.
        (arc_reorg): Don't combine into brcc instructions which are part
        of hardware hazard solution.
        * config/arc/arc.md (attr tune): Consider new arc7xx tune value.
        (tune_arc700): Likewise.
        * config/arc/arc.opt (arc7xx): New tune value.
        * config/arc/arc700.md: Improve A7 scheduler.

From-SVN: r265676
parent aac1c11c
2018-10-31 Claudiu Zissulescu <claziss@synopsys.com>
* config/arc/arc-arch.h (ARC_TUNE_ARC7XX): New tune value.
* config/arc/arc.c (arc_active_insn): New function.
(check_store_cacheline_hazard): Likewise.
(workaround_arc_anomaly): Use check_store_cacheline_hazard.
(arc_override_options): Disable delay slot scheduler for older
A7.
(arc_store_addr_hazard_p): New implementation, old one renamed to
...
(arc_store_addr_hazard_internal_p): Renamed.
(arc_reorg): Don't combine into brcc instructions which are part
of hardware hazard solution.
* config/arc/arc.md (attr tune): Consider new arc7xx tune value.
(tune_arc700): Likewise.
* config/arc/arc.opt (arc7xx): New tune value.
* config/arc/arc700.md: Improve A7 scheduler.
2018-10-31 Claudiu Zissulescu <claziss@synopsys.com>
* config/arc/arc.c (arc_override_options): Remove
TARGET_COMPACT_CASESI.
* config/arc/arc.h (ASM_OUTPUT_ADDR_DIFF_ELT): Update.
......@@ -71,6 +71,7 @@ enum arc_tune_attr
{
ARC_TUNE_NONE,
ARC_TUNE_ARC600,
ARC_TUNE_ARC7XX,
ARC_TUNE_ARC700_4_2_STD,
ARC_TUNE_ARC700_4_2_XMAC,
ARC_TUNE_CORE_3,
......
......@@ -1303,6 +1303,10 @@ arc_override_options (void)
if (!global_options_set.x_g_switch_value && !TARGET_NO_SDATA_SET)
g_switch_value = TARGET_LL64 ? 8 : 4;
/* A7 has an issue with delay slots. */
if (TARGET_ARC700 && (arc_tune != ARC_TUNE_ARC7XX))
flag_delayed_branch = 0;
/* These need to be done at start up. It's convenient to do them here. */
arc_init ();
}
......@@ -7137,11 +7141,90 @@ arc_invalid_within_doloop (const rtx_insn *insn)
return NULL;
}
/* Return the next active insn, skiping the inline assembly code. */
static rtx_insn *
arc_active_insn (rtx_insn *insn)
{
rtx_insn *nxt = next_active_insn (insn);
if (nxt && GET_CODE (PATTERN (nxt)) == ASM_INPUT)
nxt = next_active_insn (nxt);
return nxt;
}
/* Search for a sequence made out of two stores and a given number of
loads, insert a nop if required. */
static void
check_store_cacheline_hazard (void)
{
rtx_insn *insn, *succ0, *insn1;
bool found = false;
for (insn = get_insns (); insn; insn = arc_active_insn (insn))
{
succ0 = arc_active_insn (insn);
if (!succ0)
return;
if (!single_set (insn) || !single_set (succ0))
continue;
if ((get_attr_type (insn) != TYPE_STORE)
|| (get_attr_type (succ0) != TYPE_STORE))
continue;
/* Found at least two consecutive stores. Goto the end of the
store sequence. */
for (insn1 = succ0; insn1; insn1 = arc_active_insn (insn1))
if (!single_set (insn1) || get_attr_type (insn1) != TYPE_STORE)
break;
/* Now, check the next two instructions for the following cases:
1. next instruction is a LD => insert 2 nops between store
sequence and load.
2. next-next instruction is a LD => inset 1 nop after the store
sequence. */
if (insn1 && single_set (insn1)
&& (get_attr_type (insn1) == TYPE_LOAD))
{
found = true;
emit_insn_before (gen_nopv (), insn1);
emit_insn_before (gen_nopv (), insn1);
}
else
{
if (insn1 && (get_attr_type (insn1) == TYPE_COMPARE))
{
/* REG_SAVE_NOTE is used by Haifa scheduler, we are in
reorg, so it is safe to reuse it for avoiding the
current compare insn to be part of a BRcc
optimization. */
add_reg_note (insn1, REG_SAVE_NOTE, GEN_INT (3));
}
insn1 = arc_active_insn (insn1);
if (insn1 && single_set (insn1)
&& (get_attr_type (insn1) == TYPE_LOAD))
{
found = true;
emit_insn_before (gen_nopv (), insn1);
}
}
insn = insn1;
if (found)
found = false;
}
}
/* Return true if a load instruction (CONSUMER) uses the same address as a
store instruction (PRODUCER). This function is used to avoid st/ld
address hazard in ARC700 cores. */
bool
arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer)
static bool
arc_store_addr_hazard_internal_p (rtx_insn* producer, rtx_insn* consumer)
{
rtx in_set, out_set;
rtx out_addr, in_addr;
......@@ -7189,6 +7272,16 @@ arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer)
return false;
}
/* Return TRUE is we have an store address hazard. */
bool
arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer)
{
if (TARGET_ARC700 && (arc_tune != ARC_TUNE_ARC7XX))
return true;
return arc_store_addr_hazard_internal_p (producer, consumer);
}
/* The same functionality as arc_hazard. It is called in machine
reorg before any other optimization. Hence, the NOP size is taken
into account when doing branch shortening. */
......@@ -7197,6 +7290,7 @@ static void
workaround_arc_anomaly (void)
{
rtx_insn *insn, *succ0;
rtx_insn *succ1;
/* For any architecture: call arc_hazard here. */
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
......@@ -7208,27 +7302,30 @@ workaround_arc_anomaly (void)
}
}
if (TARGET_ARC700)
{
rtx_insn *succ1;
if (!TARGET_ARC700)
return;
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
succ0 = next_real_insn (insn);
if (arc_store_addr_hazard_p (insn, succ0))
{
emit_insn_after (gen_nopv (), insn);
emit_insn_after (gen_nopv (), insn);
continue;
}
/* Old A7 are suffering of a cache hazard, and we need to insert two
nops between any sequence of stores and a load. */
if (arc_tune != ARC_TUNE_ARC7XX)
check_store_cacheline_hazard ();
/* Avoid adding nops if the instruction between the ST and LD is
a call or jump. */
succ1 = next_real_insn (succ0);
if (succ0 && !JUMP_P (succ0) && !CALL_P (succ0)
&& arc_store_addr_hazard_p (insn, succ1))
emit_insn_after (gen_nopv (), insn);
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
succ0 = next_real_insn (insn);
if (arc_store_addr_hazard_internal_p (insn, succ0))
{
emit_insn_after (gen_nopv (), insn);
emit_insn_after (gen_nopv (), insn);
continue;
}
/* Avoid adding nops if the instruction between the ST and LD is
a call or jump. */
succ1 = next_real_insn (succ0);
if (succ0 && !JUMP_P (succ0) && !CALL_P (succ0)
&& arc_store_addr_hazard_internal_p (insn, succ1))
emit_insn_after (gen_nopv (), insn);
}
}
......@@ -7866,11 +7963,15 @@ arc_reorg (void)
if (!link_insn)
continue;
else
/* Check if this is a data dependency. */
{
/* Check if this is a data dependency. */
rtx op, cc_clob_rtx, op0, op1, brcc_insn, note;
rtx cmp0, cmp1;
/* Make sure we can use it for brcc insns. */
if (find_reg_note (link_insn, REG_SAVE_NOTE, GEN_INT (3)))
continue;
/* Ok this is the set cc. copy args here. */
op = XEXP (pc_target, 0);
......
......@@ -600,11 +600,13 @@
;; somehow modify them to become inelegible for delay slots if a decision
;; is made that makes conditional execution required.
(define_attr "tune" "none,arc600,arc700_4_2_std,arc700_4_2_xmac, core_3, \
archs4x, archs4xd, archs4xd_slow"
(define_attr "tune" "none,arc600,arc7xx,arc700_4_2_std,arc700_4_2_xmac, \
core_3, archs4x, archs4xd, archs4xd_slow"
(const
(cond [(symbol_ref "arc_tune == TUNE_ARC600")
(const_string "arc600")
(symbol_ref "arc_tune == ARC_TUNE_ARC7XX")
(const_string "arc7xx")
(symbol_ref "arc_tune == TUNE_ARC700_4_2_STD")
(const_string "arc700_4_2_std")
(symbol_ref "arc_tune == TUNE_ARC700_4_2_XMAC")
......@@ -619,7 +621,7 @@ archs4x, archs4xd, archs4xd_slow"
(const_string "none"))))
(define_attr "tune_arc700" "false,true"
(if_then_else (eq_attr "tune" "arc700_4_2_std, arc700_4_2_xmac")
(if_then_else (eq_attr "tune" "arc7xx, arc700_4_2_std, arc700_4_2_xmac")
(const_string "true")
(const_string "false")))
......
......@@ -263,6 +263,9 @@ EnumValue
Enum(arc_tune_attr) String(arc601) Value(ARC_TUNE_ARC600)
EnumValue
Enum(arc_tune_attr) String(arc7xx) Value(ARC_TUNE_ARC7XX)
EnumValue
Enum(arc_tune_attr) String(arc700) Value(ARC_TUNE_ARC700_4_2_STD)
EnumValue
......
......@@ -145,28 +145,14 @@
; no functional unit runs when blockage is reserved
(exclusion_set "blockage" "core, multiplier")
(define_insn_reservation "data_load_DI" 4
(and (eq_attr "tune_arc700" "true")
(eq_attr "type" "load")
(match_operand:DI 0 "" ""))
"issue+dmp, issue+dmp, dmp_write_port, dmp_write_port")
(define_insn_reservation "data_load" 3
(and (eq_attr "tune_arc700" "true")
(eq_attr "type" "load")
(not (match_operand:DI 0 "" "")))
(eq_attr "type" "load"))
"issue+dmp, nothing, dmp_write_port")
(define_insn_reservation "data_store_DI" 2
(and (eq_attr "tune_arc700" "true")
(eq_attr "type" "store")
(match_operand:DI 0 "" ""))
"issue+dmp_write_port, issue+dmp_write_port")
(define_insn_reservation "data_store" 1
(and (eq_attr "tune_arc700" "true")
(eq_attr "type" "store")
(not (match_operand:DI 0 "" "")))
(eq_attr "type" "store"))
"issue+dmp_write_port")
(define_bypass 3 "data_store" "data_load" "arc_store_addr_hazard_p")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment