Commit 2f2ebf95 by Kito Cheng Committed by Chung-Ju Wu

[NDS32] Implment n10 pipeline.

gcc/
	* config.gcc (nds32*-*-*): Check that n10/d10 are valid to --with-cpu.
	* config/nds32/nds32-n10.md: New file.
	* config/nds32/nds32-opts.h (nds32_cpu_type): Add CPU_N10.
	* config/nds32/nds32-pipelines-auxiliary.c: Implementation for n10
	pipeline.
	* config/nds32/nds32-protos.h: More declarations for n10 pipeline.
	* config/nds32/nds32-utils.c: More implementations for n10 pipeline.
	* config/nds32/nds32.md (pipeline_model): Add n10.
	* config/nds32/nds32.opt (mcpu): Support n10 pipeline cpus.
	* config/nds32/pipelines.md: Include n10 settings.

Co-Authored-By: Chung-Ju Wu <jasonwucj@gmail.com>

From-SVN: r260207
parent 7c32ef41
2018-05-13 Kito Cheng <kito.cheng@gmail.com>
Chung-Ju Wu <jasonwucj@gmail.com>
* config.gcc (nds32*-*-*): Check that n10/d10 are valid to --with-cpu.
* config/nds32/nds32-n10.md: New file.
* config/nds32/nds32-opts.h (nds32_cpu_type): Add CPU_N10.
* config/nds32/nds32-pipelines-auxiliary.c: Implementation for n10
pipeline.
* config/nds32/nds32-protos.h: More declarations for n10 pipeline.
* config/nds32/nds32-utils.c: More implementations for n10 pipeline.
* config/nds32/nds32.md (pipeline_model): Add n10.
* config/nds32/nds32.opt (mcpu): Support n10 pipeline cpus.
* config/nds32/pipelines.md: Include n10 settings.
2018-05-13 Monk Chiang <sh.chiang04@gmail.com> 2018-05-13 Monk Chiang <sh.chiang04@gmail.com>
Kito Cheng <kito.cheng@gmail.com> Kito Cheng <kito.cheng@gmail.com>
Chung-Ju Wu <jasonwucj@gmail.com> Chung-Ju Wu <jasonwucj@gmail.com>
......
...@@ -4367,11 +4367,11 @@ case "${target}" in ...@@ -4367,11 +4367,11 @@ case "${target}" in
"") "")
with_cpu=n9 with_cpu=n9
;; ;;
n6 | n7 | n8 | e8 | s8 | n9) n6 | n7 |n8 | e8 | s8 | n9 | n10 | d10)
# OK # OK
;; ;;
*) *)
echo "Cannot accept --with-cpu=$with_cpu, available values are: n6 n7 n8 e8 s8 n9" 1>&2 echo "Cannot accept --with-cpu=$with_cpu, available values are: n6 n7 n8 e8 s8 n9 n10 d10" 1>&2
exit 1 exit 1
;; ;;
esac esac
......
...@@ -42,6 +42,7 @@ enum nds32_cpu_type ...@@ -42,6 +42,7 @@ enum nds32_cpu_type
CPU_N8, CPU_N8,
CPU_E8, CPU_E8,
CPU_N9, CPU_N9,
CPU_N10,
CPU_SIMPLE CPU_SIMPLE
}; };
......
...@@ -335,6 +335,103 @@ movd44_even_dep_p (rtx_insn *insn, rtx def_reg) ...@@ -335,6 +335,103 @@ movd44_even_dep_p (rtx_insn *insn, rtx def_reg)
return false; return false;
} }
/* Check if INSN is a wext insn consuming DEF_REG. */
bool
wext_odd_dep_p (rtx insn, rtx def_reg)
{
rtx shift_rtx = XEXP (SET_SRC (PATTERN (insn)), 0);
rtx use_reg = XEXP (shift_rtx, 0);
rtx pos_rtx = XEXP (shift_rtx, 1);
if (REG_P (pos_rtx) && reg_overlap_p (def_reg, pos_rtx))
return true;
if (GET_MODE (def_reg) == DImode)
return reg_overlap_p (def_reg, use_reg);
gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG);
gcc_assert (REG_P (use_reg));
if (REG_P (def_reg))
{
if (!TARGET_BIG_ENDIAN)
return REGNO (def_reg) == REGNO (use_reg) + 1;
else
return REGNO (def_reg) == REGNO (use_reg);
}
if (GET_CODE (def_reg) == SUBREG)
{
if (!reg_overlap_p (def_reg, use_reg))
return false;
if (!TARGET_BIG_ENDIAN)
return SUBREG_BYTE (def_reg) == 4;
else
return SUBREG_BYTE (def_reg) == 0;
}
return false;
}
/* Check if INSN is a bpick insn consuming DEF_REG. */
bool
bpick_ra_rb_dep_p (rtx insn, rtx def_reg)
{
rtx ior_rtx = SET_SRC (PATTERN (insn));
rtx and1_rtx = XEXP (ior_rtx, 0);
rtx and2_rtx = XEXP (ior_rtx, 1);
rtx reg1_0 = XEXP (and1_rtx, 0);
rtx reg1_1 = XEXP (and1_rtx, 1);
rtx reg2_0 = XEXP (and2_rtx, 0);
rtx reg2_1 = XEXP (and2_rtx, 1);
if (GET_CODE (reg1_0) == NOT)
{
if (rtx_equal_p (reg1_0, reg2_0))
return reg_overlap_p (def_reg, reg1_1)
|| reg_overlap_p (def_reg, reg2_1);
if (rtx_equal_p (reg1_0, reg2_1))
return reg_overlap_p (def_reg, reg1_1)
|| reg_overlap_p (def_reg, reg2_0);
}
if (GET_CODE (reg1_1) == NOT)
{
if (rtx_equal_p (reg1_1, reg2_0))
return reg_overlap_p (def_reg, reg1_0)
|| reg_overlap_p (def_reg, reg2_1);
if (rtx_equal_p (reg1_1, reg2_1))
return reg_overlap_p (def_reg, reg1_0)
|| reg_overlap_p (def_reg, reg2_0);
}
if (GET_CODE (reg2_0) == NOT)
{
if (rtx_equal_p (reg2_0, reg1_0))
return reg_overlap_p (def_reg, reg2_1)
|| reg_overlap_p (def_reg, reg1_1);
if (rtx_equal_p (reg2_0, reg1_1))
return reg_overlap_p (def_reg, reg2_1)
|| reg_overlap_p (def_reg, reg1_0);
}
if (GET_CODE (reg2_1) == NOT)
{
if (rtx_equal_p (reg2_1, reg1_0))
return reg_overlap_p (def_reg, reg2_0)
|| reg_overlap_p (def_reg, reg1_1);
if (rtx_equal_p (reg2_1, reg1_1))
return reg_overlap_p (def_reg, reg2_0)
|| reg_overlap_p (def_reg, reg1_0);
}
gcc_unreachable ();
}
} // namespace scheduling } // namespace scheduling
} // namespace nds32 } // namespace nds32
...@@ -375,8 +472,7 @@ n7_consumed_by_ii_dep_p (rtx_insn *consumer, rtx def_reg) ...@@ -375,8 +472,7 @@ n7_consumed_by_ii_dep_p (rtx_insn *consumer, rtx def_reg)
operations in order to write two registers. We have to check the operations in order to write two registers. We have to check the
dependency from the producer to the first micro-operation. */ dependency from the producer to the first micro-operation. */
case TYPE_DIV: case TYPE_DIV:
if (INSN_CODE (consumer) == CODE_FOR_divmodsi4 if (divmod_p (consumer))
|| INSN_CODE (consumer) == CODE_FOR_udivmodsi4)
use_rtx = SET_SRC (parallel_element (consumer, 0)); use_rtx = SET_SRC (parallel_element (consumer, 0));
else else
use_rtx = SET_SRC (PATTERN (consumer)); use_rtx = SET_SRC (PATTERN (consumer));
...@@ -506,8 +602,7 @@ n8_consumed_by_ex_p (rtx_insn *consumer, rtx def_reg) ...@@ -506,8 +602,7 @@ n8_consumed_by_ex_p (rtx_insn *consumer, rtx def_reg)
operations in order to write two registers. We have to check the operations in order to write two registers. We have to check the
dependency from the producer to the first micro-operation. */ dependency from the producer to the first micro-operation. */
case TYPE_DIV: case TYPE_DIV:
if (INSN_CODE (consumer) == CODE_FOR_divmodsi4 if (divmod_p (consumer))
|| INSN_CODE (consumer) == CODE_FOR_udivmodsi4)
use_rtx = SET_SRC (parallel_element (consumer, 0)); use_rtx = SET_SRC (parallel_element (consumer, 0));
else else
use_rtx = SET_SRC (PATTERN (consumer)); use_rtx = SET_SRC (PATTERN (consumer));
...@@ -606,8 +701,7 @@ n9_2r1w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) ...@@ -606,8 +701,7 @@ n9_2r1w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg)
break; break;
case TYPE_DIV: case TYPE_DIV:
if (INSN_CODE (consumer) == CODE_FOR_divmodsi4 if (divmod_p (consumer))
|| INSN_CODE (consumer) == CODE_FOR_udivmodsi4)
use_rtx = SET_SRC (parallel_element (consumer, 0)); use_rtx = SET_SRC (parallel_element (consumer, 0));
else else
use_rtx = SET_SRC (PATTERN (consumer)); use_rtx = SET_SRC (PATTERN (consumer));
...@@ -706,8 +800,7 @@ n9_3r2w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) ...@@ -706,8 +800,7 @@ n9_3r2w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg)
We have to check the dependency from the producer to the first We have to check the dependency from the producer to the first
micro-operation. */ micro-operation. */
case TYPE_DIV: case TYPE_DIV:
if (INSN_CODE (consumer) == CODE_FOR_divmodsi4 if (divmod_p (consumer))
|| INSN_CODE (consumer) == CODE_FOR_udivmodsi4)
use_rtx = SET_SRC (parallel_element (consumer, 0)); use_rtx = SET_SRC (parallel_element (consumer, 0));
else else
use_rtx = SET_SRC (PATTERN (consumer)); use_rtx = SET_SRC (PATTERN (consumer));
...@@ -744,6 +837,86 @@ n9_3r2w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) ...@@ -744,6 +837,86 @@ n9_3r2w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg)
return false; return false;
} }
/* Check the dependency between the producer defining DEF_REG and CONSUMER
requiring input operand at EX. */
bool
n10_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg)
{
rtx use_rtx;
switch (get_attr_type (consumer))
{
case TYPE_ALU:
case TYPE_PBSAD:
case TYPE_MUL:
case TYPE_DALU:
case TYPE_DALU64:
case TYPE_DMUL:
case TYPE_DPACK:
case TYPE_DINSB:
case TYPE_DCMP:
case TYPE_DCLIP:
case TYPE_DALUROUND:
use_rtx = SET_SRC (PATTERN (consumer));
break;
case TYPE_ALU_SHIFT:
use_rtx = extract_shift_reg (consumer);
break;
case TYPE_PBSADA:
return pbsada_insn_ra_rb_dep_reg_p (consumer, def_reg);
case TYPE_MAC:
case TYPE_DMAC:
use_rtx = extract_mac_non_acc_rtx (consumer);
break;
/* Some special instructions, divmodsi4 and udivmodsi4, produce two
results, the quotient and the remainder. */
case TYPE_DIV:
if (divmod_p (consumer))
use_rtx = SET_SRC (parallel_element (consumer, 0));
else
use_rtx = SET_SRC (PATTERN (consumer));
break;
case TYPE_DWEXT:
return wext_odd_dep_p (consumer, def_reg);
case TYPE_DBPICK:
return bpick_ra_rb_dep_p (consumer, def_reg);
case TYPE_MMU:
if (GET_CODE (PATTERN (consumer)) == SET)
use_rtx = SET_SRC (PATTERN (consumer));
else
return true;
break;
case TYPE_LOAD:
case TYPE_STORE:
use_rtx = extract_mem_rtx (consumer);
break;
case TYPE_LOAD_MULTIPLE:
case TYPE_STORE_MULTIPLE:
use_rtx = extract_base_reg (consumer);
break;
case TYPE_BRANCH:
use_rtx = PATTERN (consumer);
break;
default:
gcc_unreachable ();
}
if (reg_overlap_p (def_reg, use_rtx))
return true;
return false;
}
} // anonymous namespace } // anonymous namespace
...@@ -837,8 +1010,7 @@ nds32_n8_ex_to_ii_p (rtx_insn *producer, rtx_insn *consumer) ...@@ -837,8 +1010,7 @@ nds32_n8_ex_to_ii_p (rtx_insn *producer, rtx_insn *consumer)
break; break;
case TYPE_DIV: case TYPE_DIV:
if (INSN_CODE (producer) == CODE_FOR_divmodsi4 if (divmod_p (producer))
|| INSN_CODE (producer) == CODE_FOR_udivmodsi4)
def_reg = SET_DEST (parallel_element (producer, 1)); def_reg = SET_DEST (parallel_element (producer, 1));
else else
def_reg = SET_DEST (PATTERN (producer)); def_reg = SET_DEST (PATTERN (producer));
...@@ -969,8 +1141,7 @@ nds32_e8_ex_to_ii_p (rtx_insn *producer, rtx_insn *consumer) ...@@ -969,8 +1141,7 @@ nds32_e8_ex_to_ii_p (rtx_insn *producer, rtx_insn *consumer)
break; break;
case TYPE_DIV: case TYPE_DIV:
if (INSN_CODE (producer) == CODE_FOR_divmodsi4 if (divmod_p (producer))
|| INSN_CODE (producer) == CODE_FOR_udivmodsi4)
{ {
rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); rtx def_reg1 = SET_DEST (parallel_element (producer, 0));
rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); rtx def_reg2 = SET_DEST (parallel_element (producer, 1));
...@@ -1073,8 +1244,7 @@ nds32_n9_3r2w_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer) ...@@ -1073,8 +1244,7 @@ nds32_n9_3r2w_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
results, the quotient and the remainder. We have to handle them results, the quotient and the remainder. We have to handle them
individually. */ individually. */
case TYPE_DIV: case TYPE_DIV:
if (INSN_CODE (producer) == CODE_FOR_divmodsi4 if (divmod_p (producer))
|| INSN_CODE (producer) == CODE_FOR_udivmodsi4)
{ {
rtx def_reg1 = SET_DEST (parallel_element (producer, 0)); rtx def_reg1 = SET_DEST (parallel_element (producer, 0));
rtx def_reg2 = SET_DEST (parallel_element (producer, 1)); rtx def_reg2 = SET_DEST (parallel_element (producer, 1));
...@@ -1132,4 +1302,73 @@ nds32_n9_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer) ...@@ -1132,4 +1302,73 @@ nds32_n9_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
return n9_3r2w_consumed_by_ex_dep_p (consumer, last_def_reg); return n9_3r2w_consumed_by_ex_dep_p (consumer, last_def_reg);
} }
/* Guard functions for N10 cores. */
/* Check dependencies from EX to EX (ADDR_OUT -> ADDR_IN). */
bool
nds32_n10_ex_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
{
gcc_assert (get_attr_type (producer) == TYPE_FLOAD
|| get_attr_type (producer) == TYPE_FSTORE);
gcc_assert (get_attr_type (consumer) == TYPE_FLOAD
|| get_attr_type (consumer) == TYPE_FSTORE);
if (!post_update_insn_p (producer))
return false;
return reg_overlap_p (extract_base_reg (producer),
extract_mem_rtx (consumer));
}
/* Check dependencies from MM to EX. */
bool
nds32_n10_mm_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
{
rtx def_reg;
switch (get_attr_type (producer))
{
case TYPE_LOAD:
case TYPE_MUL:
case TYPE_MAC:
case TYPE_DALU64:
case TYPE_DMUL:
case TYPE_DMAC:
case TYPE_DALUROUND:
case TYPE_DBPICK:
case TYPE_DWEXT:
def_reg = SET_DEST (PATTERN (producer));
break;
/* Some special instructions, divmodsi4 and udivmodsi4, produce two
results, the quotient and the remainder. We have to handle them
individually. */
case TYPE_DIV:
if (divmod_p (producer))
{
rtx def_reg1 = SET_DEST (parallel_element (producer, 0));
rtx def_reg2 = SET_DEST (parallel_element (producer, 1));
return (n10_consumed_by_ex_dep_p (consumer, def_reg1)
|| n10_consumed_by_ex_dep_p (consumer, def_reg2));
}
def_reg = SET_DEST (PATTERN (producer));
break;
default:
gcc_unreachable ();
}
return n10_consumed_by_ex_dep_p (consumer, def_reg);
}
/* Check dependencies from LMW(N, N) to EX. */
bool
nds32_n10_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
{
rtx last_def_reg = extract_nth_access_reg (producer, -1);
return n10_consumed_by_ex_dep_p (consumer, last_def_reg);
}
/* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */
...@@ -121,6 +121,9 @@ extern bool nds32_n9_2r1w_mm_to_ex_p (rtx_insn *, rtx_insn *); ...@@ -121,6 +121,9 @@ extern bool nds32_n9_2r1w_mm_to_ex_p (rtx_insn *, rtx_insn *);
extern bool nds32_n9_3r2w_mm_to_ex_p (rtx_insn *, rtx_insn *); extern bool nds32_n9_3r2w_mm_to_ex_p (rtx_insn *, rtx_insn *);
extern bool nds32_n9_last_load_to_ex_p (rtx_insn *, rtx_insn *); extern bool nds32_n9_last_load_to_ex_p (rtx_insn *, rtx_insn *);
extern bool nds32_n10_ex_to_ex_p (rtx_insn *, rtx_insn *);
extern bool nds32_n10_mm_to_ex_p (rtx_insn *, rtx_insn *);
extern bool nds32_n10_last_load_to_ex_p (rtx_insn *, rtx_insn *);
/* Auxiliary functions for stack operation predicate checking. */ /* Auxiliary functions for stack operation predicate checking. */
...@@ -300,6 +303,8 @@ rtx extract_movd44_odd_reg (rtx_insn *); ...@@ -300,6 +303,8 @@ rtx extract_movd44_odd_reg (rtx_insn *);
rtx extract_mac_non_acc_rtx (rtx_insn *); rtx extract_mac_non_acc_rtx (rtx_insn *);
bool divmod_p (rtx_insn *);
rtx extract_branch_target_rtx (rtx_insn *); rtx extract_branch_target_rtx (rtx_insn *);
rtx extract_branch_condition_rtx (rtx_insn *); rtx extract_branch_condition_rtx (rtx_insn *);
} // namespace nds32 } // namespace nds32
......
...@@ -424,6 +424,19 @@ extract_mac_non_acc_rtx (rtx_insn *insn) ...@@ -424,6 +424,19 @@ extract_mac_non_acc_rtx (rtx_insn *insn)
} }
} }
/* Check if the DIV insn needs two write ports. */
bool
divmod_p (rtx_insn *insn)
{
gcc_assert (get_attr_type (insn) == TYPE_DIV);
if (INSN_CODE (insn) == CODE_FOR_divmodsi4
|| INSN_CODE (insn) == CODE_FOR_udivmodsi4)
return true;
return false;
}
/* Extract the rtx representing the branch target to help recognize /* Extract the rtx representing the branch target to help recognize
data hazards. */ data hazards. */
rtx rtx
......
...@@ -56,12 +56,13 @@ ...@@ -56,12 +56,13 @@
;; ------------------------------------------------------------------------ ;; ------------------------------------------------------------------------
;; CPU pipeline model. ;; CPU pipeline model.
(define_attr "pipeline_model" "n7,n8,e8,n9,simple" (define_attr "pipeline_model" "n7,n8,e8,n9,n10,simple"
(const (const
(cond [(match_test "nds32_cpu_option == CPU_N7") (const_string "n7") (cond [(match_test "nds32_cpu_option == CPU_N7") (const_string "n7")
(match_test "nds32_cpu_option == CPU_E8") (const_string "e8") (match_test "nds32_cpu_option == CPU_E8") (const_string "e8")
(match_test "nds32_cpu_option == CPU_N6 || nds32_cpu_option == CPU_N8") (const_string "n8") (match_test "nds32_cpu_option == CPU_N6 || nds32_cpu_option == CPU_N8") (const_string "n8")
(match_test "nds32_cpu_option == CPU_N9") (const_string "n9") (match_test "nds32_cpu_option == CPU_N9") (const_string "n9")
(match_test "nds32_cpu_option == CPU_N10") (const_string "n10")
(match_test "nds32_cpu_option == CPU_SIMPLE") (const_string "simple")] (match_test "nds32_cpu_option == CPU_SIMPLE") (const_string "simple")]
(const_string "n9")))) (const_string "n9"))))
......
...@@ -243,6 +243,51 @@ EnumValue ...@@ -243,6 +243,51 @@ EnumValue
Enum(nds32_cpu_type) String(n968a) Value(CPU_N9) Enum(nds32_cpu_type) String(n968a) Value(CPU_N9)
EnumValue EnumValue
Enum(nds32_cpu_type) String(n10) Value(CPU_N10)
EnumValue
Enum(nds32_cpu_type) String(n1033) Value(CPU_N10)
EnumValue
Enum(nds32_cpu_type) String(n1033a) Value(CPU_N10)
EnumValue
Enum(nds32_cpu_type) String(n1033-fpu) Value(CPU_N10)
EnumValue
Enum(nds32_cpu_type) String(n1033-spu) Value(CPU_N10)
EnumValue
Enum(nds32_cpu_type) String(n1068) Value(CPU_N10)
EnumValue
Enum(nds32_cpu_type) String(n1068a) Value(CPU_N10)
EnumValue
Enum(nds32_cpu_type) String(n1068-fpu) Value(CPU_N10)
EnumValue
Enum(nds32_cpu_type) String(n1068a-fpu) Value(CPU_N10)
EnumValue
Enum(nds32_cpu_type) String(n1068-spu) Value(CPU_N10)
EnumValue
Enum(nds32_cpu_type) String(n1068a-spu) Value(CPU_N10)
EnumValue
Enum(nds32_cpu_type) String(d10) Value(CPU_N10)
EnumValue
Enum(nds32_cpu_type) String(d1088) Value(CPU_N10)
EnumValue
Enum(nds32_cpu_type) String(d1088-fpu) Value(CPU_N10)
EnumValue
Enum(nds32_cpu_type) String(d1088-spu) Value(CPU_N10)
EnumValue
Enum(nds32_cpu_type) String(simple) Value(CPU_SIMPLE) Enum(nds32_cpu_type) String(simple) Value(CPU_SIMPLE)
mconfig-fpu= mconfig-fpu=
......
...@@ -44,6 +44,12 @@ ...@@ -44,6 +44,12 @@
;; ------------------------------------------------------------------------ ;; ------------------------------------------------------------------------
;; Include N10 pipeline settings.
;; ------------------------------------------------------------------------
(include "nds32-n10.md")
;; ------------------------------------------------------------------------
;; Define simple pipeline settings. ;; Define simple pipeline settings.
;; ------------------------------------------------------------------------ ;; ------------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment