Commit 8fd52141 by Kito Cheng Committed by Chung-Ju Wu

[NDS32] Implement n8 pipeline.

gcc/
	* config.gcc (nds32*-*-*): Check that n6/n8/s8 are valid to --with-cpu.
	* config/nds32/nds32-n8.md: New file.
	* config/nds32/nds32-opts.h (nds32_cpu_type): Add CPU_N6 and CPU_N8.
	* config/nds32/nds32-pipelines-auxiliary.c: Implementation for n8
	pipeline.
	* config/nds32/nds32-protos.h: More declarations for n8 pipeline.
	* config/nds32/nds32-utils.c: More implementations for n8 pipeline.
	* config/nds32/nds32.md (pipeline_model): Add n8.
	* config/nds32/nds32.opt (mcpu): Support n8 pipeline cpus.
	* config/nds32/pipelines.md: Include n8 settings.

Co-Authored-By: Chung-Ju Wu <jasonwucj@gmail.com>

From-SVN: r259219
parent b99353a2
2018-04-08 Kito Cheng <kito.cheng@gmail.com> 2018-04-08 Kito Cheng <kito.cheng@gmail.com>
Chung-Ju Wu <jasonwucj@gmail.com> Chung-Ju Wu <jasonwucj@gmail.com>
* config.gcc (nds32*-*-*): Check that n6/n8/s8 are valid to --with-cpu.
* config/nds32/nds32-n8.md: New file.
* config/nds32/nds32-opts.h (nds32_cpu_type): Add CPU_N6 and CPU_N8.
* config/nds32/nds32-pipelines-auxiliary.c: Implementation for n8
pipeline.
* config/nds32/nds32-protos.h: More declarations for n8 pipeline.
* config/nds32/nds32-utils.c: More implementations for n8 pipeline.
* config/nds32/nds32.md (pipeline_model): Add n8.
* config/nds32/nds32.opt (mcpu): Support n8 pipeline cpus.
* config/nds32/pipelines.md: Include n8 settings.
2018-04-08 Kito Cheng <kito.cheng@gmail.com>
Chung-Ju Wu <jasonwucj@gmail.com>
* config.gcc (nds32*): Add nds32-utils.o into extra_objs. * config.gcc (nds32*): Add nds32-utils.o into extra_objs.
* config/nds32/nds32-n9-2r1w.md: New file. * config/nds32/nds32-n9-2r1w.md: New file.
* config/nds32/nds32-n9-3r2w.md: New file. * config/nds32/nds32-n9-3r2w.md: New file.
......
...@@ -4315,11 +4315,11 @@ case "${target}" in ...@@ -4315,11 +4315,11 @@ case "${target}" in
"") "")
with_cpu=n9 with_cpu=n9
;; ;;
n9) n6 | n8 | s8 | n9)
# OK # OK
;; ;;
*) *)
echo "Cannot accept --with-cpu=$with_cpu, available values are: n9" 1>&2 echo "Cannot accept --with-cpu=$with_cpu, available values are: n6 n8 s8 n9" 1>&2
exit 1 exit 1
;; ;;
esac esac
......
...@@ -37,6 +37,8 @@ enum nds32_arch_type ...@@ -37,6 +37,8 @@ enum nds32_arch_type
/* The various ANDES CPU. */ /* The various ANDES CPU. */
enum nds32_cpu_type enum nds32_cpu_type
{ {
CPU_N6,
CPU_N8,
CPU_N9, CPU_N9,
CPU_SIMPLE CPU_SIMPLE
}; };
......
...@@ -343,6 +343,108 @@ movd44_even_dep_p (rtx_insn *insn, rtx def_reg) ...@@ -343,6 +343,108 @@ movd44_even_dep_p (rtx_insn *insn, rtx def_reg)
using namespace nds32; using namespace nds32;
using namespace nds32::scheduling; using namespace nds32::scheduling;
namespace { // anonymous namespace
/* Check the dependency between the producer defining DEF_REG and CONSUMER
requiring input operand at AG (II). */
bool
n8_consumed_by_addr_in_p (rtx_insn *consumer, rtx def_reg)
{
rtx use_rtx;
switch (get_attr_type (consumer))
{
case TYPE_BRANCH:
use_rtx = extract_branch_target_rtx (consumer);
break;
case TYPE_LOAD:
if (load_single_p (consumer))
use_rtx = extract_mem_rtx (consumer);
else
use_rtx = extract_base_reg (consumer);
break;
case TYPE_STORE:
if (store_single_p (consumer)
&& (!post_update_insn_p (consumer)
|| immed_offset_p (extract_mem_rtx (consumer))))
use_rtx = extract_mem_rtx (consumer);
else
use_rtx = extract_base_reg (consumer);
break;
case TYPE_LOAD_MULTIPLE:
case TYPE_STORE_MULTIPLE:
use_rtx = extract_base_reg (consumer);
break;
default:
gcc_unreachable ();
}
return reg_overlap_p (def_reg, use_rtx);
}
/* Check the dependency between the producer defining DEF_REG and CONSUMER
requiring input operand at EX. */
bool
n8_consumed_by_ex_p (rtx_insn *consumer, rtx def_reg)
{
rtx use_rtx;
switch (get_attr_type (consumer))
{
case TYPE_ALU:
if (movd44_even_dep_p (consumer, def_reg))
return true;
use_rtx = SET_SRC (PATTERN (consumer));
break;
case TYPE_MUL:
use_rtx = SET_SRC (PATTERN (consumer));
break;
case TYPE_MAC:
use_rtx = extract_mac_non_acc_rtx (consumer);
break;
/* Some special instructions, divmodsi4 and udivmodsi4, produce two
results, the quotient and the remainder. It requires two micro-
operations in order to write two registers. We have to check the
dependency from the producer to the first micro-operation. */
case TYPE_DIV:
if (INSN_CODE (consumer) == CODE_FOR_divmodsi4
|| INSN_CODE (consumer) == CODE_FOR_udivmodsi4)
use_rtx = SET_SRC (parallel_element (consumer, 0));
else
use_rtx = SET_SRC (PATTERN (consumer));
break;
case TYPE_BRANCH:
use_rtx = extract_branch_condition_rtx (consumer);
break;
case TYPE_STORE:
/* exclude ST_!bi_RR */
if (!post_update_insn_p (consumer)
&& !immed_offset_p (extract_mem_rtx (consumer)))
return false;
use_rtx = SET_SRC (PATTERN (consumer));
break;
case TYPE_STORE_MULTIPLE:
use_rtx = extract_nth_access_rtx (consumer, 0);
break;
default:
gcc_unreachable ();
}
return reg_overlap_p (def_reg, use_rtx);
}
/* Check the dependency between the producer defining DEF_REG and CONSUMER /* Check the dependency between the producer defining DEF_REG and CONSUMER
requiring input operand at EX. */ requiring input operand at EX. */
bool bool
...@@ -514,6 +616,152 @@ n9_3r2w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg) ...@@ -514,6 +616,152 @@ n9_3r2w_consumed_by_ex_dep_p (rtx_insn *consumer, rtx def_reg)
return false; return false;
} }
} // anonymous namespace
/* ------------------------------------------------------------------------ */
/* Guard functions for N8 core. */
bool
nds32_n8_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer)
{
if (post_update_insn_p (producer))
return false;
rtx def_reg = SET_DEST (PATTERN (producer));
return n8_consumed_by_addr_in_p (consumer, def_reg);
}
bool
nds32_n8_load_bi_to_ii_p (rtx_insn *producer, rtx_insn *consumer)
{
if (!post_update_insn_p (producer))
return false;
rtx def_reg = SET_DEST (PATTERN (producer));
return n8_consumed_by_addr_in_p (consumer, def_reg);
}
bool
nds32_n8_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
{
if (post_update_insn_p (producer))
return false;
rtx def_reg = SET_DEST (PATTERN (producer));
return n8_consumed_by_ex_p (consumer, def_reg);
}
bool
nds32_n8_ex_to_ii_p (rtx_insn *producer, rtx_insn *consumer)
{
rtx def_reg;
switch (get_attr_type (producer))
{
case TYPE_ALU:
if (movd44_insn_p (producer))
def_reg = extract_movd44_odd_reg (producer);
else
def_reg = SET_DEST (PATTERN (producer));
break;
case TYPE_MUL:
case TYPE_MAC:
def_reg = SET_DEST (PATTERN (producer));
break;
case TYPE_DIV:
if (INSN_CODE (producer) == CODE_FOR_divmodsi4
|| INSN_CODE (producer) == CODE_FOR_udivmodsi4)
def_reg = SET_DEST (parallel_element (producer, 1));
else
def_reg = SET_DEST (PATTERN (producer));
break;
case TYPE_LOAD:
case TYPE_STORE:
case TYPE_LOAD_MULTIPLE:
case TYPE_STORE_MULTIPLE:
if (!post_update_insn_p (producer))
return false;
def_reg = extract_base_reg (producer);
break;
default:
gcc_unreachable ();
}
return n8_consumed_by_addr_in_p (consumer, def_reg);
}
bool
nds32_n8_last_load_to_ii_p (rtx_insn *producer, rtx_insn *consumer)
{
/* If PRODUCER is a post-update LMW insn, the last micro-operation updates
the base register and the result is ready in EX stage, so we don't need
to handle that case in this guard function and the corresponding bypass
rule. */
if (post_update_insn_p (producer))
return false;
rtx last_def_reg = extract_nth_access_reg (producer, -1);
if (last_def_reg == NULL_RTX)
return false;
gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG);
return n8_consumed_by_addr_in_p (consumer, last_def_reg);
}
bool
nds32_n8_last_load_two_to_ii_p (rtx_insn *producer, rtx_insn *consumer)
{
int index = -2;
/* If PRODUCER is a post-update insn, there is an additional one micro-
operation inserted in the end, so the last memory access operation should
be handled by this guard function and the corresponding bypass rule. */
if (post_update_insn_p (producer))
index = -1;
rtx last_two_def_reg = extract_nth_access_reg (producer, index);
if (last_two_def_reg == NULL_RTX)
return false;
gcc_assert (REG_P (last_two_def_reg)
|| GET_CODE (last_two_def_reg) == SUBREG);
return n8_consumed_by_addr_in_p (consumer, last_two_def_reg);
}
bool
nds32_n8_last_load_to_ex_p (rtx_insn *producer, rtx_insn *consumer)
{
/* If PRODUCER is a post-update LMW insn, the last micro-operation updates
the base register and the result is ready in EX stage, so we don't need
to handle that case in this guard function and the corresponding bypass
rule. */
if (post_update_insn_p (producer))
return false;
rtx last_def_reg = extract_nth_access_reg (producer, -1);
if (last_def_reg == NULL_RTX)
return false;
gcc_assert (REG_P (last_def_reg) || GET_CODE (last_def_reg) == SUBREG);
return n8_consumed_by_ex_p (consumer, last_def_reg);
}
/* Guard functions for N9 cores. */ /* Guard functions for N9 cores. */
/* Check dependencies from MM to EX. */ /* Check dependencies from MM to EX. */
......
...@@ -99,6 +99,14 @@ extern bool nds32_valid_multiple_load_store_p (rtx, bool, bool); ...@@ -99,6 +99,14 @@ extern bool nds32_valid_multiple_load_store_p (rtx, bool, bool);
/* Auxiliary functions for guard function checking in pipelines.md. */ /* Auxiliary functions for guard function checking in pipelines.md. */
extern bool nds32_n8_load_to_ii_p (rtx_insn *, rtx_insn *);
extern bool nds32_n8_load_bi_to_ii_p (rtx_insn *, rtx_insn *);
extern bool nds32_n8_load_to_ex_p (rtx_insn *, rtx_insn *);
extern bool nds32_n8_ex_to_ii_p (rtx_insn *, rtx_insn *);
extern bool nds32_n8_last_load_to_ii_p (rtx_insn *, rtx_insn *);
extern bool nds32_n8_last_load_two_to_ii_p (rtx_insn *, rtx_insn *);
extern bool nds32_n8_last_load_to_ex_p (rtx_insn *, rtx_insn *);
extern bool nds32_n9_2r1w_mm_to_ex_p (rtx_insn *, rtx_insn *); extern bool nds32_n9_2r1w_mm_to_ex_p (rtx_insn *, rtx_insn *);
extern bool nds32_n9_3r2w_mm_to_ex_p (rtx_insn *, rtx_insn *); extern bool nds32_n9_3r2w_mm_to_ex_p (rtx_insn *, rtx_insn *);
extern bool nds32_n9_last_load_to_ex_p (rtx_insn *, rtx_insn *); extern bool nds32_n9_last_load_to_ex_p (rtx_insn *, rtx_insn *);
...@@ -251,8 +259,12 @@ rtx extract_base_reg (rtx_insn *); ...@@ -251,8 +259,12 @@ rtx extract_base_reg (rtx_insn *);
rtx extract_shift_reg (rtx); rtx extract_shift_reg (rtx);
bool movd44_insn_p (rtx_insn *); bool movd44_insn_p (rtx_insn *);
rtx extract_movd44_odd_reg (rtx_insn *);
rtx extract_mac_non_acc_rtx (rtx_insn *); rtx extract_mac_non_acc_rtx (rtx_insn *);
rtx extract_branch_target_rtx (rtx_insn *);
rtx extract_branch_condition_rtx (rtx_insn *);
} // namespace nds32 } // namespace nds32
/* Functions for create nds32 specific optimization pass. */ /* Functions for create nds32 specific optimization pass. */
......
...@@ -377,6 +377,33 @@ movd44_insn_p (rtx_insn *insn) ...@@ -377,6 +377,33 @@ movd44_insn_p (rtx_insn *insn)
return false; return false;
} }
/* Extract the second result (odd reg) of a movd44 insn. */
rtx
extract_movd44_odd_reg (rtx_insn *insn)
{
gcc_assert (movd44_insn_p (insn));
rtx def_reg = SET_DEST (PATTERN (insn));
machine_mode mode;
gcc_assert (REG_P (def_reg) || GET_CODE (def_reg) == SUBREG);
switch (GET_MODE (def_reg))
{
case E_DImode:
mode = SImode;
break;
case E_DFmode:
mode = SFmode;
break;
default:
gcc_unreachable ();
}
return gen_highpart (mode, def_reg);
}
/* Extract the rtx representing non-accumulation operands of a MAC insn. */ /* Extract the rtx representing non-accumulation operands of a MAC insn. */
rtx rtx
extract_mac_non_acc_rtx (rtx_insn *insn) extract_mac_non_acc_rtx (rtx_insn *insn)
...@@ -396,4 +423,89 @@ extract_mac_non_acc_rtx (rtx_insn *insn) ...@@ -396,4 +423,89 @@ extract_mac_non_acc_rtx (rtx_insn *insn)
} }
} }
/* Extract the rtx representing the branch target to help recognize
data hazards. */
rtx
extract_branch_target_rtx (rtx_insn *insn)
{
gcc_assert (CALL_P (insn) || JUMP_P (insn));
rtx body = PATTERN (insn);
if (GET_CODE (body) == SET)
{
/* RTXs in IF_THEN_ELSE are branch conditions. */
if (GET_CODE (SET_SRC (body)) == IF_THEN_ELSE)
return NULL_RTX;
return SET_SRC (body);
}
if (GET_CODE (body) == CALL)
return XEXP (body, 0);
if (GET_CODE (body) == PARALLEL)
{
rtx first_rtx = parallel_element (body, 0);
if (GET_CODE (first_rtx) == SET)
return SET_SRC (first_rtx);
if (GET_CODE (first_rtx) == CALL)
return XEXP (first_rtx, 0);
}
/* Handle special cases of bltzal, bgezal and jralnez. */
if (GET_CODE (body) == COND_EXEC)
{
rtx addr_rtx = XEXP (body, 1);
if (GET_CODE (addr_rtx) == SET)
return SET_SRC (addr_rtx);
if (GET_CODE (addr_rtx) == PARALLEL)
{
rtx first_rtx = parallel_element (addr_rtx, 0);
if (GET_CODE (first_rtx) == SET)
{
rtx call_rtx = SET_SRC (first_rtx);
gcc_assert (GET_CODE (call_rtx) == CALL);
return XEXP (call_rtx, 0);
}
if (GET_CODE (first_rtx) == CALL)
return XEXP (first_rtx, 0);
}
}
gcc_unreachable ();
}
/* Extract the rtx representing the branch condition to help recognize
data hazards. */
rtx
extract_branch_condition_rtx (rtx_insn *insn)
{
gcc_assert (CALL_P (insn) || JUMP_P (insn));
rtx body = PATTERN (insn);
if (GET_CODE (body) == SET)
{
rtx if_then_else_rtx = SET_SRC (body);
if (GET_CODE (if_then_else_rtx) == IF_THEN_ELSE)
return XEXP (if_then_else_rtx, 0);
return NULL_RTX;
}
if (GET_CODE (body) == COND_EXEC)
return XEXP (body, 0);
return NULL_RTX;
}
} // namespace nds32 } // namespace nds32
...@@ -56,9 +56,10 @@ ...@@ -56,9 +56,10 @@
;; ------------------------------------------------------------------------ ;; ------------------------------------------------------------------------
;; CPU pipeline model. ;; CPU pipeline model.
(define_attr "pipeline_model" "n9,simple" (define_attr "pipeline_model" "n8,n9,simple"
(const (const
(cond [(match_test "nds32_cpu_option == CPU_N9") (const_string "n9") (cond [(match_test "nds32_cpu_option == CPU_N6 || nds32_cpu_option == CPU_N8") (const_string "n8")
(match_test "nds32_cpu_option == CPU_N9") (const_string "n9")
(match_test "nds32_cpu_option == CPU_SIMPLE") (const_string "simple")] (match_test "nds32_cpu_option == CPU_SIMPLE") (const_string "simple")]
(const_string "n9")))) (const_string "n9"))))
......
...@@ -175,6 +175,30 @@ Name(nds32_cpu_type) Type(enum nds32_cpu_type) ...@@ -175,6 +175,30 @@ Name(nds32_cpu_type) Type(enum nds32_cpu_type)
Known cpu types (for use with the -mcpu= option): Known cpu types (for use with the -mcpu= option):
EnumValue EnumValue
Enum(nds32_cpu_type) String(n6) Value(CPU_N6)
EnumValue
Enum(nds32_cpu_type) String(n650) Value(CPU_N6)
EnumValue
Enum(nds32_cpu_type) String(n8) Value(CPU_N8)
EnumValue
Enum(nds32_cpu_type) String(n801) Value(CPU_N8)
EnumValue
Enum(nds32_cpu_type) String(sn8) Value(CPU_N8)
EnumValue
Enum(nds32_cpu_type) String(sn801) Value(CPU_N8)
EnumValue
Enum(nds32_cpu_type) String(s8) Value(CPU_N8)
EnumValue
Enum(nds32_cpu_type) String(s801) Value(CPU_N8)
EnumValue
Enum(nds32_cpu_type) String(n9) Value(CPU_N9) Enum(nds32_cpu_type) String(n9) Value(CPU_N9)
EnumValue EnumValue
......
...@@ -18,6 +18,10 @@ ...@@ -18,6 +18,10 @@
;; along with GCC; see the file COPYING3. If not see ;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>. ;; <http://www.gnu.org/licenses/>.
;; ------------------------------------------------------------------------
;; Include N8 pipeline settings.
;; ------------------------------------------------------------------------
(include "nds32-n8.md")
;; ------------------------------------------------------------------------ ;; ------------------------------------------------------------------------
;; Include N9/N10 pipeline settings. ;; Include N9/N10 pipeline settings.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment