// ================================================================ // NVDLA Open Source Project // // Copyright(c) 2016 - 2017 NVIDIA Corporation. Licensed under the // NVDLA Open Hardware License; Check "LICENSE" which comes with // this distribution for more information. // ================================================================ // File Name: NV_NVDLA_SDP_RDMA_eg.v // ================================================================ // NVDLA Open Source Project // // Copyright(c) 2016 - 2017 NVIDIA Corporation. Licensed under the // NVDLA Open Hardware License; Check "LICENSE" which comes with // this distribution for more information. // ================================================================ // File Name: NV_NVDLA_SDP_define.h `include "simulate_x_tick.vh" module NV_NVDLA_SDP_RDMA_eg ( nvdla_core_clk //|< i ,nvdla_core_rstn //|< i ,pwrbus_ram_pd //|< i ,op_load //|< i ,eg_done //|> o ,cq2eg_pd //|< i ,cq2eg_pvld //|< i ,cq2eg_prdy //|> o ,lat_fifo_rd_pd //|< i ,lat_fifo_rd_pvld //|< i ,lat_fifo_rd_prdy //|> o ,dma_rd_cdt_lat_fifo_pop //|> o ,sdp_rdma2dp_alu_pd //|> o ,sdp_rdma2dp_alu_valid //|> o ,sdp_rdma2dp_alu_ready //|< i ,sdp_rdma2dp_mul_pd //|> o ,sdp_rdma2dp_mul_valid //|> o ,sdp_rdma2dp_mul_ready //|< i ,reg2dp_batch_number //|< i ,reg2dp_channel //|< i ,reg2dp_height //|< i ,reg2dp_width //|< i ,reg2dp_proc_precision //|< i ,reg2dp_out_precision //|< i ,reg2dp_rdma_data_mode //|< i ,reg2dp_rdma_data_size //|< i ,reg2dp_rdma_data_use //|< i ); input nvdla_core_clk; input nvdla_core_rstn; input [31:0] pwrbus_ram_pd; input op_load; output eg_done; input [15:0] cq2eg_pd; input cq2eg_pvld; output cq2eg_prdy; input [65 -1:0] lat_fifo_rd_pd; input lat_fifo_rd_pvld; output lat_fifo_rd_prdy; output dma_rd_cdt_lat_fifo_pop; output [8*16:0] sdp_rdma2dp_alu_pd; output sdp_rdma2dp_alu_valid; input sdp_rdma2dp_alu_ready; output [8*16:0] sdp_rdma2dp_mul_pd; output sdp_rdma2dp_mul_valid; input sdp_rdma2dp_mul_ready; input [4:0] reg2dp_batch_number; input [12:0] reg2dp_channel; input [12:0] reg2dp_height; input [12:0] reg2dp_width; input [1:0] reg2dp_proc_precision; input [1:0] reg2dp_out_precision; input reg2dp_rdma_data_mode; input reg2dp_rdma_data_size; input [1:0] reg2dp_rdma_data_use; wire cfg_alu_en; wire cfg_mul_en; wire cfg_do_8; wire cfg_dp_8; wire cfg_data_size_1byte; wire cfg_data_size_2byte; wire cfg_mode_1bytex1; wire cfg_mode_1bytex2; wire cfg_mode_2bytex1; wire cfg_mode_2bytex2; wire cfg_mode_alu_only; wire cfg_mode_both; wire cfg_mode_mul_only; wire cfg_mode_per_element; wire cfg_mode_single; reg cq2eg_prdy_hold; wire ig2eg_cube_end; wire [14:0] ig2eg_size; wire [15:0] beat_size; reg [14:0] beat_count; wire [15:0] beat_count_nxt; reg mon_beat_count; wire is_last_beat; wire is_beat_end; wire layer_done; wire mul_layer_end; wire mul_roc_rdy; wire mul_roc_vld; wire mul_rod_rdy; wire mul_rod_vld; reg alu_layer_done; reg alu_roc_en; reg eg_done; reg mul_layer_done; reg mul_roc_en; wire alu_layer_end; wire alu_roc_rdy; wire alu_roc_vld; wire alu_rod_rdy; wire alu_rod_vld; wire [4*8*8 +3:0] unpack_out_pd; wire unpack_out_pvld; wire unpack_out_prdy; wire [8*8 -1:0] mode_1bytex2_alu_rod0_pd; wire [8*8 -1:0] mode_1bytex2_alu_rod1_pd; wire [8*8 -1:0] mode_2bytex2_alu_rod0_pd; wire [8*8 -1:0] mode_2bytex2_alu_rod1_pd; wire [8*8 -1:0] mode_1bytex2_mul_rod0_pd; wire [8*8 -1:0] mode_1bytex2_mul_rod1_pd; wire [8*8 -1:0] mode_2bytex2_mul_rod0_pd; wire [8*8 -1:0] mode_2bytex2_mul_rod1_pd; //============== // CFG REG //============== assign cfg_data_size_1byte = reg2dp_rdma_data_size == 1'h0 ; assign cfg_data_size_2byte = reg2dp_rdma_data_size == 1'h1 ; assign cfg_mode_mul_only = reg2dp_rdma_data_use == 2'h0 ; assign cfg_mode_alu_only = reg2dp_rdma_data_use == 2'h1 ; assign cfg_mode_both = reg2dp_rdma_data_use == 2'h2 ; assign cfg_mode_per_element = reg2dp_rdma_data_mode == 1'h1 ; assign cfg_mode_single = cfg_mode_mul_only || cfg_mode_alu_only; assign cfg_mode_1bytex1 = cfg_data_size_1byte & cfg_mode_single; assign cfg_mode_2bytex1 = cfg_data_size_2byte & cfg_mode_single; assign cfg_mode_1bytex2 = cfg_data_size_1byte & cfg_mode_both; assign cfg_mode_2bytex2 = cfg_data_size_2byte & cfg_mode_both; assign cfg_dp_8 = reg2dp_proc_precision== 0 ; assign cfg_do_8 = reg2dp_out_precision== 0 ; assign cfg_alu_en = cfg_mode_alu_only || cfg_mode_both; assign cfg_mul_en = cfg_mode_mul_only || cfg_mode_both; //============== // DMA Interface //============== assign dma_rd_cdt_lat_fifo_pop = lat_fifo_rd_pvld & lat_fifo_rd_prdy; //============== // Latency FIFO to buffer return DATA //============== wire [3:0] lat_fifo_rd_mask = {{(4-1){1'b0}},lat_fifo_rd_pd[65 -1:64]}; wire [2:0] lat_fifo_rd_size = lat_fifo_rd_mask[3]+lat_fifo_rd_mask[2]+lat_fifo_rd_mask[1]+lat_fifo_rd_mask[0]; //================================================================== // Context Queue: read //================================================================== assign cq2eg_prdy = is_beat_end; assign ig2eg_size[14:0] = cq2eg_pd[14:0]; assign ig2eg_cube_end = cq2eg_pd[15]; assign beat_size = ig2eg_size+1; assign beat_count_nxt = beat_count+lat_fifo_rd_size; always @(posedge nvdla_core_clk or negedge nvdla_core_rstn) begin if (!nvdla_core_rstn) begin {mon_beat_count,beat_count} <= 16'h0; end else begin if (lat_fifo_rd_pvld & lat_fifo_rd_prdy) begin if (is_last_beat) begin {mon_beat_count,beat_count} <= 16'h0; end else begin {mon_beat_count,beat_count} <= beat_count_nxt; end end end end assign is_last_beat = (beat_count_nxt == beat_size); assign is_beat_end = is_last_beat & lat_fifo_rd_pvld & lat_fifo_rd_prdy; `ifdef SPYGLASS_ASSERT_ON `else // spyglass disable_block NoWidthInBasedNum-ML // spyglass disable_block STARC-2.10.3.2a // spyglass disable_block STARC05-2.1.3.1 // spyglass disable_block STARC-2.1.4.6 // spyglass disable_block W116 // spyglass disable_block W154 // spyglass disable_block W239 // spyglass disable_block W362 // spyglass disable_block WRN_58 // spyglass disable_block WRN_61 `endif // SPYGLASS_ASSERT_ON `ifdef ASSERT_ON `ifdef FV_ASSERT_ON `define ASSERT_RESET nvdla_core_rstn `else `ifdef SYNTHESIS `define ASSERT_RESET nvdla_core_rstn `else `ifdef ASSERT_OFF_RESET_IS_X `define ASSERT_RESET ((1'bx === nvdla_core_rstn) ? 1'b0 : nvdla_core_rstn) `else `define ASSERT_RESET ((1'bx === nvdla_core_rstn) ? 1'b1 : nvdla_core_rstn) `endif // ASSERT_OFF_RESET_IS_X `endif // SYNTHESIS `endif // FV_ASSERT_ON // VCS coverage off nv_assert_never #(0,0,"info in CQ there be there when return data come") zzz_assert_never_2x (nvdla_core_clk, `ASSERT_RESET, (!cq2eg_pvld) & lat_fifo_rd_pvld); // spyglass disable W504 SelfDeterminedExpr-ML // VCS coverage on `undef ASSERT_RESET `endif // ASSERT_ON `ifdef SPYGLASS_ASSERT_ON `else // spyglass enable_block NoWidthInBasedNum-ML // spyglass enable_block STARC-2.10.3.2a // spyglass enable_block STARC05-2.1.3.1 // spyglass enable_block STARC-2.1.4.6 // spyglass enable_block W116 // spyglass enable_block W154 // spyglass enable_block W239 // spyglass enable_block W362 // spyglass enable_block WRN_58 // spyglass enable_block WRN_61 `endif // SPYGLASS_ASSERT_ON /////////combine lat fifo pd to 4*atomic_m*bpe////// wire lat_fifo_rd_beat_end = is_last_beat; NV_NVDLA_SDP_RDMA_unpack u_rdma_unpack ( .nvdla_core_clk (nvdla_core_clk) ,.nvdla_core_rstn (nvdla_core_rstn) ,.inp_data (lat_fifo_rd_pd[65 -1:0]) ,.inp_pvld (lat_fifo_rd_pvld) ,.inp_prdy (lat_fifo_rd_prdy) ,.inp_end (lat_fifo_rd_beat_end) ,.out_data (unpack_out_pd[4*8*8 +3:0]) ,.out_pvld (unpack_out_pvld) ,.out_prdy (unpack_out_prdy) ); wire [3:0] unpack_out_mask = unpack_out_pd[4*8*8 +3:4*8*8]; assign unpack_out_prdy = cfg_mode_both ? alu_rod_rdy & mul_rod_rdy : cfg_mode_alu_only ? alu_rod_rdy : mul_rod_rdy; //============================================================ // Re-Order FIFO to send data to SDP-core //============================================================ // |----------------------------------------------------| // | 16B | 16B | 16B | 16B | // MODE |----------------------------------------------------| // | 0 1 2 3 | // 1Bx1 | ALU or MUL | ALU or MUL | ALU or MUL or ALU or MUL | // |----------------------------------------------------| // | 0 | 1 | // 2Bx1 | ALU or MUL | ALU or MUL | // |====================================================| // | 0 | 1 | // 1Bx2 | ALU | MUL | ALU | MUL | // |----------------------------------------------------| // | 0 | 1 | // 2Bx2 | ALU | MUL | // |----------------------------------------------------| //============================================================ //: my $k = 8; //: foreach my $i (0..${k}-1) { //: my $j = ${i}*2; //: print "assign mode_1bytex2_alu_rod0_pd[8*${i}+7:8*${i}] = unpack_out_pd[8*${j}+7: 8*${j}]; \n"; //: } //: print "\n"; //: foreach my $i (0..${k}-1) { //: my $jj = ${i}*2+$k*2; //: print "assign mode_1bytex2_alu_rod1_pd[8*${i}+7:8*${i}] = unpack_out_pd[8*${jj}+7: 8*${jj}]; \n"; //: } //: print "\n"; //: foreach my $i (0..${k}/2-1) { //: my $j = ${i}*2; //: print "assign mode_2bytex2_alu_rod0_pd[16*${i}+15:16*${i}] = unpack_out_pd[16*${j}+15: 16*${j}]; \n"; //: } //: print "\n"; //: foreach my $i (0..${k}/2-1) { //: my $jj = ${i}*2+$k; //: print "assign mode_2bytex2_alu_rod1_pd[16*${i}+15:16*${i}] = unpack_out_pd[16*${jj}+15: 16*${jj}]; \n"; //: } //: print "\n"; //: foreach my $i (0..${k}-1) { //: my $j = ${i}*2+1; //: print "assign mode_1bytex2_mul_rod0_pd[8*${i}+7:8*${i}] = unpack_out_pd[8*${j}+7: 8*${j}]; \n"; //: } //: print "\n"; //: foreach my $i (0..${k}-1) { //: my $jj = ${i}*2+1+$k*2; //: print "assign mode_1bytex2_mul_rod1_pd[8*${i}+7:8*${i}] = unpack_out_pd[8*${jj}+7: 8*${jj}]; \n"; //: } //: print "\n"; //: foreach my $i (0..${k}/2-1) { //: my $j = ${i}*2+1; //: print "assign mode_2bytex2_mul_rod0_pd[16*${i}+15:16*${i}] = unpack_out_pd[16*${j}+15: 16*${j}]; \n"; //: } //: print "\n"; //: foreach my $i (0..${k}/2-1) { //: my $jj = ${i}*2+1+$k; //: print "assign mode_2bytex2_mul_rod1_pd[16*${i}+15:16*${i}] = unpack_out_pd[16*${jj}+15: 16*${jj}]; \n"; //: } //: print "\n"; //| eperl: generated_beg (DO NOT EDIT BELOW) assign mode_1bytex2_alu_rod0_pd[8*0+7:8*0] = unpack_out_pd[8*0+7: 8*0]; assign mode_1bytex2_alu_rod0_pd[8*1+7:8*1] = unpack_out_pd[8*2+7: 8*2]; assign mode_1bytex2_alu_rod0_pd[8*2+7:8*2] = unpack_out_pd[8*4+7: 8*4]; assign mode_1bytex2_alu_rod0_pd[8*3+7:8*3] = unpack_out_pd[8*6+7: 8*6]; assign mode_1bytex2_alu_rod0_pd[8*4+7:8*4] = unpack_out_pd[8*8+7: 8*8]; assign mode_1bytex2_alu_rod0_pd[8*5+7:8*5] = unpack_out_pd[8*10+7: 8*10]; assign mode_1bytex2_alu_rod0_pd[8*6+7:8*6] = unpack_out_pd[8*12+7: 8*12]; assign mode_1bytex2_alu_rod0_pd[8*7+7:8*7] = unpack_out_pd[8*14+7: 8*14]; assign mode_1bytex2_alu_rod1_pd[8*0+7:8*0] = unpack_out_pd[8*16+7: 8*16]; assign mode_1bytex2_alu_rod1_pd[8*1+7:8*1] = unpack_out_pd[8*18+7: 8*18]; assign mode_1bytex2_alu_rod1_pd[8*2+7:8*2] = unpack_out_pd[8*20+7: 8*20]; assign mode_1bytex2_alu_rod1_pd[8*3+7:8*3] = unpack_out_pd[8*22+7: 8*22]; assign mode_1bytex2_alu_rod1_pd[8*4+7:8*4] = unpack_out_pd[8*24+7: 8*24]; assign mode_1bytex2_alu_rod1_pd[8*5+7:8*5] = unpack_out_pd[8*26+7: 8*26]; assign mode_1bytex2_alu_rod1_pd[8*6+7:8*6] = unpack_out_pd[8*28+7: 8*28]; assign mode_1bytex2_alu_rod1_pd[8*7+7:8*7] = unpack_out_pd[8*30+7: 8*30]; assign mode_2bytex2_alu_rod0_pd[16*0+15:16*0] = unpack_out_pd[16*0+15: 16*0]; assign mode_2bytex2_alu_rod0_pd[16*1+15:16*1] = unpack_out_pd[16*2+15: 16*2]; assign mode_2bytex2_alu_rod0_pd[16*2+15:16*2] = unpack_out_pd[16*4+15: 16*4]; assign mode_2bytex2_alu_rod0_pd[16*3+15:16*3] = unpack_out_pd[16*6+15: 16*6]; assign mode_2bytex2_alu_rod1_pd[16*0+15:16*0] = unpack_out_pd[16*8+15: 16*8]; assign mode_2bytex2_alu_rod1_pd[16*1+15:16*1] = unpack_out_pd[16*10+15: 16*10]; assign mode_2bytex2_alu_rod1_pd[16*2+15:16*2] = unpack_out_pd[16*12+15: 16*12]; assign mode_2bytex2_alu_rod1_pd[16*3+15:16*3] = unpack_out_pd[16*14+15: 16*14]; assign mode_1bytex2_mul_rod0_pd[8*0+7:8*0] = unpack_out_pd[8*1+7: 8*1]; assign mode_1bytex2_mul_rod0_pd[8*1+7:8*1] = unpack_out_pd[8*3+7: 8*3]; assign mode_1bytex2_mul_rod0_pd[8*2+7:8*2] = unpack_out_pd[8*5+7: 8*5]; assign mode_1bytex2_mul_rod0_pd[8*3+7:8*3] = unpack_out_pd[8*7+7: 8*7]; assign mode_1bytex2_mul_rod0_pd[8*4+7:8*4] = unpack_out_pd[8*9+7: 8*9]; assign mode_1bytex2_mul_rod0_pd[8*5+7:8*5] = unpack_out_pd[8*11+7: 8*11]; assign mode_1bytex2_mul_rod0_pd[8*6+7:8*6] = unpack_out_pd[8*13+7: 8*13]; assign mode_1bytex2_mul_rod0_pd[8*7+7:8*7] = unpack_out_pd[8*15+7: 8*15]; assign mode_1bytex2_mul_rod1_pd[8*0+7:8*0] = unpack_out_pd[8*17+7: 8*17]; assign mode_1bytex2_mul_rod1_pd[8*1+7:8*1] = unpack_out_pd[8*19+7: 8*19]; assign mode_1bytex2_mul_rod1_pd[8*2+7:8*2] = unpack_out_pd[8*21+7: 8*21]; assign mode_1bytex2_mul_rod1_pd[8*3+7:8*3] = unpack_out_pd[8*23+7: 8*23]; assign mode_1bytex2_mul_rod1_pd[8*4+7:8*4] = unpack_out_pd[8*25+7: 8*25]; assign mode_1bytex2_mul_rod1_pd[8*5+7:8*5] = unpack_out_pd[8*27+7: 8*27]; assign mode_1bytex2_mul_rod1_pd[8*6+7:8*6] = unpack_out_pd[8*29+7: 8*29]; assign mode_1bytex2_mul_rod1_pd[8*7+7:8*7] = unpack_out_pd[8*31+7: 8*31]; assign mode_2bytex2_mul_rod0_pd[16*0+15:16*0] = unpack_out_pd[16*1+15: 16*1]; assign mode_2bytex2_mul_rod0_pd[16*1+15:16*1] = unpack_out_pd[16*3+15: 16*3]; assign mode_2bytex2_mul_rod0_pd[16*2+15:16*2] = unpack_out_pd[16*5+15: 16*5]; assign mode_2bytex2_mul_rod0_pd[16*3+15:16*3] = unpack_out_pd[16*7+15: 16*7]; assign mode_2bytex2_mul_rod1_pd[16*0+15:16*0] = unpack_out_pd[16*9+15: 16*9]; assign mode_2bytex2_mul_rod1_pd[16*1+15:16*1] = unpack_out_pd[16*11+15: 16*11]; assign mode_2bytex2_mul_rod1_pd[16*2+15:16*2] = unpack_out_pd[16*13+15: 16*13]; assign mode_2bytex2_mul_rod1_pd[16*3+15:16*3] = unpack_out_pd[16*15+15: 16*15]; //| eperl: generated_end (DO NOT EDIT ABOVE) wire [8*8 -1:0] alu_rod0_pd = cfg_mode_2bytex2 ? mode_2bytex2_alu_rod0_pd : cfg_mode_1bytex2 ? mode_1bytex2_alu_rod0_pd : unpack_out_pd[(8*8*0+8*8 -1):8*8*0]; wire [8*8 -1:0] alu_rod1_pd = cfg_mode_2bytex2 ? mode_2bytex2_alu_rod1_pd : cfg_mode_1bytex2 ? mode_1bytex2_alu_rod1_pd : unpack_out_pd[(8*8*1+8*8 -1):8*8*1]; wire [8*8 -1:0] alu_rod2_pd = unpack_out_pd[(8*8*2+8*8 -1):8*8*2]; wire [8*8 -1:0] alu_rod3_pd = unpack_out_pd[(8*8*3+8*8 -1):8*8*3]; wire [8*8 -1:0] mul_rod0_pd = cfg_mode_2bytex2 ? mode_2bytex2_mul_rod0_pd : cfg_mode_1bytex2 ? mode_1bytex2_mul_rod0_pd : unpack_out_pd[(8*8*0+8*8 -1):8*8*0]; wire [8*8 -1:0] mul_rod1_pd = cfg_mode_2bytex2 ? mode_2bytex2_mul_rod1_pd : cfg_mode_1bytex2 ? mode_1bytex2_mul_rod1_pd : unpack_out_pd[(8*8*1+8*8 -1):8*8*1]; wire [8*8 -1:0] mul_rod2_pd = unpack_out_pd[(8*8*2+8*8 -1):8*8*2]; wire [8*8 -1:0] mul_rod3_pd = unpack_out_pd[(8*8*3+8*8 -1):8*8*3]; wire [3:0] alu_rod_mask = cfg_mode_both ? {2'h0,unpack_out_mask[2],unpack_out_mask[0]} : unpack_out_mask[3:0]; wire [3:0] mul_rod_mask = cfg_mode_both ? {2'h0,unpack_out_mask[2],unpack_out_mask[0]} : unpack_out_mask[3:0]; wire [2:0] alu_roc_size = alu_rod_mask[3] + alu_rod_mask[2] + alu_rod_mask[1] + alu_rod_mask[0]; wire [2:0] mul_roc_size = mul_rod_mask[3] + mul_rod_mask[2] + mul_rod_mask[1] + mul_rod_mask[0]; assign alu_rod_vld = cfg_alu_en & unpack_out_pvld & (cfg_mode_both ? mul_rod_rdy : 1'b1); assign mul_rod_vld = cfg_mul_en & unpack_out_pvld & (cfg_mode_both ? alu_rod_rdy : 1'b1); assign alu_roc_vld = cfg_alu_en & unpack_out_pvld & (cfg_mode_both ? mul_roc_rdy & mul_rod_rdy & alu_rod_rdy : alu_rod_rdy); assign mul_roc_vld = cfg_mul_en & unpack_out_pvld & (cfg_mode_both ? alu_roc_rdy & mul_rod_rdy & alu_rod_rdy : mul_rod_rdy); wire [1:0] alu_roc_pd,mul_roc_pd; wire mon_alu_roc_c,mon_mul_roc_c; assign {mon_alu_roc_c,alu_roc_pd} = alu_roc_size -1; assign {mon_mul_roc_c,mul_roc_pd} = mul_roc_size -1; //assert: alu_rod_vld & !alu_roc_rdy ////////////////split unpack pd to 4 atomic_m alu or mul data ///////////////////// NV_NVDLA_SDP_RDMA_EG_ro u_alu ( .nvdla_core_clk (nvdla_core_clk) ,.nvdla_core_rstn (nvdla_core_rstn) ,.pwrbus_ram_pd (pwrbus_ram_pd) ,.sdp_rdma2dp_valid (sdp_rdma2dp_alu_valid) ,.sdp_rdma2dp_ready (sdp_rdma2dp_alu_ready) ,.sdp_rdma2dp_pd (sdp_rdma2dp_alu_pd[8*16:0]) ,.rod0_wr_pd (alu_rod0_pd[8*8 -1:0]) ,.rod1_wr_pd (alu_rod1_pd[8*8 -1:0]) ,.rod2_wr_pd (alu_rod2_pd[8*8 -1:0]) ,.rod3_wr_pd (alu_rod3_pd[8*8 -1:0]) ,.rod_wr_mask (alu_rod_mask[3:0]) ,.rod_wr_vld (alu_rod_vld) ,.rod_wr_rdy (alu_rod_rdy) ,.roc_wr_pd (alu_roc_pd[1:0]) ,.roc_wr_vld (alu_roc_vld) ,.roc_wr_rdy (alu_roc_rdy) ,.cfg_dp_8 (cfg_dp_8) ,.cfg_dp_size_1byte (cfg_data_size_1byte) ,.cfg_mode_per_element (cfg_mode_per_element) ,.reg2dp_channel (reg2dp_channel[12:0]) ,.reg2dp_height (reg2dp_height[12:0]) ,.reg2dp_width (reg2dp_width[12:0]) ,.layer_end (alu_layer_end) ); NV_NVDLA_SDP_RDMA_EG_ro u_mul ( .nvdla_core_clk (nvdla_core_clk) ,.nvdla_core_rstn (nvdla_core_rstn) ,.pwrbus_ram_pd (pwrbus_ram_pd) ,.sdp_rdma2dp_valid (sdp_rdma2dp_mul_valid) ,.sdp_rdma2dp_ready (sdp_rdma2dp_mul_ready) ,.sdp_rdma2dp_pd (sdp_rdma2dp_mul_pd[8*16:0]) ,.rod0_wr_pd (mul_rod0_pd[8*8 -1:0]) ,.rod1_wr_pd (mul_rod1_pd[8*8 -1:0]) ,.rod2_wr_pd (mul_rod2_pd[8*8 -1:0]) ,.rod3_wr_pd (mul_rod3_pd[8*8 -1:0]) ,.rod_wr_mask (mul_rod_mask[3:0]) ,.rod_wr_vld (mul_rod_vld) ,.rod_wr_rdy (mul_rod_rdy) ,.roc_wr_pd (mul_roc_pd[1:0]) ,.roc_wr_vld (mul_roc_vld) ,.roc_wr_rdy (mul_roc_rdy) ,.cfg_dp_8 (cfg_dp_8) ,.cfg_dp_size_1byte (cfg_data_size_1byte) ,.cfg_mode_per_element (cfg_mode_per_element) ,.reg2dp_channel (reg2dp_channel[12:0]) ,.reg2dp_height (reg2dp_height[12:0]) ,.reg2dp_width (reg2dp_width[12:0]) ,.layer_end (mul_layer_end) ); //========================================================== // Layer Done //========================================================== always @(posedge nvdla_core_clk or negedge nvdla_core_rstn) begin if (!nvdla_core_rstn) begin alu_layer_done <= 1'b0; end else begin if (op_load) begin if (cfg_alu_en) begin alu_layer_done <= 1'b0; end else begin alu_layer_done <= 1'b1; end end else if (alu_layer_end) begin alu_layer_done <= 1'b1; end else if (layer_done) begin alu_layer_done <= 1'b0; end end end always @(posedge nvdla_core_clk or negedge nvdla_core_rstn) begin if (!nvdla_core_rstn) begin mul_layer_done <= 1'b0; end else begin if (op_load) begin if (cfg_mul_en) begin mul_layer_done <= 1'b0; end else begin mul_layer_done <= 1'b1; end end else if (mul_layer_end) begin mul_layer_done <= 1'b1; end else if (layer_done) begin mul_layer_done <= 1'b0; end end end assign layer_done = alu_layer_done & mul_layer_done; always @(posedge nvdla_core_clk or negedge nvdla_core_rstn) begin if (!nvdla_core_rstn) begin eg_done <= 1'b0; end else begin eg_done <= layer_done; end end endmodule // NV_NVDLA_SDP_RDMA_eg