// ================================================================ // NVDLA Open Source Project // // Copyright(c) 2016 - 2017 NVIDIA Corporation. Licensed under the // NVDLA Open Hardware License; Check "LICENSE" which comes with // this distribution for more information. // ================================================================ // File Name: NV_NVDLA_CMAC_CORE_mac.v // ================================================================ // NVDLA Open Source Project // // Copyright(c) 2016 - 2017 NVIDIA Corporation. Licensed under the // NVDLA Open Hardware License; Check "LICENSE" which comes with // this distribution for more information. // ================================================================ // File Name: NV_NVDLA_CMAC.h `define DESIGNWARE_NOEXIST 1 module NV_NVDLA_CMAC_CORE_mac ( nvdla_core_clk //|< i ,nvdla_wg_clk //|< i ,nvdla_core_rstn //|< i ,cfg_is_wg //|< i ,cfg_reg_en //|< i ,dat_actv_data //|< i ,dat_actv_nz //|< i ,dat_actv_pvld //|< i ,wt_actv_data //|< i ,wt_actv_nz //|< i ,wt_actv_pvld //|< i ,mac_out_data //|> o ,mac_out_pvld //|> o ); input nvdla_core_clk; input nvdla_wg_clk; input nvdla_core_rstn; input cfg_is_wg; input cfg_reg_en; input [8*8 -1:0] dat_actv_data; input [8 -1:0] dat_actv_nz; input [8 -1:0] dat_actv_pvld; input [8*8 -1:0] wt_actv_data; input [8 -1:0] wt_actv_nz; input [8 -1:0] wt_actv_pvld; output [19 -1:0] mac_out_data; output mac_out_pvld; ////////////////// unpack data&nz ////////////// //: for(my $i=0; $i<8; $i++){ //: my $bpe = 8; //: my $data_msb = ($i+1) * $bpe - 1; //: my $data_lsb = $i * $bpe; //: print qq( //: wire [${bpe}-1:0] wt_actv_data${i} = wt_actv_data[${data_msb}:${data_lsb}]; //: wire [${bpe}-1:0] dat_actv_data${i} = dat_actv_data[${data_msb}:${data_lsb}]; //: wire wt_actv_nz${i} = wt_actv_nz[${i}]; //: wire dat_actv_nz${i} = dat_actv_nz[${i}]; //: ) //: } //| eperl: generated_beg (DO NOT EDIT BELOW) wire [8-1:0] wt_actv_data0 = wt_actv_data[7:0]; wire [8-1:0] dat_actv_data0 = dat_actv_data[7:0]; wire wt_actv_nz0 = wt_actv_nz[0]; wire dat_actv_nz0 = dat_actv_nz[0]; wire [8-1:0] wt_actv_data1 = wt_actv_data[15:8]; wire [8-1:0] dat_actv_data1 = dat_actv_data[15:8]; wire wt_actv_nz1 = wt_actv_nz[1]; wire dat_actv_nz1 = dat_actv_nz[1]; wire [8-1:0] wt_actv_data2 = wt_actv_data[23:16]; wire [8-1:0] dat_actv_data2 = dat_actv_data[23:16]; wire wt_actv_nz2 = wt_actv_nz[2]; wire dat_actv_nz2 = dat_actv_nz[2]; wire [8-1:0] wt_actv_data3 = wt_actv_data[31:24]; wire [8-1:0] dat_actv_data3 = dat_actv_data[31:24]; wire wt_actv_nz3 = wt_actv_nz[3]; wire dat_actv_nz3 = dat_actv_nz[3]; wire [8-1:0] wt_actv_data4 = wt_actv_data[39:32]; wire [8-1:0] dat_actv_data4 = dat_actv_data[39:32]; wire wt_actv_nz4 = wt_actv_nz[4]; wire dat_actv_nz4 = dat_actv_nz[4]; wire [8-1:0] wt_actv_data5 = wt_actv_data[47:40]; wire [8-1:0] dat_actv_data5 = dat_actv_data[47:40]; wire wt_actv_nz5 = wt_actv_nz[5]; wire dat_actv_nz5 = dat_actv_nz[5]; wire [8-1:0] wt_actv_data6 = wt_actv_data[55:48]; wire [8-1:0] dat_actv_data6 = dat_actv_data[55:48]; wire wt_actv_nz6 = wt_actv_nz[6]; wire dat_actv_nz6 = dat_actv_nz[6]; wire [8-1:0] wt_actv_data7 = wt_actv_data[63:56]; wire [8-1:0] dat_actv_data7 = dat_actv_data[63:56]; wire wt_actv_nz7 = wt_actv_nz[7]; wire dat_actv_nz7 = dat_actv_nz[7]; //| eperl: generated_end (DO NOT EDIT ABOVE) `ifdef DESIGNWARE_NOEXIST wire signed [19 -1:0] sum_out; wire [8 -1:0] op_out_pvld; //: my $mul_result_width = 18; //: my $bpe = 8; //: my $rwidth = 19; //: my $result_width = $rwidth * 8 * 2; //: for (my $i=0; $i < 8; ++$i) { //: print "assign op_out_pvld[${i}] = wt_actv_pvld[${i}] & dat_actv_pvld[${i}] & wt_actv_nz${i} & dat_actv_nz${i};\n"; //: print "wire signed [${mul_result_width}-1:0] mout_$i = (\$signed(wt_actv_data${i}) * \$signed(dat_actv_data${i})) & \$signed({${mul_result_width}{op_out_pvld[${i}]}});\n"; //: } //: //: print "assign sum_out = \n"; //: for (my $i=0; $i < 8; ++$i) { //: print " "; //: print "+ " if ($i != 0); //: print "mout_$i\n"; //: } //: print "; \n"; //| eperl: generated_beg (DO NOT EDIT BELOW) assign op_out_pvld[0] = wt_actv_pvld[0] & dat_actv_pvld[0] & wt_actv_nz0 & dat_actv_nz0; wire signed [18-1:0] mout_0 = ($signed(wt_actv_data0) * $signed(dat_actv_data0)) & $signed({18{op_out_pvld[0]}}); assign op_out_pvld[1] = wt_actv_pvld[1] & dat_actv_pvld[1] & wt_actv_nz1 & dat_actv_nz1; wire signed [18-1:0] mout_1 = ($signed(wt_actv_data1) * $signed(dat_actv_data1)) & $signed({18{op_out_pvld[1]}}); assign op_out_pvld[2] = wt_actv_pvld[2] & dat_actv_pvld[2] & wt_actv_nz2 & dat_actv_nz2; wire signed [18-1:0] mout_2 = ($signed(wt_actv_data2) * $signed(dat_actv_data2)) & $signed({18{op_out_pvld[2]}}); assign op_out_pvld[3] = wt_actv_pvld[3] & dat_actv_pvld[3] & wt_actv_nz3 & dat_actv_nz3; wire signed [18-1:0] mout_3 = ($signed(wt_actv_data3) * $signed(dat_actv_data3)) & $signed({18{op_out_pvld[3]}}); assign op_out_pvld[4] = wt_actv_pvld[4] & dat_actv_pvld[4] & wt_actv_nz4 & dat_actv_nz4; wire signed [18-1:0] mout_4 = ($signed(wt_actv_data4) * $signed(dat_actv_data4)) & $signed({18{op_out_pvld[4]}}); assign op_out_pvld[5] = wt_actv_pvld[5] & dat_actv_pvld[5] & wt_actv_nz5 & dat_actv_nz5; wire signed [18-1:0] mout_5 = ($signed(wt_actv_data5) * $signed(dat_actv_data5)) & $signed({18{op_out_pvld[5]}}); assign op_out_pvld[6] = wt_actv_pvld[6] & dat_actv_pvld[6] & wt_actv_nz6 & dat_actv_nz6; wire signed [18-1:0] mout_6 = ($signed(wt_actv_data6) * $signed(dat_actv_data6)) & $signed({18{op_out_pvld[6]}}); assign op_out_pvld[7] = wt_actv_pvld[7] & dat_actv_pvld[7] & wt_actv_nz7 & dat_actv_nz7; wire signed [18-1:0] mout_7 = ($signed(wt_actv_data7) * $signed(dat_actv_data7)) & $signed({18{op_out_pvld[7]}}); assign sum_out = mout_0 + mout_1 + mout_2 + mout_3 + mout_4 + mout_5 + mout_6 + mout_7 ; //| eperl: generated_end (DO NOT EDIT ABOVE) `endif `ifndef DESIGNWARE_NOEXIST wire [19 -1:0] sum_out; wire [19*8*2-1:0] full_mul_result; wire [8 -1:0] op_out_pvld; //: my $mul_result_width = 18; //: my $bpe = 8; //: my $rwidth = 19; //: for (my $i=0; $i < 8; ++$i) { //: my $j = $i * 2; //: my $k = $i * 2 + 1; //: print qq( //: wire [$mul_result_width-1:0] mout_$j; //: wire [$mul_result_width-1:0] mout_$k; //: DW02_multp #(${bpe}, ${bpe}, $mul_result_width) mul$i ( //: .a(wt_actv_data${i}), //: .b(dat_actv_data${i}), //: .tc(1'b1), //: .out0(mout_${j}), //: .out1(mout_${k}) //: ); //: assign op_out_pvld[${i}] = wt_actv_pvld[${i}] & dat_actv_pvld[${i}] & wt_actv_nz${i} & dat_actv_nz${i}; //: ); //: //: my $offset = $j * $rwidth; //: my $sign_extend_bits = 19 - $mul_result_width; //: print qq( //: assign full_mul_result[$offset + $rwidth - 1 : $offset] = {{${sign_extend_bits}{mout_${j}[${mul_result_width}-1]}}, mout_$j} & {${rwidth}{op_out_pvld[$i]}}; ); //: $offset = $k * $rwidth; //: print qq( //: assign full_mul_result[$offset + $rwidth - 1 : $offset] = {{${sign_extend_bits}{mout_${k}[${mul_result_width}-1]}}, mout_$k} & {${rwidth}{op_out_pvld[$i]}}; ); //: } //| eperl: generated_beg (DO NOT EDIT BELOW) wire [18-1:0] mout_0; wire [18-1:0] mout_1; DW02_multp #(8, 8, 18) mul0 ( .a(wt_actv_data0), .b(dat_actv_data0), .tc(1'b1), .out0(mout_0), .out1(mout_1) ); assign op_out_pvld[0] = wt_actv_pvld[0] & dat_actv_pvld[0] & wt_actv_nz0 & dat_actv_nz0; assign full_mul_result[0 + 19 - 1 : 0] = {{1{mout_0[18-1]}}, mout_0} & {19{op_out_pvld[0]}}; assign full_mul_result[19 + 19 - 1 : 19] = {{1{mout_1[18-1]}}, mout_1} & {19{op_out_pvld[0]}}; wire [18-1:0] mout_2; wire [18-1:0] mout_3; DW02_multp #(8, 8, 18) mul1 ( .a(wt_actv_data1), .b(dat_actv_data1), .tc(1'b1), .out0(mout_2), .out1(mout_3) ); assign op_out_pvld[1] = wt_actv_pvld[1] & dat_actv_pvld[1] & wt_actv_nz1 & dat_actv_nz1; assign full_mul_result[38 + 19 - 1 : 38] = {{1{mout_2[18-1]}}, mout_2} & {19{op_out_pvld[1]}}; assign full_mul_result[57 + 19 - 1 : 57] = {{1{mout_3[18-1]}}, mout_3} & {19{op_out_pvld[1]}}; wire [18-1:0] mout_4; wire [18-1:0] mout_5; DW02_multp #(8, 8, 18) mul2 ( .a(wt_actv_data2), .b(dat_actv_data2), .tc(1'b1), .out0(mout_4), .out1(mout_5) ); assign op_out_pvld[2] = wt_actv_pvld[2] & dat_actv_pvld[2] & wt_actv_nz2 & dat_actv_nz2; assign full_mul_result[76 + 19 - 1 : 76] = {{1{mout_4[18-1]}}, mout_4} & {19{op_out_pvld[2]}}; assign full_mul_result[95 + 19 - 1 : 95] = {{1{mout_5[18-1]}}, mout_5} & {19{op_out_pvld[2]}}; wire [18-1:0] mout_6; wire [18-1:0] mout_7; DW02_multp #(8, 8, 18) mul3 ( .a(wt_actv_data3), .b(dat_actv_data3), .tc(1'b1), .out0(mout_6), .out1(mout_7) ); assign op_out_pvld[3] = wt_actv_pvld[3] & dat_actv_pvld[3] & wt_actv_nz3 & dat_actv_nz3; assign full_mul_result[114 + 19 - 1 : 114] = {{1{mout_6[18-1]}}, mout_6} & {19{op_out_pvld[3]}}; assign full_mul_result[133 + 19 - 1 : 133] = {{1{mout_7[18-1]}}, mout_7} & {19{op_out_pvld[3]}}; wire [18-1:0] mout_8; wire [18-1:0] mout_9; DW02_multp #(8, 8, 18) mul4 ( .a(wt_actv_data4), .b(dat_actv_data4), .tc(1'b1), .out0(mout_8), .out1(mout_9) ); assign op_out_pvld[4] = wt_actv_pvld[4] & dat_actv_pvld[4] & wt_actv_nz4 & dat_actv_nz4; assign full_mul_result[152 + 19 - 1 : 152] = {{1{mout_8[18-1]}}, mout_8} & {19{op_out_pvld[4]}}; assign full_mul_result[171 + 19 - 1 : 171] = {{1{mout_9[18-1]}}, mout_9} & {19{op_out_pvld[4]}}; wire [18-1:0] mout_10; wire [18-1:0] mout_11; DW02_multp #(8, 8, 18) mul5 ( .a(wt_actv_data5), .b(dat_actv_data5), .tc(1'b1), .out0(mout_10), .out1(mout_11) ); assign op_out_pvld[5] = wt_actv_pvld[5] & dat_actv_pvld[5] & wt_actv_nz5 & dat_actv_nz5; assign full_mul_result[190 + 19 - 1 : 190] = {{1{mout_10[18-1]}}, mout_10} & {19{op_out_pvld[5]}}; assign full_mul_result[209 + 19 - 1 : 209] = {{1{mout_11[18-1]}}, mout_11} & {19{op_out_pvld[5]}}; wire [18-1:0] mout_12; wire [18-1:0] mout_13; DW02_multp #(8, 8, 18) mul6 ( .a(wt_actv_data6), .b(dat_actv_data6), .tc(1'b1), .out0(mout_12), .out1(mout_13) ); assign op_out_pvld[6] = wt_actv_pvld[6] & dat_actv_pvld[6] & wt_actv_nz6 & dat_actv_nz6; assign full_mul_result[228 + 19 - 1 : 228] = {{1{mout_12[18-1]}}, mout_12} & {19{op_out_pvld[6]}}; assign full_mul_result[247 + 19 - 1 : 247] = {{1{mout_13[18-1]}}, mout_13} & {19{op_out_pvld[6]}}; wire [18-1:0] mout_14; wire [18-1:0] mout_15; DW02_multp #(8, 8, 18) mul7 ( .a(wt_actv_data7), .b(dat_actv_data7), .tc(1'b1), .out0(mout_14), .out1(mout_15) ); assign op_out_pvld[7] = wt_actv_pvld[7] & dat_actv_pvld[7] & wt_actv_nz7 & dat_actv_nz7; assign full_mul_result[266 + 19 - 1 : 266] = {{1{mout_14[18-1]}}, mout_14} & {19{op_out_pvld[7]}}; assign full_mul_result[285 + 19 - 1 : 285] = {{1{mout_15[18-1]}}, mout_15} & {19{op_out_pvld[7]}}; //| eperl: generated_end (DO NOT EDIT ABOVE) DW02_sum #(8*2, 19) fsum (.INPUT(full_mul_result), .SUM(sum_out)); `endif //add pipeline for retiming wire pp_pvld_d0 = (dat_actv_pvld[0] & wt_actv_pvld[0]); //wire [19 -1:0] sum_out_d0 = $unsigned(sum_out); wire [19 -1:0] sum_out_d0 = sum_out; //: my $rwidth = 19; //: my $rr=3; //: &eperl::retime("-stage ${rr} -o sum_out_dd -i sum_out_d0 -cg_en_i pp_pvld_d0 -cg_en_o pp_pvld_dd -cg_en_rtm -wid $rwidth"); //| eperl: generated_beg (DO NOT EDIT BELOW) reg [19-1:0] sum_out_d0_d1; always @(posedge nvdla_core_clk) begin if ((pp_pvld_d0)) begin sum_out_d0_d1[19-1:0] <= sum_out_d0[19-1:0]; end end reg pp_pvld_d0_d1; always @(posedge nvdla_core_clk) begin pp_pvld_d0_d1 <= pp_pvld_d0; end reg [19-1:0] sum_out_d0_d2; always @(posedge nvdla_core_clk) begin if ((pp_pvld_d0_d1)) begin sum_out_d0_d2[19-1:0] <= sum_out_d0_d1[19-1:0]; end end reg pp_pvld_d0_d2; always @(posedge nvdla_core_clk) begin pp_pvld_d0_d2 <= pp_pvld_d0_d1; end reg [19-1:0] sum_out_d0_d3; always @(posedge nvdla_core_clk) begin if ((pp_pvld_d0_d2)) begin sum_out_d0_d3[19-1:0] <= sum_out_d0_d2[19-1:0]; end end reg pp_pvld_d0_d3; always @(posedge nvdla_core_clk) begin pp_pvld_d0_d3 <= pp_pvld_d0_d2; end wire [19-1:0] sum_out_dd; assign sum_out_dd = sum_out_d0_d3; wire pp_pvld_dd; assign pp_pvld_dd = pp_pvld_d0_d3; //| eperl: generated_end (DO NOT EDIT ABOVE) assign mac_out_pvld=pp_pvld_dd; assign mac_out_data=sum_out_dd; endmodule // NV_NVDLA_CMAC_CORE_mac