# # Copyright (c) 2018 by Contributors # file: hls.tcl # brief: HLS generation script. # # Command line arguments: # Arg 1: path to design sources # Arg 2: path to sim sources # Arg 3: path to test sources # Arg 4: path to include sources # Arg 5: mode # Arg 6: debug # Arg 7: no_dsp # Arg 8: no_alu # Arg 9: target clock period # Arg 10: input type width (log) # Arg 11: weight type width (log) # Arg 12: accum type width (log) # Arg 13: output type width (log) # Arg 14: batch size (log) # Arg 15: in block size (log) # Arg 16: out block size (log) # Arg 17: uop buffer size in B (log) # Arg 18: inp buffer size in B (log) # Arg 19: wgt buffer size in B (log) # Arg 20: acc buffer size in B (log) # Arg 21: out buffer size in B (log) if { [llength $argv] eq 23 } { set src_dir [lindex $argv 2] set sim_dir [lindex $argv 3] set test_dir [lindex $argv 4] set include_dir [lindex $argv 5] set mode [lindex $argv 6] set debug [lindex $argv 7] set no_dsp [lindex $argv 8] set no_alu [lindex $argv 9] set target_period [lindex $argv 10] set inp_width [lindex $argv 11] set wgt_width [lindex $argv 12] set acc_width [lindex $argv 13] set out_width [lindex $argv 14] set batch [lindex $argv 15] set block_in [lindex $argv 16] set block_out [lindex $argv 17] set uop_buff_size [lindex $argv 18] set inp_buff_size [lindex $argv 19] set wgt_buff_size [lindex $argv 20] set acc_buff_size [lindex $argv 21] set out_buff_size [lindex $argv 22] } else { set src_dir "../src" set sim_dir "../sim" set test_dir "../../src/test" set include_dir "../../include" set mode "all" set debug "false" set no_dsp "true" set no_alu "false" set target_period 10 set inp_width 3 set wgt_width 3 set acc_width 5 set out_width 3 set batch 1 set block_in 4 set block_out 4 set uop_buff_size 15 set inp_buff_size 15 set wgt_buff_size 15 set acc_buff_size 17 set out_buff_size 15 exit } # Initializes the HLS design and sets HLS pragmas for memory partitioning. # This is necessary because of a Vivado restriction that doesn't allow for # buses wider than 1024 bits. proc init_design {per inp_width wgt_width out_width batch block_in block_out} { # Set device number set_part {xc7z020clg484-1} # Set the clock frequency create_clock -period $per -name default # Set input partition factor to (INP_VECTOR_WIDTH*BATCH/1024) set inp_partition_factor [expr {(1 << ($inp_width + $block_in + $batch)) / 1024}] if {$inp_partition_factor == 0} { set_directive_array_reshape -type complete -dim 2 "load" inp_mem set_directive_array_reshape -type complete -dim 2 "compute" inp_mem } else { # Set input reshaping factor below to (1024/INP_VECTOR_WIDTH) set inp_reshape_factor [expr {1024 / (1 << ($inp_width + $block_in))}] set_directive_array_partition -type block -factor $inp_partition_factor -dim 2 "load" inp_mem set_directive_array_partition -type block -factor $inp_partition_factor -dim 2 "compute" inp_mem set_directive_array_reshape -type block -factor $inp_reshape_factor -dim 2 "load" inp_mem set_directive_array_reshape -type block -factor $inp_reshape_factor -dim 2 "compute" inp_mem } # Set weight partition factor to (WGT_VECTOR_WIDTH*BLOCK_OUT/1024) set wgt_partition_factor [expr {(1 << ($wgt_width + $block_in + $block_out)) / 1024}] if {$wgt_partition_factor == 0} { set_directive_array_reshape -type complete -dim 2 "load" wgt_mem set_directive_array_reshape -type complete -dim 2 "compute" wgt_mem } else { # Set weight reshaping factor below to (1024/WGT_VECTOR_WIDTH) set wgt_reshape_factor [expr {1024 / (1 << ($wgt_width + $block_in))}] set_directive_array_partition -type block -factor $wgt_partition_factor -dim 2 "load" wgt_mem set_directive_array_partition -type block -factor $wgt_partition_factor -dim 2 "compute" wgt_mem set_directive_array_reshape -type block -factor $wgt_reshape_factor -dim 2 "load" wgt_mem set_directive_array_reshape -type block -factor $wgt_reshape_factor -dim 2 "compute" wgt_mem } # Set output partition factor to (OUT_VECTOR_WIDTH*BATCH/1024) set out_partition_factor [expr {(1 << ($out_width + $block_out + $batch)) / 1024}] if {$out_partition_factor == 0} { set_directive_array_reshape -type complete -dim 2 "compute" out_mem set_directive_array_reshape -type complete -dim 2 "store" out_mem } else { # Set output reshaping factor below to (1024/OUT_VECTOR_WIDTH) set out_reshape_factor [expr {1024 / (1 << ($out_width + $block_out))}] set_directive_array_partition -type block -factor $out_partition_factor -dim 2 "compute" out_mem set_directive_array_partition -type block -factor $out_partition_factor -dim 2 "store" out_mem set_directive_array_reshape -type block -factor $out_reshape_factor -dim 2 "compute" out_mem set_directive_array_reshape -type block -factor $out_reshape_factor -dim 2 "store" out_mem } } # C define flags to pass to compiler set cflags "-I $include_dir -I $src_dir -I $test_dir \ -DVTA_LOG_WGT_WIDTH=$wgt_width -DVTA_LOG_INP_WIDTH=$inp_width \ -DVTA_LOG_ACC_WIDTH=$acc_width -DVTA_LOG_OUT_WIDTH=$out_width \ -DVTA_LOG_BATCH=$batch -DVTA_LOG_BLOCK_OUT=$block_out -DVTA_LOG_BLOCK_IN=$block_in \ -DVTA_LOG_UOP_BUFF_SIZE=$uop_buff_size -DVTA_LOG_INP_BUFF_SIZE=$inp_buff_size \ -DVTA_LOG_WGT_BUFF_SIZE=$wgt_buff_size -DVTA_LOG_ACC_BUFF_SIZE=$acc_buff_size \ -DVTA_LOG_OUT_BUFF_SIZE=$out_buff_size" if {$debug=="true"} { append cflags " -DVTA_DEBUG=1" } if {$no_dsp=="true"} { append cflags " -DNO_DSP" } if {$no_alu=="true"} { append cflags " -DNO_ALU" } # HLS behavioral sim if {$mode=="all" || $mode=="sim"} { open_project vta_sim set_top vta add_files $src_dir/vta.cc -cflags $cflags add_files -tb $sim_dir/vta_test.cc -cflags $cflags add_files -tb $test_dir/test_lib.cc -cflags $cflags open_solution "solution0" init_design $target_period $inp_width $wgt_width $out_width $batch $block_in $block_out csim_design -clean close_project } # Generate fetch stage if {$mode=="all" || $mode=="skip_sim" || $mode=="fetch"} { open_project vta_fetch set_top fetch add_files $src_dir/vta.cc -cflags $cflags open_solution "solution0" init_design $target_period $inp_width $wgt_width $out_width $batch $block_in $block_out csynth_design if {$mode=="all" || $mode=="skip_sim"} { export_design -format ip_catalog } close_project } # Generate load stage if {$mode=="all" || $mode=="skip_sim" || $mode=="load"} { open_project vta_load set_top load add_files $src_dir/vta.cc -cflags $cflags open_solution "solution0" init_design $target_period $inp_width $wgt_width $out_width $batch $block_in $block_out csynth_design if {$mode=="all" || $mode=="skip_sim"} { export_design -format ip_catalog } close_project } # Generate compute stage if {$mode=="all" || $mode=="skip_sim" || $mode=="compute"} { open_project vta_compute set_top compute add_files $src_dir/vta.cc -cflags $cflags open_solution "solution0" init_design $target_period $inp_width $wgt_width $out_width $batch $block_in $block_out csynth_design if {$mode=="all" || $mode=="skip_sim"} { export_design -format ip_catalog } close_project } # Generate store stage if {$mode=="all" || $mode=="skip_sim" || $mode=="store"} { open_project vta_store set_top store add_files $src_dir/vta.cc -cflags $cflags open_solution "solution0" init_design $target_period $inp_width $wgt_width $out_width $batch $block_in $block_out csynth_design if {$mode=="all" || $mode=="skip_sim"} { export_design -format ip_catalog } close_project } exit