Commit f55609b4 by Thierry Moreau Committed by Jared Roesch

[VTA] Refactor to increase platform coverage (Ultra96 etc.) (#3496)

* hardware refactor for increased FPGA coverage, small optimizations

* fix header

* cleaning up parameters that won't be needed for now

* streamlining makefile, and simplifying tcl scripts

* moving parameter derivation into pkg_config.py, keeping tcl scripts lightweight

* refactoring tcl script to avoid global variables

* deriving AXI signals in pkg_config.py

* unifying address map definition for hardware and software drivers

* single channel design for ultra96 to simplify build

* enable alu by default, no mul opcode for now

* hardware fix

* new bitstream; vta version

* avoid error when env variable is not set

* ultra96 cleanup

* further cleaning up tcl script for bitstream generation

* preliminary rpc server support on ultra96

* rpc server tracker scripts

* ultra96 ldflag

* ultra96 support

* ultra96 support

* cleanup line

* cmake support for ultra96

* simplify memory instantiation

* cleaning up IP parameter initialization

* fix queue instantiation

* 2019.1 transition

* fix macro def

* removing bus width from config

* cleanup

* fix

* turning off testing for now

* cleanup ultra96 ps insantiation

* minor refactor

* adding comments

* upgrading to tophub v0.6

* model used in TVM target now refers to a specific version of VTA for better autoTVM scheduling

* revert change due to bug

* rename driver files to be for zynq-type devices

* streamlining address mapping

* unifying register map offset values between driver and hardware generator

* rely on cma library for cache flush/invalidation

* coherence management

* not make buffer packing depend on data types that can be wider than 64bits

* refactor config derivation to minimize free parameters

* fix environment/pkg config interaction

* adding cfg dump property to pkgconfig:

* fix rpc reconfig

* fix spacing

* cleanup

* fix spacing

* long line fix

* fix spacing and lint

* fix line length

* cmake fix

* environment fix

* renaming after pynq since the driver stack relies on the pynq library - see pynq.io

* update doc

* adding parameterization to  name

* space

* removing reg width

* vta RPC

* update doc on how to edit vta_config.json

* fix path

* fix path
parent bca8ac17
...@@ -17,7 +17,10 @@ ...@@ -17,7 +17,10 @@
# under the License. # under the License.
PROJROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../../" && pwd )" PROJROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../../" && pwd )"
# Derive target specified by vta_config.json
VTA_CONFIG=${PROJROOT}/vta/config/vta_config.py
TARGET=$(python ${VTA_CONFIG} --target)
export PYTHONPATH=${PYTHONPATH}:${PROJROOT}/python:${PROJROOT}/vta/python export PYTHONPATH=${PYTHONPATH}:${PROJROOT}/python:${PROJROOT}/vta/python
export PYTHONPATH=${PYTHONPATH}:/home/xilinx/pynq export PYTHONPATH=${PYTHONPATH}:/home/xilinx/pynq
python3 -m vta.exec.rpc_server --tracker fleet:9190 --key pynq python3 -m vta.exec.rpc_server --tracker fleet:9190 --key $TARGET
...@@ -38,11 +38,16 @@ elseif(PYTHON) ...@@ -38,11 +38,16 @@ elseif(PYTHON)
string(REGEX MATCHALL "(^| )-D[A-Za-z0-9_=.]*" VTA_DEFINITIONS "${__vta_defs}") string(REGEX MATCHALL "(^| )-D[A-Za-z0-9_=.]*" VTA_DEFINITIONS "${__vta_defs}")
file(GLOB VTA_RUNTIME_SRCS vta/src/*.cc) file(GLOB VTA_RUNTIME_SRCS vta/src/*.cc)
file(GLOB __vta_target_srcs vta/src/${VTA_TARGET}/*.cc) # Add sim driver sources
if(${VTA_TARGET} STREQUAL "sim")
file(GLOB __vta_target_srcs vta/src/sim/*.cc)
endif()
# Add pynq driver sources
if(${VTA_TARGET} STREQUAL "pynq" OR ${VTA_TARGET} STREQUAL "ultra96")
file(GLOB __vta_target_srcs vta/src/pynq/*.cc)
endif()
list(APPEND VTA_RUNTIME_SRCS ${__vta_target_srcs}) list(APPEND VTA_RUNTIME_SRCS ${__vta_target_srcs})
# Add tsim driver sources
add_library(vta SHARED ${VTA_RUNTIME_SRCS})
if(${VTA_TARGET} STREQUAL "tsim") if(${VTA_TARGET} STREQUAL "tsim")
target_compile_definitions(vta PUBLIC USE_TSIM) target_compile_definitions(vta PUBLIC USE_TSIM)
include_directories("vta/include") include_directories("vta/include")
...@@ -50,6 +55,8 @@ elseif(PYTHON) ...@@ -50,6 +55,8 @@ elseif(PYTHON)
list(APPEND RUNTIME_SRCS ${RUNTIME_DPI_SRCS}) list(APPEND RUNTIME_SRCS ${RUNTIME_DPI_SRCS})
endif() endif()
add_library(vta SHARED ${VTA_RUNTIME_SRCS})
target_include_directories(vta PUBLIC vta/include) target_include_directories(vta PUBLIC vta/include)
foreach(__def ${VTA_DEFINITIONS}) foreach(__def ${VTA_DEFINITIONS})
...@@ -62,7 +69,7 @@ elseif(PYTHON) ...@@ -62,7 +69,7 @@ elseif(PYTHON)
endif(APPLE) endif(APPLE)
# PYNQ rules for Pynq v2.4 # PYNQ rules for Pynq v2.4
if(${VTA_TARGET} STREQUAL "pynq") if(${VTA_TARGET} STREQUAL "pynq" OR ${VTA_TARGET} STREQUAL "ultra96")
find_library(__cma_lib NAMES cma PATH /usr/lib) find_library(__cma_lib NAMES cma PATH /usr/lib)
target_link_libraries(vta ${__cma_lib}) target_link_libraries(vta ${__cma_lib})
endif() endif()
......
...@@ -36,10 +36,6 @@ below. ...@@ -36,10 +36,6 @@ below.
+=======================+============+========================================================+ +=======================+============+========================================================+
| ``TARGET`` | String | The TVM device target. | | ``TARGET`` | String | The TVM device target. |
+-----------------------+------------+--------------------------------------------------------+ +-----------------------+------------+--------------------------------------------------------+
| ``HW_TARGET`` | Int | FPGA frequency in MHz. |
+-----------------------+------------+--------------------------------------------------------+
| ``HW_CLK_TARGET`` | Int | FPGA clock period in ns target for HLS tool. |
+-----------------------+------------+--------------------------------------------------------+
| ``HW_VER`` | String | VTA hardware version number. | | ``HW_VER`` | String | VTA hardware version number. |
+-----------------------+------------+--------------------------------------------------------+ +-----------------------+------------+--------------------------------------------------------+
| ``LOG_INP_WIDTH`` | Int (log2) | Input data type signed integer width. | | ``LOG_INP_WIDTH`` | Int (log2) | Input data type signed integer width. |
...@@ -48,13 +44,9 @@ below. ...@@ -48,13 +44,9 @@ below.
+-----------------------+------------+--------------------------------------------------------+ +-----------------------+------------+--------------------------------------------------------+
| ``LOG_ACC_WIDTH`` | Int (log2) | Accumulator data type signed integer width. | | ``LOG_ACC_WIDTH`` | Int (log2) | Accumulator data type signed integer width. |
+-----------------------+------------+--------------------------------------------------------+ +-----------------------+------------+--------------------------------------------------------+
| ``LOG_OUT_WIDTH`` | Int (log2) | Output data type signed integer width. | | ``LOG_BATCH`` | Int (log2) | VTA matrix multiply intrinsic input/output dimension 0.|
+-----------------------+------------+--------------------------------------------------------+
| ``LOG_BATCH`` | Int (log2) | VTA matrix multiply intrinsic output dimension 0. |
+-----------------------+------------+--------------------------------------------------------+
| ``LOG_BLOCK_IN`` | Int (log2) | VTA matrix multiply reduction dimension. |
+-----------------------+------------+--------------------------------------------------------+ +-----------------------+------------+--------------------------------------------------------+
| ``LOG_BLOCK_OUT`` | Int (log2) | VTA matrix multiply intrinsic output dimension 1. | | ``LOG_BLOCK`` | Int (log2) | VTA matrix multiply inner dimensions. |
+-----------------------+------------+--------------------------------------------------------+ +-----------------------+------------+--------------------------------------------------------+
| ``LOG_UOP_BUFF_SIZE`` | Int (log2) | Micro-op on-chip buffer in Bytes. | | ``LOG_UOP_BUFF_SIZE`` | Int (log2) | Micro-op on-chip buffer in Bytes. |
+-----------------------+------------+--------------------------------------------------------+ +-----------------------+------------+--------------------------------------------------------+
...@@ -75,13 +67,8 @@ below. ...@@ -75,13 +67,8 @@ below.
We provide additional detail below regarding each parameter: We provide additional detail below regarding each parameter:
- ``TARGET``: Can be set to ``"pynq"`` or ``"sim"``. - ``TARGET``: Can be set to ``"pynq"``, ``"ultra96"``, ``"sim"`` (fast simulator), or ``"tsim"`` (cycle accurate sim with verilator).
- ``HW_TARGET``: In pynq mode, can be set to ``100``, ``142``, ``167``, or ``200`` MHz.
- ``HW_CLK_TARGET``: The lower the target, the more pipeline stages HLS will insert to achieve timing closure during place and route (this can also slightly decrease performance).
- ``HW_VER``: Hardware version which increments everytime the VTA hardware design changes. This parameter is used to uniquely idenfity hardware bitstreams. - ``HW_VER``: Hardware version which increments everytime the VTA hardware design changes. This parameter is used to uniquely idenfity hardware bitstreams.
- ``LOG_OUT_WIDTH``: We recommend matching ``LOG_OUT_WIDTH`` to ``LOG_INP_WIDTH``. - ``LOG_BATCH``: Equivalent to A in multiplication of shape (A, B) x (B, C), or typically, the batch dimension of inner tensor computation.
- ``LOG_BATCH``: Equivalent to A in multiplication of shape (A, B) x (B, C), or typically, the batch dimension. - ``LOG_BLOCK``: Equivalent to B and C in multiplication of shape (A, B) x (B, C), or typically, the input/output channel dimensions of the innter tensor computation.
- ``LOG_BATCH``: Equivalent to A in multiplication of shape (A, B) x (B, C), or typically, the batch dimension.
- ``LOG_BLOCK_IN``: Equivalent to B in multiplication of shape (A, B) x (B, C), or typically, the input channel dimension.
- ``LOG_BLOCK_OUT``: Equivalent to C in multiplication of shape (A, B) x (B, C), or typically, the output channel dimension.
...@@ -61,7 +61,7 @@ To do so, ...@@ -61,7 +61,7 @@ To do so,
```bash ```bash
cd <tvm root> cd <tvm root>
cp vta/config/vta_config.json vta_config.json vim vta/config/vta_config.json
# edit vta_config.json # edit vta_config.json
make vta make vta
``` ```
...@@ -118,7 +118,7 @@ cd /home/xilinx/tvm ...@@ -118,7 +118,7 @@ cd /home/xilinx/tvm
mkdir build mkdir build
cp cmake/config.cmake build/. cp cmake/config.cmake build/.
# Copy pynq specific configuration # Copy pynq specific configuration
cp vta/config/pynq_sample.json build/vta_config.json cp vta/config/pynq_sample.json vta/config/vta_config.json
cd build cd build
cmake .. cmake ..
make runtime vta -j2 make runtime vta -j2
...@@ -147,13 +147,12 @@ export VTA_PYNQ_RPC_PORT=9091 ...@@ -147,13 +147,12 @@ export VTA_PYNQ_RPC_PORT=9091
``` ```
In addition, you'll need to edit the `vta_config.json` file on the host to indicate that we are targeting the Pynq platform, by setting the `TARGET` field to `"pynq"`. In addition, you'll need to edit the `vta_config.json` file on the host to indicate that we are targeting the Pynq platform, by setting the `TARGET` field to `"pynq"`.
Alternatively, you can copy the default `vta/config/pynq_sample.json` into the TVM root as `vta_config.json`.
> Note: in contrast to our simulation setup, there are no libraries to compile on the host side since the host offloads all of the computation to the Pynq board. > Note: in contrast to our simulation setup, there are no libraries to compile on the host side since the host offloads all of the computation to the Pynq board.
```bash ```bash
# On the Host-side # On the Host-side
cd <tvm root> cd <tvm root>
cp vta/config/pynq_sample.json vta_config.json cp vta/config/pynq_sample.json vta/config/vta_config.json
``` ```
This time again, we will run the 2D convolution testbench. This time again, we will run the 2D convolution testbench.
...@@ -187,28 +186,28 @@ This third and last guide allows users to generate custom VTA bitstreams using f ...@@ -187,28 +186,28 @@ This third and last guide allows users to generate custom VTA bitstreams using f
### Xilinx Toolchain Installation ### Xilinx Toolchain Installation
We recommend using `Vivado 2018.2` since our scripts have been tested to work on this version of the Xilinx toolchains. We recommend using `Vivado 2019.1` since our scripts have been tested to work on this version of the Xilinx toolchains.
Our guide is written for Linux (Ubuntu) installation. Our guide is written for Linux (Ubuntu) installation.
You’ll need to install Xilinx’ FPGA compilation toolchain, [Vivado HL WebPACK 2018.2](https://www.xilinx.com/products/design-tools/vivado.html), which a license-free version of the Vivado HLx toolchain. You’ll need to install Xilinx’ FPGA compilation toolchain, [Vivado HL WebPACK 2019.1](https://www.xilinx.com/products/design-tools/vivado.html), which a license-free version of the Vivado HLx toolchain.
#### Obtaining and Launching the Vivado GUI Installer #### Obtaining and Launching the Vivado GUI Installer
1. Go to the [download webpage](https://www.xilinx.com/support/download/index.html/content/xilinx/en/downloadNav/vivado-design-tools/2018-2.html), and download the Linux Self Extracting Web Installer for Vivado HLx 2018.2: WebPACK and Editions. 1. Go to the [download webpage](https://www.xilinx.com/support/download/index.html/content/xilinx/en/downloadNav/vivado-design-tools/2019-1.html), and download the Linux Self Extracting Web Installer for Vivado HLx 2019.1: WebPACK and Editions.
2. You’ll have to sign in with a Xilinx account. This requires a Xilinx account creation that will take 2 minutes. 2. You’ll have to sign in with a Xilinx account. This requires a Xilinx account creation that will take 2 minutes.
3. Complete the Name and Address Verification by clicking “Next”, and you will get the opportunity to download a binary file, called `Xilinx_Vivado_SDK_Web_2018.2_0614_1954_Lin64.bin`. 3. Complete the Name and Address Verification by clicking “Next”, and you will get the opportunity to download a binary file, called `Xilinx_Vivado_SDK_Web_2019.1_0524_1430_Lin64.bin`.
4. Now that the file is downloaded, go to your `Downloads` directory, and change the file permissions so it can be executed: 4. Now that the file is downloaded, go to your `Downloads` directory, and change the file permissions so it can be executed:
```bash ```bash
chmod u+x Xilinx_Vivado_SDK_Web_2018.2_0614_1954_Lin64.bin chmod u+x Xilinx_Vivado_SDK_Web_2019.1_0524_1430_Lin64.bin
``` ```
5. Now you can execute the binary: 5. Now you can execute the binary:
```bash ```bash
./Xilinx_Vivado_SDK_Web_2018.2_0614_1954_Lin64.bin ./Xilinx_Vivado_SDK_Web_2019.1_0524_1430_Lin64.bin
``` ```
#### Xilinx Vivado GUI Installer Steps #### Xilinx Vivado GUI Installer Steps
At this point you've launched the Vivado 2018.2 Installer GUI program. At this point you've launched the Vivado 2019.1 Installer GUI program.
1. Click “Next” on the *Welcome* screen. 1. Click “Next” on the *Welcome* screen.
2. On the *Select Install Type* screen, enter your Xilinx user credentials under the “User Authentication” box and select the “Download and Install Now” option before clicking “Next” . 2. On the *Select Install Type* screen, enter your Xilinx user credentials under the “User Authentication” box and select the “Download and Install Now” option before clicking “Next” .
...@@ -230,8 +229,8 @@ At this point you've launched the Vivado 2018.2 Installer GUI program. ...@@ -230,8 +229,8 @@ At this point you've launched the Vivado 2018.2 Installer GUI program.
The last step is to update your `~/.bashrc` with the following lines. This will include all of the Xilinx binary paths so you can launch compilation scripts from the command line. The last step is to update your `~/.bashrc` with the following lines. This will include all of the Xilinx binary paths so you can launch compilation scripts from the command line.
```bash ```bash
# Xilinx Vivado 2018.2 environment # Xilinx Vivado 2019.1 environment
export XILINX_VIVADO=${XILINX_PATH}/Vivado/2018.2 export XILINX_VIVADO=${XILINX_PATH}/Vivado/2019.1
export PATH=${XILINX_VIVADO}/bin:${PATH} export PATH=${XILINX_VIVADO}/bin:${PATH}
``` ```
......
...@@ -44,7 +44,7 @@ PACKAGE_VERSION = { ...@@ -44,7 +44,7 @@ PACKAGE_VERSION = {
'opencl': "v0.02", 'opencl': "v0.02",
'mali': "v0.05", 'mali': "v0.05",
'vta': "v0.05", 'vta': "v0.06",
} }
logger = logging.getLogger('autotvm') logger = logging.getLogger('autotvm')
......
{ {
"TARGET" : "pynq", "TARGET" : "pynq",
"HW_FREQ" : 100, "HW_VER" : "0.0.1",
"HW_CLK_TARGET" : 8,
"HW_VER" : "0.0.0",
"LOG_INP_WIDTH" : 3, "LOG_INP_WIDTH" : 3,
"LOG_WGT_WIDTH" : 3, "LOG_WGT_WIDTH" : 3,
"LOG_ACC_WIDTH" : 5, "LOG_ACC_WIDTH" : 5,
"LOG_OUT_WIDTH" : 3,
"LOG_BATCH" : 0, "LOG_BATCH" : 0,
"LOG_BLOCK_IN" : 4, "LOG_BLOCK" : 4,
"LOG_BLOCK_OUT" : 4,
"LOG_UOP_BUFF_SIZE" : 15, "LOG_UOP_BUFF_SIZE" : 15,
"LOG_INP_BUFF_SIZE" : 15, "LOG_INP_BUFF_SIZE" :15,
"LOG_WGT_BUFF_SIZE" : 18, "LOG_WGT_BUFF_SIZE" : 18,
"LOG_ACC_BUFF_SIZE" : 17 "LOG_ACC_BUFF_SIZE" : 17
} }
{
"TARGET" : "ultra96",
"HW_VER" : "0.0.1",
"LOG_INP_WIDTH" : 3,
"LOG_WGT_WIDTH" : 3,
"LOG_ACC_WIDTH" : 5,
"LOG_BATCH" : 0,
"LOG_BLOCK" : 4,
"LOG_UOP_BUFF_SIZE" : 15,
"LOG_INP_BUFF_SIZE" :15,
"LOG_WGT_BUFF_SIZE" : 18,
"LOG_ACC_BUFF_SIZE" : 17
}
{ {
"TARGET" : "sim", "TARGET" : "sim",
"HW_FREQ" : 100, "HW_VER" : "0.0.1",
"HW_CLK_TARGET" : 7,
"HW_VER" : "0.0.0",
"LOG_INP_WIDTH" : 3, "LOG_INP_WIDTH" : 3,
"LOG_WGT_WIDTH" : 3, "LOG_WGT_WIDTH" : 3,
"LOG_ACC_WIDTH" : 5, "LOG_ACC_WIDTH" : 5,
"LOG_OUT_WIDTH" : 3,
"LOG_BATCH" : 0, "LOG_BATCH" : 0,
"LOG_BLOCK_IN" : 4, "LOG_BLOCK" : 4,
"LOG_BLOCK_OUT" : 4,
"LOG_UOP_BUFF_SIZE" : 15, "LOG_UOP_BUFF_SIZE" : 15,
"LOG_INP_BUFF_SIZE" : 15, "LOG_INP_BUFF_SIZE" : 15,
"LOG_WGT_BUFF_SIZE" : 18, "LOG_WGT_BUFF_SIZE" : 18,
......
...@@ -17,81 +17,30 @@ ...@@ -17,81 +17,30 @@
# Directories # Directories
ROOTDIR = $(CURDIR) ROOTDIR = $(CURDIR)
BUILD_NAME = build VTA_DIR = $(CURDIR)/../..
BUILD_DIR = $(ROOTDIR)/../../$(BUILD_NAME)/hardware/xilinx BUILD_DIR = $(VTA_DIR)/build/hardware/xilinx
SCRIPT_DIR = $(ROOTDIR)/scripts SCRIPT_DIR = $(CURDIR)/scripts
SRC_DIR = $(ROOTDIR)/src SRC_DIR = $(CURDIR)/src
SIM_DIR = $(ROOTDIR)/sim
TEST_DIR = $(ROOTDIR)/../../tests/hardware/common
INCLUDE_DIR = $(ROOTDIR)/../../include
# Executables # Executables
VIVADO_HLS = vivado_hls VIVADO_HLS = vivado_hls
VIVADO = vivado VIVADO = vivado
HSI = hsi
# HLS mode
MODE = skip_sim
# Debug flag
DEBUG = false
# SLURM
SLURM = false
# Prevent generation of DSP
NO_DSP = false
# Prevent generation of ALU
NO_ALU = false
# Process VTA JSON config # Process VTA JSON config
VTA_CONFIG = python $(CURDIR)/../../config/vta_config.py VTA_CONFIG := $(CURDIR)/../../config/vta_config.py
CFLAGS := $(shell ${VTA_CONFIG} --cflags)
VTA_TARGET := $(shell ${VTA_CONFIG} --target)
#---------------------
# VTA Parameters
#--------------------
VTA_INP_WIDTH := $(shell ${VTA_CONFIG} --get-inpwidth)
VTA_WGT_WIDTH := $(shell ${VTA_CONFIG} --get-wgtwidth)
VTA_ACC_WIDTH := $(shell ${VTA_CONFIG} --get-accwidth)
VTA_OUT_WIDTH := $(shell ${VTA_CONFIG} --get-outwidth)
VTA_BATCH := $(shell ${VTA_CONFIG} --get-batch)
VTA_IN_BLOCK := $(shell ${VTA_CONFIG} --get-blockin)
VTA_OUT_BLOCK := $(shell ${VTA_CONFIG} --get-blockout)
VTA_UOP_BUFF_SIZE := $(shell ${VTA_CONFIG} --get-uopbuffsize)
VTA_INP_BUFF_SIZE := $(shell ${VTA_CONFIG} --get-inpbuffsize)
VTA_WGT_BUFF_SIZE := $(shell ${VTA_CONFIG} --get-wgtbuffsize)
VTA_ACC_BUFF_SIZE := $(shell ${VTA_CONFIG} --get-accbuffsize)
VTA_OUT_BUFF_SIZE := $(shell ${VTA_CONFIG} --get-outbuffsize)
#---------------------
# FPGA Parameters
#--------------------
VTA_CLOCK_FREQ = $(shell ${VTA_CONFIG} --get-fpgafreq)
VTA_TARGET_PER = $(shell ${VTA_CONFIG} --get-fpgaper)
#---------------------
# Compilation parameters
#--------------------
# Number of threads during compilation
VTA_HW_COMP_THREADS = 8
# Derive config name # Derive config name
CONF = $(shell ${VTA_CONFIG} --cfg-str) CONF := $(shell python ${VTA_CONFIG} --cfg-str)
IP_BUILD_PATH = $(BUILD_DIR)/hls/$(CONF) IP_BUILD_PATH := $(BUILD_DIR)/hls/$(CONF)
HW_BUILD_PATH = $(BUILD_DIR)/vivado/$(CONF) HW_BUILD_PATH := $(BUILD_DIR)/vivado/$(CONF)
ifeq ($(SLURM), true)
IP_BUILD_PATH = /scratch/hls/$(CONF)
HW_BUILD_PATH = /scratch/vivado/$(CONF)
endif
# IP file path # IP file path
IP_PATH = $(BUILD_DIR)/hls/$(CONF)/solution0/impl/ip/xilinx_com_hls_vta_1_0.zip IP_PATH := $(BUILD_DIR)/hls/$(CONF)/vta_compute/soln/impl/ip/xilinx_com_hls_compute_1_0.zip
# Bitstream file path # Bitstream file path
BIT_PATH = $(BUILD_DIR)/vivado/$(CONF)/export/$(CONF).bit BIT_PATH := $(BUILD_DIR)/vivado/$(CONF)/export/$(CONF).bit
.PHONY: all ip bit bsp clean clean_all .PHONY: all ip bit clean clean_all
all: bit all: bit
ip: $(IP_PATH) ip: $(IP_PATH)
...@@ -100,37 +49,24 @@ bit: $(BIT_PATH) ...@@ -100,37 +49,24 @@ bit: $(BIT_PATH)
$(IP_PATH): $(SRC_DIR)/* $(IP_PATH): $(SRC_DIR)/*
mkdir -p $(IP_BUILD_PATH) mkdir -p $(IP_BUILD_PATH)
cd $(IP_BUILD_PATH) && \ cd $(IP_BUILD_PATH) && \
$(VIVADO_HLS) -f $(SCRIPT_DIR)/hls.tcl \ $(VIVADO_HLS) \
-tclargs $(SRC_DIR) $(SIM_DIR) $(TEST_DIR) $(INCLUDE_DIR) \ -f $(SCRIPT_DIR)/hls.tcl \
$(MODE) $(DEBUG) $(NO_DSP) $(NO_ALU) $(VTA_TARGET_PER) \ -tclargs \
$(VTA_INP_WIDTH) $(VTA_WGT_WIDTH) $(VTA_ACC_WIDTH) $(VTA_OUT_WIDTH) \ $(VTA_DIR) \
$(VTA_BATCH) $(VTA_IN_BLOCK) $(VTA_OUT_BLOCK) \ ${VTA_CONFIG}
$(VTA_UOP_BUFF_SIZE) $(VTA_INP_BUFF_SIZE) $(VTA_WGT_BUFF_SIZE) \
$(VTA_ACC_BUFF_SIZE) $(VTA_OUT_BUFF_SIZE)
ifeq ($(SLURM), true)
mkdir -p $(BUILD_DIR)/hls
mv $(IP_BUILD_PATH) $(BUILD_DIR)/hls/.
endif
$(BIT_PATH): $(IP_PATH) $(BIT_PATH): $(IP_PATH)
mkdir -p $(HW_BUILD_PATH) mkdir -p $(HW_BUILD_PATH)
cd $(HW_BUILD_PATH) && \ cd $(HW_BUILD_PATH) && \
$(VIVADO) -mode tcl -source $(SCRIPT_DIR)/vivado.tcl \ $(VIVADO) \
-tclargs $(BUILD_DIR)/hls/$(CONF) $(VTA_HW_COMP_THREADS) $(VTA_CLOCK_FREQ) \ -mode tcl \
$(VTA_INP_WIDTH) $(VTA_WGT_WIDTH) $(VTA_OUT_WIDTH) \ -source $(SCRIPT_DIR)/vivado.tcl \
$(VTA_BATCH) $(VTA_IN_BLOCK) $(VTA_OUT_BLOCK) \ -tclargs \
$(VTA_INP_BUFF_SIZE) $(VTA_WGT_BUFF_SIZE) $(VTA_OUT_BUFF_SIZE) $(BUILD_DIR)/hls/$(CONF) \
ifeq ($(SLURM), true) ${VTA_CONFIG}
mkdir -p $(BUILD_DIR)/vivado
mv $(HW_BUILD_PATH) $(BUILD_DIR)/vivado/.
endif
bsp: $(BIT_PATH)
cd $(HW_BUILD_PATH) && $(HSI) -mode tcl -source $(SCRIPT_DIR)/hsi.tcl -nojournal -nolog
cd $(HW_BUILD_PATH)/bsp && make
clean: clean:
rm -rf *.out *.log *.sb figures rm -rf *.out *.log
cleanall: clean cleanall: clean
rm -rf $(BUILD_DIR) rm -rf $(BUILD_DIR)
...@@ -35,17 +35,6 @@ int main(void) { ...@@ -35,17 +35,6 @@ int main(void) {
printParameters(); printParameters();
#endif #endif
// Micro op bound
assert(VTA_UOP_GEM_2_1 < VTA_UOP_WIDTH);
assert(VTA_UOP_ALU_1_1 < VTA_UOP_WIDTH);
// Make sure there is no misaligment
assert(VTA_INSN_GEM_9_1 < VTA_INSN_GEM_A_0);
assert(VTA_INSN_MEM_7_1 < VTA_INSN_MEM_8_0);
// Instruction bounds
assert(VTA_INSN_MEM_E_1 < VTA_INS_WIDTH);
assert(VTA_INSN_GEM_F_1 < VTA_INS_WIDTH);
assert(VTA_INSN_ALU_G_1 < VTA_INS_WIDTH);
int status = 0; int status = 0;
// Run ALU test (vector-scalar operators) // Run ALU test (vector-scalar operators)
...@@ -65,15 +54,15 @@ int main(void) { ...@@ -65,15 +54,15 @@ int main(void) {
status |= alu_test(VTA_ALU_OPCODE_MAX, false, VTA_BLOCK_OUT, 128, false); status |= alu_test(VTA_ALU_OPCODE_MAX, false, VTA_BLOCK_OUT, 128, false);
status |= alu_test(VTA_ALU_OPCODE_ADD, false, VTA_BLOCK_OUT, 128, true); status |= alu_test(VTA_ALU_OPCODE_ADD, false, VTA_BLOCK_OUT, 128, true);
status |= alu_test(VTA_ALU_OPCODE_ADD, false, VTA_BLOCK_OUT, 128, false); status |= alu_test(VTA_ALU_OPCODE_ADD, false, VTA_BLOCK_OUT, 128, false);
status |= alu_test(VTA_ALU_OPCODE_SHR, false, VTA_BLOCK_OUT, 128, true);
status |= alu_test(VTA_ALU_OPCODE_SHR, false, VTA_BLOCK_OUT, 128, false);
// Run blocked GEMM test // Run blocked GEMM test
status |= blocked_gemm_test(256, 256, VTA_BLOCK_OUT*4, true, 2);
status |= blocked_gemm_test(256, 256, VTA_BLOCK_OUT*4, false, 2); status |= blocked_gemm_test(256, 256, VTA_BLOCK_OUT*4, false, 2);
status |= blocked_gemm_test(256, 256, VTA_BLOCK_OUT*4, true, 1);
status |= blocked_gemm_test(256, 256, VTA_BLOCK_OUT*4, false, 1); status |= blocked_gemm_test(256, 256, VTA_BLOCK_OUT*4, false, 1);
// Simple GEMM unit test // Simple GEMM unit test
status |= gemm_test(64, 64, 64, true); status |= gemm_test(4 * VTA_BATCH, 4 * VTA_BLOCK_OUT, 4 * VTA_BLOCK_IN, false);
return status; return status;
} }
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
*/ */
/*! /*!
* Copyright (c) 2018 by Contributors
* \file vta.h * \file vta.h
* \brief Type definitions and prototype for VTA HLS design. * \brief Type definitions and prototype for VTA HLS design.
*/ */
...@@ -32,6 +31,16 @@ ...@@ -32,6 +31,16 @@
#include <vta/hw_spec.h> #include <vta/hw_spec.h>
/*!
* Define HLS stream depth
*/
#define PRAGMA_SUB(x) _Pragma (#x)
#define PRAGMA_HLS(x) PRAGMA_SUB(x)
#define STREAM_IN_DEPTH 8
/* \typedef bus_T memory bus datatype*/
typedef ap_uint<VTA_BUS_WIDTH> bus_T;
/* \typedef uop_T Micro-op datatype*/ /* \typedef uop_T Micro-op datatype*/
typedef ap_uint<VTA_UOP_WIDTH> uop_T; typedef ap_uint<VTA_UOP_WIDTH> uop_T;
...@@ -53,18 +62,6 @@ typedef ap_int<VTA_WGT_WIDTH+VTA_INP_WIDTH+1> mul_T; ...@@ -53,18 +62,6 @@ typedef ap_int<VTA_WGT_WIDTH+VTA_INP_WIDTH+1> mul_T;
/* \typedef sum_T GEMM accumulator datatype*/ /* \typedef sum_T GEMM accumulator datatype*/
typedef ap_int<VTA_WGT_WIDTH+VTA_INP_WIDTH+VTA_LOG_BLOCK_IN+1> sum_T; typedef ap_int<VTA_WGT_WIDTH+VTA_INP_WIDTH+VTA_LOG_BLOCK_IN+1> sum_T;
/* \typedef inp_vec_T Input vector datatype*/
typedef ap_uint<VTA_INP_WIDTH*VTA_BLOCK_IN> inp_vec_T;
/* \typedef wgt_vec_T Weight vector datatype*/
typedef ap_uint<VTA_WGT_WIDTH*VTA_BLOCK_IN> wgt_vec_T;
/* \typedef acc_vec_T Accumulator vector datatype*/
typedef ap_uint<VTA_ACC_WIDTH*VTA_BLOCK_OUT> acc_vec_T;
/* \typedef out_vec_T Output vector datatype*/
typedef ap_uint<VTA_OUT_WIDTH*VTA_BLOCK_OUT> out_vec_T;
/* \typedef uop_idx_T Micro-op SRAM index datatype*/ /* \typedef uop_idx_T Micro-op SRAM index datatype*/
typedef ap_uint<VTA_LOG_UOP_BUFF_DEPTH+1> uop_idx_T; typedef ap_uint<VTA_LOG_UOP_BUFF_DEPTH+1> uop_idx_T;
...@@ -107,18 +104,14 @@ typedef ap_uint<VTA_MEMOP_PAD_BIT_WIDTH> memop_pad_T; ...@@ -107,18 +104,14 @@ typedef ap_uint<VTA_MEMOP_PAD_BIT_WIDTH> memop_pad_T;
/* \typedef aluop_opcode_T ALU operation opcode datatype*/ /* \typedef aluop_opcode_T ALU operation opcode datatype*/
typedef ap_uint<VTA_ALU_OPCODE_BIT_WIDTH> aluop_opcode_T; typedef ap_uint<VTA_ALU_OPCODE_BIT_WIDTH> aluop_opcode_T;
/* \typedef aluop_opcode_T ALU operation immediate datatype*/ /* \typedef aluop_imm_T ALU operation immediate datatype*/
typedef ap_int<VTA_ALUOP_IMM_BIT_WIDTH> aluop_imm_T; typedef ap_int<VTA_ALUOP_IMM_BIT_WIDTH> aluop_imm_T;
/* \typedef aluop_opcode_T ALU operation shift immediate datatype*/ /* \typedef aluop_shr_arg_T ALU operation shift right immediate datatype*/
typedef ap_int<VTA_LOG_ACC_WIDTH> aluop_sh_imm_T; typedef ap_int<VTA_SHR_ARG_BIT_WIDTH> aluop_shr_arg_T;
/*! /* \typedef aluop_mul_arg_T ALU operation multiply datatype*/
* Define HLS stream depth typedef ap_int<VTA_MUL_ARG_BIT_WIDTH> aluop_mul_arg_T;
*/
#define PRAGMA_SUB(x) _Pragma (#x)
#define PRAGMA_HLS(x) PRAGMA_SUB(x)
#define STREAM_IN_DEPTH 8
/*! /*!
* \brief Fetch module. * \brief Fetch module.
...@@ -153,13 +146,13 @@ void fetch( ...@@ -153,13 +146,13 @@ void fetch(
* \param wgt_mem Local weight SRAM buffer. Write only single port BRAM. * \param wgt_mem Local weight SRAM buffer. Write only single port BRAM.
*/ */
void load( void load(
volatile inp_vec_T *inputs, volatile bus_T *inputs,
volatile wgt_vec_T *weights, volatile bus_T *weights,
hls::stream<insn_T> &load_queue, hls::stream<insn_T> &load_queue,
hls::stream<bool> &g2l_dep_queue, hls::stream<bool> &g2l_dep_queue,
hls::stream<bool> &l2g_dep_queue, hls::stream<bool> &l2g_dep_queue,
inp_vec_T inp_mem[VTA_INP_BUFF_DEPTH][VTA_BATCH], bus_T inp_mem[VTA_INP_BUFF_DEPTH][INP_MAT_AXI_RATIO],
wgt_vec_T wgt_mem[VTA_WGT_BUFF_DEPTH][VTA_BLOCK_OUT]); bus_T wgt_mem[VTA_WGT_BUFF_DEPTH][WGT_MAT_AXI_RATIO]);
/*! /*!
* \brief Compute module. * \brief Compute module.
...@@ -187,15 +180,15 @@ void load( ...@@ -187,15 +180,15 @@ void load(
void compute( void compute(
volatile uint32_t &done, volatile uint32_t &done,
volatile uop_T *uops, volatile uop_T *uops,
volatile acc_vec_T *biases, volatile bus_T *biases,
hls::stream<insn_T> &gemm_queue, hls::stream<insn_T> &gemm_queue,
hls::stream<bool> &l2g_dep_queue, hls::stream<bool> &l2g_dep_queue,
hls::stream<bool> &s2g_dep_queue, hls::stream<bool> &s2g_dep_queue,
hls::stream<bool> &g2l_dep_queue, hls::stream<bool> &g2l_dep_queue,
hls::stream<bool> &g2s_dep_queue, hls::stream<bool> &g2s_dep_queue,
out_vec_T inp_mem[VTA_INP_BUFF_DEPTH][VTA_BATCH], bus_T inp_mem[VTA_INP_BUFF_DEPTH][INP_MAT_AXI_RATIO],
wgt_vec_T wgt_mem[VTA_WGT_BUFF_DEPTH][VTA_BLOCK_OUT], bus_T wgt_mem[VTA_WGT_BUFF_DEPTH][WGT_MAT_AXI_RATIO],
out_vec_T out_mem[VTA_ACC_BUFF_DEPTH][VTA_BATCH]); bus_T out_mem[VTA_ACC_BUFF_DEPTH][OUT_MAT_AXI_RATIO]);
/*! /*!
* \brief Store module. * \brief Store module.
...@@ -211,11 +204,11 @@ void compute( ...@@ -211,11 +204,11 @@ void compute(
* \param out_mem Local output SRAM buffer. Read only single port BRAM. * \param out_mem Local output SRAM buffer. Read only single port BRAM.
*/ */
void store( void store(
volatile out_vec_T *outputs, volatile bus_T *outputs,
hls::stream<insn_T> &store_queue, hls::stream<insn_T> &store_queue,
hls::stream<bool> &g2s_dep_queue, hls::stream<bool> &g2s_dep_queue,
hls::stream<bool> &s2g_dep_queue, hls::stream<bool> &s2g_dep_queue,
out_vec_T out_mem[VTA_ACC_BUFF_DEPTH][VTA_BATCH]); bus_T out_mem[VTA_ACC_BUFF_DEPTH][OUT_MAT_AXI_RATIO]);
/*! /*!
* \brief VTA wrapper for simulation purpose only. * \brief VTA wrapper for simulation purpose only.
...@@ -232,9 +225,9 @@ void vta( ...@@ -232,9 +225,9 @@ void vta(
uint32_t insn_count, uint32_t insn_count,
volatile insn_T *insns, volatile insn_T *insns,
volatile uop_T *uops, volatile uop_T *uops,
volatile inp_vec_T *inputs, volatile bus_T *inputs,
volatile wgt_vec_T *weights, volatile bus_T *weights,
volatile acc_vec_T *biases, volatile bus_T *biases,
volatile out_vec_T *outputs); volatile bus_T *outputs);
#endif // VTA_VTA_H_ #endif // VTA_VTA_H_
...@@ -136,19 +136,23 @@ void VTAMemCopyToHost(void* dst, const void* src, size_t size); ...@@ -136,19 +136,23 @@ void VTAMemCopyToHost(void* dst, const void* src, size_t size);
/*! /*!
* \brief Flushes the region of memory out of the CPU cache to DRAM. * \brief Flushes the region of memory out of the CPU cache to DRAM.
* \param buf Pointer to memory region allocated with VTAMemAlloc to be flushed. * \param vir_addr Pointer to memory region allocated with VTAMemAlloc to be flushed.
* This need to be the physical address. * This need to be the virtual address.
* \param phy_addr Pointer to memory region allocated with VTAMemAlloc to be flushed.
* This need to be the physical address.
* \param size Size of the region to flush in Bytes. * \param size Size of the region to flush in Bytes.
*/ */
void VTAFlushCache(vta_phy_addr_t buf, int size); void VTAFlushCache(void* vir_addr, vta_phy_addr_t phy_addr, int size);
/*! /*!
* \brief Invalidates the region of memory that is cached. * \brief Invalidates the region of memory that is cached.
* \param buf Pointer to memory region allocated with VTAMemAlloc to be invalidated. * \param vir_addr Pointer to memory region allocated with VTAMemAlloc to be invalidated.
* This need to be the physical address. * This need to be the virtual address.
* \param phy_addr Pointer to memory region allocated with VTAMemAlloc to be invalidated.
* This need to be the physical address.
* \param size Size of the region to invalidate in Bytes. * \param size Size of the region to invalidate in Bytes.
*/ */
void VTAInvalidateCache(vta_phy_addr_t buf, int size); void VTAInvalidateCache(void* vir_addr, vta_phy_addr_t phy_addr, int size);
#ifdef __cplusplus #ifdef __cplusplus
} }
......
...@@ -45,10 +45,11 @@ def get_bitstream_path(): ...@@ -45,10 +45,11 @@ def get_bitstream_path():
# Derive destination path # Derive destination path
cache_dir = os.getenv("VTA_CACHE_PATH", os.path.join(os.getenv("HOME"), ".vta_cache/")) cache_dir = os.getenv("VTA_CACHE_PATH", os.path.join(os.getenv("HOME"), ".vta_cache/"))
cache_dir = os.path.join(cache_dir, env.TARGET) cache_dir = os.path.join(cache_dir, env.TARGET)
cache_dir = os.path.join(cache_dir, env.HW_VER.replace('.', '_'))
# Create the directory if it didn't exist # Create the directory if it didn't exist
if not os.path.exists(cache_dir): if not os.path.exists(cache_dir):
os.makedirs(cache_dir) os.makedirs(cache_dir)
bit_path = os.path.join(cache_dir, env.BITSTREAM) bit_path = os.path.join(cache_dir, env.BITSTREAM) + ".bit"
return bit_path return bit_path
...@@ -63,7 +64,7 @@ def download_bitstream(): ...@@ -63,7 +64,7 @@ def download_bitstream():
bit = get_bitstream_path() bit = get_bitstream_path()
url = os.path.join(BITSTREAM_URL, env.TARGET) url = os.path.join(BITSTREAM_URL, env.TARGET)
url = os.path.join(url, env.HW_VER) url = os.path.join(url, env.HW_VER)
url = os.path.join(url, env.BITSTREAM) url = os.path.join(url, env.BITSTREAM + ".bit")
try: try:
download(url, bit) download(url, bit)
......
...@@ -113,15 +113,9 @@ class Environment(object): ...@@ -113,15 +113,9 @@ class Environment(object):
# initialization function # initialization function
def __init__(self, cfg): def __init__(self, cfg):
self.__dict__.update(cfg) # Produce the derived parameters and update dict
for key in PkgConfig.cfg_keys: self.pkg = self.pkg_config(cfg)
if key not in cfg: self.__dict__.update(self.pkg.cfg_dict)
raise ValueError("Expect key %s in cfg" % key)
# derive output buffer size
self.LOG_OUT_BUFF_SIZE = (
self.LOG_ACC_BUFF_SIZE +
self.LOG_OUT_WIDTH -
self.LOG_ACC_WIDTH)
# data type width # data type width
self.INP_WIDTH = 1 << self.LOG_INP_WIDTH self.INP_WIDTH = 1 << self.LOG_INP_WIDTH
self.WGT_WIDTH = 1 << self.LOG_WGT_WIDTH self.WGT_WIDTH = 1 << self.LOG_WGT_WIDTH
...@@ -154,25 +148,15 @@ class Environment(object): ...@@ -154,25 +148,15 @@ class Environment(object):
self.WGT_ELEM_BYTES = self.WGT_ELEM_BITS // 8 self.WGT_ELEM_BYTES = self.WGT_ELEM_BITS // 8
self.ACC_ELEM_BYTES = self.ACC_ELEM_BITS // 8 self.ACC_ELEM_BYTES = self.ACC_ELEM_BITS // 8
self.OUT_ELEM_BYTES = self.OUT_ELEM_BITS // 8 self.OUT_ELEM_BYTES = self.OUT_ELEM_BITS // 8
# Configuration bitstream name
self.BITSTREAM = "{}x{}x{}_{}bx{}b_{}_{}_{}_{}_{}MHz_{}ns_v{}.bit".format(
(1 << cfg["LOG_BATCH"]),
(1 << cfg["LOG_BLOCK_IN"]),
(1 << cfg["LOG_BLOCK_OUT"]),
(1 << cfg["LOG_INP_WIDTH"]),
(1 << cfg["LOG_WGT_WIDTH"]),
cfg["LOG_UOP_BUFF_SIZE"],
cfg["LOG_INP_BUFF_SIZE"],
cfg["LOG_WGT_BUFF_SIZE"],
cfg["LOG_ACC_BUFF_SIZE"],
cfg["HW_FREQ"],
cfg["HW_CLK_TARGET"],
cfg["HW_VER"].replace('.', '_'))
# dtypes # dtypes
self.acc_dtype = "int%d" % self.ACC_WIDTH self.acc_dtype = "int%d" % self.ACC_WIDTH
self.inp_dtype = "int%d" % self.INP_WIDTH self.inp_dtype = "int%d" % self.INP_WIDTH
self.wgt_dtype = "int%d" % self.WGT_WIDTH self.wgt_dtype = "int%d" % self.WGT_WIDTH
self.out_dtype = "int%d" % self.OUT_WIDTH self.out_dtype = "int%d" % self.OUT_WIDTH
# bistream name
self.BITSTREAM = self.pkg.bitstream
# model string
self.MODEL = self.TARGET + "_" + self.BITSTREAM
# lazy cached members # lazy cached members
self.mock_mode = False self.mock_mode = False
self._mock_env = None self._mock_env = None
...@@ -187,11 +171,15 @@ class Environment(object): ...@@ -187,11 +171,15 @@ class Environment(object):
def __exit__(self, ptype, value, trace): def __exit__(self, ptype, value, trace):
Environment.current = self._last_env Environment.current = self._last_env
def pkg_config(self): def pkg_config(self, cfg):
"""PkgConfig instance""" """PkgConfig instance"""
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
proj_root = os.path.abspath(os.path.join(curr_path, "../../")) proj_root = os.path.abspath(os.path.join(curr_path, "../../"))
return PkgConfig(self.__dict__, proj_root) return PkgConfig(cfg, proj_root)
@property
def cfg_dict(self):
return self.pkg.cfg_dict
@property @property
def dev(self): def dev(self):
...@@ -236,13 +224,15 @@ class Environment(object): ...@@ -236,13 +224,15 @@ class Environment(object):
@property @property
def target(self): def target(self):
return tvm.target.vta(model=self.TARGET) return tvm.target.vta(model=self.MODEL)
@property @property
def target_host(self): def target_host(self):
"""The target host""" """The target host"""
if self.TARGET == "pynq": if self.TARGET == "pynq":
return "llvm -target=armv7-none-linux-gnueabihf" return "llvm -target=armv7-none-linux-gnueabihf"
if self.TARGET == "ultra96":
return "llvm -target=aarch64-linux-gnu"
if self.TARGET == "sim" or self.TARGET == "tsim": if self.TARGET == "sim" or self.TARGET == "tsim":
return "llvm" return "llvm"
raise ValueError("Unknown target %s" % self.TARGET) raise ValueError("Unknown target %s" % self.TARGET)
...@@ -316,21 +306,18 @@ def coproc_dep_pop(op): ...@@ -316,21 +306,18 @@ def coproc_dep_pop(op):
def _init_env(): def _init_env():
"""Iniitalize the default global env""" """Initialize the default global env"""
curr_path = os.path.dirname( curr_path = os.path.dirname(
os.path.abspath(os.path.expanduser(__file__))) os.path.abspath(os.path.expanduser(__file__)))
proj_root = os.path.abspath(os.path.join(curr_path, "../../../")) proj_root = os.path.abspath(os.path.join(curr_path, "../../../"))
path_list = [ path_list = [
os.path.join(curr_path, "vta_config.json"),
os.path.join(proj_root, "build", "vta_config.json"),
os.path.join(proj_root, "vta_config.json"),
os.path.join(proj_root, "vta/config/vta_config.json") os.path.join(proj_root, "vta/config/vta_config.json")
] ]
path_list = [p for p in path_list if os.path.exists(p)] path_list = [p for p in path_list if os.path.exists(p)]
if not path_list: if not path_list:
raise RuntimeError( raise RuntimeError(
"Error: {} not found.make sure you have config.json in your vta root" "Error: vta_config.json not found.")
.format(filename)) cfg = json.load(open(path_list[0]))
return Environment(json.load(open(path_list[0]))) return Environment(cfg)
Environment.current = _init_env() Environment.current = _init_env()
...@@ -48,9 +48,12 @@ def pynq_bitstream_program(bitstream_path): ...@@ -48,9 +48,12 @@ def pynq_bitstream_program(bitstream_path):
bitstream.download() bitstream.download()
def bitstream_program(target, bitstream): def bitstream_program(target, bitstream):
if target == 'pynq': if target in ['pynq', 'ultra96']:
pynq_bitstream_program(bitstream) pynq_bitstream_program(bitstream)
elif target != 'sim': elif target in ['sim', 'tsim']:
# In simulation, bit stream programming is a no-op
return
else:
raise RuntimeError("Unknown target {}".format(target)) raise RuntimeError("Unknown target {}".format(target))
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -30,7 +30,7 @@ def reconfig_runtime(remote): ...@@ -30,7 +30,7 @@ def reconfig_runtime(remote):
""" """
env = get_env() env = get_env()
freconfig = remote.get_function("tvm.contrib.vta.reconfig_runtime") freconfig = remote.get_function("tvm.contrib.vta.reconfig_runtime")
freconfig(env.pkg_config().cfg_json) freconfig(env.pkg.cfg_json)
def program_fpga(remote, bitstream=None): def program_fpga(remote, bitstream=None):
......
...@@ -33,7 +33,6 @@ def run(run_func): ...@@ -33,7 +33,6 @@ def run(run_func):
env = get_env() env = get_env()
if env.TARGET in ["sim", "tsim"]: if env.TARGET in ["sim", "tsim"]:
# Talk to local RPC if necessary to debug RPC server. # Talk to local RPC if necessary to debug RPC server.
# Compile vta on your host with make at the root. # Compile vta on your host with make at the root.
# Make sure TARGET is set to "sim" in the config.json file. # Make sure TARGET is set to "sim" in the config.json file.
...@@ -53,21 +52,20 @@ def run(run_func): ...@@ -53,21 +52,20 @@ def run(run_func):
assert simulator.enabled() assert simulator.enabled()
run_func(env, rpc.LocalSession()) run_func(env, rpc.LocalSession())
elif env.TARGET == "pynq": elif env.TARGET in ["pynq", "ultra96"]:
# The environment variables below should be set if we are using # The environment variables below should be set if we are using
# a tracker to obtain a remote for a test device # a tracker to obtain a remote for a test device
tracket_host = os.environ.get("TVM_TRACKER_HOST", None) tracker_host = os.environ.get("TVM_TRACKER_HOST", None)
tracket_port = os.environ.get("TVM_TRACKER_PORT", None) tracker_port = os.environ.get("TVM_TRACKER_PORT", None)
# Otherwise, we can set the variables below to directly # Otherwise, we can set the variables below to directly
# obtain a remote from a test device # obtain a remote from a test device
pynq_host = os.environ.get("VTA_PYNQ_RPC_HOST", None) pynq_host = os.environ.get("VTA_PYNQ_RPC_HOST", None)
pynq_port = os.environ.get("VTA_PYNQ_RPC_PORT", None) pynq_port = os.environ.get("VTA_PYNQ_RPC_PORT", None)
# Run device from fleet node if env variables are defined # Run device from fleet node if env variables are defined
if tracket_host and tracket_port: if tracker_host and tracker_port:
remote = autotvm.measure.request_remote(env.TARGET, remote = autotvm.measure.request_remote(env.TARGET,
tracket_host, tracker_host,
int(tracket_port), int(tracker_port),
timeout=10000) timeout=10000)
run_func(env, remote) run_func(env, remote)
else: else:
...@@ -78,3 +76,6 @@ def run(run_func): ...@@ -78,3 +76,6 @@ def run(run_func):
else: else:
raise RuntimeError( raise RuntimeError(
"Please set the VTA_PYNQ_RPC_HOST and VTA_PYNQ_RPC_PORT environment variables") "Please set the VTA_PYNQ_RPC_HOST and VTA_PYNQ_RPC_PORT environment variables")
else:
raise RuntimeError("Unknown target %s" % env.TARGET)
...@@ -15,12 +15,9 @@ ...@@ -15,12 +15,9 @@
* KIND, either express or implied. See the License for the * KIND, either express or implied. See the License for the
* specific language governing permissions and limitations * specific language governing permissions and limitations
* under the License. * under the License.
*/ *
/*!
* Copyright (c) 2018 by Contributors
* \file pynq_driver.c * \file pynq_driver.c
* \brief VTA driver for Pynq board. * \brief VTA driver for Zynq SoC boards with Pynq support (see pynq.io).
*/ */
#include <vta/driver.h> #include <vta/driver.h>
...@@ -53,19 +50,19 @@ void VTAMemCopyToHost(void* dst, const void* src, size_t size) { ...@@ -53,19 +50,19 @@ void VTAMemCopyToHost(void* dst, const void* src, size_t size) {
memcpy(dst, src, size); memcpy(dst, src, size);
} }
void VTAFlushCache(vta_phy_addr_t buf, int size) { void VTAFlushCache(void* vir_addr, vta_phy_addr_t phy_addr, int size) {
// Call the xlnkFlushCache on the CMA buffer // Call the cma_flush_cache on the CMA buffer
// so that the FPGA can read the buffer data. // so that the FPGA can read the buffer data.
xlnkFlushCache(reinterpret_cast<void*>(buf), size); cma_flush_cache(vir_addr, phy_addr, size);
} }
void VTAInvalidateCache(vta_phy_addr_t buf, int size) { void VTAInvalidateCache(void* vir_addr, vta_phy_addr_t phy_addr, int size) {
// Call the xlnkInvalidateCache on the CMA buffer // Call the cma_invalidate_cache on the CMA buffer
// so that the host needs to read the buffer data. // so that the host needs to read the buffer data.
xlnkInvalidateCache(reinterpret_cast<void*>(buf), size); cma_invalidate_cache(vir_addr, phy_addr, size);
} }
void *VTAMapRegister(uint32_t addr, size_t length) { void *VTAMapRegister(uint32_t addr) {
// Align the base address with the pages // Align the base address with the pages
uint32_t virt_base = addr & ~(getpagesize() - 1); uint32_t virt_base = addr & ~(getpagesize() - 1);
// Calculate base address offset w.r.t the base address // Calculate base address offset w.r.t the base address
...@@ -73,16 +70,16 @@ void *VTAMapRegister(uint32_t addr, size_t length) { ...@@ -73,16 +70,16 @@ void *VTAMapRegister(uint32_t addr, size_t length) {
// Open file and mmap // Open file and mmap
uint32_t mmap_file = open("/dev/mem", O_RDWR|O_SYNC); uint32_t mmap_file = open("/dev/mem", O_RDWR|O_SYNC);
return mmap(NULL, return mmap(NULL,
(length+virt_offset), (VTA_IP_REG_MAP_RANGE + virt_offset),
PROT_READ|PROT_WRITE, PROT_READ|PROT_WRITE,
MAP_SHARED, MAP_SHARED,
mmap_file, mmap_file,
virt_base); virt_base);
} }
void VTAUnmapRegister(void *vta, size_t length) { void VTAUnmapRegister(void *vta) {
// Unmap memory // Unmap memory
int status = munmap(vta, length); int status = munmap(vta, VTA_IP_REG_MAP_RANGE);
assert(status == 0); assert(status == 0);
} }
...@@ -98,39 +95,30 @@ class VTADevice { ...@@ -98,39 +95,30 @@ class VTADevice {
public: public:
VTADevice() { VTADevice() {
// VTA stage handles // VTA stage handles
vta_fetch_handle_ = VTAMapRegister(VTA_FETCH_ADDR, VTA_RANGE); vta_fetch_handle_ = VTAMapRegister(VTA_FETCH_ADDR);
vta_load_handle_ = VTAMapRegister(VTA_LOAD_ADDR, VTA_RANGE); vta_load_handle_ = VTAMapRegister(VTA_LOAD_ADDR);
vta_compute_handle_ = VTAMapRegister(VTA_COMPUTE_ADDR, VTA_RANGE); vta_compute_handle_ = VTAMapRegister(VTA_COMPUTE_ADDR);
vta_store_handle_ = VTAMapRegister(VTA_STORE_ADDR, VTA_RANGE); vta_store_handle_ = VTAMapRegister(VTA_STORE_ADDR);
} }
~VTADevice() { ~VTADevice() {
// Close VTA stage handle // Close VTA stage handle
VTAUnmapRegister(vta_fetch_handle_, VTA_RANGE); VTAUnmapRegister(vta_fetch_handle_);
VTAUnmapRegister(vta_load_handle_, VTA_RANGE); VTAUnmapRegister(vta_load_handle_);
VTAUnmapRegister(vta_compute_handle_, VTA_RANGE); VTAUnmapRegister(vta_compute_handle_);
VTAUnmapRegister(vta_store_handle_, VTA_RANGE); VTAUnmapRegister(vta_store_handle_);
} }
int Run(vta_phy_addr_t insn_phy_addr, int Run(vta_phy_addr_t insn_phy_addr,
uint32_t insn_count, uint32_t insn_count,
uint32_t wait_cycles) { uint32_t wait_cycles) {
// NOTE: Register address map is derived from the auto-generated VTAWriteMappedReg(vta_fetch_handle_, VTA_FETCH_INSN_COUNT_OFFSET, insn_count);
// driver files available under hardware/build/vivado/<design>/export/driver VTAWriteMappedReg(vta_fetch_handle_, VTA_FETCH_INSN_ADDR_OFFSET, insn_phy_addr);
// FETCH @ 0x10 : Data signal of insn_count_V VTAWriteMappedReg(vta_load_handle_, VTA_LOAD_INP_ADDR_OFFSET, 0);
VTAWriteMappedReg(vta_fetch_handle_, 0x10, insn_count); VTAWriteMappedReg(vta_load_handle_, VTA_LOAD_WGT_ADDR_OFFSET, 0);
// FETCH @ 0x18 : Data signal of insns_V VTAWriteMappedReg(vta_compute_handle_, VTA_COMPUTE_UOP_ADDR_OFFSET, 0);
VTAWriteMappedReg(vta_fetch_handle_, 0x18, insn_phy_addr); VTAWriteMappedReg(vta_compute_handle_, VTA_COMPUTE_BIAS_ADDR_OFFSET, 0);
// LOAD @ 0x10 : Data signal of inputs_V VTAWriteMappedReg(vta_store_handle_, VTA_STORE_OUT_ADDR_OFFSET, 0);
VTAWriteMappedReg(vta_load_handle_, 0x10, 0);
// LOAD @ 0x18 : Data signal of weight_V
VTAWriteMappedReg(vta_load_handle_, 0x18, 0);
// COMPUTE @ 0x20 : Data signal of uops_V
VTAWriteMappedReg(vta_compute_handle_, 0x20, 0);
// COMPUTE @ 0x28 : Data signal of biases_V
VTAWriteMappedReg(vta_compute_handle_, 0x28, 0);
// STORE @ 0x10 : Data signal of outputs_V
VTAWriteMappedReg(vta_store_handle_, 0x10, 0);
// VTA start // VTA start
VTAWriteMappedReg(vta_fetch_handle_, 0x0, VTA_START); VTAWriteMappedReg(vta_fetch_handle_, 0x0, VTA_START);
...@@ -141,7 +129,7 @@ class VTADevice { ...@@ -141,7 +129,7 @@ class VTADevice {
// Loop until the VTA is done // Loop until the VTA is done
unsigned t, flag = 0; unsigned t, flag = 0;
for (t = 0; t < wait_cycles; ++t) { for (t = 0; t < wait_cycles; ++t) {
flag = VTAReadMappedReg(vta_compute_handle_, 0x18); flag = VTAReadMappedReg(vta_compute_handle_, VTA_COMPUTE_DONE_RD_OFFSET);
if (flag == VTA_DONE) break; if (flag == VTA_DONE) break;
std::this_thread::yield(); std::this_thread::yield();
} }
......
...@@ -6,21 +6,18 @@ ...@@ -6,21 +6,18 @@
* to you under the Apache License, Version 2.0 (the * to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance * "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at * with the License. You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, * Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an * software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the * KIND, either express or implied. See the License for the
* specific language governing permissions and limitations * specific language governing permissions and limitations
* under the License. * under the License.
*/ *
* \file pynq_driver.h
/*! * \brief VTA driver for Zynq SoC boards with Pynq support (see pynq.io).
* Copyright (c) 2018 by Contributors
* \file vta_pynq_driver.h
* \brief VTA driver for Pynq board.
*/ */
#ifndef VTA_PYNQ_PYNQ_DRIVER_H_ #ifndef VTA_PYNQ_PYNQ_DRIVER_H_
...@@ -41,23 +38,21 @@ extern "C" { ...@@ -41,23 +38,21 @@ extern "C" {
#include <time.h> #include <time.h>
#include <unistd.h> #include <unistd.h>
#ifdef __arm__ #if defined(__arm__) || defined(__aarch64__)
#include <libxlnk_cma.h> #include <libxlnk_cma.h>
#else #else
void* cma_alloc(size_t size, int cached); void* cma_alloc(size_t size, int cached);
void cma_free(void* buf); void cma_free(void* buf);
uint32_t cma_get_phy_addr(void* buf); uint32_t cma_get_phy_addr(void* buf);
void cma_flush_cache(void* buf, unsigned int phys_addr, int size);
void cma_invalidate_cache(void* buf, unsigned int phys_addr, int size);
#endif #endif
void xlnkFlushCache(void* buf, int size);
void xlnkInvalidateCache(void* buf, int size);
void *VTAMapRegister(uint32_t addr, size_t length); void *VTAMapRegister(uint32_t addr);
void VTAUnmapRegister(void *vta, size_t length); void VTAUnmapRegister(void *vta);
void VTAWriteMappedReg(void* base_addr, uint32_t offset, uint32_t val); void VTAWriteMappedReg(void* base_addr, uint32_t offset, uint32_t val);
uint32_t VTAReadMappedReg(void* base_addr, uint32_t offset); uint32_t VTAReadMappedReg(void* base_addr, uint32_t offset);
/*! \brief VTA configuration register address range */
#define VTA_RANGE 0x100
/*! \brief VTA configuration register start value */ /*! \brief VTA configuration register start value */
#define VTA_START 0x1 #define VTA_START 0x1
/*! \brief VTA configuration register auto-restart value */ /*! \brief VTA configuration register auto-restart value */
...@@ -65,27 +60,6 @@ uint32_t VTAReadMappedReg(void* base_addr, uint32_t offset); ...@@ -65,27 +60,6 @@ uint32_t VTAReadMappedReg(void* base_addr, uint32_t offset);
/*! \brief VTA configuration register done value */ /*! \brief VTA configuration register done value */
#define VTA_DONE 0x1 #define VTA_DONE 0x1
/*! \brief VTA fetch stage configuration register address
* from auto-generated XPAR_FETCH_0_S_AXI_CONTROL_BUS_BASEADDR define
* in xparameters.h (under build/vivado/<design name>/export/bsp/ps7_cortexa9_0/include)
*/
#define VTA_FETCH_ADDR 0x43C00000
/*! \brief VTA compute stage configuration register address
* from auto-generated XPAR_COMPUTE_0_S_AXI_CONTROL_BUS_BASEADDR define
* in xparameters.h (under build/vivado/<design name>/export/bsp/ps7_cortexa9_0/include)
*/
#define VTA_COMPUTE_ADDR 0x43C10000
/*! \brief VTA compute stage configuration register address
* from auto-generated XPAR_LOAD_0_S_AXI_CONTROL_BUS_BASEADDR define
* in xparameters.h (under build/vivado/<design name>/export/bsp/ps7_cortexa9_0/include)
*/
#define VTA_LOAD_ADDR 0x43C20000
/*! \brief VTA store stage configuration register address
* from auto-generated XPAR_STORE_0_S_AXI_CONTROL_BUS_BASEADDR define
* in xparameters.h (under build/vivado/<design name>/export/bsp/ps7_cortexa9_0/include)
*/
#define VTA_STORE_ADDR 0x43C30000
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
......
...@@ -44,8 +44,10 @@ namespace vta { ...@@ -44,8 +44,10 @@ namespace vta {
static_assert(VTA_UOP_WIDTH == sizeof(VTAUop) * 8, static_assert(VTA_UOP_WIDTH == sizeof(VTAUop) * 8,
"VTA_UOP_WIDTH do not match VTAUop size"); "VTA_UOP_WIDTH do not match VTAUop size");
/*! \brief Enable coherent access between VTA and CPU (used on shared mem systems). */ /*! \brief Enable coherent access of data buffers between VTA and CPU */
static const bool kBufferCoherent = true; static const bool kBufferCoherent = VTA_COHERENT_ACCESSES;
/*! \brief Always cache buffers (otherwise, write back to DRAM from CPU) */
static const bool kAlwaysCache = true;
/*! /*!
* \brief Data buffer represents data on CMA. * \brief Data buffer represents data on CMA.
...@@ -65,8 +67,10 @@ struct DataBuffer { ...@@ -65,8 +67,10 @@ struct DataBuffer {
* \param size The size of the data. * \param size The size of the data.
*/ */
void InvalidateCache(size_t offset, size_t size) { void InvalidateCache(size_t offset, size_t size) {
if (!kBufferCoherent) { if (!kBufferCoherent && kAlwaysCache) {
VTAInvalidateCache(phy_addr_ + offset, size); VTAInvalidateCache(reinterpret_cast<char *>(data_) + offset,
phy_addr_ + offset,
size);
} }
} }
/*! /*!
...@@ -75,8 +79,10 @@ struct DataBuffer { ...@@ -75,8 +79,10 @@ struct DataBuffer {
* \param size The size of the data. * \param size The size of the data.
*/ */
void FlushCache(size_t offset, size_t size) { void FlushCache(size_t offset, size_t size) {
if (!kBufferCoherent) { if (!kBufferCoherent && kAlwaysCache) {
VTAFlushCache(phy_addr_ + offset, size); VTAFlushCache(reinterpret_cast<char *>(data_) + offset,
phy_addr_ + offset,
size);
} }
} }
/*! /*!
...@@ -102,7 +108,7 @@ struct DataBuffer { ...@@ -102,7 +108,7 @@ struct DataBuffer {
* \param size The size of the buffer. * \param size The size of the buffer.
*/ */
static DataBuffer* Alloc(size_t size) { static DataBuffer* Alloc(size_t size) {
void* data = VTAMemAlloc(size, 1); void* data = VTAMemAlloc(size, kAlwaysCache);
CHECK(data != nullptr); CHECK(data != nullptr);
DataBuffer* buffer = new DataBuffer(); DataBuffer* buffer = new DataBuffer();
buffer->data_ = data; buffer->data_ = data;
...@@ -469,7 +475,9 @@ class UopQueue : public BaseQueue<VTAUop> { ...@@ -469,7 +475,9 @@ class UopQueue : public BaseQueue<VTAUop> {
// Flush if we're using a shared memory system // Flush if we're using a shared memory system
// and if interface is non-coherent // and if interface is non-coherent
if (!coherent_ && always_cache_) { if (!coherent_ && always_cache_) {
VTAFlushCache(fpga_buff_phy_, offset); VTAFlushCache(fpga_buff_,
fpga_buff_phy_,
offset);
} }
} }
...@@ -860,7 +868,9 @@ class InsnQueue : public BaseQueue<VTAGenericInsn> { ...@@ -860,7 +868,9 @@ class InsnQueue : public BaseQueue<VTAGenericInsn> {
// Flush if we're using a shared memory system // Flush if we're using a shared memory system
// and if interface is non-coherent // and if interface is non-coherent
if (!coherent_ && always_cache_) { if (!coherent_ && always_cache_) {
VTAFlushCache(fpga_buff_phy_, buff_size); VTAFlushCache(fpga_buff_,
fpga_buff_phy_,
buff_size);
} }
} }
...@@ -1302,9 +1312,9 @@ class CommandQueue { ...@@ -1302,9 +1312,9 @@ class CommandQueue {
// The kernel we are currently recording // The kernel we are currently recording
UopKernel* record_kernel_{nullptr}; UopKernel* record_kernel_{nullptr};
// Micro op queue // Micro op queue
UopQueue<VTA_MAX_XFER, true, true> uop_queue_; UopQueue<VTA_MAX_XFER, kBufferCoherent, kAlwaysCache> uop_queue_;
// instruction queue // instruction queue
InsnQueue<VTA_MAX_XFER, true, true> insn_queue_; InsnQueue<VTA_MAX_XFER, kBufferCoherent, kAlwaysCache> insn_queue_;
// Device handle // Device handle
VTADeviceHandle device_{nullptr}; VTADeviceHandle device_{nullptr};
#ifdef USE_TSIM #ifdef USE_TSIM
......
...@@ -615,10 +615,10 @@ void VTAMemCopyToHost(void* dst, const void* src, size_t size) { ...@@ -615,10 +615,10 @@ void VTAMemCopyToHost(void* dst, const void* src, size_t size) {
memcpy(dst, src, size); memcpy(dst, src, size);
} }
void VTAFlushCache(vta_phy_addr_t buf, int size) { void VTAFlushCache(void* vir_addr, vta_phy_addr_t phy_addr, int size) {
} }
void VTAInvalidateCache(vta_phy_addr_t buf, int size) { void VTAInvalidateCache(void* vir_addr, vta_phy_addr_t phy_addr, int size) {
} }
VTADeviceHandle VTADeviceAlloc() { VTADeviceHandle VTADeviceAlloc() {
......
...@@ -228,10 +228,10 @@ void VTAMemCopyToHost(void* dst, const void* src, size_t size) { ...@@ -228,10 +228,10 @@ void VTAMemCopyToHost(void* dst, const void* src, size_t size) {
memcpy(dst, src, size); memcpy(dst, src, size);
} }
void VTAFlushCache(vta_phy_addr_t buf, int size) { void VTAFlushCache(void* vir_addr, vta_phy_addr_t phy_addr, int size) {
} }
void VTAInvalidateCache(vta_phy_addr_t buf, int size) { void VTAInvalidateCache(void* vir_addr, vta_phy_addr_t phy_addr, int size) {
} }
VTADeviceHandle VTADeviceAlloc() { VTADeviceHandle VTADeviceAlloc() {
......
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
*/ */
/*! /*!
* Copyright (c) 2018 by Contributors
* \file test_lib.cpp * \file test_lib.cpp
* \brief Test library for the VTA design simulation and driver tests. * \brief Test library for the VTA design simulation and driver tests.
*/ */
...@@ -40,7 +39,6 @@ ...@@ -40,7 +39,6 @@
#include "../../../src/pynq/pynq_driver.h" #include "../../../src/pynq/pynq_driver.h"
#endif // VTA_TARGET_PYNQ #endif // VTA_TARGET_PYNQ
typedef uint64_t axi_T;
typedef uint32_t uop_T; typedef uint32_t uop_T;
typedef int8_t wgt_T; typedef int8_t wgt_T;
typedef int8_t inp_T; typedef int8_t inp_T;
...@@ -95,15 +93,25 @@ template <typename T, int T_WIDTH> ...@@ -95,15 +93,25 @@ template <typename T, int T_WIDTH>
void unpackBuffer(T **dst, T *src, int y_size, int x_size, int y_block, int x_block); void unpackBuffer(T **dst, T *src, int y_size, int x_size, int y_block, int x_block);
/*! /*!
* \brief Allocates and initializes a 2D array in the heap. * \brief Allocates and randomly initializes a 2D array in the heap.
* \param rows Number of rows. * \param rows Number of rows.
* \param cols Number of columns. * \param cols Number of columns.
* \return Pointer to the 2D array. * \return Pointer to the 2D array.
*/ */
template <typename T, int T_WIDTH> template <typename T>
T ** allocInit2dArray(int rows, int cols); T ** allocInit2dArray(int rows, int cols);
/*! /*!
* \brief Allocates and initializes a 2D array to a set value in the heap.
* \param rows Number of rows.
* \param cols Number of columns.
* \param val Value to set the whole array to.
* \return Pointer to the 2D array.
*/
template <typename T>
T ** allocSet2dArray(int rows, int cols, int val);
/*!
* \brief Allocates a 2D array in the heap. * \brief Allocates a 2D array in the heap.
* \param rows Number of rows. * \param rows Number of rows.
* \param cols Number of columns. * \param cols Number of columns.
......
...@@ -24,7 +24,7 @@ def test_env(): ...@@ -24,7 +24,7 @@ def test_env():
def test_env_scope(): def test_env_scope():
env = vta.get_env() env = vta.get_env()
cfg = env.pkg_config().cfg_dict cfg = env.cfg_dict
cfg["TARGET"] = "xyz" cfg["TARGET"] = "xyz"
with vta.Environment(cfg): with vta.Environment(cfg):
assert vta.get_env().TARGET == "xyz" assert vta.get_env().TARGET == "xyz"
......
...@@ -100,9 +100,9 @@ if env.TARGET not in ["sim", "tsim"]: ...@@ -100,9 +100,9 @@ if env.TARGET not in ["sim", "tsim"]:
# the host, make sure you've set the variables below to the IP of # the host, make sure you've set the variables below to the IP of
# your board. # your board.
device_host = os.environ.get("VTA_PYNQ_RPC_HOST", "192.168.2.99") device_host = os.environ.get("VTA_PYNQ_RPC_HOST", "192.168.2.99")
device_port = int(os.environ.get("VTA_PYNQ_RPC_PORT", "9091")) device_port = os.environ.get("VTA_PYNQ_RPC_PORT", "9091")
if not tracker_host or not tracker_port: if not tracker_host or not tracker_port:
remote = rpc.connect(device_host, device_port) remote = rpc.connect(device_host, int(device_port))
else: else:
remote = autotvm.measure.request_remote(env.TARGET, tracker_host, int(tracker_port), timeout=10000) remote = autotvm.measure.request_remote(env.TARGET, tracker_host, int(tracker_port), timeout=10000)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment