Commit 7f25bf1d by Thierry Moreau Committed by Tianqi Chen

[BITSTREAM SERVER] Bitstream server integration (#38)

parent 8c9758b6
TVM Change Log
VTA Change Log
==============
This file records the changes in VTA stack in reverse chronological order.
......@@ -6,7 +6,8 @@ This file records the changes in VTA stack in reverse chronological order.
## Initial version
- Vivado based hardware
- Driver for PYNQ
- Vivado based hardware.
- Driver for PYNQ board.
- Runtime library.
- TVM compiler stack.
- Resnet-18 example.
......@@ -31,21 +31,10 @@ From there, clone the VTA repository:
git clone git@github.com:uwsaml/vta.git --recursive
```
Next, clone the TVM repository:
```bash
git clone git@github.com:dmlc/tvm.git --recursive
```
TVM is rapidly changing, and to ensure stability, we keep track of working TVM checkpoints.
As of now, the TVM checkpoint `e4c2af9abdcb3c7aabafba8084414d7739c17c4c` is known to work with VTA.
```bash
git checkout e4c2af9abdcb3c7aabafba8084414d7739c17c4c
```
Now, ssh into your **Pynq board** to build the TVM runtime with the following commands:
```bash
ssh xilinx@192.168.2.99 # ssh if you haven't done so
cd ~/tvm
cd ~/vta/nnvm/tvm
cp make/config.mk .
echo USE_RPC=1 >> config.mk
make runtime -j2
......@@ -57,7 +46,6 @@ We're now ready to build the Pynq RPC server on the Pynq board.
```bash
ssh xilinx@192.168.2.99 # ssh if you haven't done so
cd ~/vta
export TVM_PATH = /home/xilinx/tvm
make
```
......
......@@ -4,6 +4,7 @@ In order to run this example you'll need to have:
* VTA installed
* TVM installed
* NNVM installed
* MxNet installed
* A Pynq-based RPC server running
## VTA installation
......@@ -26,9 +27,9 @@ git clone git@github.com:dmlc/tvm.git --recursive
```
TVM is rapidly changing, and to ensure stability, we keep track of working TVM checkpoints.
As of now, the TVM checkpoint `e4c2af9abdcb3c7aabafba8084414d7739c17c4c` is known to work with VTA.
As of now, the TVM checkpoint `168f099155106d1188dbc54ac00acc02900a3c6f` is known to work with VTA.
```bash
git checkout e4c2af9abdcb3c7aabafba8084414d7739c17c4c
git checkout 168f099155106d1188dbc54ac00acc02900a3c6f
```
Before building TVM, copy the `make/config.mk` file into the root TVM directory:
......@@ -59,7 +60,7 @@ Clone the NNVM repository from `tqchen` in the directory of your choosing:
git clone git@github.com:tqchen/nnvm.git --recursive
```
To run this example, we rely on a special branch of NNVM: `qt`:
To run this example, we rely on a special branch of NNVM until these changes get merged back into the main repo: `qt`:
```bash
cd <nnvm root>
git checkout qt
......@@ -76,6 +77,10 @@ Finally update your `~/.bashrc` file to include the NNVM python libraries in you
export PYTHONPATH=<nnvm root>/python:${PYTHONPATH}
```
## MxNet Installation
Follow the [MxNet Installation Instructions](https://mxnet.incubator.apache.org)
## Pynq RPC Server Setup
Follow the [Pynq RPC Server Guide](https://github.com/uwsaml/vta/tree/master/apps/pynq_rpc/README.md)
......
......@@ -15,8 +15,6 @@ from tvm.contrib import graph_runtime, rpc, util
bfactor = 1
cfactor = 16
host = "pynq"
port = 9091
verbose = False
# only run fpga component, mark non-conv ops as nop
debug_fpga_only = False
......@@ -27,8 +25,7 @@ TEST_FILE = 'cat.jpg'
CATEG_FILE = 'synset.txt'
RESNET_GRAPH_FILE = 'quantize_graph.json'
RESNET_PARAMS_FILE = 'quantize_params.pkl'
BITSTREAM_FILE = 'vta.bit'
for file in [TEST_FILE, CATEG_FILE, RESNET_GRAPH_FILE, RESNET_PARAMS_FILE, BITSTREAM_FILE]:
for file in [TEST_FILE, CATEG_FILE, RESNET_GRAPH_FILE, RESNET_PARAMS_FILE]:
if not os.path.isfile(file):
print ("Downloading {}".format(file))
wget.download(url+file)
......@@ -43,7 +40,6 @@ target_host = "llvm -mtriple=armv7-none-linux-gnueabihf -mcpu=cortex-a9 -mattr=+
if vta.get_env().TARGET == "sim":
target_host = "llvm"
synset = eval(open(os.path.join(CATEG_FILE)).read())
image = Image.open(os.path.join(TEST_FILE)).resize((224, 224))
......@@ -138,8 +134,17 @@ if vta.get_env().TARGET == "sim":
remote = rpc.LocalSession()
print("local session")
else:
host = os.environ.get("VTA_PYNQ_RPC_HOST", None)
assert host
port = os.environ.get("VTA_PYNQ_RPC_PORT", "9091")
port = int(port)
remote = rpc.connect(host, port)
# Program FPGA, and build runtime if necessary
# Overwrite bitstream with a path to your own if you built it yourself
vta.reconfig_runtime(remote)
vta.program_fpga(remote, bitstream=None)
remote.upload(temp.relpath("graphlib.o"))
lib = remote.load_module("graphlib.o")
ctx = remote.ext_dev(0) if target.device_name == "vta" else remote.cpu(0)
......
......@@ -46,25 +46,20 @@ VTA_ACC_BUFF_SIZE := $(shell ${VTA_CONFIG} --get-accbuffsize)
VTA_OUT_BUFF_SIZE := $(shell ${VTA_CONFIG} --get-outbuffsize)
#---------------------
# FPGA Parameters
#--------------------
VTA_CLOCK_FREQ = $(shell ${VTA_CONFIG} --get-fpgafreq)
VTA_TARGET_PER = $(shell ${VTA_CONFIG} --get-fpgaper)
#---------------------
# Compilation parameters
#--------------------
# Number of threads during compilation
VTA_HW_COMP_THREADS = 8
# Target Frequency
VTA_HW_COMP_CLOCK_FREQ = 100
# Timing closure compensation (0 for none, 3 for highest)
VTA_HW_COMP_TIMING_COMP = 0
# Derive clock target period
TARGET_PER = \
$(shell echo "$$(( (1000 + $(VTA_HW_COMP_CLOCK_FREQ) - 1) / $(VTA_HW_COMP_CLOCK_FREQ) - $(VTA_HW_COMP_TIMING_COMP)))" )
# Derive config name
CONF_ROOT = $(shell ${VTA_CONFIG} --cfg-str)
CONF = $(CONF_ROOT)_$(VTA_HW_COMP_CLOCK_FREQ)MHz_$(TARGET_PER)ns
CONF = $(shell ${VTA_CONFIG} --cfg-str)
IP_BUILD_PATH = $(BUILD_DIR)/hls/$(CONF)
HW_BUILD_PATH = $(BUILD_DIR)/vivado/$(CONF)
......@@ -90,7 +85,7 @@ $(IP_PATH): $(SRC_DIR)/*
cd $(IP_BUILD_PATH) && \
$(VIVADO_HLS) -f $(SCRIPT_DIR)/hls.tcl \
-tclargs $(SRC_DIR) $(SIM_DIR) $(TEST_DIR) $(INCLUDE_DIR) \
$(MODE) $(DEBUG) $(NO_DSP) $(NO_ALU) $(TARGET_PER) \
$(MODE) $(DEBUG) $(NO_DSP) $(NO_ALU) $(VTA_TARGET_PER) \
$(VTA_INP_WIDTH) $(VTA_WGT_WIDTH) $(VTA_ACC_WIDTH) $(VTA_OUT_WIDTH) \
$(VTA_BATCH) $(VTA_IN_BLOCK) $(VTA_OUT_BLOCK) \
$(VTA_UOP_BUFF_SIZE) $(VTA_INP_BUFF_SIZE) $(VTA_WGT_BUFF_SIZE) \
......@@ -104,7 +99,7 @@ $(BIT_PATH): $(IP_PATH)
mkdir -p $(HW_BUILD_PATH)
cd $(HW_BUILD_PATH) && \
$(VIVADO) -mode tcl -source $(SCRIPT_DIR)/vivado.tcl \
-tclargs $(BUILD_DIR)/hls/$(CONF) $(VTA_HW_COMP_THREADS) $(VTA_HW_COMP_CLOCK_FREQ) \
-tclargs $(BUILD_DIR)/hls/$(CONF) $(VTA_HW_COMP_THREADS) $(VTA_CLOCK_FREQ) \
$(VTA_INP_WIDTH) $(VTA_WGT_WIDTH) $(VTA_OUT_WIDTH) \
$(VTA_BATCH) $(VTA_IN_BLOCK) $(VTA_OUT_BLOCK) \
$(VTA_INP_BUFF_SIZE) $(VTA_WGT_BUFF_SIZE) $(VTA_OUT_BUFF_SIZE)
......
......@@ -67,7 +67,5 @@ make
The local `Makefile` containts several variables that can be tweaked by the user:
* `VTA_HW_COMP_THREADS`: determines the number of threads used for the Vivado compilation job (default 8 threads).
* `VTA_HW_COMP_CLOCK_FREQ`: determines the target frequency of the VTA design (default 100MHz). It can only be set to 100, 142, 167 or 200MHz.
* `VTA_HW_COMP_TIMING_COMP`: determines how much additional slack must be provided to close timing (default 0ns). Generally when utilization is high for an FPGA design, setting this paramter to 1, 2 or 3 can help close timing.
Once the compilation completes, the generated bitstream can be found under `<vta root>/build/hardware/xilinx/vivado/<design name>/export/vta.bit`.
\ No newline at end of file
{
"TARGET" : "pynq",
"HW_FREQ" : 100,
"HW_CLK_TARGET" : 8,
"HW_VER" : "0.0.0",
"LOG_INP_WIDTH" : 3,
"LOG_WGT_WIDTH" : 3,
"LOG_ACC_WIDTH" : 5,
......@@ -7,7 +10,7 @@
"LOG_BATCH" : 0,
"LOG_BLOCK_IN" : 4,
"LOG_BLOCK_OUT" : 4,
"LOG_UOP_BUFF_SIZE" : 14,
"LOG_UOP_BUFF_SIZE" : 15,
"LOG_INP_BUFF_SIZE" : 15,
"LOG_WGT_BUFF_SIZE" : 18,
"LOG_ACC_BUFF_SIZE" : 17
......
{
"TARGET" : "sim",
"HW_FREQ" : 100,
"HW_CLK_TARGET" : 8,
"HW_VER" : "0.0.0",
"LOG_INP_WIDTH" : 3,
"LOG_WGT_WIDTH" : 3,
"LOG_ACC_WIDTH" : 5,
......@@ -7,7 +10,7 @@
"LOG_BATCH" : 0,
"LOG_BLOCK_IN" : 4,
"LOG_BLOCK_OUT" : 4,
"LOG_UOP_BUFF_SIZE" : 14,
"LOG_UOP_BUFF_SIZE" : 15,
"LOG_INP_BUFF_SIZE" : 15,
"LOG_WGT_BUFF_SIZE" : 18,
"LOG_ACC_BUFF_SIZE" : 17
......
......@@ -54,6 +54,10 @@ def main():
help="returns log of accum buffer size in B")
parser.add_argument("--get-outbuffsize", action="store_true",
help="returns log of output buffer size in B")
parser.add_argument("--get-fpgafreq", action="store_true",
help="returns FPGA frequency")
parser.add_argument("--get-fpgaper", action="store_true",
help="returns HLS target clock period")
args = parser.parse_args()
if len(sys.argv) == 1:
......@@ -91,7 +95,8 @@ def main():
print(pkg.cfg_json)
if args.cfg_str:
cfg_str = "{}x{}x{}_{}bx{}b_{}_{}_{}_{}".format(
# Needs to match the BITSTREAM string in python/vta/environment.py
cfg_str = "{}x{}x{}_{}bx{}b_{}_{}_{}_{}_{}MHz_{}ns_v{}".format(
(1 << cfg["LOG_BATCH"]),
(1 << cfg["LOG_BLOCK_IN"]),
(1 << cfg["LOG_BLOCK_OUT"]),
......@@ -100,8 +105,11 @@ def main():
cfg["LOG_UOP_BUFF_SIZE"],
cfg["LOG_INP_BUFF_SIZE"],
cfg["LOG_WGT_BUFF_SIZE"],
cfg["LOG_ACC_BUFF_SIZE"])
print cfg_str
cfg["LOG_ACC_BUFF_SIZE"],
cfg["HW_FREQ"],
cfg["HW_CLK_TARGET"],
cfg["HW_VER"].replace('.', '_'))
print(cfg_str)
if args.get_inpwidth:
print(cfg["LOG_INP_WIDTH"])
......@@ -139,5 +147,11 @@ def main():
if args.get_accbuffsize:
print(cfg["LOG_ACC_BUFF_SIZE"])
if args.get_fpgafreq:
print(cfg["HW_FREQ"])
if args.get_fpgaper:
print(cfg["HW_CLK_TARGET"])
if __name__ == "__main__":
main()
......@@ -8,11 +8,10 @@ from __future__ import absolute_import as _abs
__version__ = "0.1.0"
from .bitstream import get_bitstream_path, download_bitstream
from .environment import get_env, Environment
from .rpc_client import reconfig_runtime, program_fpga
try:
from . import top
from .build_module import build_config, lower, build
......
"""VTA specific bitstream management library."""
from __future__ import absolute_import as _abs
import os
import urllib
from .environment import get_env
# bitstream repo
BITSTREAM_URL = "https://github.com/uwsaml/vta-distro/raw/master/bitstreams/"
def get_bitstream_path():
"""Returns the path to the cached bitstream corresponding to the current config
Returns
-------
bit_path: str
Corresponding to the filepath of the bitstream
"""
env = get_env()
# Derive destination path
cache_dir = os.getenv("VTA_CACHE_PATH", os.path.join(os.getenv("HOME"), ".vta_cache/"))
cache_dir = os.path.join(cache_dir, env.TARGET)
# Create the directory if it didn't exist
if not os.path.exists(cache_dir):
os.makedirs(cache_dir)
bit_path = os.path.join(cache_dir, env.BITSTREAM)
return bit_path
def download_bitstream():
"""Downloads a cached bitstream corresponding to the current config
"""
env = get_env()
success = False
bit = get_bitstream_path()
url = os.path.join(BITSTREAM_URL, env.TARGET)
url = os.path.join(url, env.HW_VER)
url = os.path.join(url, env.BITSTREAM)
# Check that the bitstream is accessible from the server
if urllib.urlopen(url).getcode() == 404:
# Raise error - the solution when this happens it to build your own bitstream and add it
# to your VTA_CACHE_PATH
raise RuntimeError(
"Error: {} is not available. It appears that this configuration has not been built."
.format(url))
else:
urllib.urlretrieve(url, bit)
success = True
return success
......@@ -24,7 +24,7 @@ class DevContext(object):
Note
----
This class is introduced so we have a clear separation
of developer related stuffs and user facing attributes.
of developer related, and user facing attributes.
"""
# Memory id for DMA
MEM_ID_UOP = 0
......@@ -62,7 +62,7 @@ class DevContext(object):
class Environment(object):
"""Hareware configuration object.
"""Hardware configuration object.
This object contains all the information
needed for compiling to a specific VTA backend.
......@@ -98,23 +98,24 @@ class Environment(object):
# initialization function
def __init__(self, cfg):
# Log of input/activation width in bits
self.__dict__.update(cfg)
for key in PkgConfig.cfg_keys:
if key not in cfg:
raise ValueError("Expect key %s in cfg" % key)
# derive output buffer size
self.LOG_OUT_BUFF_SIZE = (
self.LOG_ACC_BUFF_SIZE +
self.LOG_OUT_WIDTH -
self.LOG_ACC_WIDTH)
# width
# data type width
self.INP_WIDTH = 1 << self.LOG_INP_WIDTH
self.WGT_WIDTH = 1 << self.LOG_WGT_WIDTH
self.ACC_WIDTH = 1 << self.LOG_ACC_WIDTH
self.OUT_WIDTH = self.INP_WIDTH
# tensor intrinsic shape
self.BATCH = 1 << self.LOG_BATCH
self.BLOCK_IN = 1 << self.LOG_BLOCK_IN
self.BLOCK_OUT = 1 << self.LOG_BLOCK_OUT
self.OUT_WIDTH = self.INP_WIDTH
# buffer size
self.UOP_BUFF_SIZE = 1 << self.LOG_UOP_BUFF_SIZE
self.INP_BUFF_SIZE = 1 << self.LOG_INP_BUFF_SIZE
......@@ -138,6 +139,20 @@ class Environment(object):
self.WGT_ELEM_BYTES = self.WGT_ELEM_BITS // 8
self.ACC_ELEM_BYTES = self.ACC_ELEM_BITS // 8
self.OUT_ELEM_BYTES = self.OUT_ELEM_BITS // 8
# Configuration bitstream name
self.BITSTREAM = "{}x{}x{}_{}bx{}b_{}_{}_{}_{}_{}MHz_{}ns_v{}.bit".format(
(1 << cfg["LOG_BATCH"]),
(1 << cfg["LOG_BLOCK_IN"]),
(1 << cfg["LOG_BLOCK_OUT"]),
(1 << cfg["LOG_INP_WIDTH"]),
(1 << cfg["LOG_WGT_WIDTH"]),
cfg["LOG_UOP_BUFF_SIZE"],
cfg["LOG_INP_BUFF_SIZE"],
cfg["LOG_WGT_BUFF_SIZE"],
cfg["LOG_ACC_BUFF_SIZE"],
cfg["HW_FREQ"],
cfg["HW_CLK_TARGET"],
cfg["HW_VER"].replace('.', '_'))
# dtypes
self.acc_dtype = "int%d" % self.ACC_WIDTH
self.inp_dtype = "int%d" % self.INP_WIDTH
......
......@@ -22,6 +22,9 @@ class PkgConfig(object):
"""
cfg_keys = [
"TARGET",
"HW_FREQ",
"HW_CLK_TARGET",
"HW_VER",
"LOG_INP_WIDTH",
"LOG_WGT_WIDTH",
"LOG_ACC_WIDTH",
......
......@@ -2,6 +2,7 @@
import os
from .environment import get_env
from .bitstream import download_bitstream, get_bitstream_path
def reconfig_runtime(remote):
"""Reconfigure remote runtime based on current hardware spec.
......@@ -16,7 +17,7 @@ def reconfig_runtime(remote):
freconfig(env.pkg_config().cfg_json)
def program_fpga(remote, bitstream):
def program_fpga(remote, bitstream=None):
"""Upload and program bistream
Parameters
......@@ -24,9 +25,16 @@ def program_fpga(remote, bitstream):
remote : RPCSession
The TVM RPC session
bitstream : str
Path to a local bistream file.
bitstream : str, optional
Path to a local bistream file. If unset, tries to download from cache server.
"""
if bitstream:
assert os.path.isfile(bitstream)
else:
bitstream = get_bitstream_path()
if not os.path.isfile(bitstream):
download_bitstream()
fprogram = remote.get_function("tvm.contrib.vta.init")
remote.upload(bitstream)
fprogram(os.path.basename(bitstream))
......@@ -6,7 +6,6 @@ from tvm.contrib import rpc
from ..environment import get_env
from . import simulator
def run(run_func):
"""Run test function on all available env.
......@@ -29,10 +28,10 @@ def run(run_func):
run_func(env, rpc.LocalSession())
# Run on PYNQ if env variable exists
pynq_host = os.environ.get("VTA_PYNQ_RPC_HOST", None)
if pynq_host:
host = os.environ.get("VTA_PYNQ_RPC_HOST", None)
if host:
env.TARGET = "pynq"
port = os.environ.get("VTA_PYNQ_RPC_PORT", "9091")
port = int(port)
remote = rpc.connect(pynq_host, port)
remote = rpc.connect(host, port)
run_func(env, remote)
import tvm
import vta
import os
from tvm.contrib import rpc, util
env = vta.get_env()
host = "pynq"
port = 9091
target = "llvm -target=armv7-none-linux-gnueabihf"
bit = "{}x{}x{}_{}bx{}b_{}_{}_{}_{}_100MHz_10ns.bit".format(
env.BATCH, env.BLOCK_IN, env.BLOCK_OUT,
env.INP_WIDTH, env.WGT_WIDTH,
env.LOG_UOP_BUFF_SIZE, env.LOG_INP_BUFF_SIZE,
env.LOG_WGT_BUFF_SIZE, env.LOG_ACC_BUFF_SIZE)
import tvm
from tvm.contrib import rpc
from vta import get_bitstream_path, download_bitstream, program_fpga, reconfig_runtime
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
bitstream = os.path.join(curr_path, "../../../../vta_bitstreams/bitstreams/", bit)
def program_rpc_bitstream(path=None):
"""Program the FPGA on the RPC server
def test_program_rpc():
Parameters
----------
path : path to bitstream (optional)
"""
assert tvm.module.enabled("rpc")
host = os.environ.get("VTA_PYNQ_RPC_HOST", None)
if not host:
raise RuntimeError(
"Error: VTA_PYNQ_RPC_HOST environment variable not set.")
# If a path to a bitstream is passed, make sure that it point to a valid bitstream
port = os.environ.get("VTA_PYNQ_RPC_PORT", "9091")
port = int(port)
remote = rpc.connect(host, port)
vta.program_fpga(remote, bit)
program_fpga(remote, path)
def test_reconfig_runtime():
def reconfig_rpc_runtime():
"""Reconfig the RPC server runtime
"""
assert tvm.module.enabled("rpc")
remote = rpc.connect(host, port)
vta.reconfig_runtime(remote)
host = os.environ.get("VTA_PYNQ_RPC_HOST", None)
if host:
port = os.environ.get("VTA_PYNQ_RPC_PORT", "9091")
port = int(port)
remote = rpc.connect(host, port)
reconfig_runtime(remote)
test_program_rpc()
test_reconfig_runtime()
program_rpc_bitstream()
reconfig_rpc_runtime()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment