Commit 47c870a9 by Logan Weber Committed by Tianqi Chen

[µTVM] Enable AutoTVM for ARM STM32F746XX Boards (#4274)

parent 11af82c0
......@@ -75,6 +75,9 @@ def context(dev_type, dev_id=0):
assert tvm.context("cuda", 0) == tvm.gpu(0)
"""
if isinstance(dev_type, string_types):
if '-device=micro_dev' in dev_type:
dev_type = 'micro_dev'
else:
dev_type = dev_type.split()[0]
if dev_type not in TVMContext.STR2MASK:
raise ValueError("Unknown device type %s" % dev_type)
......
......@@ -19,9 +19,81 @@
import os
import subprocess
from . import util
from .._ffi.base import py_str
from ..api import register_func
RELOCATION_LD_SCRIPT_TEMPLATE = """
/* linker symbol for use in UTVMInit */
_utvm_stack_pointer_init = 0x{stack_pointer_init:x};
SECTIONS
{{
. = 0x{text_start:x};
. = ALIGN({word_size});
.text :
{{
. = ALIGN({word_size});
KEEP(*(.text))
KEEP(*(.text*))
. = ALIGN({word_size});
}}
. = 0x{rodata_start:x};
. = ALIGN({word_size});
.rodata :
{{
. = ALIGN({word_size});
KEEP(*(.rodata))
KEEP(*(.rodata*))
. = ALIGN({word_size});
}}
. = 0x{data_start:x};
. = ALIGN({word_size});
.data :
{{
. = ALIGN({word_size});
KEEP(*(.data))
KEEP(*(.data*))
. = ALIGN({word_size});
}}
. = 0x{bss_start:x};
. = ALIGN({word_size});
.bss :
{{
. = ALIGN({word_size});
KEEP(*(.bss))
KEEP(*(.bss*))
. = ALIGN({word_size});
}}
}}
"""
def run_cmd(cmd):
"""Runs `cmd` in a subprocess and awaits its completion.
Parameters
----------
cmd : List[str]
list of command-line arguments
Returns
-------
output : str
resulting stdout capture from the subprocess
"""
proc = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
(output, _) = proc.communicate()
output = output.decode("utf-8")
if proc.returncode != 0:
cmd_str = " ".join(cmd)
msg = f"error while running command \"{cmd_str}\":\n{output}"
raise RuntimeError(msg)
return output
@register_func("tvm_callback_get_section_size")
def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix):
......@@ -48,14 +120,7 @@ def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix):
raise RuntimeError("no such file \"{}\"".format(binary_path))
# We use the "-A" flag here to get the ".rodata" section's size, which is
# not included by default.
size_proc = subprocess.Popen(
["{}size".format(toolchain_prefix), "-A", binary_path], stdout=subprocess.PIPE)
(size_output, _) = size_proc.communicate()
size_output = size_output.decode("utf-8")
if size_proc.returncode != 0:
msg = "error in finding section size:\n"
msg += py_str(size_output)
raise RuntimeError(msg)
size_output = run_cmd(["{}size".format(toolchain_prefix), "-A", binary_path])
# TODO(weberlo): Refactor this method and `*relocate_binary` so they are
# both aware of [".bss", ".sbss", ".sdata"] being relocated to ".bss".
......@@ -74,13 +139,15 @@ def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix):
continue
entry_name = tokens[0]
entry_size = int(tokens[1])
if entry_name in sections_to_sum:
for section in sections_to_sum:
if entry_name.startswith(section):
section_size += entry_size
break
# NOTE: For some reason, the size of the BSS section on the RISC-V
# GCC is sometimes reported to be smaller than it is, so we need to adjust
# for this.
if "riscv" in toolchain_prefix and section_name == 'bss':
if "riscv" in toolchain_prefix and section_name == "bss":
# TODO(weberlo): Figure out why 32 is the minimum constant that works.
#
# The current hypothesis is that the last symbols in the ".bss" and
......@@ -97,7 +164,14 @@ def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix):
@register_func("tvm_callback_relocate_binary")
def tvm_callback_relocate_binary(
binary_path, text_addr, rodata_addr, data_addr, bss_addr, toolchain_prefix):
binary_path,
word_size,
text_start,
rodata_start,
data_start,
bss_start,
stack_end,
toolchain_prefix):
"""Relocates sections in the binary to new addresses
Parameters
......@@ -105,17 +179,23 @@ def tvm_callback_relocate_binary(
binary_path : str
path of the binary file
text_addr : str
text section absolute address
word_size : int
word size on the target machine
text_start : int
text section address
rodata_addr : str
rodata section absolute address
rodata_start : int
rodata section address
data_addr : str
data section absolute address
data_start : int
data section address
bss_addr : str
bss section absolute address
bss_start : int
bss section address
stack_end : int
stack section end address
toolchain_prefix : str
prefix for binary names in target compiler toolchain
......@@ -125,68 +205,29 @@ def tvm_callback_relocate_binary(
rel_bin : bytearray
the relocated binary
"""
tmp_dir = util.tempdir()
rel_obj_path = tmp_dir.relpath("relocated.o")
stack_pointer_init = stack_end - word_size
ld_script_contents = ""
# TODO(weberlo): There should be a better way to configure this for different archs.
if "riscv" in toolchain_prefix:
ld_script_contents += "OUTPUT_ARCH( \"riscv\" )\n\n"
# TODO(weberlo): Generate the script in a more procedural manner.
ld_script_contents += """
SECTIONS
{
. = %s;
. = ALIGN(8);
.text :
{
*(.text)
. = ALIGN(8);
*(.text*)
}
. = %s;
. = ALIGN(8);
.rodata :
{
*(.rodata)
. = ALIGN(8);
*(.rodata*)
}
. = %s;
. = ALIGN(8);
.data :
{
*(.data)
. = ALIGN(8);
*(.data*)
. = ALIGN(8);
*(.sdata)
}
. = %s;
. = ALIGN(8);
.bss :
{
*(.bss)
. = ALIGN(8);
*(.bss*)
. = ALIGN(8);
*(.sbss)
}
}
""" % (text_addr, rodata_addr, data_addr, bss_addr)
ld_script_contents += RELOCATION_LD_SCRIPT_TEMPLATE.format(
word_size=word_size,
text_start=text_start,
rodata_start=rodata_start,
data_start=data_start,
bss_start=bss_start,
stack_pointer_init=stack_pointer_init)
tmp_dir = util.tempdir()
rel_obj_path = tmp_dir.relpath("relocated.obj")
rel_ld_script_path = tmp_dir.relpath("relocated.lds")
with open(rel_ld_script_path, "w") as f:
f.write(ld_script_contents)
ld_proc = subprocess.Popen(["{}ld".format(toolchain_prefix), binary_path,
run_cmd([
"{}ld".format(toolchain_prefix),
binary_path,
"-T", rel_ld_script_path,
"-o", rel_obj_path],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
(out, _) = ld_proc.communicate()
if ld_proc.returncode != 0:
msg = "linking error using ld:\n"
msg += py_str(out)
raise RuntimeError(msg)
"-o", rel_obj_path])
with open(rel_obj_path, "rb") as f:
rel_bin = bytearray(f.read())
return rel_bin
......@@ -217,16 +258,11 @@ def tvm_callback_read_binary_section(binary, section, toolchain_prefix):
tmp_section = tmp_dir.relpath("tmp_section.bin")
with open(tmp_bin, "wb") as out_file:
out_file.write(bytes(binary))
objcopy_proc = subprocess.Popen(["{}objcopy".format(toolchain_prefix), "--dump-section",
run_cmd([
"{}objcopy".format(toolchain_prefix),
"--dump-section",
".{}={}".format(section, tmp_section),
tmp_bin],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
(out, _) = objcopy_proc.communicate()
if objcopy_proc.returncode != 0:
msg = "error in using objcopy:\n"
msg += py_str(out)
raise RuntimeError(msg)
tmp_bin])
if os.path.isfile(tmp_section):
# Get section content if it exists.
with open(tmp_section, "rb") as f:
......@@ -259,15 +295,12 @@ def tvm_callback_get_symbol_map(binary, toolchain_prefix):
tmp_obj = tmp_dir.relpath("tmp_obj.bin")
with open(tmp_obj, "wb") as out_file:
out_file.write(bytes(binary))
nm_proc = subprocess.Popen(["{}nm".format(toolchain_prefix), "-C", "--defined-only", tmp_obj],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
(nm_output, _) = nm_proc.communicate()
if nm_proc.returncode != 0:
msg = "error in using nm:\n"
msg += py_str(nm_output)
raise RuntimeError(msg)
nm_output = nm_output.decode("utf8").splitlines()
nm_output = run_cmd([
"{}nm".format(toolchain_prefix),
"-C",
"--defined-only",
tmp_obj])
nm_output = nm_output.splitlines()
map_str = ""
for line in nm_output:
line = line.split()
......
......@@ -19,14 +19,22 @@
from __future__ import absolute_import
import argparse
import ast
import multiprocessing
import sys
import logging
import tvm
from tvm import micro
from .. import rpc
def main(args):
"""Main function"""
"""Main function
Parameters
----------
args : argparse.Namespace
parsed args from command-line invocation
"""
if args.tracker:
url, port = args.tracker.rsplit(":", 1)
port = int(port)
......@@ -37,6 +45,9 @@ def main(args):
else:
tracker_addr = None
if args.utvm_dev_config or args.utvm_dev_id:
init_utvm(args)
server = rpc.Server(args.host,
args.port,
args.port_end,
......@@ -48,6 +59,38 @@ def main(args):
server.proc.join()
def init_utvm(args):
"""MicroTVM-specific RPC initialization
Parameters
----------
args : argparse.Namespace
parsed args from command-line invocation
"""
if args.utvm_dev_config and args.utvm_dev_id:
raise RuntimeError('only one of --utvm-dev-config and --utvm-dev-id allowed')
if args.utvm_dev_config:
with open(args.utvm_dev_config, 'r') as dev_conf_file:
dev_config = json.load(dev_conf_file)
else:
dev_config_args = ast.literal_eval(args.utvm_dev_config_args)
default_config_func = micro.device.get_device_funcs(args.utvm_dev_id)['default_config']
dev_config = default_config_func(*dev_config_args)
if args.utvm_dev_config or args.utvm_dev_id:
# add MicroTVM overrides
@tvm.register_func('tvm.rpc.server.start', override=True)
def server_start():
# pylint: disable=unused-variable
session = micro.Session(dev_config)
session._enter()
@tvm.register_func('tvm.rpc.server.shutdown', override=True)
def server_shutdown():
session._exit()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--host', type=str, default="0.0.0.0",
......@@ -71,6 +114,13 @@ if __name__ == "__main__":
and ROCM compilers.")
parser.add_argument('--custom-addr', type=str,
help="Custom IP Address to Report to RPC Tracker")
parser.add_argument('--utvm-dev-config', type=str,
help='JSON config file for the target device (if using MicroTVM)')
parser.add_argument('--utvm-dev-id', type=str,
help='Unique ID for the target device (if using MicroTVM)')
parser.add_argument('--utvm-dev-config-args', type=str,
help=('Python list of literals required to generate a default'
' MicroTVM config (if --utvm-dev-id is specified)'))
parser.set_defaults(fork=True)
args = parser.parse_args()
......
......@@ -14,13 +14,9 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""uTVM module for bare-metal backends.
uTVM (or the micro backend) enables provides support for bare-metal devices.
Its targets currently include a host-emulated device which is used for testing,
and JTAG-based openocd device which allows actual interfacing with microdevices.
"""
"""MicroTVM module for bare-metal backends"""
from ..contrib import binutil
from .base import Session, cross_compiler, create_micro_lib
from .base import Session, create_micro_mod, cross_compiler
from .base import LibType, get_micro_host_driven_dir, get_micro_device_dir
from . import device
......@@ -14,71 +14,100 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Base definitions for micro."""
"""Base definitions for MicroTVM"""
from __future__ import absolute_import
import logging
import os
import sys
from enum import Enum
import tvm
from tvm.contrib import util as _util
from tvm.contrib import cc as _cc
from .._ffi.function import _init_api
from .._ffi.libinfo import find_include_path
SUPPORTED_DEVICE_TYPES = ["host", "openocd"]
class LibType(Enum):
"""Enumeration of library types that can be compiled and loaded onto a device"""
# library to be used as a MicroTVM runtime
RUNTIME = 0
# library to be used as an operator
OPERATOR = 1
class Session:
"""MicroTVM Device Session
Parameters
----------
device_type : str
type of low-level device
toolchain_prefix : str
toolchain prefix to be used. For example, a prefix of
"riscv64-unknown-elf-" means "riscv64-unknown-elf-gcc" is used as
the compiler and "riscv64-unknown-elf-ld" is used as the linker,
etc.
config : dict
configuration for this session (as generated by
`tvm.micro.device.host.default_config()`, for example)
Example
--------
.. code-block:: python
c_mod = ... # some module generated with "c" as the target
device_type = "openocd"
toolchain_prefix = "riscv64-unknown-elf-"
with tvm.micro.Session(device_type,
toolchain_prefix,
base_addr=0x10010000,
server_addr="127.0.0.1",
port=6666):
c_mod.export_library(lib_obj_path, fcompile=tvm.micro.cross_compiler(toolchain_prefix))
micro_mod = tvm.module.load(lib_obj_path, "micro_dev")
dev_config = micro.device.arm.stm32f746xx.default_config("127.0.0.1", 6666)
with tvm.micro.Session(dev_config) as sess:
micro_mod = create_micro_mod(c_mod, dev_config)
"""
def __init__(self, device_type, toolchain_prefix, **kwargs):
if device_type not in SUPPORTED_DEVICE_TYPES:
raise RuntimeError("unknown micro device type \"{}\"".format(device_type))
def __init__(self, config):
self._check_system()
self._check_args(device_type, kwargs)
# TODO(weberlo): add config validation
# grab a binutil instance from the ID in the config
dev_funcs = tvm.micro.device.get_device_funcs(config["device_id"])
self.create_micro_lib = dev_funcs["create_micro_lib"]
self.toolchain_prefix = config["toolchain_prefix"]
self.mem_layout = config["mem_layout"]
self.word_size = config["word_size"]
self.thumb_mode = config["thumb_mode"]
self.comms_method = config["comms_method"]
# First, find and compile runtime library.
runtime_src_path = os.path.join(_get_micro_device_dir(), "utvm_runtime.c")
runtime_src_path = os.path.join(get_micro_host_driven_dir(), "utvm_runtime.c")
tmp_dir = _util.tempdir()
runtime_obj_path = tmp_dir.relpath("utvm_runtime.obj")
create_micro_lib(
runtime_obj_path, runtime_src_path, toolchain_prefix, include_dev_lib_header=False)
self.create_micro_lib(runtime_obj_path, runtime_src_path, LibType.RUNTIME)
#input(f"check {runtime_obj_path}: ")
comms_method = config["comms_method"]
if comms_method == "openocd":
server_addr = config["server_addr"]
server_port = config["server_port"]
elif comms_method == "host":
server_addr = ""
server_port = 0
else:
raise RuntimeError(f"unknown communication method: f{self.comms_method}")
base_addr = kwargs.get("base_addr", 0)
server_addr = kwargs.get("server_addr", "")
port = kwargs.get("port", 0)
self.module = _CreateSession(
device_type, runtime_obj_path, toolchain_prefix, base_addr, server_addr, port)
comms_method,
runtime_obj_path,
self.toolchain_prefix,
self.mem_layout["text"].get("start", 0),
self.mem_layout["text"]["size"],
self.mem_layout["rodata"].get("start", 0),
self.mem_layout["rodata"]["size"],
self.mem_layout["data"].get("start", 0),
self.mem_layout["data"]["size"],
self.mem_layout["bss"].get("start", 0),
self.mem_layout["bss"]["size"],
self.mem_layout["args"].get("start", 0),
self.mem_layout["args"]["size"],
self.mem_layout["heap"].get("start", 0),
self.mem_layout["heap"]["size"],
self.mem_layout["workspace"].get("start", 0),
self.mem_layout["workspace"]["size"],
self.mem_layout["stack"].get("start", 0),
self.mem_layout["stack"]["size"],
self.word_size,
self.thumb_mode,
server_addr,
server_port)
self._enter = self.module["enter"]
self._exit = self.module["exit"]
......@@ -88,55 +117,57 @@ class Session:
Raises error if not supported.
"""
if not sys.platform.startswith("linux"):
raise RuntimeError("microTVM is currently only supported on Linux")
raise RuntimeError("MicroTVM is currently only supported on Linux hosts")
# TODO(weberlo): Add 32-bit support.
# It's primarily the compilation pipeline that isn't compatible.
if sys.maxsize <= 2**32:
raise RuntimeError("microTVM is currently only supported on 64-bit platforms")
def _check_args(self, device_type, args):
"""Check if the given configuration is valid."""
if device_type == "host":
pass
elif device_type == "openocd":
assert "base_addr" in args
assert "server_addr" in args
assert "port" in args
raise RuntimeError("MicroTVM is currently only supported on 64-bit host platforms")
def __enter__(self):
self._enter()
return self
def __exit__(self, exc_type, exc_value, exc_traceback):
self._exit()
def _get_micro_device_dir():
"""Get directory path for uTVM runtime source files.
def create_micro_mod(c_mod, dev_config):
"""Produces a micro module from a given module.
Parameters
----------
c_mod : tvm.module.Module
module with "c" as its target backend
dev_config : Dict[str, Any]
MicroTVM config dict for the target device
Return
------
micro_device_dir : str
directory path
micro_mod : tvm.module.Module
micro module for the target device
"""
micro_dir = os.path.dirname(os.path.realpath(os.path.expanduser(__file__)))
micro_device_dir = os.path.join(micro_dir, "..", "..", "..",
"src", "runtime", "micro", "device")
return micro_device_dir
temp_dir = _util.tempdir()
lib_obj_path = temp_dir.relpath("dev_lib.obj")
c_mod.export_library(
lib_obj_path,
fcompile=cross_compiler(dev_config, LibType.OPERATOR))
micro_mod = tvm.module.load(lib_obj_path)
return micro_mod
def cross_compiler(toolchain_prefix, include_dev_lib_header=True):
"""Creates a cross compile function that wraps `create_micro_lib`.
def cross_compiler(dev_config, lib_type):
"""Create a cross-compile function that wraps `create_lib` for a `Binutil` instance.
For use in `tvm.module.Module.export_library`.
Parameters
----------
toolchain_prefix : str
toolchain prefix to be used
dev_config : Dict[str, Any]
MicroTVM config dict for the target device
include_dev_lib_header : Optional[bool]
whether to include the device library header containing definitions of
library functions.
lib_type : micro.LibType
whether to compile a MicroTVM runtime or operator library
Return
------
......@@ -149,78 +180,46 @@ def cross_compiler(toolchain_prefix, include_dev_lib_header=True):
.. code-block:: python
c_mod = ... # some module generated with "c" as the target
fcompile = tvm.micro.cross_compiler(toolchain_prefix="")
fcompile = tvm.micro.cross_compiler(dev_config, LibType.OPERATOR)
c_mod.export_library("dev_lib.obj", fcompile=fcompile)
"""
dev_funcs = tvm.micro.device.get_device_funcs(dev_config['device_id'])
create_micro_lib = dev_funcs['create_micro_lib']
def compile_func(obj_path, src_path, **kwargs):
if isinstance(obj_path, list):
obj_path = obj_path[0]
if isinstance(src_path, list):
src_path = src_path[0]
create_micro_lib(obj_path, src_path, toolchain_prefix,
kwargs.get("options", None), include_dev_lib_header)
return _cc.cross_compiler(compile_func)
create_micro_lib(obj_path, src_path, lib_type, kwargs.get("options", None))
return _cc.cross_compiler(compile_func, output_format="obj")
def create_micro_lib(
obj_path, src_path, toolchain_prefix, options=None, include_dev_lib_header=True):
"""Compiles code into a binary for the target micro device.
def get_micro_host_driven_dir():
"""Get directory path for uTVM host-driven runtime source files.
Parameters
----------
obj_path : Optional[str]
path to generated object file (defaults to same directory as `src_path`)
Return
------
micro_device_dir : str
directory path
"""
micro_dir = os.path.dirname(os.path.realpath(os.path.expanduser(__file__)))
micro_host_driven_dir = os.path.join(micro_dir, "..", "..", "..",
"src", "runtime", "micro", "host_driven")
return micro_host_driven_dir
src_path : str
path to source file
toolchain_prefix : str
toolchain prefix to be used
def get_micro_device_dir():
"""Get directory path for parent directory of device-specific source files
include_dev_lib_header : bool
whether to include the device library header containing definitions of
library functions.
Return
------
micro_device_dir : str
directory path
"""
def replace_suffix(s, new_suffix):
if "." in os.path.basename(s):
# There already exists an extension.
return os.path.join(
os.path.dirname(s),
".".join(os.path.basename(s).split(".")[:-1] + [new_suffix]))
# No existing extension; we can just append.
return s + "." + new_suffix
# uTVM object files cannot have an ".o" suffix, because it triggers the
# code path for creating shared objects in `tvm.module.load`. So we replace
# ".o" suffixes with ".obj".
if obj_path.endswith(".o"):
logging.warning(
"\".o\" suffix in \"%s\" has been replaced with \".obj\"", obj_path)
obj_path = replace_suffix(obj_path, "obj")
options = ["-I" + path for path in find_include_path()]
options += ["-I{}".format(_get_micro_device_dir())]
options += ["-fno-stack-protector"]
# TODO(weberlo): Don't rely on the toolchain prefix to identify if this is the host
# device.
if toolchain_prefix == "" and sys.maxsize > 2**32 and sys.platform.startswith("linux"):
# Only add this option if the host is a 64-bit Linux.
options += ["-mcmodel=large"]
compile_cmd = "{}gcc".format(toolchain_prefix)
if include_dev_lib_header:
# Create a temporary copy of the source, so we can inject the dev lib
# header without modifying the original.
tmp_dir = _util.tempdir()
temp_src_path = tmp_dir.relpath("temp.c")
with open(src_path, "r") as f:
src_lines = f.read().splitlines()
src_lines.insert(0, "#include \"utvm_device_dylib_redirect.c\"")
with open(temp_src_path, "w") as f:
f.write("\n".join(src_lines))
src_path = temp_src_path
_cc.create_shared(obj_path, src_path, options, compile_cmd)
micro_dir = os.path.dirname(os.path.realpath(os.path.expanduser(__file__)))
micro_device_dir = os.path.join(micro_dir, "..", "..", "..",
"src", "runtime", "micro", "device")
return micro_device_dir
_init_api("tvm.micro", "tvm.micro.base")
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Device-specific configuration for MicroTVM"""
from .base import register_device, get_device_funcs, create_micro_lib_base
from . import host
from . import arm
from . import riscv_spike
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Base module for ARM device configurations"""
from . import stm32f746xx
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Compilation and config definitions for ARM STM32F746XX devices"""
from .. import create_micro_lib_base, register_device
DEVICE_ID = "arm.stm32f746xx"
TOOLCHAIN_PREFIX = "arm-none-eabi-"
def create_micro_lib(obj_path, src_path, lib_type, options=None):
"""Wrapper over `create_micro_lib_base` to add device-specific options
Parameters
----------
obj_path : str
path to generated object file
src_path : str
path to source file
lib_type : micro.LibType
whether to compile a MicroTVM runtime or operator library
options : Optional[List[str]]
additional options to pass to GCC
"""
if options is None:
options = []
options += [
"-mcpu=cortex-m7",
"-mlittle-endian",
"-mfloat-abi=hard",
"-mfpu=fpv5-sp-d16",
"-mthumb",
"-gdwarf-5",
]
create_micro_lib_base(
obj_path, src_path, TOOLCHAIN_PREFIX, DEVICE_ID, lib_type, options=options)
def default_config(server_addr, server_port):
"""Generates a default configuration for ARM STM32F746XX devices
Parameters
----------
server_addr : str
address of OpenOCD server to connect to
server_port : int
port of OpenOCD server to connect to
Return
------
config : Dict[str, Any]
MicroTVM config dict for this device
"""
return {
"device_id": DEVICE_ID,
"toolchain_prefix": TOOLCHAIN_PREFIX,
#
# [Device Memory Layout]
# RAM (rwx) : START = 0x20000000, LENGTH = 320K
# FLASH (rx) : START = 0x8000000, LENGTH = 1024K
#
"mem_layout": {
"text": {
"start": 0x20000180,
"size": 20480,
},
"rodata": {
"start": 0x20005180,
"size": 20480,
},
"data": {
"start": 0x2000a180,
"size": 768,
},
"bss": {
"start": 0x2000a480,
"size": 768,
},
"args": {
"start": 0x2000a780,
"size": 1280,
},
"heap": {
"start": 0x2000ac80,
"size": 262144,
},
"workspace": {
"start": 0x2004ac80,
"size": 20480,
},
"stack": {
"start": 0x2004fc80,
"size": 80,
},
},
"word_size": 4,
"thumb_mode": True,
"comms_method": "openocd",
"server_addr": server_addr,
"server_port": server_port,
}
register_device(DEVICE_ID, {
"create_micro_lib": create_micro_lib,
"default_config": default_config,
})
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Base definitions for MicroTVM config"""
import glob
import os
from pathlib import Path
from tvm.contrib import util as _util
from tvm.contrib.binutil import run_cmd
from tvm._ffi.libinfo import find_include_path
from tvm.micro import LibType, get_micro_host_driven_dir, get_micro_device_dir
_DEVICE_REGISTRY = {}
def register_device(device_id, device_funcs):
"""Register a device and associated compilation/config functions
Parameters
----------
device_id : str
unique identifier for the device
device_funcs : Dict[str, func]
dictionary with compilation and config generation functions as values
"""
if device_id in _DEVICE_REGISTRY:
raise RuntimeError(f"\"{device_id}\" already exists in the device registry")
_DEVICE_REGISTRY[device_id] = device_funcs
def get_device_funcs(device_id):
"""Get compilation and config generation functions for device
Parameters
----------
device_id : str
unique identifier for the device
Return
------
device_funcs : Dict[str, func]
dictionary with compilation and config generation functions as values
"""
if device_id not in _DEVICE_REGISTRY:
raise RuntimeError(f"\"{device_id}\" does not exist in the binutil registry")
device_funcs = _DEVICE_REGISTRY[device_id]
return device_funcs
def create_micro_lib_base(
out_obj_path,
in_src_path,
toolchain_prefix,
device_id,
lib_type,
options=None):
"""Compiles code into a binary for the target micro device.
Parameters
----------
out_obj_path : str
path to generated object file
in_src_path : str
path to source file
toolchain_prefix : str
toolchain prefix to be used. For example, a prefix of
"riscv64-unknown-elf-" means "riscv64-unknown-elf-gcc" is used as
the compiler and "riscv64-unknown-elf-ld" is used as the linker,
etc.
device_id : str
unique identifier for the target device
lib_type : micro.LibType
whether to compile a MicroTVM runtime or operator library
options : List[str]
additional options to pass to GCC
"""
base_compile_cmd = [
f"{toolchain_prefix}gcc",
"-std=c11",
"-Wall",
"-Wextra",
"--pedantic",
"-c",
"-O0",
"-g",
"-nostartfiles",
"-nodefaultlibs",
"-nostdlib",
"-fdata-sections",
"-ffunction-sections",
]
if options is not None:
base_compile_cmd += options
src_paths = []
include_paths = find_include_path() + [get_micro_host_driven_dir()]
tmp_dir = _util.tempdir()
# we might transform the src path in one of the branches below
new_in_src_path = in_src_path
if lib_type == LibType.RUNTIME:
dev_dir = _get_device_source_dir(device_id)
dev_src_paths = glob.glob(f"{dev_dir}/*.[csS]")
# there needs to at least be a utvm_timer.c file
assert dev_src_paths
assert "utvm_timer.c" in map(os.path.basename, dev_src_paths)
src_paths += dev_src_paths
elif lib_type == LibType.OPERATOR:
# create a temporary copy of the source, so we can inject the dev lib
# header without modifying the original.
temp_src_path = tmp_dir.relpath("temp.c")
with open(in_src_path, "r") as f:
src_lines = f.read().splitlines()
src_lines.insert(0, "#include \"utvm_device_dylib_redirect.c\"")
with open(temp_src_path, "w") as f:
f.write("\n".join(src_lines))
new_in_src_path = temp_src_path
base_compile_cmd += ["-c"]
else:
raise RuntimeError("unknown lib type")
src_paths += [new_in_src_path]
for path in include_paths:
base_compile_cmd += ["-I", path]
prereq_obj_paths = []
for src_path in src_paths:
curr_obj_path = Path(src_path).with_suffix(".o").name
assert curr_obj_path not in prereq_obj_paths
prereq_obj_paths.append(curr_obj_path)
curr_compile_cmd = base_compile_cmd + [src_path, "-o", curr_obj_path]
run_cmd(curr_compile_cmd)
ld_cmd = [f"{toolchain_prefix}ld", "-relocatable"]
ld_cmd += prereq_obj_paths
ld_cmd += ["-o", out_obj_path]
run_cmd(ld_cmd)
def _get_device_source_dir(device_id):
"""Grabs the source directory for device-specific uTVM files"""
dev_subdir = "/".join(device_id.split("."))
return get_micro_device_dir() + "/" + dev_subdir
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Compilation and config definitions for the host emulated device"""
import sys
from . import create_micro_lib_base, register_device
DEVICE_ID = "host"
TOOLCHAIN_PREFIX = ""
def create_micro_lib(obj_path, src_path, lib_type, options=None):
"""Wrapper over `create_micro_lib_base` to add device-specific options
Parameters
----------
obj_path : str
path to generated object file
src_path : str
path to source file
lib_type : micro.LibType
whether to compile a MicroTVM runtime or operator library
options : Optional[List[str]]
additional options to pass to GCC
"""
if options is None:
options = []
if sys.maxsize > 2**32 and sys.platform.startswith("linux"):
options += ["-mcmodel=large"]
create_micro_lib_base(
obj_path, src_path, TOOLCHAIN_PREFIX, DEVICE_ID, lib_type, options=options)
def default_config():
"""Generates a default configuration for the host emulated device
Return
------
config : Dict[str, Any]
MicroTVM config dict for this device
"""
return {
"device_id": DEVICE_ID,
"toolchain_prefix": TOOLCHAIN_PREFIX,
"mem_layout": {
"text": {
"size": 20480,
},
"rodata": {
"size": 20480,
},
"data": {
"size": 768,
},
"bss": {
"size": 768,
},
"args": {
"size": 1280,
},
"heap": {
"size": 262144,
},
"workspace": {
"size": 20480,
},
"stack": {
"size": 80,
},
},
"word_size": 8 if sys.maxsize > 2**32 else 4,
"thumb_mode": False,
"comms_method": "host",
}
register_device(DEVICE_ID, {
"create_micro_lib": create_micro_lib,
"default_config": default_config,
})
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Compilation and config definitions for Spike, a RISC-V functional ISA simulator"""
from collections import OrderedDict
from . import create_micro_lib_base, register_device
DEVICE_ID = "riscv_spike"
TOOLCHAIN_PREFIX = "riscv64-unknown-elf-"
def create_micro_lib(obj_path, src_path, lib_type, options=None):
"""Wrapper over `create_micro_lib_base` to add device-specific options
Parameters
----------
obj_path : str
path to generated object file
src_path : str
path to source file
lib_type : micro.LibType
whether to compile a MicroTVM runtime or operator library
options : Optional[List[str]]
additional options to pass to GCC
"""
create_micro_lib_base(
obj_path,
src_path,
TOOLCHAIN_PREFIX,
DEVICE_ID,
lib_type,
options=options)
def default_config(base_addr, server_addr, server_port):
"""Generates a default configuration for Spike
Parameters
----------
base_addr : int
base address of the simulator (for calculating the memory layout)
server_addr : str
address of OpenOCD server to connect to
server_port : int
port of OpenOCD server to connect to
Return
------
config : Dict[str, Any]
MicroTVM config dict for this device
"""
res = {
"device_id": DEVICE_ID,
"toolchain_prefix": TOOLCHAIN_PREFIX,
"mem_layout": OrderedDict([
("text", {
"size": 20480,
}),
("rodata", {
"size": 20480,
}),
("data", {
"size": 768,
}),
("bss", {
"size": 768,
}),
("args", {
"size": 1280,
}),
("heap", {
"size": 262144,
}),
("workspace", {
"size": 20480,
}),
("stack", {
"size": 80,
}),
]),
"word_size": 4,
"thumb_mode": True,
"comms_method": "openocd",
"server_addr": server_addr,
"server_port": server_port,
}
# generate section start addresses from the given `base_addr`
curr_offset = 0
mem_layout = res["mem_layout"]
for region_dict in mem_layout.values():
region_dict["start"] = base_addr + curr_offset
curr_offset += region_dict["size"]
return res
register_device(DEVICE_ID, {
"create_micro_lib": create_micro_lib,
"default_config": default_config,
})
......@@ -265,6 +265,9 @@ def load(path, fmt=""):
files = [tar_temp.relpath(x) for x in tar_temp.listdir()]
_cc.create_shared(path + ".so", files)
path += ".so"
# TODO(weberlo): we should probably use a more distinctive suffix for uTVM object files
elif path.endswith(".obj"):
fmt = "micro_dev"
# Redirect to the load API
return _LoadFromFile(path, fmt)
......
......@@ -85,7 +85,9 @@ Target CreateTarget(const std::string& target_name,
}
t->device_type = kDLCPU;
t->thread_warp_size = 1;
if (target_name == "c" || target_name == "llvm") {
if (target_name == "c" && t->device_name == "micro_dev") {
t->device_type = kDLMicroDev;
} else if (target_name == "c" || target_name == "llvm") {
t->keys_array.push_back(ir::StringImm::make("cpu"));
} else if (target_name == "cuda" || target_name == "nvptx") {
t->device_type = kDLGPU;
......
......@@ -33,7 +33,8 @@ CodeGenCHost::CodeGenCHost() {
module_name_ = GetUniqueName("__tvm_module_ctx");
}
void CodeGenCHost::Init(bool output_ssa) {
void CodeGenCHost::Init(bool output_ssa, bool emit_asserts) {
emit_asserts_ = emit_asserts;
decl_stream << "#include \"tvm/runtime/c_runtime_api.h\"\n";
decl_stream << "#include \"tvm/runtime/c_backend_api.h\"\n";
decl_stream << "extern void* " << module_name_ << " = NULL;\n";
......@@ -237,6 +238,7 @@ void CodeGenCHost::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*)
}
void CodeGenCHost::VisitStmt_(const AssertStmt *op) { // NOLINT(*)
if (emit_asserts_) {
std::string cond = PrintExpr(op->condition);
PrintIndent();
stream << "if (!(" << cond << ")) {\n";
......@@ -248,6 +250,7 @@ void CodeGenCHost::VisitStmt_(const AssertStmt *op) { // NOLINT(*)
this->EndScope(assert_if_scope);
PrintIndent();
stream << "}\n";
}
this->PrintStmt(op->body);
}
......@@ -277,8 +280,9 @@ inline void CodeGenCHost::PrintTernaryCondExpr(const T* op,
runtime::Module BuildCHost(Array<LoweredFunc> funcs) {
using tvm::runtime::Registry;
bool output_ssa = false;
bool emit_asserts = false;
CodeGenCHost cg;
cg.Init(output_ssa);
cg.Init(output_ssa, emit_asserts);
for (LoweredFunc f : funcs) {
cg.AddFunction(f);
}
......
......@@ -35,7 +35,7 @@ namespace codegen {
class CodeGenCHost final : public CodeGenC {
public:
CodeGenCHost();
void Init(bool output_ssa);
void Init(bool output_ssa, bool emit_asserts);
void AddFunction(LoweredFunc f);
std::string Finish();
......@@ -53,6 +53,8 @@ class CodeGenCHost final : public CodeGenC {
private:
std::string module_name_;
/*! \brief whether to emit asserts in the resulting C code */
bool emit_asserts_;
void PrintGetFuncFromBackend(const std::string& func_name, const std::string& packed_func_name);
void PrintFuncCall(const std::string& packed_func_name, int num_args);
......
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file utvm_init.s
* \brief uTVM init definition for STM32F746XX-series boards
*/
.syntax unified
.cpu cortex-m7
.fpu softvfp
.thumb
.section .text.UTVMInit
.type UTVMInit, %function
UTVMInit:
/* enable fpu */
ldr r0, =0xE000ED88
ldr r1, [r0]
ldr r2, =0xF00000
orr r1, r2
str r1, [r0]
dsb
isb
/* set stack pointer */
ldr sp, =_utvm_stack_pointer_init
bl UTVMMain
.size UTVMInit, .-UTVMInit
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file utvm_timer.c
* \brief uTVM timer API definitions for STM32F746XX-series boards
*/
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include "utvm_runtime.h"
// There are two implementations of cycle counters on the STM32F7X: SysTick and
// CYCCNT. SysTick is preferred, as it gives better error handling, but the
// counter is only 24 bits wide. If a larger timer is needed, use the CYCCNT
// implementation, which has a 32-bit counter.
#define USE_SYSTICK
#ifdef USE_SYSTICK
#define SYST_CSR (*((volatile uint32_t *) 0xE000E010))
#define SYST_RVR (*((volatile uint32_t *) 0xE000E014))
#define SYST_CVR (*((volatile uint32_t *) 0xE000E018))
#define SYST_CALIB (*((volatile uint32_t *) 0xE000E01C))
#define SYST_CSR_ENABLE 0
#define SYST_CSR_TICKINT 1
#define SYST_CSR_CLKSOURCE 2
#define SYST_COUNTFLAG 16
#define SYST_CALIB_NOREF 31
#define SYST_CALIB_SKEW 30
uint32_t start_time = 0;
uint32_t stop_time = 0;
int32_t UTVMTimerStart() {
SYST_CSR = (1 << SYST_CSR_ENABLE) | (1 << SYST_CSR_CLKSOURCE);
// wait until timer starts
while (SYST_CVR == 0) {}
start_time = SYST_CVR;
return 0;
}
void UTVMTimerStop() {
SYST_CSR = 0;
stop_time = SYST_CVR;
}
void UTVMTimerReset() {
SYST_CSR = 0;
// maximum reload value (24-bit)
SYST_RVR = (~((uint32_t) 0)) >> 8;
SYST_CVR = 0;
}
uint32_t UTVMTimerRead() {
if (SYST_CSR & SYST_COUNTFLAG) {
TVMAPISetLastError("timer overflowed");
return -1;
} else {
return start_time - stop_time;
}
}
#else // !USE_SYSTICK
#define DWT_CTRL (*((volatile uint32_t *) 0xE0001000))
#define DWT_CYCCNT (*((volatile uint32_t *) 0xE0001004))
#define DWT_CTRL_NOCYCCNT 25
#define DWT_CTRL_CYCCNTENA 0
uint32_t start_time = 0;
uint32_t stop_time = 0;
void UTVMTimerReset() {
DWT_CYCCNT = 0;
}
int32_t UTVMTimerStart() {
if (DWT_CTRL & DWT_CTRL_NOCYCCNT) {
TVMAPISetLastError("cycle counter not implemented on device");
return -1;
}
start_time = DWT_CYCCNT;
DWT_CTRL |= (1 << DWT_CTRL_CYCCNTENA);
}
void UTVMTimerStop() {
stop_time = DWT_CYCCNT;
DWT_CTRL &= ~(1 << DWT_CTRL_CYCCNTENA);
}
int32_t UTVMTimerRead() {
if (stop_time > stop_time) {
return stop_time - start_time;
} else {
uint32_t largest = ~0;
return (largest - start_time) + stop_time;
}
}
#endif // USE_SYSTICK
#ifdef __cplusplus
} // TVM_EXTERN_C
#endif
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file utvm_init.c
* \brief uTVM init definition for the host emulated device
*/
#ifdef __cplusplus
extern "C" {
#endif
#include "utvm_runtime.h"
void UTVMInit() {
// no init required for the host
UTVMMain();
}
#ifdef __cplusplus
} // TVM_EXTERN_C
#endif
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file utvm_timer.c
* \brief uTVM timer API stubs for the host emulated device
*/
#ifdef __cplusplus
extern "C" {
#endif
#include "utvm_runtime.h"
// TODO(weberlo): use this? https://stackoverflow.com/questions/5141960/get-the-current-time-in-c
int32_t UTVMTimerStart() {
return 0;
}
void UTVMTimerStop() { }
void UTVMTimerReset() { }
uint32_t UTVMTimerRead() {
return 1;
}
#ifdef __cplusplus
} // TVM_EXTERN_C
#endif
......@@ -21,9 +21,9 @@
* \file utvm_runtime.cc
* \brief uTVM runtime
*
* All function calls go through `UTVMMain`, which reads from the current
* `UTVMTask` and calls the appropriate function with the arguments from the
* task.
* All function calls go through the externally defined `UTVMInit`, which
* performs device-specific setup, then calls `UTVMMain`. `UTVMMain` then
* calls the function in `utvm_task` with the arguments from the task.
*
* Additionally included in this file are definitions for some of the most
* common functions used in the C runtime API.
......@@ -35,10 +35,17 @@ extern "C" {
#include "utvm_runtime.h"
// Task pointers must be patched before calling a function.
UTVMTask task;
UTVMTask utvm_task = {
.func = NULL,
.arg_values = NULL,
.arg_type_codes = NULL,
.num_args = 0,
};
size_t utvm_word_size = 0; // NOLINT(*)
// These pointers are patched at load time to point to the workspace section.
char* utvm_workspace_begin = NULL; // NOLINT(*)
char* utvm_workspace_start = NULL; // NOLINT(*)
char* utvm_workspace_end = NULL; // NOLINT(*)
char* utvm_workspace_curr = NULL; // NOLINT(*)
// Keep track of how many active allocations there are on the workspace.
......@@ -47,24 +54,39 @@ size_t utvm_num_active_allocs = 0;
const char* utvm_last_error = NULL; // NOLINT(*)
int32_t utvm_return_code = 0; // NOLINT(*)
// We use a dummy function to signal execution is finished for device
// backends which require breakpoints.
void UTVMDone() { }
uint32_t utvm_task_time = 0;
// Gets called by UTVMInit, after device-specific initialization is finished.
void UTVMMain() {
utvm_workspace_curr = utvm_workspace_begin;
utvm_workspace_curr = utvm_workspace_start;
utvm_num_active_allocs = 0;
utvm_last_error = NULL; // NOLINT(*)
utvm_return_code = 0;
utvm_return_code = task.func((void*) task.arg_values, (void*) task.arg_type_codes, // NOLINT(*)
task.num_args);
utvm_task_time = 0;
UTVMTimerReset();
int32_t err = UTVMTimerStart();
if (err < 0) {
utvm_return_code = err;
UTVMDone();
}
utvm_return_code = utvm_task.func(
(void*) utvm_task.arg_values, // NOLINT(*)
(void*) utvm_task.arg_type_codes, // NOLINT(*)
utvm_task.num_args);
UTVMTimerStop();
utvm_task_time = UTVMTimerRead();
UTVMDone();
}
// We use a dummy function to signal execution is finished for device
// backends which require breakpoints.
void UTVMDone() { }
void* TVMBackendAllocWorkspace(int device_type, int device_id, uint64_t size,
int dtype_code_hint, int dtype_bits_hint) {
// Align up to 8 bytes.
utvm_workspace_curr += (8 - ((uintptr_t) utvm_workspace_curr % 8)) % 8; // NOLINT(*)
utvm_workspace_curr +=
(utvm_word_size - ((uintptr_t) utvm_workspace_curr % utvm_word_size)) % utvm_word_size; // NOLINT(*)
if (utvm_workspace_curr + size > utvm_workspace_end) {
// Out of space in workspace.
return NULL;
......@@ -81,11 +103,11 @@ int TVMBackendFreeWorkspace(int device_type, int device_id, void* ptr) {
TVMAPISetLastError("free called with no active workspace allocations");
// Reset allocations and workspace (for future task executions).
utvm_num_active_allocs = 0;
utvm_workspace_curr = utvm_workspace_begin;
utvm_workspace_curr = utvm_workspace_start;
return -1;
} else if (utvm_num_active_allocs == 0) {
// No more allocations. Reset workspace.
utvm_workspace_curr = utvm_workspace_begin;
utvm_workspace_curr = utvm_workspace_start;
return 0;
} else {
return 0;
......
......@@ -21,8 +21,8 @@
* \file utvm_runtime.h
* \brief uTVM runtime headers
*/
#ifndef TVM_RUNTIME_MICRO_DEVICE_UTVM_RUNTIME_H_
#define TVM_RUNTIME_MICRO_DEVICE_UTVM_RUNTIME_H_
#ifndef TVM_RUNTIME_MICRO_HOST_DRIVEN_UTVM_RUNTIME_H_
#define TVM_RUNTIME_MICRO_HOST_DRIVEN_UTVM_RUNTIME_H_
#ifdef __cplusplus
extern "C" {
......@@ -30,6 +30,7 @@ extern "C" {
#include <stdint.h>
#include <tvm/runtime/c_runtime_api.h>
#include <tvm/runtime/c_backend_api.h>
/*!
* \brief Task structure for uTVM
......@@ -45,8 +46,22 @@ typedef struct {
int32_t num_args;
} UTVMTask;
extern void UTVMInit();
extern void UTVMTimerReset();
extern int32_t UTVMTimerStart();
extern void UTVMTimerStop();
extern uint32_t UTVMTimerRead();
void UTVMMain();
void UTVMDone();
#ifdef __cplusplus
} // TVM_EXTERN_C
#endif
#endif // TVM_RUNTIME_MICRO_DEVICE_UTVM_RUNTIME_H_
#endif // TVM_RUNTIME_MICRO_HOST_DRIVEN_UTVM_RUNTIME_H_
......@@ -31,6 +31,9 @@
namespace tvm {
namespace runtime {
/*! \brief number of bytes in each page */
constexpr int kPageSize = 4096;
/*!
* \brief emulated low-level device on host machine
*/
......@@ -40,40 +43,33 @@ class HostLowLevelDevice final : public LowLevelDevice {
* \brief constructor to initialize on-host memory region to act as device
* \param num_bytes size of the emulated on-device memory region
*/
explicit HostLowLevelDevice(size_t num_bytes) : size_(num_bytes) {
explicit HostLowLevelDevice(size_t num_bytes, void** base_addr) : size_(num_bytes) {
size_t size_in_pages = (num_bytes + kPageSize - 1) / kPageSize;
// TODO(weberlo): Set permissions per section (e.g., read-write perms for
// the heap, execute perms for text, etc.).
int mmap_prot = PROT_READ | PROT_WRITE | PROT_EXEC;
int mmap_flags = MAP_ANONYMOUS | MAP_PRIVATE;
base_addr_ = reinterpret_cast<std::uintptr_t>(
mmap(nullptr, size_in_pages * kPageSize, mmap_prot, mmap_flags, -1, 0));
base_addr_ = mmap(nullptr, size_in_pages * kPageSize, mmap_prot, mmap_flags, -1, 0);
*base_addr = base_addr_;
}
/*!
* \brief destructor to deallocate on-host device region
*/
virtual ~HostLowLevelDevice() {
munmap(reinterpret_cast<void*>(base_addr_), size_);
}
void Read(DevBaseOffset offset, void* buf, size_t num_bytes) {
void* addr = ToDevPtr(offset).cast_to<void*>();
std::memcpy(buf, addr, num_bytes);
munmap(base_addr_, size_);
}
void Write(DevBaseOffset offset, const void* buf, size_t num_bytes) {
void* addr = ToDevPtr(offset).cast_to<void*>();
std::memcpy(addr, buf, num_bytes);
void Read(DevPtr addr, void* buf, size_t num_bytes) {
std::memcpy(buf, addr.cast_to<void*>(), num_bytes);
}
void Execute(DevBaseOffset func_offset, DevBaseOffset breakpoint) {
DevPtr func_addr = ToDevPtr(func_offset);
reinterpret_cast<void (*)(void)>(func_addr.value())();
void Write(DevPtr addr, const void* buf, size_t num_bytes) {
std::memcpy(addr.cast_to<void*>(), buf, num_bytes);
}
std::uintptr_t base_addr() const final {
return base_addr_;
void Execute(DevPtr func_addr, DevPtr breakpoint_addr) {
reinterpret_cast<void (*)(void)>(func_addr.value().val64)();
}
const char* device_type() const final {
......@@ -82,14 +78,14 @@ class HostLowLevelDevice final : public LowLevelDevice {
private:
/*! \brief base address of the micro device memory region */
std::uintptr_t base_addr_;
void* base_addr_;
/*! \brief size of memory region */
size_t size_;
};
const std::shared_ptr<LowLevelDevice> HostLowLevelDeviceCreate(size_t num_bytes) {
const std::shared_ptr<LowLevelDevice> HostLowLevelDeviceCreate(size_t num_bytes, void** base_addr) {
std::shared_ptr<LowLevelDevice> lld =
std::make_shared<HostLowLevelDevice>(num_bytes);
std::make_shared<HostLowLevelDevice>(num_bytes, base_addr);
return lld;
}
......
......@@ -40,87 +40,52 @@ class LowLevelDevice {
virtual ~LowLevelDevice() {}
/*!
* \brief reads num_bytes from device memory at base_addr + offset into buffer
* \param offset on-device memory offset pointer to be read from
* \brief reads num_bytes from device memory at addr into buffer
* \param addr on-device memory address to read from
* \param buffer on-host buffer to be read into
* \param num_bytes number of bytes to be read
* \param num_bytes number of bytes to read
*/
virtual void Read(DevBaseOffset offset,
virtual void Read(DevPtr addr,
void* buffer,
size_t num_bytes) = 0;
/*!
* \brief writes num_bytes from buffer to device memory at base_addr + offset
* \param offset on-device memory offset pointer to be written to
* \param buffer on-host buffer to be written
* \param num_bytes number of bytes to be written
* \brief writes num_bytes from buffer to device memory at addr
* \param addr on-device memory address to write into
* \param buffer host buffer to write from
* \param num_bytes number of bytes to write
*/
virtual void Write(DevBaseOffset offset,
virtual void Write(DevPtr addr,
const void* buffer,
size_t num_bytes) = 0;
/*!
* \brief starts execution of device at offset
* \brief starts execution of device at func_addr
* \param func_addr offset of the init stub function
* \param breakpoint breakpoint at which to stop function execution
* \param breakpoint_addr address at which to stop function execution
*/
virtual void Execute(DevBaseOffset func_offset, DevBaseOffset breakpoint) = 0;
// TODO(weberlo): Should we just give the device the *entire* memory layout
// decided by the session?
/*!
* \brief sets the offset of the top of the stack section
* \param stack_top offset of the stack top
*/
virtual void SetStackTop(DevBaseOffset stack_top) {
LOG(FATAL) << "unimplemented";
}
/*!
* \brief convert from base offset to absolute address
* \param offset base offset
* \return absolute address
*/
DevPtr ToDevPtr(DevBaseOffset offset) {
return DevPtr(base_addr() + offset.value());
}
/*!
* \brief convert from absolute address to base offset
* \param ptr absolute address
* \return base offset
*/
DevBaseOffset ToDevOffset(DevPtr ptr) {
return DevBaseOffset(ptr.value() - base_addr());
}
virtual void Execute(DevPtr func_addr, DevPtr breakpoint_addr) = 0;
/*!
* \brief getter function for low-level device type
* \return string containing device type
*/
virtual const char* device_type() const = 0;
protected:
/*!
* \brief getter function for base_addr
* \return the base address of the device memory region
*/
virtual std::uintptr_t base_addr() const = 0;
};
/*!
* \brief create a host low-level device
* \param num_bytes size of the memory region
* \param base_addr pointer to write the host device's resulting base address into
*/
const std::shared_ptr<LowLevelDevice> HostLowLevelDeviceCreate(size_t num_bytes);
const std::shared_ptr<LowLevelDevice> HostLowLevelDeviceCreate(size_t num_bytes, void** base_addr);
/*!
* \brief connect to OpenOCD and create an OpenOCD low-level device
* \param addr address of the OpenOCD server to connect to
* \param port port of the OpenOCD server to connect to
*/
const std::shared_ptr<LowLevelDevice> OpenOCDLowLevelDeviceCreate(std::uintptr_t base_addr,
const std::string& addr,
const std::shared_ptr<LowLevelDevice> OpenOCDLowLevelDeviceCreate(const std::string& addr,
int port);
} // namespace runtime
......
......@@ -35,30 +35,6 @@
namespace tvm {
namespace runtime {
size_t GetDefaultSectionSize(SectionKind kind) {
switch (kind) {
case SectionKind::kText:
return 0xF000;
case SectionKind::kRodata:
return 0xF000;
case SectionKind::kData:
return 0xF00;
case SectionKind::kBss:
return 0xF00;
case SectionKind::kArgs:
return 0xF0000;
case SectionKind::kStack:
return 0xF000;
case SectionKind::kHeap:
return 0xF00000;
case SectionKind::kWorkspace:
return 0xF0000;
default:
LOG(FATAL) << "invalid section " << static_cast<size_t>(kind);
return 0;
}
}
const char* SectionToString(SectionKind section) {
switch (section) {
case SectionKind::kText: return "text";
......@@ -66,37 +42,32 @@ const char* SectionToString(SectionKind section) {
case SectionKind::kData: return "data";
case SectionKind::kBss: return "bss";
case SectionKind::kArgs: return "args";
case SectionKind::kStack: return "stack";
case SectionKind::kHeap: return "heap";
case SectionKind::kWorkspace: return "workspace";
case SectionKind::kStack: return "stack";
default: return "";
}
}
static std::string AddrToString(void* addr) {
std::stringstream stream;
if (addr != nullptr)
stream << addr;
else
stream << "0x0";
std::string string_addr = stream.str();
return string_addr;
}
std::string RelocateBinarySections(const std::string& binary_path,
DevPtr text,
DevPtr rodata,
DevPtr data,
DevPtr bss,
std::string RelocateBinarySections(
const std::string& binary_path,
size_t word_size,
DevPtr text_start,
DevPtr rodata_start,
DevPtr data_start,
DevPtr bss_start,
DevPtr stack_end,
const std::string& toolchain_prefix) {
const auto* f = Registry::Get("tvm_callback_relocate_binary");
CHECK(f != nullptr)
<< "Require tvm_callback_relocate_binary to exist in registry";
std::string relocated_bin = (*f)(binary_path,
AddrToString(text.cast_to<void*>()),
AddrToString(rodata.cast_to<void*>()),
AddrToString(data.cast_to<void*>()),
AddrToString(bss.cast_to<void*>()),
word_size,
text_start.cast_to<uint64_t>(),
rodata_start.cast_to<uint64_t>(),
data_start.cast_to<uint64_t>(),
bss_start.cast_to<uint64_t>(),
stack_end.cast_to<uint64_t>(),
toolchain_prefix);
return relocated_bin;
}
......
......@@ -46,122 +46,79 @@ enum class SectionKind : size_t {
kData,
kBss,
kArgs,
kStack,
kHeap,
kWorkspace,
kStack,
kNumKinds,
};
/*! \brief default size alignment */
constexpr int kDefaultSizeAlignment = 8;
/*! \brief union for storing values on varying target word sizes */
union TargetVal {
/*! \brief 32-bit pointer */
uint32_t val32;
/*! \brief 64-bit pointer */
uint64_t val64;
};
/*! \brief Base class for interfacing with device locations (pointers/offsets) */
class DeviceLocation {
/*! \brief absolute device address */
class DevPtr {
public:
/*! \brief construct a location with value `value` */
explicit DeviceLocation(std::uintptr_t value) : value_(value) {}
/*! \brief construct a device address with value `value` */
explicit DevPtr(std::uintptr_t value) : value_(TargetVal { .val64 = value }) {}
/*! \brief default constructor */
DeviceLocation() : value_(0) {}
DevPtr() : value_(TargetVal { .val64 = 0 }) {}
/*! \brief construct a null location */
explicit DeviceLocation(std::nullptr_t value) : value_(0) {}
/*! \brief construct a null address */
explicit DevPtr(std::nullptr_t value) : value_(TargetVal { .val64 = 0 }) {}
/*! \brief destructor */
virtual ~DeviceLocation() {}
~DevPtr() {}
/*!
* \brief get value of location
* \return value of location
* \brief get value of pointer
* \return value of pointer
*/
std::uintptr_t value() const { return value_; }
TargetVal value() const { return value_; }
/*!
* \brief cast location to type `T`
* \return casted result
*/
template <typename T>
T cast_to() const { return reinterpret_cast<T>(value_); }
T cast_to() const { return reinterpret_cast<T>(value_.val64); }
/*! \brief check if location is null */
bool operator==(std::nullptr_t) const { return value_ == 0; }
bool operator==(std::nullptr_t) const { return value_.val64 == 0; }
/*! \brief check if location is not null */
bool operator!=(std::nullptr_t) const { return value_ != 0; }
protected:
/*! \brief raw value storing the location */
std::uintptr_t value_;
};
/*! \brief absolute device address */
class DevPtr : public DeviceLocation {
public:
/*! \brief construct an absolute address with value `value` */
explicit DevPtr(std::uintptr_t val) : DeviceLocation(val) {}
/*! \brief default constructor */
DevPtr() : DeviceLocation() {}
/*! \brief construct a null absolute address */
explicit DevPtr(std::nullptr_t val) : DeviceLocation(val) {}
bool operator!=(std::nullptr_t) const { return value_.val64 != 0; }
/*! \brief add an integer to this absolute address to get a larger absolute address */
DevPtr operator+(size_t n) const {
return DevPtr(value_ + n);
return DevPtr(value_.val64 + n);
}
/*! \brief mutably add an integer to this absolute address */
DevPtr& operator+=(size_t n) {
value_ += n;
value_.val64 += n;
return *this;
}
/*! \brief subtract an integer from this absolute address to get a smaller absolute address */
DevPtr operator-(size_t n) const {
return DevPtr(value_ - n);
return DevPtr(value_.val64 - n);
}
/*! \brief mutably subtract an integer from this absolute address */
DevPtr& operator-=(size_t n) {
value_ -= n;
value_.val64 -= n;
return *this;
}
};
/*! \brief offset from device base address */
class DevBaseOffset : public DeviceLocation {
public:
/*! \brief construct a base offset with value `value` */
explicit DevBaseOffset(std::uintptr_t value) : DeviceLocation(value) {}
/*! \brief default constructor */
DevBaseOffset() : DeviceLocation() {}
/*! \brief construct a null base offset */
explicit DevBaseOffset(std::nullptr_t value) : DeviceLocation(value) {}
/*! \brief add an integer to this base offset to get a larger base offset */
DevBaseOffset operator+(size_t n) const {
return DevBaseOffset(value_ + n);
}
/*! \brief mutably add an integer to this base offset */
DevBaseOffset& operator+=(size_t n) {
value_ += n;
return *this;
}
/*! \brief subtract an integer from this base offset to get a smaller base offset */
DevBaseOffset operator-(size_t n) const {
return DevBaseOffset(value_ - n);
}
/*! \brief mutably subtract an integer from this base offset */
DevBaseOffset& operator-=(size_t n) {
value_ -= n;
return *this;
}
private:
/*! \brief raw value storing the pointer */
TargetVal value_;
};
/*!
......@@ -212,6 +169,10 @@ class SymbolMap {
return result->second;
}
bool HasSymbol(const std::string& name) const {
return map_.find(name) != map_.end();
}
private:
/*! \brief backing map */
std::unordered_map<std::string, DevPtr> map_;
......@@ -220,7 +181,7 @@ class SymbolMap {
/*! \brief struct containing start and size of a device memory region */
struct DevMemRegion {
/*! \brief section start offset */
DevBaseOffset start;
DevPtr start;
/*! \brief size of section */
size_t size;
};
......@@ -239,16 +200,13 @@ struct BinaryInfo {
SymbolMap symbol_map;
};
// TODO(weberlo): should this be here?
/*! \brief number of bytes in each page */
constexpr int kPageSize = 4096;
const DevBaseOffset kDeviceStart = DevBaseOffset(64);
/*!
* \brief return default size of given section kind in bytes
*/
size_t GetDefaultSectionSize(SectionKind kind);
struct BinaryContents {
BinaryInfo binary_info;
std::string text_contents;
std::string rodata_contents;
std::string data_contents;
std::string bss_contents;
};
/*!
* \brief upper-aligns value according to specified alignment
......@@ -270,18 +228,23 @@ const char* SectionToString(SectionKind section);
/*!
* \brief links binary by repositioning section addresses
* \param binary_name input binary filename
* \param text new text section address
* \param rodata new rodata section address
* \param data new data section address
* \param bss new bss section address
* \param word_size word size on the target machine
* \param text_start text section address
* \param rodata_start rodata section address
* \param data_start data section address
* \param bss_start bss section address
* \param stack_end stack section end address
* \param toolchain_prefix prefix of compiler toolchain to use
* \return relocated binary file contents
*/
std::string RelocateBinarySections(const std::string& binary_name,
DevPtr text,
DevPtr rodata,
DevPtr data,
DevPtr bss,
std::string RelocateBinarySections(
const std::string& binary_path,
size_t word_size,
DevPtr text_start,
DevPtr rodata_start,
DevPtr data_start,
DevPtr bss_start,
DevPtr stack_end,
const std::string& toolchain_prefix);
/*!
......@@ -306,7 +269,7 @@ std::string ReadSection(const std::string& binary,
size_t GetSectionSize(const std::string& binary_name,
SectionKind section,
const std::string& toolchain_prefix,
size_t align = kDefaultSizeAlignment);
size_t align);
} // namespace runtime
} // namespace tvm
......
......@@ -61,7 +61,7 @@ class MicroDeviceAPI final : public DeviceAPI {
void FreeDataSpace(TVMContext ctx, void* ptr) final {
MicroDevSpace* dev_space = static_cast<MicroDevSpace*>(ptr);
dev_space->session->FreeInSection(
SectionKind::kHeap, DevBaseOffset(reinterpret_cast<std::uintptr_t>(dev_space->data)));
SectionKind::kHeap, DevPtr(reinterpret_cast<std::uintptr_t>(dev_space->data)));
delete dev_space;
}
......@@ -89,12 +89,12 @@ class MicroDeviceAPI final : public DeviceAPI {
ObjectPtr<MicroSession>& session = from_space->session;
const std::shared_ptr<LowLevelDevice>& lld = session->low_level_device();
DevBaseOffset from_dev_offset = GetDevLoc(from_space, from_offset);
DevBaseOffset to_dev_offset = GetDevLoc(to_space, to_offset);
DevPtr from_dev_addr = GetDevLoc(from_space, from_offset);
DevPtr to_dev_addr = GetDevLoc(to_space, to_offset);
std::vector<uint8_t> buffer(size);
lld->Read(from_dev_offset, static_cast<void*>(buffer.data()), size);
lld->Write(to_dev_offset, static_cast<void*>(buffer.data()), size);
lld->Read(from_dev_addr, static_cast<void*>(buffer.data()), size);
lld->Write(to_dev_addr, static_cast<void*>(buffer.data()), size);
} else if (type_from_to == std::make_tuple(kDLMicroDev, kDLCPU)) {
// Reading from the device.
......@@ -102,9 +102,9 @@ class MicroDeviceAPI final : public DeviceAPI {
ObjectPtr<MicroSession>& session = from_space->session;
const std::shared_ptr<LowLevelDevice>& lld = session->low_level_device();
DevBaseOffset from_dev_offset = GetDevLoc(from_space, from_offset);
DevPtr from_dev_addr = GetDevLoc(from_space, from_offset);
void* to_host_ptr = GetHostLoc(to, to_offset);
lld->Read(from_dev_offset, to_host_ptr, size);
lld->Read(from_dev_addr, to_host_ptr, size);
} else if (type_from_to == std::make_tuple(kDLCPU, kDLMicroDev)) {
// Writing to the device.
......@@ -113,8 +113,8 @@ class MicroDeviceAPI final : public DeviceAPI {
const std::shared_ptr<LowLevelDevice>& lld = session->low_level_device();
void* from_host_ptr = GetHostLoc(from, from_offset);
DevBaseOffset to_dev_offset = GetDevLoc(to_space, to_offset);
lld->Write(to_dev_offset, from_host_ptr, size);
DevPtr to_dev_addr = GetDevLoc(to_space, to_offset);
lld->Write(to_dev_addr, from_host_ptr, size);
} else {
LOG(FATAL) << "Expect copy from/to micro device or between micro device\n";
}
......@@ -138,7 +138,7 @@ class MicroDeviceAPI final : public DeviceAPI {
MicroDevSpace* dev_space = static_cast<MicroDevSpace*>(data);
ObjectPtr<MicroSession>& session = dev_space->session;
session->FreeInSection(SectionKind::kWorkspace,
DevBaseOffset(reinterpret_cast<std::uintptr_t>(dev_space->data)));
DevPtr(reinterpret_cast<std::uintptr_t>(dev_space->data)));
delete dev_space;
}
......@@ -152,10 +152,8 @@ class MicroDeviceAPI final : public DeviceAPI {
}
private:
DevBaseOffset GetDevLoc(MicroDevSpace* dev_space, size_t offset) {
DevBaseOffset dev_offset =
DevBaseOffset(reinterpret_cast<std::uintptr_t>(dev_space->data) + offset);
return dev_offset;
DevPtr GetDevLoc(MicroDevSpace* dev_space, size_t offset) {
return DevPtr(reinterpret_cast<std::uintptr_t>(dev_space->data) + offset);
}
void* GetHostLoc(const void* ptr, size_t offset) {
......
......@@ -55,62 +55,48 @@ class MicroModuleNode final : public ModuleNode {
*/
void InitMicroModule(const std::string& binary_path) {
session_ = MicroSession::Current();
binary_path_ = binary_path;
binary_info_ = session_->LoadBinary(binary_path_);
}
/*!
* \brief runs selected function on the micro device
* \param func_name name of the function to be run
* \param func_offset offset of the function to be run
* \param args type-erased arguments passed to the function
*/
void RunFunction(const std::string& func_name, DevBaseOffset func_offset, const TVMArgs& args) {
session_->PushToExecQueue(func_offset, args);
symbol_map_ = session_->LoadBinary(binary_path, true).symbol_map;
}
private:
/*! \brief module binary info */
BinaryInfo binary_info_;
/*! \brief path to module binary */
std::string binary_path_;
SymbolMap symbol_map_;
/*! \brief global session pointer */
ObjectPtr<MicroSession> session_;
};
class MicroWrappedFunc {
public:
MicroWrappedFunc(MicroModuleNode* m,
ObjectPtr<MicroSession> session,
const std::string& func_name,
DevBaseOffset func_offset) {
m_ = m;
MicroWrappedFunc(ObjectPtr<MicroSession> session,
DevPtr func_ptr) {
session_ = session;
func_name_ = func_name;
func_offset_ = func_offset;
func_ptr_ = func_ptr;
}
void operator()(TVMArgs args, TVMRetValue* rv) const {
m_->RunFunction(func_name_, func_offset_, args);
*rv = session_->PushToExecQueue(func_ptr_, args);
}
private:
/*! \brief internal module */
MicroModuleNode* m_;
/*! \brief reference to the session for this function (to keep the session alive) */
ObjectPtr<MicroSession> session_;
/*! \brief name of the function */
std::string func_name_;
/*! \brief offset of the function to be called */
DevBaseOffset func_offset_;
DevPtr func_ptr_;
};
PackedFunc MicroModuleNode::GetFunction(
const std::string& name,
const ObjectPtr<Object>& sptr_to_self) {
DevBaseOffset func_offset =
session_->low_level_device()->ToDevOffset(binary_info_.symbol_map[name]);
MicroWrappedFunc f(this, session_, name, func_offset);
DevPtr func_ptr;
if (name == tvm::runtime::symbol::tvm_module_main) {
if (symbol_map_.HasSymbol(tvm::runtime::symbol::tvm_module_main)) {
func_ptr = symbol_map_[tvm::runtime::symbol::tvm_module_main];
} else {
func_ptr = symbol_map_["default_function"];
}
} else {
func_ptr = symbol_map_[name];
}
MicroWrappedFunc f(session_, func_ptr);
return PackedFunc(f);
}
......
......@@ -38,11 +38,15 @@ class MicroSectionAllocator {
* \brief constructor that specifies section boundaries
* \param region location and size of the section on the device
*/
explicit MicroSectionAllocator(DevMemRegion region)
: start_offset_(region.start),
explicit MicroSectionAllocator(DevMemRegion region, size_t word_size)
: start_addr_(region.start),
size_(0),
capacity_(region.size) {
CHECK_EQ(start_offset_.value() % 8, 0) << "micro section not aligned to 8 bytes";
capacity_(region.size),
word_size_(word_size) {
CHECK_EQ(start_addr_.value().val64 % word_size, 0)
<< "micro section start not aligned to " << word_size << " bytes";
CHECK_EQ(capacity_ % word_size, 0)
<< "micro section end not aligned to " << word_size << " bytes";
}
/*!
......@@ -55,15 +59,15 @@ class MicroSectionAllocator {
* \param size size of allocated memory in bytes
* \return pointer to allocated memory region in section, nullptr if out of space
*/
DevBaseOffset Allocate(size_t size) {
size_ = UpperAlignValue(size_, 8);
DevPtr Allocate(size_t size) {
size_ = UpperAlignValue(size_, word_size_);
CHECK(size_ + size < capacity_)
<< "cannot alloc " << size << " bytes in section with start_addr " <<
start_offset_.value();
DevBaseOffset alloc_ptr = start_offset_ + size_;
start_addr_.cast_to<void*>();
DevPtr alloc_addr = start_addr_ + size_;
size_ += size;
alloc_map_[alloc_ptr.value()] = size;
return alloc_ptr;
alloc_map_[alloc_addr.value().val64] = size;
return alloc_addr;
}
/*!
......@@ -71,10 +75,10 @@ class MicroSectionAllocator {
* \param offs offset to allocated memory
* \note simple allocator scheme, more complex versions will be implemented later
*/
void Free(DevBaseOffset offs) {
std::uintptr_t ptr = offs.value();
CHECK(alloc_map_.find(ptr) != alloc_map_.end()) << "freed pointer was never allocated";
alloc_map_.erase(ptr);
void Free(DevPtr addr) {
CHECK(alloc_map_.find(addr.value().val64) != alloc_map_.end())
<< "freed pointer was never allocated";
alloc_map_.erase(addr.value().val64);
if (alloc_map_.empty()) {
size_ = 0;
}
......@@ -83,17 +87,17 @@ class MicroSectionAllocator {
/*!
* \brief start offset of the memory region managed by this allocator
*/
DevBaseOffset start_offset() const { return start_offset_; }
DevPtr start_addr() const { return start_addr_; }
/*!
* \brief current end offset of the space being used in this memory region
* \brief current end addr of the space being used in this memory region
*/
DevBaseOffset curr_end_offset() const { return start_offset_ + size_; }
DevPtr curr_end_addr() const { return start_addr_ + size_; }
/*!
* \brief end offset of the memory region managed by this allocator
* \brief end addr of the memory region managed by this allocator
*/
DevBaseOffset max_end_offset() const { return start_offset_ + capacity_; }
DevPtr max_addr() const { return start_addr_ + capacity_; }
/*!
* \brief size of the section
......@@ -107,13 +111,15 @@ class MicroSectionAllocator {
private:
/*! \brief start address of the section */
DevBaseOffset start_offset_;
DevPtr start_addr_;
/*! \brief current size of the section */
size_t size_;
/*! \brief total storage capacity of the section */
size_t capacity_;
/*! \brief number of bytes in a word on the target device */
size_t word_size_;
/*! \brief allocation map for allocation sizes */
std::unordered_map<std::uintptr_t, size_t> alloc_map_;
std::unordered_map<uint64_t, size_t> alloc_map_;
};
} // namespace runtime
......
......@@ -23,6 +23,7 @@
#include <dmlc/thread_local.h>
#include <tvm/runtime/registry.h>
#include <memory>
#include <stack>
#include <tuple>
#include <vector>
......@@ -56,99 +57,270 @@ void MicroSession::ExitWithScope() {
entry->session_stack.pop();
}
MicroSession::MicroSession() {
DevBaseOffset curr_start_offset = kDeviceStart;
for (size_t i = 0; i < static_cast<size_t>(SectionKind::kNumKinds); i++) {
size_t section_size = GetDefaultSectionSize(static_cast<SectionKind>(i));
section_allocators_[i] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = curr_start_offset,
.size = section_size,
});
curr_start_offset += section_size;
}
memory_size_ = curr_start_offset.cast_to<size_t>();
}
MicroSession::~MicroSession() {
for (size_t i = 0; i < static_cast<size_t>(SectionKind::kNumKinds); i++) {
section_allocators_[i] = nullptr;
}
low_level_device_ = nullptr;
}
void MicroSession::CreateSession(const std::string& device_type,
MicroSession::MicroSession(
const std::string& comms_method,
const std::string& binary_path,
const std::string& toolchain_prefix,
std::uintptr_t base_addr,
uint64_t text_start,
size_t text_size,
uint64_t rodata_start,
size_t rodata_size,
uint64_t data_start,
size_t data_size,
uint64_t bss_start,
size_t bss_size,
uint64_t args_start,
size_t args_size,
uint64_t heap_start,
size_t heap_size,
uint64_t workspace_start,
size_t workspace_size,
uint64_t stack_start,
size_t stack_size,
size_t word_size,
bool thumb_mode,
const std::string& server_addr,
int port) {
// TODO(weberlo): make device type enum
toolchain_prefix_ = toolchain_prefix;
if (device_type == "host") {
low_level_device_ = HostLowLevelDeviceCreate(memory_size_);
} else if (device_type == "openocd") {
// TODO(weberlo): We need a better way of configuring devices.
low_level_device_ = OpenOCDLowLevelDeviceCreate(base_addr, server_addr, port);
int port)
: toolchain_prefix_(toolchain_prefix)
, word_size_(word_size)
, thumb_mode_(thumb_mode) {
CHECK(word_size_ == 4 || word_size_ == 8) << "unsupported word size " << word_size_;
if (comms_method == "host") {
// TODO(weberlo): move checks to python
CHECK(
text_start == 0 &&
rodata_start == 0 &&
data_start == 0 &&
bss_start == 0 &&
args_start == 0 &&
heap_start == 0 &&
workspace_start == 0 &&
stack_start == 0) << "unable to specify section addresses for host device";
size_t memory_size =
text_size + rodata_size + data_size + bss_size +
args_size + heap_size + workspace_size + stack_size;
void* base_addr;
low_level_device_ = HostLowLevelDeviceCreate(memory_size, &base_addr);
CHECK_EQ(reinterpret_cast<std::uintptr_t>(base_addr) % word_size_, 0)
<< "base address not aligned to " << word_size_ << " bytes";
DevPtr curr_addr = DevPtr(reinterpret_cast<std::uintptr_t>(base_addr));
section_allocators_[0] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = curr_addr,
.size = text_size,
}, word_size_);
curr_addr += text_size;
section_allocators_[1] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = curr_addr,
.size = rodata_size,
}, word_size_);
curr_addr += rodata_size;
section_allocators_[2] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = curr_addr,
.size = data_size,
}, word_size_);
curr_addr += data_size;
section_allocators_[3] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = curr_addr,
.size = bss_size,
}, word_size_);
curr_addr += bss_size;
section_allocators_[4] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = curr_addr,
.size = args_size,
}, word_size_);
curr_addr += args_size;
section_allocators_[5] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = curr_addr,
.size = heap_size,
}, word_size_);
curr_addr += heap_size;
section_allocators_[6] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = curr_addr,
.size = workspace_size,
}, word_size_);
curr_addr += workspace_size;
section_allocators_[7] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = curr_addr,
.size = stack_size,
}, word_size_);
curr_addr += stack_size;
} else if (comms_method == "openocd") {
low_level_device_ = OpenOCDLowLevelDeviceCreate(server_addr, port);
section_allocators_[0] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = DevPtr(text_start),
.size = text_size,
}, word_size_);
section_allocators_[1] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = DevPtr(rodata_start),
.size = rodata_size,
}, word_size_);
section_allocators_[2] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = DevPtr(data_start),
.size = data_size,
}, word_size_);
section_allocators_[3] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = DevPtr(bss_start),
.size = bss_size,
}, word_size_);
section_allocators_[4] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = DevPtr(args_start),
.size = args_size,
}, word_size_);
section_allocators_[5] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = DevPtr(heap_start),
.size = heap_size,
}, word_size_);
section_allocators_[6] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = DevPtr(workspace_start),
.size = workspace_size,
}, word_size_);
section_allocators_[7] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = DevPtr(stack_start),
.size = stack_size,
}, word_size_);
} else {
LOG(FATAL) << "unsupported micro low-level device";
}
SetRuntimeBinaryPath(binary_path);
CHECK(!runtime_binary_path_.empty()) << "uTVM runtime not initialized";
runtime_bin_info_ = LoadBinary(runtime_binary_path_, /* patch_dylib_pointers */ false);
utvm_main_symbol_ = low_level_device()->ToDevOffset(runtime_symbol_map()["UTVMMain"]);
utvm_done_symbol_ = low_level_device()->ToDevOffset(runtime_symbol_map()["UTVMDone"]);
if (device_type == "openocd") {
// Set OpenOCD device's stack pointer.
auto stack_section = GetAllocator(SectionKind::kStack);
low_level_device_->SetStackTop(stack_section->max_end_offset());
runtime_symbol_map_ = LoadBinary(binary_path, false).symbol_map;
// Patch pointers to define the bounds of the workspace section and the word
// size (for allocation alignment).
std::shared_ptr<MicroSectionAllocator> ws_allocator = GetAllocator(SectionKind::kWorkspace);
TargetVal ws_start = ws_allocator->start_addr().value();
TargetVal ws_end = ws_allocator->max_addr().value();
TargetVal target_word_size { .val64 = word_size_ };
if (word_size_ == 4) {
DevSymbolWrite(runtime_symbol_map_, "utvm_workspace_start", ws_start.val32);
DevSymbolWrite(runtime_symbol_map_, "utvm_workspace_end", ws_end.val32);
DevSymbolWrite(runtime_symbol_map_, "utvm_word_size", target_word_size.val32);
} else if (word_size_ == 8) {
DevSymbolWrite(runtime_symbol_map_, "utvm_workspace_start", ws_start.val64);
DevSymbolWrite(runtime_symbol_map_, "utvm_workspace_end", ws_end.val64);
DevSymbolWrite(runtime_symbol_map_, "utvm_word_size", target_word_size.val64);
}
}
// Patch workspace pointers to the start of the workspace section.
DevBaseOffset workspace_start_offset = GetAllocator(SectionKind::kWorkspace)->start_offset();
DevBaseOffset workspace_end_offset = GetAllocator(SectionKind::kWorkspace)->max_end_offset();
void* workspace_start_addr =
low_level_device_->ToDevPtr(workspace_start_offset).cast_to<void*>();
void* workspace_end_addr =
low_level_device_->ToDevPtr(workspace_end_offset).cast_to<void*>();
DevSymbolWrite(runtime_symbol_map(), "utvm_workspace_begin", workspace_start_addr);
DevSymbolWrite(runtime_symbol_map(), "utvm_workspace_end", workspace_end_addr);
MicroSession::~MicroSession() {
for (size_t i = 0; i < static_cast<size_t>(SectionKind::kNumKinds); i++) {
section_allocators_[i] = nullptr;
}
low_level_device_ = nullptr;
}
void MicroSession::PushToExecQueue(DevBaseOffset func, const TVMArgs& args) {
int32_t (*func_dev_addr)(void*, void*, int32_t) =
reinterpret_cast<int32_t (*)(void*, void*, int32_t)>(
low_level_device()->ToDevPtr(func).value());
double MicroSession::PushToExecQueue(DevPtr func_ptr, const TVMArgs& args) {
if (thumb_mode_) {
func_ptr += 1;
}
// Create an allocator stream for the memory region after the most recent
// allocation in the args section.
DevPtr args_addr =
low_level_device()->ToDevPtr(GetAllocator(SectionKind::kArgs)->curr_end_offset());
TargetDataLayoutEncoder encoder(args_addr);
DevPtr args_addr = GetAllocator(SectionKind::kArgs)->curr_end_addr();
TargetDataLayoutEncoder encoder(args_addr, word_size_);
std::tuple<DevPtr, DevPtr> arg_field_addrs = EncoderAppend(&encoder, args);
// Flush `stream` to device memory.
DevBaseOffset stream_dev_offset =
DevPtr stream_dev_addr =
GetAllocator(SectionKind::kArgs)->Allocate(encoder.buf_size());
low_level_device()->Write(stream_dev_offset,
low_level_device()->Write(stream_dev_addr,
reinterpret_cast<void*>(encoder.data()),
encoder.buf_size());
UTVMTask task = {
.func = func_dev_addr,
.arg_values = std::get<0>(arg_field_addrs).cast_to<TVMValue*>(),
.arg_type_codes = std::get<1>(arg_field_addrs).cast_to<int*>(),
TargetVal arg_values_dev_addr = std::get<0>(arg_field_addrs).value();
TargetVal arg_type_codes_dev_addr = std::get<1>(arg_field_addrs).value();
if (word_size_ == 4) {
UTVMTask32 task = {
.func = func_ptr.value().val32,
.arg_values = arg_values_dev_addr.val32,
.arg_type_codes = arg_type_codes_dev_addr.val32,
.num_args = args.num_args,
};
// Write the task.
DevSymbolWrite(runtime_symbol_map_, "utvm_task", task);
} else if (word_size_ == 8) {
UTVMTask64 task = {
.func = func_ptr.value().val64,
.arg_values = arg_values_dev_addr.val64,
.arg_type_codes = arg_type_codes_dev_addr.val64,
.num_args = args.num_args,
};
// Write the task.
DevSymbolWrite(runtime_symbol_map(), "task", task);
DevSymbolWrite(runtime_symbol_map_, "utvm_task", task);
}
DevPtr utvm_init_addr = runtime_symbol_map_["UTVMInit"];
DevPtr utvm_done_addr = runtime_symbol_map_["UTVMDone"];
if (thumb_mode_) {
utvm_init_addr += 1;
}
low_level_device()->Execute(utvm_main_symbol_, utvm_done_symbol_);
low_level_device()->Execute(utvm_init_addr, utvm_done_addr);
// Check if there was an error during execution. If so, log it.
CheckDeviceError();
uint32_t task_time = DevSymbolRead<uint32_t>(runtime_symbol_map_, "utvm_task_time");
GetAllocator(SectionKind::kArgs)->Free(stream_dev_addr);
return static_cast<double>(task_time);
}
BinaryInfo MicroSession::LoadBinary(const std::string& binary_path, bool patch_dylib_pointers) {
DevMemRegion text_section;
DevMemRegion rodata_section;
DevMemRegion data_section;
DevMemRegion bss_section;
text_section.size = GetSectionSize(
binary_path, SectionKind::kText, toolchain_prefix_, word_size_);
rodata_section.size = GetSectionSize(
binary_path, SectionKind::kRodata, toolchain_prefix_, word_size_);
data_section.size = GetSectionSize(
binary_path, SectionKind::kData, toolchain_prefix_, word_size_);
bss_section.size = GetSectionSize(
binary_path, SectionKind::kBss, toolchain_prefix_, word_size_);
text_section.start = AllocateInSection(SectionKind::kText, text_section.size);
rodata_section.start = AllocateInSection(SectionKind::kRodata, rodata_section.size);
data_section.start = AllocateInSection(SectionKind::kData, data_section.size);
bss_section.start = AllocateInSection(SectionKind::kBss, bss_section.size);
CHECK(text_section.start != nullptr && rodata_section.start != nullptr &&
data_section.start != nullptr && bss_section.start != nullptr)
<< "not enough space to load module on device";
GetAllocator(SectionKind::kArgs)->Free(stream_dev_offset);
std::string relocated_bin = RelocateBinarySections(
binary_path,
word_size_,
text_section.start,
rodata_section.start,
data_section.start,
bss_section.start,
GetAllocator(SectionKind::kStack)->max_addr(),
toolchain_prefix_);
std::string text_contents = ReadSection(relocated_bin, SectionKind::kText, toolchain_prefix_);
std::string rodata_contents = ReadSection(relocated_bin, SectionKind::kRodata, toolchain_prefix_);
std::string data_contents = ReadSection(relocated_bin, SectionKind::kData, toolchain_prefix_);
std::string bss_contents = ReadSection(relocated_bin, SectionKind::kBss, toolchain_prefix_);
low_level_device_->Write(text_section.start, &text_contents[0], text_section.size);
low_level_device_->Write(rodata_section.start, &rodata_contents[0], rodata_section.size);
low_level_device_->Write(data_section.start, &data_contents[0], data_section.size);
low_level_device_->Write(bss_section.start, &bss_contents[0], bss_section.size);
SymbolMap symbol_map {relocated_bin, toolchain_prefix_};
if (patch_dylib_pointers) {
// Patch device lib pointers.
PatchImplHole(symbol_map, "TVMBackendAllocWorkspace");
PatchImplHole(symbol_map, "TVMBackendFreeWorkspace");
PatchImplHole(symbol_map, "TVMAPISetLastError");
}
return BinaryInfo {
.text_section = text_section,
.rodata_section = rodata_section,
.data_section = data_section,
.bss_section = bss_section,
.symbol_map = symbol_map,
};
}
std::tuple<DevPtr, DevPtr> MicroSession::EncoderAppend(
......@@ -171,7 +343,12 @@ std::tuple<DevPtr, DevPtr> MicroSession::EncoderAppend(
// Mutate the array to unwrap the `data` field.
base_arr_handle->data = reinterpret_cast<MicroDevSpace*>(old_data)->data;
// Now, encode the unwrapped version.
void* arr_ptr = EncoderAppend(encoder, *base_arr_handle).cast_to<void*>();
void* arr_ptr = nullptr;
if (word_size_ == 4) {
arr_ptr = EncoderAppend<TVMArray32>(encoder, *base_arr_handle).cast_to<void*>();
} else if (word_size_ == 8) {
arr_ptr = EncoderAppend<TVMArray64>(encoder, *base_arr_handle).cast_to<void*>();
}
// And restore the original wrapped version.
base_arr_handle->data = old_data;
......@@ -190,54 +367,53 @@ std::tuple<DevPtr, DevPtr> MicroSession::EncoderAppend(
}
}
type_codes_slot.WriteArray(type_codes, num_args);
return std::make_tuple(tvm_vals_slot.start_addr(), type_codes_slot.start_addr());
}
template <typename T>
DevPtr MicroSession::EncoderAppend(TargetDataLayoutEncoder* encoder, const TVMArray& arr) {
auto tvm_arr_slot = encoder->Alloc<TVMArray>();
auto tvm_arr_slot = encoder->Alloc<T>();
auto shape_slot = encoder->Alloc<int64_t>(arr.ndim);
// `shape` and `strides` are stored on the host, so we need to write them to
// the device first. The `data` field is already allocated on the device and
// is a device pointer, so we don't need to write it.
shape_slot.WriteArray(arr.shape, arr.ndim);
DevPtr shape_addr = shape_slot.start_addr();
DevPtr strides_addr = DevPtr(nullptr);
DevPtr shape_dev_addr = shape_slot.start_addr();
DevPtr strides_dev_addr = DevPtr(nullptr);
if (arr.strides != nullptr) {
auto stride_slot = encoder->Alloc<int64_t>(arr.ndim);
stride_slot.WriteArray(arr.strides, arr.ndim);
strides_addr = stride_slot.start_addr();
strides_dev_addr = stride_slot.start_addr();
}
// Copy `arr`, update the copy's pointers to be device pointers, then
// write the copy to `tvm_arr_slot`.
TVMArray dev_arr = arr;
// Update the device type to look like a host, because codegen generates
// checks that it is a host array.
T dev_arr(
TargetVal { .val64 = reinterpret_cast<uint64_t>(arr.data) },
arr.ctx,
arr.ndim,
arr.dtype,
shape_dev_addr.value(),
strides_dev_addr.value(),
TargetVal { .val64 = arr.byte_offset });
CHECK(dev_arr.ctx.device_type == static_cast<DLDeviceType>(kDLMicroDev))
<< "attempt to write TVMArray with non-micro device type";
// Update the device type to CPU, because from the microcontroller's
// perspective, it is.
dev_arr.ctx.device_type = DLDeviceType::kDLCPU;
// Add the base address of the device to the array's data's device offset to
// get a device address.
DevBaseOffset arr_offset(reinterpret_cast<std::uintptr_t>(arr.data));
dev_arr.data = low_level_device()->ToDevPtr(arr_offset).cast_to<void*>();
dev_arr.shape = shape_addr.cast_to<int64_t*>();
dev_arr.strides = strides_addr.cast_to<int64_t*>();
tvm_arr_slot.WriteValue(dev_arr);
return tvm_arr_slot.start_addr();
}
void MicroSession::CheckDeviceError() {
int32_t return_code = DevSymbolRead<int32_t>(runtime_symbol_map(), "utvm_return_code");
int32_t return_code = DevSymbolRead<int32_t>(runtime_symbol_map_, "utvm_return_code");
if (return_code) {
std::uintptr_t last_error =
DevSymbolRead<std::uintptr_t>(runtime_symbol_map(), "utvm_last_error");
DevSymbolRead<std::uintptr_t>(runtime_symbol_map_, "utvm_last_error");
std::string last_error_str;
if (last_error) {
DevBaseOffset last_err_offset = low_level_device()->ToDevOffset(DevPtr(last_error));
last_error_str = ReadString(last_err_offset);
DevPtr last_err_addr = DevPtr(last_error);
last_error_str = ReadString(last_err_addr);
}
LOG(FATAL) << "error during micro function execution:\n"
<< " return code: " << std::dec << return_code << "\n"
......@@ -246,100 +422,51 @@ void MicroSession::CheckDeviceError() {
}
}
BinaryInfo MicroSession::LoadBinary(const std::string& binary_path, bool patch_dylib_pointers) {
DevMemRegion text_section;
DevMemRegion rodata_section;
DevMemRegion data_section;
DevMemRegion bss_section;
text_section.size = GetSectionSize(binary_path, SectionKind::kText, toolchain_prefix_);
rodata_section.size = GetSectionSize(binary_path, SectionKind::kRodata, toolchain_prefix_);
data_section.size = GetSectionSize(binary_path, SectionKind::kData, toolchain_prefix_);
bss_section.size = GetSectionSize(binary_path, SectionKind::kBss, toolchain_prefix_);
text_section.start = AllocateInSection(SectionKind::kText, text_section.size);
rodata_section.start = AllocateInSection(SectionKind::kRodata, rodata_section.size);
data_section.start = AllocateInSection(SectionKind::kData, data_section.size);
bss_section.start = AllocateInSection(SectionKind::kBss, bss_section.size);
CHECK(text_section.start != nullptr && rodata_section.start != nullptr &&
data_section.start != nullptr && bss_section.start != nullptr)
<< "not enough space to load module on device";
std::string relocated_bin = RelocateBinarySections(
binary_path,
low_level_device_->ToDevPtr(text_section.start),
low_level_device_->ToDevPtr(rodata_section.start),
low_level_device_->ToDevPtr(data_section.start),
low_level_device_->ToDevPtr(bss_section.start),
toolchain_prefix_);
std::string text_contents = ReadSection(relocated_bin, SectionKind::kText, toolchain_prefix_);
std::string rodata_contents = ReadSection(relocated_bin, SectionKind::kRodata, toolchain_prefix_);
std::string data_contents = ReadSection(relocated_bin, SectionKind::kData, toolchain_prefix_);
std::string bss_contents = ReadSection(relocated_bin, SectionKind::kBss, toolchain_prefix_);
low_level_device_->Write(text_section.start, &text_contents[0], text_section.size);
low_level_device_->Write(rodata_section.start, &rodata_contents[0], rodata_section.size);
low_level_device_->Write(data_section.start, &data_contents[0], data_section.size);
low_level_device_->Write(bss_section.start, &bss_contents[0], bss_section.size);
SymbolMap symbol_map {relocated_bin, toolchain_prefix_};
if (patch_dylib_pointers) {
// Patch device lib pointers.
PatchImplHole(symbol_map, "TVMBackendAllocWorkspace");
PatchImplHole(symbol_map, "TVMBackendFreeWorkspace");
PatchImplHole(symbol_map, "TVMAPISetLastError");
}
return BinaryInfo {
.text_section = text_section,
.rodata_section = rodata_section,
.data_section = data_section,
.bss_section = bss_section,
.symbol_map = symbol_map,
};
}
void MicroSession::PatchImplHole(const SymbolMap& symbol_map, const std::string& func_name) {
void* runtime_impl_addr = runtime_symbol_map()[func_name].cast_to<void*>();
DevPtr runtime_impl_addr = runtime_symbol_map_[func_name];
if (thumb_mode_) {
runtime_impl_addr += 1;
}
std::ostringstream func_name_underscore;
func_name_underscore << func_name << "_";
DevSymbolWrite(symbol_map, func_name_underscore.str(), runtime_impl_addr);
}
void MicroSession::SetRuntimeBinaryPath(std::string path) {
runtime_binary_path_ = path;
if (word_size_ == 4) {
DevSymbolWrite(symbol_map, func_name_underscore.str(), runtime_impl_addr.value().val32);
} else if (word_size_ == 8) {
DevSymbolWrite(symbol_map, func_name_underscore.str(), runtime_impl_addr.value().val64);
}
}
std::string MicroSession::ReadString(DevBaseOffset str_offset) {
std::string MicroSession::ReadString(DevPtr str_addr) {
std::ostringstream result;
const size_t buf_size = 256;
std::vector<char> buf(buf_size, 0);
size_t i = buf_size;
while (i == buf_size) {
low_level_device()->Read(str_offset, buf.data(), buf_size);
low_level_device()->Read(str_addr, buf.data(), buf_size);
i = 0;
while (i < buf_size) {
if (buf[i] == 0) break;
result << buf[i];
i++;
}
str_offset = str_offset + i;
str_addr = str_addr + i;
}
return result.str();
}
DevBaseOffset MicroSession::AllocateInSection(SectionKind type, size_t size) {
DevPtr MicroSession::AllocateInSection(SectionKind type, size_t size) {
return GetAllocator(type)->Allocate(size);
}
void MicroSession::FreeInSection(SectionKind type, DevBaseOffset ptr) {
return GetAllocator(type)->Free(ptr);
void MicroSession::FreeInSection(SectionKind type, DevPtr addr) {
return GetAllocator(type)->Free(addr);
}
template <typename T>
T MicroSession::DevSymbolRead(const SymbolMap& symbol_map, const std::string& symbol) {
DevBaseOffset sym_offset = low_level_device()->ToDevOffset(symbol_map[symbol]);
DevPtr sym_addr = symbol_map[symbol];
T result;
low_level_device()->Read(sym_offset, &result, sizeof(T));
low_level_device()->Read(sym_addr, &result, sizeof(T));
return result;
}
......@@ -347,8 +474,8 @@ template <typename T>
void MicroSession::DevSymbolWrite(const SymbolMap& symbol_map,
const std::string& symbol,
const T& value) {
DevBaseOffset sym_offset = low_level_device()->ToDevOffset(symbol_map[symbol]);
low_level_device()->Write(sym_offset, &value, sizeof(T));
DevPtr sym_addr = symbol_map[symbol];
low_level_device()->Write(sym_addr, &value, sizeof(T));
}
PackedFunc MicroSession::GetFunction(
......@@ -370,15 +497,53 @@ PackedFunc MicroSession::GetFunction(
// create micro session and low-level device from Python frontend
TVM_REGISTER_GLOBAL("micro._CreateSession")
.set_body([](TVMArgs args, TVMRetValue* rv) {
const std::string& device_type = args[0];
const std::string& comms_method = args[0];
const std::string& binary_path = args[1];
const std::string& toolchain_prefix = args[2];
uint64_t base_addr = args[3];
const std::string& server_addr = args[4];
int port = args[5];
ObjectPtr<MicroSession> session = make_object<MicroSession>();
session->CreateSession(
device_type, binary_path, toolchain_prefix, base_addr, server_addr, port);
uint64_t text_start = args[3];
size_t text_size = args[4];
uint64_t rodata_start = args[5];
size_t rodata_size = args[6];
uint64_t data_start = args[7];
size_t data_size = args[8];
uint64_t bss_start = args[9];
size_t bss_size = args[10];
uint64_t args_start = args[11];
size_t args_size = args[12];
uint64_t heap_start = args[13];
size_t heap_size = args[14];
uint64_t workspace_start = args[15];
size_t workspace_size = args[16];
uint64_t stack_start = args[17];
size_t stack_size = args[18];
size_t word_size = args[19];
bool thumb_mode = args[20];
const std::string& server_addr = args[21];
int port = args[22];
ObjectPtr<MicroSession> session = make_object<MicroSession>(
comms_method,
binary_path,
toolchain_prefix,
text_start,
text_size,
rodata_start,
rodata_size,
data_start,
data_size,
bss_start,
bss_size,
args_start,
args_size,
heap_start,
heap_size,
workspace_start,
workspace_size,
stack_start,
stack_size,
word_size,
thumb_mode,
server_addr,
port);
*rv = Module(session);
});
......
......@@ -47,7 +47,6 @@
#include <tuple>
#include "low_level_device.h"
#include "device/utvm_runtime.h"
#include "target_data_layout_encoder.h"
namespace tvm {
......@@ -75,9 +74,55 @@ class MicroSession : public ModuleNode {
}
/*!
* \brief constructor
* \brief creates session by setting up a low-level device and initting allocators for it
* \param comms_method method of communication with the device (e.g., "openocd")
* \param binary_path file system path to the runtime binary
* \param toolchain_prefix GCC toolchain prefix
* \param text_start text section start address
* \param text_size text section size
* \param rodata_start text section start address
* \param rodata_size rodata section size
* \param data_start data section start address
* \param data_size data section size
* \param bss_start bss section start address
* \param bss_size bss section size
* \param args_start args section start address
* \param args_size args section size
* \param heap_start heap section start address
* \param heap_size heap section size
* \param workspace_start workspace section start address
* \param workspace_size workspace section size
* \param stack_start stack section start address
* \param stack_size stack section size
* \param word_size number of bytes in a word on the target device
* \param thumb_mode whether the target device requires a thumb-mode bit on function addresses
* \param server_addr address of the OpenOCD server to connect to (if `comms_method == "openocd"`)
* \param port port of the OpenOCD server to connect to (if `comms_method == "openocd"`)
*/
MicroSession();
MicroSession(
const std::string& comms_method,
const std::string& binary_path,
const std::string& toolchain_prefix,
uint64_t text_start,
size_t text_size,
uint64_t rodata_start,
size_t rodata_size,
uint64_t data_start,
size_t data_size,
uint64_t bss_start,
size_t bss_size,
uint64_t args_start,
size_t args_size,
uint64_t heap_start,
size_t heap_size,
uint64_t workspace_start,
size_t workspace_size,
uint64_t stack_start,
size_t stack_size,
size_t word_size,
bool thumb_mode,
const std::string& server_addr,
int port);
/*!
* \brief destructor
......@@ -87,20 +132,20 @@ class MicroSession : public ModuleNode {
static ObjectPtr<MicroSession>& Current();
/*!
* \brief creates session by setting up a low-level device and initting allocators for it
* \param args TVMArgs passed into the micro.init packedfunc
* \brief sets up runtime metadata for `func` and copies arguments for on-device execution
* \param func address of the function to be executed
* \param args args to the packed function
* \return elapsed time during function execution on the device
*/
void CreateSession(const std::string& device_type,
const std::string& binary_path,
const std::string& toolchain_prefix,
std::uintptr_t base_addr,
const std::string& server_addr,
int port);
double PushToExecQueue(DevPtr func, const TVMArgs& args);
/*!
* \brief ends the session by destructing the low-level device and its allocators
* \brief loads binary onto device
* \param binary_path path to binary object file
* \param patch_dylib_pointers whether to patch runtime API function pointers
* \return info about loaded binary
*/
void EndSession();
BinaryInfo LoadBinary(const std::string& binary_path, bool patch_dylib_pointers);
/*!
* \brief allocate memory in section
......@@ -108,36 +153,21 @@ class MicroSession : public ModuleNode {
* \param size size of allocated memory in bytes
* \return pointer to allocated memory region in section, nullptr if out of space
*/
DevBaseOffset AllocateInSection(SectionKind type, size_t size);
DevPtr AllocateInSection(SectionKind type, size_t size);
/*!
* \brief free prior allocation from section
* \param type type of section to allocate in
* \param ptr pointer to allocated memory
* \param addr device address of allocated memory
*/
void FreeInSection(SectionKind type, DevBaseOffset ptr);
void FreeInSection(SectionKind type, DevPtr addr);
/*!
* \brief read string from device to host
* \param str_offset device offset of first character of string
* \param str_addr device address of first character of string
* \return host copy of device string that was read
*/
std::string ReadString(DevBaseOffset str_offset);
/*!
* \brief sets up runtime metadata for `func` and copies arguments for on-device execution
* \param func address of the function to be executed
* \param args args to the packed function
*/
void PushToExecQueue(DevBaseOffset func, const TVMArgs& args);
/*!
* \brief loads binary onto device
* \param binary_path path to binary object file
* \param patch_dylib_pointers whether runtime API function pointer patching is needed
* \return info about loaded binary
*/
BinaryInfo LoadBinary(const std::string& binary_path, bool patch_dylib_pointers = true);
std::string ReadString(DevPtr str_addr);
/*!
* \brief read value of symbol from device memory
......@@ -174,16 +204,17 @@ class MicroSession : public ModuleNode {
/*! \brief array of memory allocators for each on-device section */
std::shared_ptr<MicroSectionAllocator>
section_allocators_[static_cast<size_t>(SectionKind::kNumKinds)];
/*! \brief total number of bytes of usable device memory for this session */
size_t memory_size_;
/*! \brief uTVM runtime binary info */
BinaryInfo runtime_bin_info_;
/*! \brief path to uTVM runtime source code */
std::string runtime_binary_path_;
/*! \brief offset of the runtime entry function */
DevBaseOffset utvm_main_symbol_;
/*! \brief offset of the runtime exit breakpoint */
DevBaseOffset utvm_done_symbol_;
/*! \brief number of bytes in a word on the target device */
size_t word_size_;
/*! \brief whether the target device requires a thumb-mode bit on function addresses
*
* ARM and other manufacturers use the lowest bit of a function address to determine
* whether it's a "thumb mode" function. The Thumb ISA is more restricted, but
* results in more compact binaries.
*/
bool thumb_mode_;
/*! \brief symbol map for the device runtime */
SymbolMap runtime_symbol_map_;
/*!
* \brief patches a function pointer in this module to an implementation
......@@ -192,12 +223,6 @@ class MicroSession : public ModuleNode {
void PatchImplHole(const SymbolMap& symbol_map, const std::string& func_name);
/*!
* \brief sets the runtime binary path
* \param path to runtime binary
*/
void SetRuntimeBinaryPath(std::string path);
/*!
* \brief appends arguments to the host-side buffer of `encoder`
* \param encoder encoder being used to append `args`
* \param args args to be appended
......@@ -211,6 +236,7 @@ class MicroSession : public ModuleNode {
* \param arr TVMArray to be appended
* \return device address of the allocated `TVMArray`
*/
template <typename T>
DevPtr EncoderAppend(TargetDataLayoutEncoder* encoder, const TVMArray& arr);
/*!
......@@ -228,18 +254,11 @@ class MicroSession : public ModuleNode {
}
/*!
* \brief returns the symbol map for the uTVM runtime
* \return reference to symbol map
*/
const SymbolMap& runtime_symbol_map() {
return runtime_bin_info_.symbol_map;
}
/*!
* \brief Push a new session context onto the thread-local stack.
* The session on top of the stack is used as the current global session.
*/
static void EnterWithScope(ObjectPtr<MicroSession> session);
/*!
* \brief Pop a session off the thread-local context stack,
* restoring the previous session as the current context.
......@@ -260,6 +279,118 @@ struct MicroDevSpace {
ObjectPtr<MicroSession> session;
};
// TODO(weberlo): maybe templatize serialization to reduce redundancy
/*! \brief TVM array for serialization to 32-bit devices */
struct TVMArray32 {
TVMArray32(
TargetVal data,
DLContext ctx,
int32_t ndim,
DLDataType dtype,
TargetVal shape,
TargetVal strides,
TargetVal byte_offset)
: data(data.val32),
ctx(ctx),
ndim(ndim),
pad0(0),
dtype(dtype),
shape(shape.val32),
strides(strides.val32),
pad1(0),
byte_offset(byte_offset.val32),
pad2(0) { }
/*! \brief opaque pointer to the allocated data */
uint32_t data;
/*! \brief The device context of the tensor */
DLContext ctx;
/*! \brief Number of dimensions */
int32_t ndim;
/*! \brief Padding to enforce struct alignment */
uint32_t pad0;
/*! \brief The data type of the pointer */
DLDataType dtype;
/*! \brief The shape of the tensor */
uint32_t shape;
/*!
* \brief strides of the tensor,
* can be NULL, indicating tensor is compact.
*/
uint32_t strides;
/*! \brief Padding to enforce struct alignment */
uint32_t pad1;
/*! \brief The offset in bytes to the beginning pointer to data */
uint32_t byte_offset;
/*! \brief Padding to enforce struct alignment */
uint32_t pad2;
};
/*! \brief TVM array for serialization to 64-bit devices */
struct TVMArray64 {
TVMArray64(
TargetVal data,
DLContext ctx,
int32_t ndim,
DLDataType dtype,
TargetVal shape,
TargetVal strides,
TargetVal byte_offset)
: data(data.val64),
ctx(ctx),
ndim(ndim),
pad0(0),
dtype(dtype),
shape(shape.val64),
strides(strides.val64),
byte_offset(byte_offset.val64) { }
/*! \brief opaque pointer to the allocated data */
uint64_t data;
/*! \brief The device context of the tensor */
DLContext ctx;
/*! \brief Number of dimensions */
int32_t ndim;
/*! \brief Padding to enforce struct alignment */
uint32_t pad0;
/*! \brief The data type of the pointer */
DLDataType dtype;
/*! \brief The shape of the tensor */
uint64_t shape;
/*!
* \brief strides of the tensor,
* can be NULL, indicating tensor is compact.
*/
uint64_t strides;
/*! \brief The offset in bytes to the beginning pointer to data */
uint64_t byte_offset;
};
/*! \brief MicroTVM task for serialization to 32-bit devices */
typedef struct StructUTVMTask32 {
/*! \brief Pointer to function to call for this task */
uint32_t func;
/*! \brief Array of argument values */
uint32_t arg_values;
/*! \brief Array of type codes for each argument value */
uint32_t arg_type_codes;
/*! \brief Number of arguments */
int32_t num_args;
} UTVMTask32;
/*! \brief MicroTVM task for serialization to 64-bit devices */
typedef struct StructUTVMTask64 {
/*! \brief Pointer to function to call for this task */
uint64_t func;
/*! \brief Array of argument values */
uint64_t arg_values;
/*! \brief Array of type codes for each argument value */
uint64_t arg_type_codes;
/*! \brief Number of arguments */
int32_t num_args;
} UTVMTask64;
} // namespace runtime
} // namespace tvm
#endif // TVM_RUNTIME_MICRO_MICRO_SESSION_H_
......@@ -37,21 +37,20 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
public:
/*!
* \brief constructor to initialize connection to openocd device
* \param base_addr base address of the device
* \param server_addr address of the OpenOCD server to connect to
* \param port port of the OpenOCD server to connect to
*/
explicit OpenOCDLowLevelDevice(std::uintptr_t base_addr,
const std::string& server_addr,
explicit OpenOCDLowLevelDevice(const std::string& server_addr,
int port) : socket_() {
socket_.Connect(tvm::common::SockAddr(server_addr.c_str(), port));
socket_.cmd_builder() << "reset halt";
server_addr_ = server_addr;
port_ = port;
socket_.Connect(tvm::common::SockAddr(server_addr_.c_str(), port_));
socket_.cmd_builder() << "halt 0";
socket_.SendCommand();
base_addr_ = base_addr;
CHECK(base_addr_ % 8 == 0) << "base address not aligned to 8 bytes";
}
void Read(DevBaseOffset offset, void* buf, size_t num_bytes) {
void Read(DevPtr addr, void* buf, size_t num_bytes) {
if (num_bytes == 0) {
return;
}
......@@ -59,7 +58,6 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
// TODO(weberlo): Refactor between read and write.
// Check if we need to chunk this write request.
if (num_bytes > kMemTransferLimit) {
DevBaseOffset curr_offset = offset;
char* curr_buf_ptr = reinterpret_cast<char*>(buf);
while (num_bytes != 0) {
size_t amount_to_read;
......@@ -68,8 +66,8 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
} else {
amount_to_read = num_bytes;
}
Read(offset, reinterpret_cast<void*>(curr_buf_ptr), amount_to_read);
offset += amount_to_read;
Read(addr, reinterpret_cast<void*>(curr_buf_ptr), amount_to_read);
addr += amount_to_read;
curr_buf_ptr += amount_to_read;
num_bytes -= amount_to_read;
}
......@@ -79,7 +77,6 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
socket_.cmd_builder() << "array unset output";
socket_.SendCommand();
DevPtr addr = DevPtr(base_addr_ + offset.value());
socket_.cmd_builder()
<< "mem2array output"
<< " " << std::dec << kWordSize
......@@ -122,14 +119,13 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
}
}
void Write(DevBaseOffset offset, const void* buf, size_t num_bytes) {
void Write(DevPtr addr, const void* buf, size_t num_bytes) {
if (num_bytes == 0) {
return;
}
// Check if we need to chunk this write request.
if (num_bytes > kMemTransferLimit) {
DevBaseOffset curr_offset = offset;
const char* curr_buf_ptr = reinterpret_cast<const char*>(buf);
while (num_bytes != 0) {
size_t amount_to_write;
......@@ -138,8 +134,8 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
} else {
amount_to_write = num_bytes;
}
Write(offset, reinterpret_cast<const void*>(curr_buf_ptr), amount_to_write);
offset += amount_to_write;
Write(addr, reinterpret_cast<const void*>(curr_buf_ptr), amount_to_write);
addr += amount_to_write;
curr_buf_ptr += amount_to_write;
num_bytes -= amount_to_write;
}
......@@ -166,7 +162,6 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
socket_.SendCommand();
}
{
DevPtr addr = DevPtr(base_addr_ + offset.value());
socket_.cmd_builder()
<< "array2mem input"
<< " " << std::dec << kWordSize
......@@ -176,20 +171,14 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
}
}
void Execute(DevBaseOffset func_offset, DevBaseOffset breakpoint) {
void Execute(DevPtr func_addr, DevPtr breakpoint_addr) {
socket_.cmd_builder() << "halt 0";
socket_.SendCommand();
// Set up the stack pointer.
DevPtr stack_end = stack_top() - 8;
socket_.cmd_builder() << "reg sp " << stack_end.cast_to<void*>();
socket_.SendCommand();
// Set a breakpoint at the beginning of `UTVMDone`.
socket_.cmd_builder() << "bp " << ToDevPtr(breakpoint).cast_to<void*>() << " 2";
socket_.cmd_builder() << "bp " << breakpoint_addr.cast_to<void*>() << " 2";
socket_.SendCommand();
DevPtr func_addr = DevPtr(base_addr_ + func_offset.value());
socket_.cmd_builder() << "resume " << func_addr.cast_to<void*>();
socket_.SendCommand();
......@@ -200,34 +189,21 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
socket_.SendCommand();
// Remove the breakpoint.
socket_.cmd_builder() << "rbp " << ToDevPtr(breakpoint).cast_to<void*>();
socket_.cmd_builder() << "rbp " << breakpoint_addr.cast_to<void*>();
socket_.SendCommand();
}
void SetStackTop(DevBaseOffset stack_top) {
stack_top_ = DevPtr(base_addr_ + stack_top.value());
}
std::uintptr_t base_addr() const final {
return base_addr_;
}
DevPtr stack_top() const {
CHECK(stack_top_ != nullptr) << "stack top was never initialized";
return stack_top_;
}
const char* device_type() const final {
return "openocd";
}
private:
/*! \brief base address of the micro device memory region */
std::uintptr_t base_addr_;
/*! \brief top of the stack section */
DevPtr stack_top_;
/*! \brief socket used to communicate with the device through Tcl */
TclSocket socket_;
/*! \brief address of OpenOCD server */
std::string server_addr_;
/*! \brief port of OpenOCD server */
int port_;
/*! \brief number of bytes in a word on the target device (64-bit) */
static const constexpr ssize_t kWordSize = 8;
......@@ -239,11 +215,10 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
static const constexpr int kWaitTime = 10000;
};
const std::shared_ptr<LowLevelDevice> OpenOCDLowLevelDeviceCreate(std::uintptr_t base_addr,
const std::string& server_addr,
const std::shared_ptr<LowLevelDevice> OpenOCDLowLevelDeviceCreate(const std::string& server_addr,
int port) {
std::shared_ptr<LowLevelDevice> lld =
std::make_shared<OpenOCDLowLevelDevice>(base_addr, server_addr, port);
std::make_shared<OpenOCDLowLevelDevice>(server_addr, port);
return lld;
}
......
......@@ -25,7 +25,7 @@
#define TVM_RUNTIME_MICRO_TARGET_DATA_LAYOUT_ENCODER_H_
#include <vector>
#include "device/utvm_runtime.h"
#include "host_driven/utvm_runtime.h"
namespace tvm {
namespace runtime {
......@@ -96,9 +96,9 @@ class TargetDataLayoutEncoder {
* \brief constructor
* \param start_addr start address of the encoder in device memory
*/
explicit TargetDataLayoutEncoder(DevPtr start_addr)
: buf_(std::vector<uint8_t>()), curr_offset_(0) {
start_addr_ = DevPtr(UpperAlignValue(start_addr.value(), 8));
explicit TargetDataLayoutEncoder(DevPtr start_addr, size_t word_size)
: buf_(std::vector<uint8_t>()), curr_offset_(0), word_size_(word_size) {
start_addr_ = DevPtr(UpperAlignValue(start_addr.value().val64, word_size_));
}
/*!
......@@ -108,7 +108,7 @@ class TargetDataLayoutEncoder {
*/
template <typename T>
Slot<T> Alloc(size_t num_elems = 1) {
curr_offset_ = UpperAlignValue(curr_offset_, 8);
curr_offset_ = UpperAlignValue(curr_offset_, word_size_);
size_t size = sizeof(T) * num_elems;
if (curr_offset_ + size > buf_.size()) {
buf_.resize(curr_offset_ + size);
......@@ -141,6 +141,8 @@ class TargetDataLayoutEncoder {
size_t curr_offset_;
/*! \brief start address of the encoder in device memory */
DevPtr start_addr_;
/*! \brief number of bytes in a word on the target device */
size_t word_size_;
};
template <typename T>
......
......@@ -21,6 +21,7 @@
* \file rpc_session.cc
* \brief RPC session for remote function call.
*/
#include <tvm/runtime/c_runtime_api.h>
#include <tvm/runtime/packed_func.h>
#include <tvm/runtime/device_api.h>
#include <tvm/runtime/registry.h>
......@@ -40,6 +41,7 @@
namespace tvm {
namespace runtime {
// Temp buffer for data array
struct RPCByteArrayBuffer {
TVMByteArray arr;
......@@ -1215,11 +1217,45 @@ void RPCSession::EventHandler::HandlePackedCall() {
CHECK_EQ(state_, kRecvCode);
}
PackedFunc MicroTimeEvaluator(
PackedFunc pf,
TVMContext ctx,
int number,
int repeat) {
auto ftimer = [pf, ctx, number, repeat](TVMArgs args, TVMRetValue *rv) mutable {
TVMRetValue temp;
std::ostringstream os;
// skip first time call, to activate lazy compilation components.
pf.CallPacked(args, &temp);
DeviceAPI::Get(ctx)->StreamSync(ctx, nullptr);
for (int i = 0; i < repeat; ++i) {
double speed = 0.0;
for (int j = 0; j < number; ++j) {
pf.CallPacked(args, &temp);
DeviceAPI::Get(ctx)->StreamSync(ctx, nullptr);
speed += (temp.operator double()) / number;
}
os.write(reinterpret_cast<char*>(&speed), sizeof(speed));
}
std::string blob = os.str();
TVMByteArray arr;
arr.size = blob.length();
arr.data = blob.data();
// return the time.
*rv = arr;
};
return PackedFunc(ftimer);
}
PackedFunc WrapTimeEvaluator(PackedFunc pf,
TVMContext ctx,
int number,
int repeat,
int min_repeat_ms) {
if (static_cast<int>(ctx.device_type) == static_cast<int>(kDLMicroDev)) {
return MicroTimeEvaluator(pf, ctx, number, repeat);
}
auto ftimer = [pf, ctx, number, repeat, min_repeat_ms](TVMArgs args, TVMRetValue *rv) mutable {
TVMRetValue temp;
std::ostringstream os;
......
......@@ -25,6 +25,7 @@ ALLOW_EXTENSION = {
"cc",
"c",
"h",
"s",
"rs",
"m",
"mm",
......
......@@ -73,12 +73,21 @@ def test_tvm_callback_relocate_binary():
with open(tmp_bin, "wb") as f:
f.write(binary)
def verify():
text_loc_str = "0x0"
rodata_loc_str = "0x10000"
data_loc_str = "0x20000"
bss_loc_str = "0x30000"
word_size = 8
text_loc = 0x0
rodata_loc = 0x10000
data_loc = 0x20000
bss_loc = 0x30000
stack_end = 0x50000
rel_bin = tvm_callback_relocate_binary(
tmp_bin, text_loc_str, rodata_loc_str, data_loc_str, bss_loc_str, TOOLCHAIN_PREFIX)
tmp_bin,
word_size,
text_loc,
rodata_loc,
data_loc,
bss_loc,
stack_end,
TOOLCHAIN_PREFIX)
print("Relocated binary section sizes")
test_tvm_callback_get_section_size(binary=rel_bin)
relf = tmp_dir.relpath("rel.bin")
......@@ -88,10 +97,6 @@ def test_tvm_callback_relocate_binary():
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
(out, _) = nm_proc.communicate()
# Ensure the relocated symbols are within the ranges we specified.
text_loc = int(text_loc_str, 16)
data_loc = int(data_loc_str, 16)
bss_loc = int(bss_loc_str, 16)
symbol_entries = out.decode("utf-8").split("\n")
for entry in symbol_entries:
if len(entry) == 0:
......@@ -127,12 +132,21 @@ def test_tvm_callback_get_symbol_map():
with open(tmp_bin, "wb") as f:
f.write(binary)
def verify():
text_loc_str = "0x0"
rodata_loc_str = "0x10000"
data_loc_str = "0x20000"
bss_loc_str = "0x30000"
word_size = 8
text_loc = 0x0
rodata_loc = 0x10000
data_loc = 0x20000
bss_loc = 0x30000
stack_end = 0x50000
rel_bin = tvm_callback_relocate_binary(
tmp_bin, text_loc_str, rodata_loc_str, data_loc_str, bss_loc_str, TOOLCHAIN_PREFIX)
tmp_bin,
word_size,
text_loc,
rodata_loc,
data_loc,
bss_loc,
stack_end,
TOOLCHAIN_PREFIX)
symbol_map = tvm_callback_get_symbol_map(rel_bin, TOOLCHAIN_PREFIX)
symbols = set()
for i, line in enumerate(symbol_map.split('\n')):
......
......@@ -14,7 +14,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import os
import numpy as np
......@@ -22,38 +21,13 @@ import tvm
from tvm.contrib import graph_runtime, util
from tvm import relay
import tvm.micro as micro
from tvm.micro import create_micro_mod
from tvm.relay.testing import resnet
# Use the host emulated micro device.
DEVICE_TYPE = "host"
TOOLCHAIN_PREFIX = ""
def create_micro_mod(c_mod, toolchain_prefix):
"""Produces a micro module from a given module.
Parameters
----------
c_mod : tvm.module.Module
module with "c" as its target backend
toolchain_prefix : str
toolchain prefix to be used (see `tvm.micro.Session` docs)
Return
------
micro_mod : tvm.module.Module
micro module for the target device
"""
temp_dir = util.tempdir()
lib_obj_path = temp_dir.relpath("dev_lib.obj")
c_mod.export_library(
lib_obj_path,
fcompile=tvm.micro.cross_compiler(toolchain_prefix=toolchain_prefix))
micro_mod = tvm.module.load(lib_obj_path, "micro_dev")
return micro_mod
DEV_CONFIG = micro.device.host.default_config()
def relay_micro_build(func, toolchain_prefix, params=None):
def relay_micro_build(func, dev_config, params=None):
"""Create a graph runtime module with a micro device context from a Relay function.
Parameters
......@@ -61,6 +35,9 @@ def relay_micro_build(func, toolchain_prefix, params=None):
func : relay.Function
function to compile
dev_config : Dict[str, Any]
MicroTVM config dict for the target device
params : dict
input parameters that do not change during inference
......@@ -71,24 +48,20 @@ def relay_micro_build(func, toolchain_prefix, params=None):
"""
with tvm.build_config(disable_vectorize=True):
graph, c_mod, params = relay.build(func, target="c", params=params)
micro_mod = create_micro_mod(c_mod, TOOLCHAIN_PREFIX)
micro_mod = create_micro_mod(c_mod, dev_config)
ctx = tvm.micro_dev(0)
mod = graph_runtime.create(graph, micro_mod, ctx)
mod.set_input(**params)
return mod
# TODO(weberlo): Add example program to test scalar double/int TVMValue serialization.
# TODO(weberlo): How can we test the OpenOCD device? The CI would need to have OpenOCD
# and Spike installed.
def test_alloc():
"""Test tensor allocation on the device."""
if not tvm.module.enabled("micro_dev"):
return
shape = (1024,)
dtype = "float32"
with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX):
with micro.Session(DEV_CONFIG):
ctx = tvm.micro_dev(0)
np_tensor = np.random.uniform(size=shape).astype(dtype)
micro_tensor = tvm.nd.array(np_tensor, ctx)
......@@ -112,15 +85,14 @@ def test_add():
func_name = "fadd"
c_mod = tvm.build(s, [A, B, C], target="c", name=func_name)
with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX):
micro_mod = create_micro_mod(c_mod, TOOLCHAIN_PREFIX)
with micro.Session(DEV_CONFIG):
micro_mod = create_micro_mod(c_mod, DEV_CONFIG)
micro_func = micro_mod[func_name]
ctx = tvm.micro_dev(0)
a = tvm.nd.array(np.random.uniform(size=shape).astype(dtype), ctx)
b = tvm.nd.array(np.random.uniform(size=shape).astype(dtype), ctx)
c = tvm.nd.array(np.zeros(shape, dtype=dtype), ctx)
micro_func(a, b, c)
tvm.testing.assert_allclose(
c.asnumpy(), a.asnumpy() + b.asnumpy())
......@@ -143,8 +115,8 @@ def test_workspace_add():
func_name = "fadd_two_workspace"
c_mod = tvm.build(s, [A, C], target="c", name=func_name)
with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX):
micro_mod = create_micro_mod(c_mod, TOOLCHAIN_PREFIX)
with micro.Session(DEV_CONFIG):
micro_mod = create_micro_mod(c_mod, DEV_CONFIG)
micro_func = micro_mod[func_name]
ctx = tvm.micro_dev(0)
a = tvm.nd.array(np.random.uniform(size=shape).astype(dtype), ctx)
......@@ -168,8 +140,8 @@ def test_graph_runtime():
z = relay.add(xx, relay.const(1.0))
func = relay.Function([x], z)
with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX):
mod = relay_micro_build(func, TOOLCHAIN_PREFIX)
with micro.Session(DEV_CONFIG):
mod = relay_micro_build(func, DEV_CONFIG)
x_in = np.random.uniform(size=shape[0]).astype(dtype)
mod.run(x=x_in)
......@@ -195,9 +167,9 @@ def test_multiple_modules():
ret = relay.subtract(x, relay.const(1.0))
sub_const_func = relay.Function([x], ret)
with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX):
add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX)
sub_const_mod = relay_micro_build(sub_const_func, TOOLCHAIN_PREFIX)
with micro.Session(DEV_CONFIG):
add_const_mod = relay_micro_build(add_const_func, DEV_CONFIG)
sub_const_mod = relay_micro_build(sub_const_func, DEV_CONFIG)
x_in = np.random.uniform(size=shape[0]).astype(dtype)
add_const_mod.run(x=x_in)
......@@ -223,8 +195,8 @@ def test_interleave_sessions():
ret = relay.add(x, relay.const(1.0))
add_const_func = relay.Function([x], ret)
sess_a = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX)
sess_b = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX)
sess_a = micro.Session(DEV_CONFIG)
sess_b = micro.Session(DEV_CONFIG)
with sess_a:
np_tensor_a = np.random.uniform(size=shape).astype(dtype)
micro_tensor_a = tvm.nd.array(np_tensor_a, tvm.micro_dev(0))
......@@ -232,13 +204,13 @@ def test_interleave_sessions():
np_tensor_b = np.random.uniform(size=shape).astype(dtype)
micro_tensor_b = tvm.nd.array(np_tensor_b, tvm.micro_dev(0))
with sess_a:
add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX)
add_const_mod = relay_micro_build(add_const_func, DEV_CONFIG)
add_const_mod.run(x=micro_tensor_a)
add_result = add_const_mod.get_output(0).asnumpy()
tvm.testing.assert_allclose(
add_result, np_tensor_a + 1.0)
with sess_b:
add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX)
add_const_mod = relay_micro_build(add_const_func, DEV_CONFIG)
add_const_mod.run(x=micro_tensor_b)
add_result = add_const_mod.get_output(0).asnumpy()
tvm.testing.assert_allclose(
......@@ -257,15 +229,15 @@ def test_nested_sessions():
ret = relay.add(x, relay.const(1.0))
add_const_func = relay.Function([x], ret)
sess_a = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX)
sess_b = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX)
sess_a = micro.Session(DEV_CONFIG)
sess_b = micro.Session(DEV_CONFIG)
with sess_a:
np_tensor_a = np.random.uniform(size=shape).astype(dtype)
micro_tensor_a = tvm.nd.array(np_tensor_a, tvm.micro_dev(0))
with sess_b:
np_tensor_b = np.random.uniform(size=shape).astype(dtype)
micro_tensor_b = tvm.nd.array(np_tensor_b, tvm.micro_dev(0))
add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX)
add_const_mod = relay_micro_build(add_const_func, DEV_CONFIG)
add_const_mod.run(x=micro_tensor_a)
add_result = add_const_mod.get_output(0).asnumpy()
tvm.testing.assert_allclose(
......@@ -284,12 +256,12 @@ def test_inactive_session_use():
ret = relay.add(x, relay.const(1.0))
add_const_func = relay.Function([x], ret)
sess_a = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX)
sess_b = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX)
sess_a = micro.Session(DEV_CONFIG)
sess_b = micro.Session(DEV_CONFIG)
with sess_a:
np_tensor_a = np.random.uniform(size=shape).astype(dtype)
micro_tensor_a = tvm.nd.array(np_tensor_a, tvm.micro_dev(0))
add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX)
add_const_mod = relay_micro_build(add_const_func, DEV_CONFIG)
with sess_b:
# These objects belong to `sess_a`.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment