Commit 47c870a9 by Logan Weber Committed by Tianqi Chen

[µTVM] Enable AutoTVM for ARM STM32F746XX Boards (#4274)

parent 11af82c0
...@@ -75,10 +75,13 @@ def context(dev_type, dev_id=0): ...@@ -75,10 +75,13 @@ def context(dev_type, dev_id=0):
assert tvm.context("cuda", 0) == tvm.gpu(0) assert tvm.context("cuda", 0) == tvm.gpu(0)
""" """
if isinstance(dev_type, string_types): if isinstance(dev_type, string_types):
dev_type = dev_type.split()[0] if '-device=micro_dev' in dev_type:
if dev_type not in TVMContext.STR2MASK: dev_type = 'micro_dev'
raise ValueError("Unknown device type %s" % dev_type) else:
dev_type = TVMContext.STR2MASK[dev_type] dev_type = dev_type.split()[0]
if dev_type not in TVMContext.STR2MASK:
raise ValueError("Unknown device type %s" % dev_type)
dev_type = TVMContext.STR2MASK[dev_type]
return TVMContext(dev_type, dev_id) return TVMContext(dev_type, dev_id)
......
...@@ -19,9 +19,81 @@ ...@@ -19,9 +19,81 @@
import os import os
import subprocess import subprocess
from . import util from . import util
from .._ffi.base import py_str
from ..api import register_func from ..api import register_func
RELOCATION_LD_SCRIPT_TEMPLATE = """
/* linker symbol for use in UTVMInit */
_utvm_stack_pointer_init = 0x{stack_pointer_init:x};
SECTIONS
{{
. = 0x{text_start:x};
. = ALIGN({word_size});
.text :
{{
. = ALIGN({word_size});
KEEP(*(.text))
KEEP(*(.text*))
. = ALIGN({word_size});
}}
. = 0x{rodata_start:x};
. = ALIGN({word_size});
.rodata :
{{
. = ALIGN({word_size});
KEEP(*(.rodata))
KEEP(*(.rodata*))
. = ALIGN({word_size});
}}
. = 0x{data_start:x};
. = ALIGN({word_size});
.data :
{{
. = ALIGN({word_size});
KEEP(*(.data))
KEEP(*(.data*))
. = ALIGN({word_size});
}}
. = 0x{bss_start:x};
. = ALIGN({word_size});
.bss :
{{
. = ALIGN({word_size});
KEEP(*(.bss))
KEEP(*(.bss*))
. = ALIGN({word_size});
}}
}}
"""
def run_cmd(cmd):
"""Runs `cmd` in a subprocess and awaits its completion.
Parameters
----------
cmd : List[str]
list of command-line arguments
Returns
-------
output : str
resulting stdout capture from the subprocess
"""
proc = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
(output, _) = proc.communicate()
output = output.decode("utf-8")
if proc.returncode != 0:
cmd_str = " ".join(cmd)
msg = f"error while running command \"{cmd_str}\":\n{output}"
raise RuntimeError(msg)
return output
@register_func("tvm_callback_get_section_size") @register_func("tvm_callback_get_section_size")
def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix): def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix):
...@@ -48,14 +120,7 @@ def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix): ...@@ -48,14 +120,7 @@ def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix):
raise RuntimeError("no such file \"{}\"".format(binary_path)) raise RuntimeError("no such file \"{}\"".format(binary_path))
# We use the "-A" flag here to get the ".rodata" section's size, which is # We use the "-A" flag here to get the ".rodata" section's size, which is
# not included by default. # not included by default.
size_proc = subprocess.Popen( size_output = run_cmd(["{}size".format(toolchain_prefix), "-A", binary_path])
["{}size".format(toolchain_prefix), "-A", binary_path], stdout=subprocess.PIPE)
(size_output, _) = size_proc.communicate()
size_output = size_output.decode("utf-8")
if size_proc.returncode != 0:
msg = "error in finding section size:\n"
msg += py_str(size_output)
raise RuntimeError(msg)
# TODO(weberlo): Refactor this method and `*relocate_binary` so they are # TODO(weberlo): Refactor this method and `*relocate_binary` so they are
# both aware of [".bss", ".sbss", ".sdata"] being relocated to ".bss". # both aware of [".bss", ".sbss", ".sdata"] being relocated to ".bss".
...@@ -74,13 +139,15 @@ def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix): ...@@ -74,13 +139,15 @@ def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix):
continue continue
entry_name = tokens[0] entry_name = tokens[0]
entry_size = int(tokens[1]) entry_size = int(tokens[1])
if entry_name in sections_to_sum: for section in sections_to_sum:
section_size += entry_size if entry_name.startswith(section):
section_size += entry_size
break
# NOTE: For some reason, the size of the BSS section on the RISC-V # NOTE: For some reason, the size of the BSS section on the RISC-V
# GCC is sometimes reported to be smaller than it is, so we need to adjust # GCC is sometimes reported to be smaller than it is, so we need to adjust
# for this. # for this.
if "riscv" in toolchain_prefix and section_name == 'bss': if "riscv" in toolchain_prefix and section_name == "bss":
# TODO(weberlo): Figure out why 32 is the minimum constant that works. # TODO(weberlo): Figure out why 32 is the minimum constant that works.
# #
# The current hypothesis is that the last symbols in the ".bss" and # The current hypothesis is that the last symbols in the ".bss" and
...@@ -97,7 +164,14 @@ def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix): ...@@ -97,7 +164,14 @@ def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix):
@register_func("tvm_callback_relocate_binary") @register_func("tvm_callback_relocate_binary")
def tvm_callback_relocate_binary( def tvm_callback_relocate_binary(
binary_path, text_addr, rodata_addr, data_addr, bss_addr, toolchain_prefix): binary_path,
word_size,
text_start,
rodata_start,
data_start,
bss_start,
stack_end,
toolchain_prefix):
"""Relocates sections in the binary to new addresses """Relocates sections in the binary to new addresses
Parameters Parameters
...@@ -105,17 +179,23 @@ def tvm_callback_relocate_binary( ...@@ -105,17 +179,23 @@ def tvm_callback_relocate_binary(
binary_path : str binary_path : str
path of the binary file path of the binary file
text_addr : str word_size : int
text section absolute address word size on the target machine
text_start : int
text section address
rodata_start : int
rodata section address
rodata_addr : str data_start : int
rodata section absolute address data section address
data_addr : str bss_start : int
data section absolute address bss section address
bss_addr : str stack_end : int
bss section absolute address stack section end address
toolchain_prefix : str toolchain_prefix : str
prefix for binary names in target compiler toolchain prefix for binary names in target compiler toolchain
...@@ -125,68 +205,29 @@ def tvm_callback_relocate_binary( ...@@ -125,68 +205,29 @@ def tvm_callback_relocate_binary(
rel_bin : bytearray rel_bin : bytearray
the relocated binary the relocated binary
""" """
tmp_dir = util.tempdir() stack_pointer_init = stack_end - word_size
rel_obj_path = tmp_dir.relpath("relocated.o")
ld_script_contents = "" ld_script_contents = ""
# TODO(weberlo): There should be a better way to configure this for different archs. # TODO(weberlo): There should be a better way to configure this for different archs.
if "riscv" in toolchain_prefix: if "riscv" in toolchain_prefix:
ld_script_contents += "OUTPUT_ARCH( \"riscv\" )\n\n" ld_script_contents += "OUTPUT_ARCH( \"riscv\" )\n\n"
# TODO(weberlo): Generate the script in a more procedural manner. ld_script_contents += RELOCATION_LD_SCRIPT_TEMPLATE.format(
ld_script_contents += """ word_size=word_size,
SECTIONS text_start=text_start,
{ rodata_start=rodata_start,
. = %s; data_start=data_start,
. = ALIGN(8); bss_start=bss_start,
.text : stack_pointer_init=stack_pointer_init)
{
*(.text) tmp_dir = util.tempdir()
. = ALIGN(8); rel_obj_path = tmp_dir.relpath("relocated.obj")
*(.text*)
}
. = %s;
. = ALIGN(8);
.rodata :
{
*(.rodata)
. = ALIGN(8);
*(.rodata*)
}
. = %s;
. = ALIGN(8);
.data :
{
*(.data)
. = ALIGN(8);
*(.data*)
. = ALIGN(8);
*(.sdata)
}
. = %s;
. = ALIGN(8);
.bss :
{
*(.bss)
. = ALIGN(8);
*(.bss*)
. = ALIGN(8);
*(.sbss)
}
}
""" % (text_addr, rodata_addr, data_addr, bss_addr)
rel_ld_script_path = tmp_dir.relpath("relocated.lds") rel_ld_script_path = tmp_dir.relpath("relocated.lds")
with open(rel_ld_script_path, "w") as f: with open(rel_ld_script_path, "w") as f:
f.write(ld_script_contents) f.write(ld_script_contents)
ld_proc = subprocess.Popen(["{}ld".format(toolchain_prefix), binary_path, run_cmd([
"-T", rel_ld_script_path, "{}ld".format(toolchain_prefix),
"-o", rel_obj_path], binary_path,
stdout=subprocess.PIPE, "-T", rel_ld_script_path,
stderr=subprocess.STDOUT) "-o", rel_obj_path])
(out, _) = ld_proc.communicate()
if ld_proc.returncode != 0:
msg = "linking error using ld:\n"
msg += py_str(out)
raise RuntimeError(msg)
with open(rel_obj_path, "rb") as f: with open(rel_obj_path, "rb") as f:
rel_bin = bytearray(f.read()) rel_bin = bytearray(f.read())
return rel_bin return rel_bin
...@@ -217,16 +258,11 @@ def tvm_callback_read_binary_section(binary, section, toolchain_prefix): ...@@ -217,16 +258,11 @@ def tvm_callback_read_binary_section(binary, section, toolchain_prefix):
tmp_section = tmp_dir.relpath("tmp_section.bin") tmp_section = tmp_dir.relpath("tmp_section.bin")
with open(tmp_bin, "wb") as out_file: with open(tmp_bin, "wb") as out_file:
out_file.write(bytes(binary)) out_file.write(bytes(binary))
objcopy_proc = subprocess.Popen(["{}objcopy".format(toolchain_prefix), "--dump-section", run_cmd([
".{}={}".format(section, tmp_section), "{}objcopy".format(toolchain_prefix),
tmp_bin], "--dump-section",
stdout=subprocess.PIPE, ".{}={}".format(section, tmp_section),
stderr=subprocess.STDOUT) tmp_bin])
(out, _) = objcopy_proc.communicate()
if objcopy_proc.returncode != 0:
msg = "error in using objcopy:\n"
msg += py_str(out)
raise RuntimeError(msg)
if os.path.isfile(tmp_section): if os.path.isfile(tmp_section):
# Get section content if it exists. # Get section content if it exists.
with open(tmp_section, "rb") as f: with open(tmp_section, "rb") as f:
...@@ -259,15 +295,12 @@ def tvm_callback_get_symbol_map(binary, toolchain_prefix): ...@@ -259,15 +295,12 @@ def tvm_callback_get_symbol_map(binary, toolchain_prefix):
tmp_obj = tmp_dir.relpath("tmp_obj.bin") tmp_obj = tmp_dir.relpath("tmp_obj.bin")
with open(tmp_obj, "wb") as out_file: with open(tmp_obj, "wb") as out_file:
out_file.write(bytes(binary)) out_file.write(bytes(binary))
nm_proc = subprocess.Popen(["{}nm".format(toolchain_prefix), "-C", "--defined-only", tmp_obj], nm_output = run_cmd([
stdout=subprocess.PIPE, "{}nm".format(toolchain_prefix),
stderr=subprocess.STDOUT) "-C",
(nm_output, _) = nm_proc.communicate() "--defined-only",
if nm_proc.returncode != 0: tmp_obj])
msg = "error in using nm:\n" nm_output = nm_output.splitlines()
msg += py_str(nm_output)
raise RuntimeError(msg)
nm_output = nm_output.decode("utf8").splitlines()
map_str = "" map_str = ""
for line in nm_output: for line in nm_output:
line = line.split() line = line.split()
......
...@@ -19,14 +19,22 @@ ...@@ -19,14 +19,22 @@
from __future__ import absolute_import from __future__ import absolute_import
import argparse import argparse
import ast
import multiprocessing import multiprocessing
import sys import sys
import logging import logging
import tvm
from tvm import micro
from .. import rpc from .. import rpc
def main(args): def main(args):
"""Main function""" """Main function
Parameters
----------
args : argparse.Namespace
parsed args from command-line invocation
"""
if args.tracker: if args.tracker:
url, port = args.tracker.rsplit(":", 1) url, port = args.tracker.rsplit(":", 1)
port = int(port) port = int(port)
...@@ -37,6 +45,9 @@ def main(args): ...@@ -37,6 +45,9 @@ def main(args):
else: else:
tracker_addr = None tracker_addr = None
if args.utvm_dev_config or args.utvm_dev_id:
init_utvm(args)
server = rpc.Server(args.host, server = rpc.Server(args.host,
args.port, args.port,
args.port_end, args.port_end,
...@@ -48,6 +59,38 @@ def main(args): ...@@ -48,6 +59,38 @@ def main(args):
server.proc.join() server.proc.join()
def init_utvm(args):
"""MicroTVM-specific RPC initialization
Parameters
----------
args : argparse.Namespace
parsed args from command-line invocation
"""
if args.utvm_dev_config and args.utvm_dev_id:
raise RuntimeError('only one of --utvm-dev-config and --utvm-dev-id allowed')
if args.utvm_dev_config:
with open(args.utvm_dev_config, 'r') as dev_conf_file:
dev_config = json.load(dev_conf_file)
else:
dev_config_args = ast.literal_eval(args.utvm_dev_config_args)
default_config_func = micro.device.get_device_funcs(args.utvm_dev_id)['default_config']
dev_config = default_config_func(*dev_config_args)
if args.utvm_dev_config or args.utvm_dev_id:
# add MicroTVM overrides
@tvm.register_func('tvm.rpc.server.start', override=True)
def server_start():
# pylint: disable=unused-variable
session = micro.Session(dev_config)
session._enter()
@tvm.register_func('tvm.rpc.server.shutdown', override=True)
def server_shutdown():
session._exit()
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--host', type=str, default="0.0.0.0", parser.add_argument('--host', type=str, default="0.0.0.0",
...@@ -71,6 +114,13 @@ if __name__ == "__main__": ...@@ -71,6 +114,13 @@ if __name__ == "__main__":
and ROCM compilers.") and ROCM compilers.")
parser.add_argument('--custom-addr', type=str, parser.add_argument('--custom-addr', type=str,
help="Custom IP Address to Report to RPC Tracker") help="Custom IP Address to Report to RPC Tracker")
parser.add_argument('--utvm-dev-config', type=str,
help='JSON config file for the target device (if using MicroTVM)')
parser.add_argument('--utvm-dev-id', type=str,
help='Unique ID for the target device (if using MicroTVM)')
parser.add_argument('--utvm-dev-config-args', type=str,
help=('Python list of literals required to generate a default'
' MicroTVM config (if --utvm-dev-id is specified)'))
parser.set_defaults(fork=True) parser.set_defaults(fork=True)
args = parser.parse_args() args = parser.parse_args()
......
...@@ -14,13 +14,9 @@ ...@@ -14,13 +14,9 @@
# KIND, either express or implied. See the License for the # KIND, either express or implied. See the License for the
# specific language governing permissions and limitations # specific language governing permissions and limitations
# under the License. # under the License.
"""MicroTVM module for bare-metal backends"""
"""uTVM module for bare-metal backends.
uTVM (or the micro backend) enables provides support for bare-metal devices.
Its targets currently include a host-emulated device which is used for testing,
and JTAG-based openocd device which allows actual interfacing with microdevices.
"""
from ..contrib import binutil from ..contrib import binutil
from .base import Session, cross_compiler, create_micro_lib from .base import Session, create_micro_mod, cross_compiler
from .base import LibType, get_micro_host_driven_dir, get_micro_device_dir
from . import device
...@@ -14,71 +14,100 @@ ...@@ -14,71 +14,100 @@
# KIND, either express or implied. See the License for the # KIND, either express or implied. See the License for the
# specific language governing permissions and limitations # specific language governing permissions and limitations
# under the License. # under the License.
"""Base definitions for MicroTVM"""
"""Base definitions for micro."""
from __future__ import absolute_import from __future__ import absolute_import
import logging
import os import os
import sys import sys
from enum import Enum
import tvm
from tvm.contrib import util as _util from tvm.contrib import util as _util
from tvm.contrib import cc as _cc from tvm.contrib import cc as _cc
from .._ffi.function import _init_api from .._ffi.function import _init_api
from .._ffi.libinfo import find_include_path
SUPPORTED_DEVICE_TYPES = ["host", "openocd"] class LibType(Enum):
"""Enumeration of library types that can be compiled and loaded onto a device"""
# library to be used as a MicroTVM runtime
RUNTIME = 0
# library to be used as an operator
OPERATOR = 1
class Session: class Session:
"""MicroTVM Device Session """MicroTVM Device Session
Parameters Parameters
---------- ----------
device_type : str config : dict
type of low-level device configuration for this session (as generated by
`tvm.micro.device.host.default_config()`, for example)
toolchain_prefix : str
toolchain prefix to be used. For example, a prefix of
"riscv64-unknown-elf-" means "riscv64-unknown-elf-gcc" is used as
the compiler and "riscv64-unknown-elf-ld" is used as the linker,
etc.
Example Example
-------- --------
.. code-block:: python .. code-block:: python
c_mod = ... # some module generated with "c" as the target c_mod = ... # some module generated with "c" as the target
device_type = "openocd" dev_config = micro.device.arm.stm32f746xx.default_config("127.0.0.1", 6666)
toolchain_prefix = "riscv64-unknown-elf-" with tvm.micro.Session(dev_config) as sess:
with tvm.micro.Session(device_type, micro_mod = create_micro_mod(c_mod, dev_config)
toolchain_prefix,
base_addr=0x10010000,
server_addr="127.0.0.1",
port=6666):
c_mod.export_library(lib_obj_path, fcompile=tvm.micro.cross_compiler(toolchain_prefix))
micro_mod = tvm.module.load(lib_obj_path, "micro_dev")
""" """
def __init__(self, device_type, toolchain_prefix, **kwargs): def __init__(self, config):
if device_type not in SUPPORTED_DEVICE_TYPES:
raise RuntimeError("unknown micro device type \"{}\"".format(device_type))
self._check_system() self._check_system()
self._check_args(device_type, kwargs) # TODO(weberlo): add config validation
# grab a binutil instance from the ID in the config
dev_funcs = tvm.micro.device.get_device_funcs(config["device_id"])
self.create_micro_lib = dev_funcs["create_micro_lib"]
self.toolchain_prefix = config["toolchain_prefix"]
self.mem_layout = config["mem_layout"]
self.word_size = config["word_size"]
self.thumb_mode = config["thumb_mode"]
self.comms_method = config["comms_method"]
# First, find and compile runtime library. # First, find and compile runtime library.
runtime_src_path = os.path.join(_get_micro_device_dir(), "utvm_runtime.c") runtime_src_path = os.path.join(get_micro_host_driven_dir(), "utvm_runtime.c")
tmp_dir = _util.tempdir() tmp_dir = _util.tempdir()
runtime_obj_path = tmp_dir.relpath("utvm_runtime.obj") runtime_obj_path = tmp_dir.relpath("utvm_runtime.obj")
create_micro_lib( self.create_micro_lib(runtime_obj_path, runtime_src_path, LibType.RUNTIME)
runtime_obj_path, runtime_src_path, toolchain_prefix, include_dev_lib_header=False) #input(f"check {runtime_obj_path}: ")
comms_method = config["comms_method"]
if comms_method == "openocd":
server_addr = config["server_addr"]
server_port = config["server_port"]
elif comms_method == "host":
server_addr = ""
server_port = 0
else:
raise RuntimeError(f"unknown communication method: f{self.comms_method}")
base_addr = kwargs.get("base_addr", 0)
server_addr = kwargs.get("server_addr", "")
port = kwargs.get("port", 0)
self.module = _CreateSession( self.module = _CreateSession(
device_type, runtime_obj_path, toolchain_prefix, base_addr, server_addr, port) comms_method,
runtime_obj_path,
self.toolchain_prefix,
self.mem_layout["text"].get("start", 0),
self.mem_layout["text"]["size"],
self.mem_layout["rodata"].get("start", 0),
self.mem_layout["rodata"]["size"],
self.mem_layout["data"].get("start", 0),
self.mem_layout["data"]["size"],
self.mem_layout["bss"].get("start", 0),
self.mem_layout["bss"]["size"],
self.mem_layout["args"].get("start", 0),
self.mem_layout["args"]["size"],
self.mem_layout["heap"].get("start", 0),
self.mem_layout["heap"]["size"],
self.mem_layout["workspace"].get("start", 0),
self.mem_layout["workspace"]["size"],
self.mem_layout["stack"].get("start", 0),
self.mem_layout["stack"]["size"],
self.word_size,
self.thumb_mode,
server_addr,
server_port)
self._enter = self.module["enter"] self._enter = self.module["enter"]
self._exit = self.module["exit"] self._exit = self.module["exit"]
...@@ -88,55 +117,57 @@ class Session: ...@@ -88,55 +117,57 @@ class Session:
Raises error if not supported. Raises error if not supported.
""" """
if not sys.platform.startswith("linux"): if not sys.platform.startswith("linux"):
raise RuntimeError("microTVM is currently only supported on Linux") raise RuntimeError("MicroTVM is currently only supported on Linux hosts")
# TODO(weberlo): Add 32-bit support. # TODO(weberlo): Add 32-bit support.
# It's primarily the compilation pipeline that isn't compatible. # It's primarily the compilation pipeline that isn't compatible.
if sys.maxsize <= 2**32: if sys.maxsize <= 2**32:
raise RuntimeError("microTVM is currently only supported on 64-bit platforms") raise RuntimeError("MicroTVM is currently only supported on 64-bit host platforms")
def _check_args(self, device_type, args):
"""Check if the given configuration is valid."""
if device_type == "host":
pass
elif device_type == "openocd":
assert "base_addr" in args
assert "server_addr" in args
assert "port" in args
def __enter__(self): def __enter__(self):
self._enter() self._enter()
return self
def __exit__(self, exc_type, exc_value, exc_traceback): def __exit__(self, exc_type, exc_value, exc_traceback):
self._exit() self._exit()
def _get_micro_device_dir(): def create_micro_mod(c_mod, dev_config):
"""Get directory path for uTVM runtime source files. """Produces a micro module from a given module.
Parameters
----------
c_mod : tvm.module.Module
module with "c" as its target backend
dev_config : Dict[str, Any]
MicroTVM config dict for the target device
Return Return
------ ------
micro_device_dir : str micro_mod : tvm.module.Module
directory path micro module for the target device
""" """
micro_dir = os.path.dirname(os.path.realpath(os.path.expanduser(__file__))) temp_dir = _util.tempdir()
micro_device_dir = os.path.join(micro_dir, "..", "..", "..", lib_obj_path = temp_dir.relpath("dev_lib.obj")
"src", "runtime", "micro", "device") c_mod.export_library(
return micro_device_dir lib_obj_path,
fcompile=cross_compiler(dev_config, LibType.OPERATOR))
micro_mod = tvm.module.load(lib_obj_path)
return micro_mod
def cross_compiler(toolchain_prefix, include_dev_lib_header=True): def cross_compiler(dev_config, lib_type):
"""Creates a cross compile function that wraps `create_micro_lib`. """Create a cross-compile function that wraps `create_lib` for a `Binutil` instance.
For use in `tvm.module.Module.export_library`. For use in `tvm.module.Module.export_library`.
Parameters Parameters
---------- ----------
toolchain_prefix : str dev_config : Dict[str, Any]
toolchain prefix to be used MicroTVM config dict for the target device
include_dev_lib_header : Optional[bool] lib_type : micro.LibType
whether to include the device library header containing definitions of whether to compile a MicroTVM runtime or operator library
library functions.
Return Return
------ ------
...@@ -149,78 +180,46 @@ def cross_compiler(toolchain_prefix, include_dev_lib_header=True): ...@@ -149,78 +180,46 @@ def cross_compiler(toolchain_prefix, include_dev_lib_header=True):
.. code-block:: python .. code-block:: python
c_mod = ... # some module generated with "c" as the target c_mod = ... # some module generated with "c" as the target
fcompile = tvm.micro.cross_compiler(toolchain_prefix="") fcompile = tvm.micro.cross_compiler(dev_config, LibType.OPERATOR)
c_mod.export_library("dev_lib.obj", fcompile=fcompile) c_mod.export_library("dev_lib.obj", fcompile=fcompile)
""" """
dev_funcs = tvm.micro.device.get_device_funcs(dev_config['device_id'])
create_micro_lib = dev_funcs['create_micro_lib']
def compile_func(obj_path, src_path, **kwargs): def compile_func(obj_path, src_path, **kwargs):
if isinstance(obj_path, list): if isinstance(obj_path, list):
obj_path = obj_path[0] obj_path = obj_path[0]
if isinstance(src_path, list): if isinstance(src_path, list):
src_path = src_path[0] src_path = src_path[0]
create_micro_lib(obj_path, src_path, toolchain_prefix, create_micro_lib(obj_path, src_path, lib_type, kwargs.get("options", None))
kwargs.get("options", None), include_dev_lib_header) return _cc.cross_compiler(compile_func, output_format="obj")
return _cc.cross_compiler(compile_func)
def create_micro_lib( def get_micro_host_driven_dir():
obj_path, src_path, toolchain_prefix, options=None, include_dev_lib_header=True): """Get directory path for uTVM host-driven runtime source files.
"""Compiles code into a binary for the target micro device.
Parameters Return
---------- ------
obj_path : Optional[str] micro_device_dir : str
path to generated object file (defaults to same directory as `src_path`) directory path
"""
micro_dir = os.path.dirname(os.path.realpath(os.path.expanduser(__file__)))
micro_host_driven_dir = os.path.join(micro_dir, "..", "..", "..",
"src", "runtime", "micro", "host_driven")
return micro_host_driven_dir
src_path : str
path to source file
toolchain_prefix : str def get_micro_device_dir():
toolchain prefix to be used """Get directory path for parent directory of device-specific source files
include_dev_lib_header : bool Return
whether to include the device library header containing definitions of ------
library functions. micro_device_dir : str
directory path
""" """
def replace_suffix(s, new_suffix): micro_dir = os.path.dirname(os.path.realpath(os.path.expanduser(__file__)))
if "." in os.path.basename(s): micro_device_dir = os.path.join(micro_dir, "..", "..", "..",
# There already exists an extension. "src", "runtime", "micro", "device")
return os.path.join( return micro_device_dir
os.path.dirname(s),
".".join(os.path.basename(s).split(".")[:-1] + [new_suffix]))
# No existing extension; we can just append.
return s + "." + new_suffix
# uTVM object files cannot have an ".o" suffix, because it triggers the
# code path for creating shared objects in `tvm.module.load`. So we replace
# ".o" suffixes with ".obj".
if obj_path.endswith(".o"):
logging.warning(
"\".o\" suffix in \"%s\" has been replaced with \".obj\"", obj_path)
obj_path = replace_suffix(obj_path, "obj")
options = ["-I" + path for path in find_include_path()]
options += ["-I{}".format(_get_micro_device_dir())]
options += ["-fno-stack-protector"]
# TODO(weberlo): Don't rely on the toolchain prefix to identify if this is the host
# device.
if toolchain_prefix == "" and sys.maxsize > 2**32 and sys.platform.startswith("linux"):
# Only add this option if the host is a 64-bit Linux.
options += ["-mcmodel=large"]
compile_cmd = "{}gcc".format(toolchain_prefix)
if include_dev_lib_header:
# Create a temporary copy of the source, so we can inject the dev lib
# header without modifying the original.
tmp_dir = _util.tempdir()
temp_src_path = tmp_dir.relpath("temp.c")
with open(src_path, "r") as f:
src_lines = f.read().splitlines()
src_lines.insert(0, "#include \"utvm_device_dylib_redirect.c\"")
with open(temp_src_path, "w") as f:
f.write("\n".join(src_lines))
src_path = temp_src_path
_cc.create_shared(obj_path, src_path, options, compile_cmd)
_init_api("tvm.micro", "tvm.micro.base") _init_api("tvm.micro", "tvm.micro.base")
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Device-specific configuration for MicroTVM"""
from .base import register_device, get_device_funcs, create_micro_lib_base
from . import host
from . import arm
from . import riscv_spike
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Base module for ARM device configurations"""
from . import stm32f746xx
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Compilation and config definitions for ARM STM32F746XX devices"""
from .. import create_micro_lib_base, register_device
DEVICE_ID = "arm.stm32f746xx"
TOOLCHAIN_PREFIX = "arm-none-eabi-"
def create_micro_lib(obj_path, src_path, lib_type, options=None):
"""Wrapper over `create_micro_lib_base` to add device-specific options
Parameters
----------
obj_path : str
path to generated object file
src_path : str
path to source file
lib_type : micro.LibType
whether to compile a MicroTVM runtime or operator library
options : Optional[List[str]]
additional options to pass to GCC
"""
if options is None:
options = []
options += [
"-mcpu=cortex-m7",
"-mlittle-endian",
"-mfloat-abi=hard",
"-mfpu=fpv5-sp-d16",
"-mthumb",
"-gdwarf-5",
]
create_micro_lib_base(
obj_path, src_path, TOOLCHAIN_PREFIX, DEVICE_ID, lib_type, options=options)
def default_config(server_addr, server_port):
"""Generates a default configuration for ARM STM32F746XX devices
Parameters
----------
server_addr : str
address of OpenOCD server to connect to
server_port : int
port of OpenOCD server to connect to
Return
------
config : Dict[str, Any]
MicroTVM config dict for this device
"""
return {
"device_id": DEVICE_ID,
"toolchain_prefix": TOOLCHAIN_PREFIX,
#
# [Device Memory Layout]
# RAM (rwx) : START = 0x20000000, LENGTH = 320K
# FLASH (rx) : START = 0x8000000, LENGTH = 1024K
#
"mem_layout": {
"text": {
"start": 0x20000180,
"size": 20480,
},
"rodata": {
"start": 0x20005180,
"size": 20480,
},
"data": {
"start": 0x2000a180,
"size": 768,
},
"bss": {
"start": 0x2000a480,
"size": 768,
},
"args": {
"start": 0x2000a780,
"size": 1280,
},
"heap": {
"start": 0x2000ac80,
"size": 262144,
},
"workspace": {
"start": 0x2004ac80,
"size": 20480,
},
"stack": {
"start": 0x2004fc80,
"size": 80,
},
},
"word_size": 4,
"thumb_mode": True,
"comms_method": "openocd",
"server_addr": server_addr,
"server_port": server_port,
}
register_device(DEVICE_ID, {
"create_micro_lib": create_micro_lib,
"default_config": default_config,
})
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Base definitions for MicroTVM config"""
import glob
import os
from pathlib import Path
from tvm.contrib import util as _util
from tvm.contrib.binutil import run_cmd
from tvm._ffi.libinfo import find_include_path
from tvm.micro import LibType, get_micro_host_driven_dir, get_micro_device_dir
_DEVICE_REGISTRY = {}
def register_device(device_id, device_funcs):
"""Register a device and associated compilation/config functions
Parameters
----------
device_id : str
unique identifier for the device
device_funcs : Dict[str, func]
dictionary with compilation and config generation functions as values
"""
if device_id in _DEVICE_REGISTRY:
raise RuntimeError(f"\"{device_id}\" already exists in the device registry")
_DEVICE_REGISTRY[device_id] = device_funcs
def get_device_funcs(device_id):
"""Get compilation and config generation functions for device
Parameters
----------
device_id : str
unique identifier for the device
Return
------
device_funcs : Dict[str, func]
dictionary with compilation and config generation functions as values
"""
if device_id not in _DEVICE_REGISTRY:
raise RuntimeError(f"\"{device_id}\" does not exist in the binutil registry")
device_funcs = _DEVICE_REGISTRY[device_id]
return device_funcs
def create_micro_lib_base(
out_obj_path,
in_src_path,
toolchain_prefix,
device_id,
lib_type,
options=None):
"""Compiles code into a binary for the target micro device.
Parameters
----------
out_obj_path : str
path to generated object file
in_src_path : str
path to source file
toolchain_prefix : str
toolchain prefix to be used. For example, a prefix of
"riscv64-unknown-elf-" means "riscv64-unknown-elf-gcc" is used as
the compiler and "riscv64-unknown-elf-ld" is used as the linker,
etc.
device_id : str
unique identifier for the target device
lib_type : micro.LibType
whether to compile a MicroTVM runtime or operator library
options : List[str]
additional options to pass to GCC
"""
base_compile_cmd = [
f"{toolchain_prefix}gcc",
"-std=c11",
"-Wall",
"-Wextra",
"--pedantic",
"-c",
"-O0",
"-g",
"-nostartfiles",
"-nodefaultlibs",
"-nostdlib",
"-fdata-sections",
"-ffunction-sections",
]
if options is not None:
base_compile_cmd += options
src_paths = []
include_paths = find_include_path() + [get_micro_host_driven_dir()]
tmp_dir = _util.tempdir()
# we might transform the src path in one of the branches below
new_in_src_path = in_src_path
if lib_type == LibType.RUNTIME:
dev_dir = _get_device_source_dir(device_id)
dev_src_paths = glob.glob(f"{dev_dir}/*.[csS]")
# there needs to at least be a utvm_timer.c file
assert dev_src_paths
assert "utvm_timer.c" in map(os.path.basename, dev_src_paths)
src_paths += dev_src_paths
elif lib_type == LibType.OPERATOR:
# create a temporary copy of the source, so we can inject the dev lib
# header without modifying the original.
temp_src_path = tmp_dir.relpath("temp.c")
with open(in_src_path, "r") as f:
src_lines = f.read().splitlines()
src_lines.insert(0, "#include \"utvm_device_dylib_redirect.c\"")
with open(temp_src_path, "w") as f:
f.write("\n".join(src_lines))
new_in_src_path = temp_src_path
base_compile_cmd += ["-c"]
else:
raise RuntimeError("unknown lib type")
src_paths += [new_in_src_path]
for path in include_paths:
base_compile_cmd += ["-I", path]
prereq_obj_paths = []
for src_path in src_paths:
curr_obj_path = Path(src_path).with_suffix(".o").name
assert curr_obj_path not in prereq_obj_paths
prereq_obj_paths.append(curr_obj_path)
curr_compile_cmd = base_compile_cmd + [src_path, "-o", curr_obj_path]
run_cmd(curr_compile_cmd)
ld_cmd = [f"{toolchain_prefix}ld", "-relocatable"]
ld_cmd += prereq_obj_paths
ld_cmd += ["-o", out_obj_path]
run_cmd(ld_cmd)
def _get_device_source_dir(device_id):
"""Grabs the source directory for device-specific uTVM files"""
dev_subdir = "/".join(device_id.split("."))
return get_micro_device_dir() + "/" + dev_subdir
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Compilation and config definitions for the host emulated device"""
import sys
from . import create_micro_lib_base, register_device
DEVICE_ID = "host"
TOOLCHAIN_PREFIX = ""
def create_micro_lib(obj_path, src_path, lib_type, options=None):
"""Wrapper over `create_micro_lib_base` to add device-specific options
Parameters
----------
obj_path : str
path to generated object file
src_path : str
path to source file
lib_type : micro.LibType
whether to compile a MicroTVM runtime or operator library
options : Optional[List[str]]
additional options to pass to GCC
"""
if options is None:
options = []
if sys.maxsize > 2**32 and sys.platform.startswith("linux"):
options += ["-mcmodel=large"]
create_micro_lib_base(
obj_path, src_path, TOOLCHAIN_PREFIX, DEVICE_ID, lib_type, options=options)
def default_config():
"""Generates a default configuration for the host emulated device
Return
------
config : Dict[str, Any]
MicroTVM config dict for this device
"""
return {
"device_id": DEVICE_ID,
"toolchain_prefix": TOOLCHAIN_PREFIX,
"mem_layout": {
"text": {
"size": 20480,
},
"rodata": {
"size": 20480,
},
"data": {
"size": 768,
},
"bss": {
"size": 768,
},
"args": {
"size": 1280,
},
"heap": {
"size": 262144,
},
"workspace": {
"size": 20480,
},
"stack": {
"size": 80,
},
},
"word_size": 8 if sys.maxsize > 2**32 else 4,
"thumb_mode": False,
"comms_method": "host",
}
register_device(DEVICE_ID, {
"create_micro_lib": create_micro_lib,
"default_config": default_config,
})
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Compilation and config definitions for Spike, a RISC-V functional ISA simulator"""
from collections import OrderedDict
from . import create_micro_lib_base, register_device
DEVICE_ID = "riscv_spike"
TOOLCHAIN_PREFIX = "riscv64-unknown-elf-"
def create_micro_lib(obj_path, src_path, lib_type, options=None):
"""Wrapper over `create_micro_lib_base` to add device-specific options
Parameters
----------
obj_path : str
path to generated object file
src_path : str
path to source file
lib_type : micro.LibType
whether to compile a MicroTVM runtime or operator library
options : Optional[List[str]]
additional options to pass to GCC
"""
create_micro_lib_base(
obj_path,
src_path,
TOOLCHAIN_PREFIX,
DEVICE_ID,
lib_type,
options=options)
def default_config(base_addr, server_addr, server_port):
"""Generates a default configuration for Spike
Parameters
----------
base_addr : int
base address of the simulator (for calculating the memory layout)
server_addr : str
address of OpenOCD server to connect to
server_port : int
port of OpenOCD server to connect to
Return
------
config : Dict[str, Any]
MicroTVM config dict for this device
"""
res = {
"device_id": DEVICE_ID,
"toolchain_prefix": TOOLCHAIN_PREFIX,
"mem_layout": OrderedDict([
("text", {
"size": 20480,
}),
("rodata", {
"size": 20480,
}),
("data", {
"size": 768,
}),
("bss", {
"size": 768,
}),
("args", {
"size": 1280,
}),
("heap", {
"size": 262144,
}),
("workspace", {
"size": 20480,
}),
("stack", {
"size": 80,
}),
]),
"word_size": 4,
"thumb_mode": True,
"comms_method": "openocd",
"server_addr": server_addr,
"server_port": server_port,
}
# generate section start addresses from the given `base_addr`
curr_offset = 0
mem_layout = res["mem_layout"]
for region_dict in mem_layout.values():
region_dict["start"] = base_addr + curr_offset
curr_offset += region_dict["size"]
return res
register_device(DEVICE_ID, {
"create_micro_lib": create_micro_lib,
"default_config": default_config,
})
...@@ -265,6 +265,9 @@ def load(path, fmt=""): ...@@ -265,6 +265,9 @@ def load(path, fmt=""):
files = [tar_temp.relpath(x) for x in tar_temp.listdir()] files = [tar_temp.relpath(x) for x in tar_temp.listdir()]
_cc.create_shared(path + ".so", files) _cc.create_shared(path + ".so", files)
path += ".so" path += ".so"
# TODO(weberlo): we should probably use a more distinctive suffix for uTVM object files
elif path.endswith(".obj"):
fmt = "micro_dev"
# Redirect to the load API # Redirect to the load API
return _LoadFromFile(path, fmt) return _LoadFromFile(path, fmt)
......
...@@ -85,7 +85,9 @@ Target CreateTarget(const std::string& target_name, ...@@ -85,7 +85,9 @@ Target CreateTarget(const std::string& target_name,
} }
t->device_type = kDLCPU; t->device_type = kDLCPU;
t->thread_warp_size = 1; t->thread_warp_size = 1;
if (target_name == "c" || target_name == "llvm") { if (target_name == "c" && t->device_name == "micro_dev") {
t->device_type = kDLMicroDev;
} else if (target_name == "c" || target_name == "llvm") {
t->keys_array.push_back(ir::StringImm::make("cpu")); t->keys_array.push_back(ir::StringImm::make("cpu"));
} else if (target_name == "cuda" || target_name == "nvptx") { } else if (target_name == "cuda" || target_name == "nvptx") {
t->device_type = kDLGPU; t->device_type = kDLGPU;
......
...@@ -33,7 +33,8 @@ CodeGenCHost::CodeGenCHost() { ...@@ -33,7 +33,8 @@ CodeGenCHost::CodeGenCHost() {
module_name_ = GetUniqueName("__tvm_module_ctx"); module_name_ = GetUniqueName("__tvm_module_ctx");
} }
void CodeGenCHost::Init(bool output_ssa) { void CodeGenCHost::Init(bool output_ssa, bool emit_asserts) {
emit_asserts_ = emit_asserts;
decl_stream << "#include \"tvm/runtime/c_runtime_api.h\"\n"; decl_stream << "#include \"tvm/runtime/c_runtime_api.h\"\n";
decl_stream << "#include \"tvm/runtime/c_backend_api.h\"\n"; decl_stream << "#include \"tvm/runtime/c_backend_api.h\"\n";
decl_stream << "extern void* " << module_name_ << " = NULL;\n"; decl_stream << "extern void* " << module_name_ << " = NULL;\n";
...@@ -237,17 +238,19 @@ void CodeGenCHost::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*) ...@@ -237,17 +238,19 @@ void CodeGenCHost::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*)
} }
void CodeGenCHost::VisitStmt_(const AssertStmt *op) { // NOLINT(*) void CodeGenCHost::VisitStmt_(const AssertStmt *op) { // NOLINT(*)
std::string cond = PrintExpr(op->condition); if (emit_asserts_) {
PrintIndent(); std::string cond = PrintExpr(op->condition);
stream << "if (!(" << cond << ")) {\n"; PrintIndent();
int assert_if_scope = this->BeginScope(); stream << "if (!(" << cond << ")) {\n";
PrintIndent(); int assert_if_scope = this->BeginScope();
stream << "TVMAPISetLastError(\"" << op->message.as<StringImm>()->value << "\");\n"; PrintIndent();
PrintIndent(); stream << "TVMAPISetLastError(\"" << op->message.as<StringImm>()->value << "\");\n";
stream << "return -1;\n"; PrintIndent();
this->EndScope(assert_if_scope); stream << "return -1;\n";
PrintIndent(); this->EndScope(assert_if_scope);
stream << "}\n"; PrintIndent();
stream << "}\n";
}
this->PrintStmt(op->body); this->PrintStmt(op->body);
} }
...@@ -277,8 +280,9 @@ inline void CodeGenCHost::PrintTernaryCondExpr(const T* op, ...@@ -277,8 +280,9 @@ inline void CodeGenCHost::PrintTernaryCondExpr(const T* op,
runtime::Module BuildCHost(Array<LoweredFunc> funcs) { runtime::Module BuildCHost(Array<LoweredFunc> funcs) {
using tvm::runtime::Registry; using tvm::runtime::Registry;
bool output_ssa = false; bool output_ssa = false;
bool emit_asserts = false;
CodeGenCHost cg; CodeGenCHost cg;
cg.Init(output_ssa); cg.Init(output_ssa, emit_asserts);
for (LoweredFunc f : funcs) { for (LoweredFunc f : funcs) {
cg.AddFunction(f); cg.AddFunction(f);
} }
......
...@@ -35,7 +35,7 @@ namespace codegen { ...@@ -35,7 +35,7 @@ namespace codegen {
class CodeGenCHost final : public CodeGenC { class CodeGenCHost final : public CodeGenC {
public: public:
CodeGenCHost(); CodeGenCHost();
void Init(bool output_ssa); void Init(bool output_ssa, bool emit_asserts);
void AddFunction(LoweredFunc f); void AddFunction(LoweredFunc f);
std::string Finish(); std::string Finish();
...@@ -53,6 +53,8 @@ class CodeGenCHost final : public CodeGenC { ...@@ -53,6 +53,8 @@ class CodeGenCHost final : public CodeGenC {
private: private:
std::string module_name_; std::string module_name_;
/*! \brief whether to emit asserts in the resulting C code */
bool emit_asserts_;
void PrintGetFuncFromBackend(const std::string& func_name, const std::string& packed_func_name); void PrintGetFuncFromBackend(const std::string& func_name, const std::string& packed_func_name);
void PrintFuncCall(const std::string& packed_func_name, int num_args); void PrintFuncCall(const std::string& packed_func_name, int num_args);
......
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file utvm_init.s
* \brief uTVM init definition for STM32F746XX-series boards
*/
.syntax unified
.cpu cortex-m7
.fpu softvfp
.thumb
.section .text.UTVMInit
.type UTVMInit, %function
UTVMInit:
/* enable fpu */
ldr r0, =0xE000ED88
ldr r1, [r0]
ldr r2, =0xF00000
orr r1, r2
str r1, [r0]
dsb
isb
/* set stack pointer */
ldr sp, =_utvm_stack_pointer_init
bl UTVMMain
.size UTVMInit, .-UTVMInit
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file utvm_timer.c
* \brief uTVM timer API definitions for STM32F746XX-series boards
*/
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include "utvm_runtime.h"
// There are two implementations of cycle counters on the STM32F7X: SysTick and
// CYCCNT. SysTick is preferred, as it gives better error handling, but the
// counter is only 24 bits wide. If a larger timer is needed, use the CYCCNT
// implementation, which has a 32-bit counter.
#define USE_SYSTICK
#ifdef USE_SYSTICK
#define SYST_CSR (*((volatile uint32_t *) 0xE000E010))
#define SYST_RVR (*((volatile uint32_t *) 0xE000E014))
#define SYST_CVR (*((volatile uint32_t *) 0xE000E018))
#define SYST_CALIB (*((volatile uint32_t *) 0xE000E01C))
#define SYST_CSR_ENABLE 0
#define SYST_CSR_TICKINT 1
#define SYST_CSR_CLKSOURCE 2
#define SYST_COUNTFLAG 16
#define SYST_CALIB_NOREF 31
#define SYST_CALIB_SKEW 30
uint32_t start_time = 0;
uint32_t stop_time = 0;
int32_t UTVMTimerStart() {
SYST_CSR = (1 << SYST_CSR_ENABLE) | (1 << SYST_CSR_CLKSOURCE);
// wait until timer starts
while (SYST_CVR == 0) {}
start_time = SYST_CVR;
return 0;
}
void UTVMTimerStop() {
SYST_CSR = 0;
stop_time = SYST_CVR;
}
void UTVMTimerReset() {
SYST_CSR = 0;
// maximum reload value (24-bit)
SYST_RVR = (~((uint32_t) 0)) >> 8;
SYST_CVR = 0;
}
uint32_t UTVMTimerRead() {
if (SYST_CSR & SYST_COUNTFLAG) {
TVMAPISetLastError("timer overflowed");
return -1;
} else {
return start_time - stop_time;
}
}
#else // !USE_SYSTICK
#define DWT_CTRL (*((volatile uint32_t *) 0xE0001000))
#define DWT_CYCCNT (*((volatile uint32_t *) 0xE0001004))
#define DWT_CTRL_NOCYCCNT 25
#define DWT_CTRL_CYCCNTENA 0
uint32_t start_time = 0;
uint32_t stop_time = 0;
void UTVMTimerReset() {
DWT_CYCCNT = 0;
}
int32_t UTVMTimerStart() {
if (DWT_CTRL & DWT_CTRL_NOCYCCNT) {
TVMAPISetLastError("cycle counter not implemented on device");
return -1;
}
start_time = DWT_CYCCNT;
DWT_CTRL |= (1 << DWT_CTRL_CYCCNTENA);
}
void UTVMTimerStop() {
stop_time = DWT_CYCCNT;
DWT_CTRL &= ~(1 << DWT_CTRL_CYCCNTENA);
}
int32_t UTVMTimerRead() {
if (stop_time > stop_time) {
return stop_time - start_time;
} else {
uint32_t largest = ~0;
return (largest - start_time) + stop_time;
}
}
#endif // USE_SYSTICK
#ifdef __cplusplus
} // TVM_EXTERN_C
#endif
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file utvm_init.c
* \brief uTVM init definition for the host emulated device
*/
#ifdef __cplusplus
extern "C" {
#endif
#include "utvm_runtime.h"
void UTVMInit() {
// no init required for the host
UTVMMain();
}
#ifdef __cplusplus
} // TVM_EXTERN_C
#endif
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file utvm_timer.c
* \brief uTVM timer API stubs for the host emulated device
*/
#ifdef __cplusplus
extern "C" {
#endif
#include "utvm_runtime.h"
// TODO(weberlo): use this? https://stackoverflow.com/questions/5141960/get-the-current-time-in-c
int32_t UTVMTimerStart() {
return 0;
}
void UTVMTimerStop() { }
void UTVMTimerReset() { }
uint32_t UTVMTimerRead() {
return 1;
}
#ifdef __cplusplus
} // TVM_EXTERN_C
#endif
...@@ -21,9 +21,9 @@ ...@@ -21,9 +21,9 @@
* \file utvm_runtime.cc * \file utvm_runtime.cc
* \brief uTVM runtime * \brief uTVM runtime
* *
* All function calls go through `UTVMMain`, which reads from the current * All function calls go through the externally defined `UTVMInit`, which
* `UTVMTask` and calls the appropriate function with the arguments from the * performs device-specific setup, then calls `UTVMMain`. `UTVMMain` then
* task. * calls the function in `utvm_task` with the arguments from the task.
* *
* Additionally included in this file are definitions for some of the most * Additionally included in this file are definitions for some of the most
* common functions used in the C runtime API. * common functions used in the C runtime API.
...@@ -35,36 +35,58 @@ extern "C" { ...@@ -35,36 +35,58 @@ extern "C" {
#include "utvm_runtime.h" #include "utvm_runtime.h"
// Task pointers must be patched before calling a function. // Task pointers must be patched before calling a function.
UTVMTask task; UTVMTask utvm_task = {
.func = NULL,
.arg_values = NULL,
.arg_type_codes = NULL,
.num_args = 0,
};
size_t utvm_word_size = 0; // NOLINT(*)
// These pointers are patched at load time to point to the workspace section. // These pointers are patched at load time to point to the workspace section.
char* utvm_workspace_begin = NULL; // NOLINT(*) char* utvm_workspace_start = NULL; // NOLINT(*)
char* utvm_workspace_end = NULL; // NOLINT(*) char* utvm_workspace_end = NULL; // NOLINT(*)
char* utvm_workspace_curr = NULL; // NOLINT(*) char* utvm_workspace_curr = NULL; // NOLINT(*)
// Keep track of how many active allocations there are on the workspace. // Keep track of how many active allocations there are on the workspace.
size_t utvm_num_active_allocs = 0; size_t utvm_num_active_allocs = 0;
const char* utvm_last_error = NULL; // NOLINT(*) const char* utvm_last_error = NULL; // NOLINT(*)
int32_t utvm_return_code = 0; // NOLINT(*) int32_t utvm_return_code = 0; // NOLINT(*)
// We use a dummy function to signal execution is finished for device uint32_t utvm_task_time = 0;
// backends which require breakpoints.
void UTVMDone() { }
// Gets called by UTVMInit, after device-specific initialization is finished.
void UTVMMain() { void UTVMMain() {
utvm_workspace_curr = utvm_workspace_begin; utvm_workspace_curr = utvm_workspace_start;
utvm_num_active_allocs = 0; utvm_num_active_allocs = 0;
utvm_last_error = NULL; // NOLINT(*) utvm_last_error = NULL; // NOLINT(*)
utvm_return_code = 0; utvm_return_code = 0;
utvm_return_code = task.func((void*) task.arg_values, (void*) task.arg_type_codes, // NOLINT(*) utvm_task_time = 0;
task.num_args); UTVMTimerReset();
int32_t err = UTVMTimerStart();
if (err < 0) {
utvm_return_code = err;
UTVMDone();
}
utvm_return_code = utvm_task.func(
(void*) utvm_task.arg_values, // NOLINT(*)
(void*) utvm_task.arg_type_codes, // NOLINT(*)
utvm_task.num_args);
UTVMTimerStop();
utvm_task_time = UTVMTimerRead();
UTVMDone(); UTVMDone();
} }
// We use a dummy function to signal execution is finished for device
// backends which require breakpoints.
void UTVMDone() { }
void* TVMBackendAllocWorkspace(int device_type, int device_id, uint64_t size, void* TVMBackendAllocWorkspace(int device_type, int device_id, uint64_t size,
int dtype_code_hint, int dtype_bits_hint) { int dtype_code_hint, int dtype_bits_hint) {
// Align up to 8 bytes. // Align up to 8 bytes.
utvm_workspace_curr += (8 - ((uintptr_t) utvm_workspace_curr % 8)) % 8; // NOLINT(*) utvm_workspace_curr +=
(utvm_word_size - ((uintptr_t) utvm_workspace_curr % utvm_word_size)) % utvm_word_size; // NOLINT(*)
if (utvm_workspace_curr + size > utvm_workspace_end) { if (utvm_workspace_curr + size > utvm_workspace_end) {
// Out of space in workspace. // Out of space in workspace.
return NULL; return NULL;
...@@ -81,11 +103,11 @@ int TVMBackendFreeWorkspace(int device_type, int device_id, void* ptr) { ...@@ -81,11 +103,11 @@ int TVMBackendFreeWorkspace(int device_type, int device_id, void* ptr) {
TVMAPISetLastError("free called with no active workspace allocations"); TVMAPISetLastError("free called with no active workspace allocations");
// Reset allocations and workspace (for future task executions). // Reset allocations and workspace (for future task executions).
utvm_num_active_allocs = 0; utvm_num_active_allocs = 0;
utvm_workspace_curr = utvm_workspace_begin; utvm_workspace_curr = utvm_workspace_start;
return -1; return -1;
} else if (utvm_num_active_allocs == 0) { } else if (utvm_num_active_allocs == 0) {
// No more allocations. Reset workspace. // No more allocations. Reset workspace.
utvm_workspace_curr = utvm_workspace_begin; utvm_workspace_curr = utvm_workspace_start;
return 0; return 0;
} else { } else {
return 0; return 0;
......
...@@ -21,8 +21,8 @@ ...@@ -21,8 +21,8 @@
* \file utvm_runtime.h * \file utvm_runtime.h
* \brief uTVM runtime headers * \brief uTVM runtime headers
*/ */
#ifndef TVM_RUNTIME_MICRO_DEVICE_UTVM_RUNTIME_H_ #ifndef TVM_RUNTIME_MICRO_HOST_DRIVEN_UTVM_RUNTIME_H_
#define TVM_RUNTIME_MICRO_DEVICE_UTVM_RUNTIME_H_ #define TVM_RUNTIME_MICRO_HOST_DRIVEN_UTVM_RUNTIME_H_
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
...@@ -30,6 +30,7 @@ extern "C" { ...@@ -30,6 +30,7 @@ extern "C" {
#include <stdint.h> #include <stdint.h>
#include <tvm/runtime/c_runtime_api.h> #include <tvm/runtime/c_runtime_api.h>
#include <tvm/runtime/c_backend_api.h>
/*! /*!
* \brief Task structure for uTVM * \brief Task structure for uTVM
...@@ -45,8 +46,22 @@ typedef struct { ...@@ -45,8 +46,22 @@ typedef struct {
int32_t num_args; int32_t num_args;
} UTVMTask; } UTVMTask;
extern void UTVMInit();
extern void UTVMTimerReset();
extern int32_t UTVMTimerStart();
extern void UTVMTimerStop();
extern uint32_t UTVMTimerRead();
void UTVMMain();
void UTVMDone();
#ifdef __cplusplus #ifdef __cplusplus
} // TVM_EXTERN_C } // TVM_EXTERN_C
#endif #endif
#endif // TVM_RUNTIME_MICRO_DEVICE_UTVM_RUNTIME_H_ #endif // TVM_RUNTIME_MICRO_HOST_DRIVEN_UTVM_RUNTIME_H_
...@@ -31,6 +31,9 @@ ...@@ -31,6 +31,9 @@
namespace tvm { namespace tvm {
namespace runtime { namespace runtime {
/*! \brief number of bytes in each page */
constexpr int kPageSize = 4096;
/*! /*!
* \brief emulated low-level device on host machine * \brief emulated low-level device on host machine
*/ */
...@@ -40,40 +43,33 @@ class HostLowLevelDevice final : public LowLevelDevice { ...@@ -40,40 +43,33 @@ class HostLowLevelDevice final : public LowLevelDevice {
* \brief constructor to initialize on-host memory region to act as device * \brief constructor to initialize on-host memory region to act as device
* \param num_bytes size of the emulated on-device memory region * \param num_bytes size of the emulated on-device memory region
*/ */
explicit HostLowLevelDevice(size_t num_bytes) : size_(num_bytes) { explicit HostLowLevelDevice(size_t num_bytes, void** base_addr) : size_(num_bytes) {
size_t size_in_pages = (num_bytes + kPageSize - 1) / kPageSize; size_t size_in_pages = (num_bytes + kPageSize - 1) / kPageSize;
// TODO(weberlo): Set permissions per section (e.g., read-write perms for // TODO(weberlo): Set permissions per section (e.g., read-write perms for
// the heap, execute perms for text, etc.). // the heap, execute perms for text, etc.).
int mmap_prot = PROT_READ | PROT_WRITE | PROT_EXEC; int mmap_prot = PROT_READ | PROT_WRITE | PROT_EXEC;
int mmap_flags = MAP_ANONYMOUS | MAP_PRIVATE; int mmap_flags = MAP_ANONYMOUS | MAP_PRIVATE;
base_addr_ = reinterpret_cast<std::uintptr_t>( base_addr_ = mmap(nullptr, size_in_pages * kPageSize, mmap_prot, mmap_flags, -1, 0);
mmap(nullptr, size_in_pages * kPageSize, mmap_prot, mmap_flags, -1, 0)); *base_addr = base_addr_;
} }
/*! /*!
* \brief destructor to deallocate on-host device region * \brief destructor to deallocate on-host device region
*/ */
virtual ~HostLowLevelDevice() { virtual ~HostLowLevelDevice() {
munmap(reinterpret_cast<void*>(base_addr_), size_); munmap(base_addr_, size_);
}
void Read(DevBaseOffset offset, void* buf, size_t num_bytes) {
void* addr = ToDevPtr(offset).cast_to<void*>();
std::memcpy(buf, addr, num_bytes);
} }
void Write(DevBaseOffset offset, const void* buf, size_t num_bytes) { void Read(DevPtr addr, void* buf, size_t num_bytes) {
void* addr = ToDevPtr(offset).cast_to<void*>(); std::memcpy(buf, addr.cast_to<void*>(), num_bytes);
std::memcpy(addr, buf, num_bytes);
} }
void Execute(DevBaseOffset func_offset, DevBaseOffset breakpoint) { void Write(DevPtr addr, const void* buf, size_t num_bytes) {
DevPtr func_addr = ToDevPtr(func_offset); std::memcpy(addr.cast_to<void*>(), buf, num_bytes);
reinterpret_cast<void (*)(void)>(func_addr.value())();
} }
std::uintptr_t base_addr() const final { void Execute(DevPtr func_addr, DevPtr breakpoint_addr) {
return base_addr_; reinterpret_cast<void (*)(void)>(func_addr.value().val64)();
} }
const char* device_type() const final { const char* device_type() const final {
...@@ -82,14 +78,14 @@ class HostLowLevelDevice final : public LowLevelDevice { ...@@ -82,14 +78,14 @@ class HostLowLevelDevice final : public LowLevelDevice {
private: private:
/*! \brief base address of the micro device memory region */ /*! \brief base address of the micro device memory region */
std::uintptr_t base_addr_; void* base_addr_;
/*! \brief size of memory region */ /*! \brief size of memory region */
size_t size_; size_t size_;
}; };
const std::shared_ptr<LowLevelDevice> HostLowLevelDeviceCreate(size_t num_bytes) { const std::shared_ptr<LowLevelDevice> HostLowLevelDeviceCreate(size_t num_bytes, void** base_addr) {
std::shared_ptr<LowLevelDevice> lld = std::shared_ptr<LowLevelDevice> lld =
std::make_shared<HostLowLevelDevice>(num_bytes); std::make_shared<HostLowLevelDevice>(num_bytes, base_addr);
return lld; return lld;
} }
......
...@@ -40,87 +40,52 @@ class LowLevelDevice { ...@@ -40,87 +40,52 @@ class LowLevelDevice {
virtual ~LowLevelDevice() {} virtual ~LowLevelDevice() {}
/*! /*!
* \brief reads num_bytes from device memory at base_addr + offset into buffer * \brief reads num_bytes from device memory at addr into buffer
* \param offset on-device memory offset pointer to be read from * \param addr on-device memory address to read from
* \param buffer on-host buffer to be read into * \param buffer on-host buffer to be read into
* \param num_bytes number of bytes to be read * \param num_bytes number of bytes to read
*/ */
virtual void Read(DevBaseOffset offset, virtual void Read(DevPtr addr,
void* buffer, void* buffer,
size_t num_bytes) = 0; size_t num_bytes) = 0;
/*! /*!
* \brief writes num_bytes from buffer to device memory at base_addr + offset * \brief writes num_bytes from buffer to device memory at addr
* \param offset on-device memory offset pointer to be written to * \param addr on-device memory address to write into
* \param buffer on-host buffer to be written * \param buffer host buffer to write from
* \param num_bytes number of bytes to be written * \param num_bytes number of bytes to write
*/ */
virtual void Write(DevBaseOffset offset, virtual void Write(DevPtr addr,
const void* buffer, const void* buffer,
size_t num_bytes) = 0; size_t num_bytes) = 0;
/*! /*!
* \brief starts execution of device at offset * \brief starts execution of device at func_addr
* \param func_addr offset of the init stub function * \param func_addr offset of the init stub function
* \param breakpoint breakpoint at which to stop function execution * \param breakpoint_addr address at which to stop function execution
*/ */
virtual void Execute(DevBaseOffset func_offset, DevBaseOffset breakpoint) = 0; virtual void Execute(DevPtr func_addr, DevPtr breakpoint_addr) = 0;
// TODO(weberlo): Should we just give the device the *entire* memory layout
// decided by the session?
/*!
* \brief sets the offset of the top of the stack section
* \param stack_top offset of the stack top
*/
virtual void SetStackTop(DevBaseOffset stack_top) {
LOG(FATAL) << "unimplemented";
}
/*!
* \brief convert from base offset to absolute address
* \param offset base offset
* \return absolute address
*/
DevPtr ToDevPtr(DevBaseOffset offset) {
return DevPtr(base_addr() + offset.value());
}
/*!
* \brief convert from absolute address to base offset
* \param ptr absolute address
* \return base offset
*/
DevBaseOffset ToDevOffset(DevPtr ptr) {
return DevBaseOffset(ptr.value() - base_addr());
}
/*! /*!
* \brief getter function for low-level device type * \brief getter function for low-level device type
* \return string containing device type * \return string containing device type
*/ */
virtual const char* device_type() const = 0; virtual const char* device_type() const = 0;
protected:
/*!
* \brief getter function for base_addr
* \return the base address of the device memory region
*/
virtual std::uintptr_t base_addr() const = 0;
}; };
/*! /*!
* \brief create a host low-level device * \brief create a host low-level device
* \param num_bytes size of the memory region * \param num_bytes size of the memory region
* \param base_addr pointer to write the host device's resulting base address into
*/ */
const std::shared_ptr<LowLevelDevice> HostLowLevelDeviceCreate(size_t num_bytes); const std::shared_ptr<LowLevelDevice> HostLowLevelDeviceCreate(size_t num_bytes, void** base_addr);
/*! /*!
* \brief connect to OpenOCD and create an OpenOCD low-level device * \brief connect to OpenOCD and create an OpenOCD low-level device
* \param addr address of the OpenOCD server to connect to
* \param port port of the OpenOCD server to connect to * \param port port of the OpenOCD server to connect to
*/ */
const std::shared_ptr<LowLevelDevice> OpenOCDLowLevelDeviceCreate(std::uintptr_t base_addr, const std::shared_ptr<LowLevelDevice> OpenOCDLowLevelDeviceCreate(const std::string& addr,
const std::string& addr,
int port); int port);
} // namespace runtime } // namespace runtime
......
...@@ -35,30 +35,6 @@ ...@@ -35,30 +35,6 @@
namespace tvm { namespace tvm {
namespace runtime { namespace runtime {
size_t GetDefaultSectionSize(SectionKind kind) {
switch (kind) {
case SectionKind::kText:
return 0xF000;
case SectionKind::kRodata:
return 0xF000;
case SectionKind::kData:
return 0xF00;
case SectionKind::kBss:
return 0xF00;
case SectionKind::kArgs:
return 0xF0000;
case SectionKind::kStack:
return 0xF000;
case SectionKind::kHeap:
return 0xF00000;
case SectionKind::kWorkspace:
return 0xF0000;
default:
LOG(FATAL) << "invalid section " << static_cast<size_t>(kind);
return 0;
}
}
const char* SectionToString(SectionKind section) { const char* SectionToString(SectionKind section) {
switch (section) { switch (section) {
case SectionKind::kText: return "text"; case SectionKind::kText: return "text";
...@@ -66,37 +42,32 @@ const char* SectionToString(SectionKind section) { ...@@ -66,37 +42,32 @@ const char* SectionToString(SectionKind section) {
case SectionKind::kData: return "data"; case SectionKind::kData: return "data";
case SectionKind::kBss: return "bss"; case SectionKind::kBss: return "bss";
case SectionKind::kArgs: return "args"; case SectionKind::kArgs: return "args";
case SectionKind::kStack: return "stack";
case SectionKind::kHeap: return "heap"; case SectionKind::kHeap: return "heap";
case SectionKind::kWorkspace: return "workspace"; case SectionKind::kWorkspace: return "workspace";
case SectionKind::kStack: return "stack";
default: return ""; default: return "";
} }
} }
static std::string AddrToString(void* addr) { std::string RelocateBinarySections(
std::stringstream stream; const std::string& binary_path,
if (addr != nullptr) size_t word_size,
stream << addr; DevPtr text_start,
else DevPtr rodata_start,
stream << "0x0"; DevPtr data_start,
std::string string_addr = stream.str(); DevPtr bss_start,
return string_addr; DevPtr stack_end,
} const std::string& toolchain_prefix) {
std::string RelocateBinarySections(const std::string& binary_path,
DevPtr text,
DevPtr rodata,
DevPtr data,
DevPtr bss,
const std::string& toolchain_prefix) {
const auto* f = Registry::Get("tvm_callback_relocate_binary"); const auto* f = Registry::Get("tvm_callback_relocate_binary");
CHECK(f != nullptr) CHECK(f != nullptr)
<< "Require tvm_callback_relocate_binary to exist in registry"; << "Require tvm_callback_relocate_binary to exist in registry";
std::string relocated_bin = (*f)(binary_path, std::string relocated_bin = (*f)(binary_path,
AddrToString(text.cast_to<void*>()), word_size,
AddrToString(rodata.cast_to<void*>()), text_start.cast_to<uint64_t>(),
AddrToString(data.cast_to<void*>()), rodata_start.cast_to<uint64_t>(),
AddrToString(bss.cast_to<void*>()), data_start.cast_to<uint64_t>(),
bss_start.cast_to<uint64_t>(),
stack_end.cast_to<uint64_t>(),
toolchain_prefix); toolchain_prefix);
return relocated_bin; return relocated_bin;
} }
......
...@@ -46,122 +46,79 @@ enum class SectionKind : size_t { ...@@ -46,122 +46,79 @@ enum class SectionKind : size_t {
kData, kData,
kBss, kBss,
kArgs, kArgs,
kStack,
kHeap, kHeap,
kWorkspace, kWorkspace,
kStack,
kNumKinds, kNumKinds,
}; };
/*! \brief default size alignment */ /*! \brief union for storing values on varying target word sizes */
constexpr int kDefaultSizeAlignment = 8; union TargetVal {
/*! \brief 32-bit pointer */
uint32_t val32;
/*! \brief 64-bit pointer */
uint64_t val64;
};
/*! \brief Base class for interfacing with device locations (pointers/offsets) */ /*! \brief absolute device address */
class DeviceLocation { class DevPtr {
public: public:
/*! \brief construct a location with value `value` */ /*! \brief construct a device address with value `value` */
explicit DeviceLocation(std::uintptr_t value) : value_(value) {} explicit DevPtr(std::uintptr_t value) : value_(TargetVal { .val64 = value }) {}
/*! \brief default constructor */ /*! \brief default constructor */
DeviceLocation() : value_(0) {} DevPtr() : value_(TargetVal { .val64 = 0 }) {}
/*! \brief construct a null location */ /*! \brief construct a null address */
explicit DeviceLocation(std::nullptr_t value) : value_(0) {} explicit DevPtr(std::nullptr_t value) : value_(TargetVal { .val64 = 0 }) {}
/*! \brief destructor */ /*! \brief destructor */
virtual ~DeviceLocation() {} ~DevPtr() {}
/*! /*!
* \brief get value of location * \brief get value of pointer
* \return value of location * \return value of pointer
*/ */
std::uintptr_t value() const { return value_; } TargetVal value() const { return value_; }
/*! /*!
* \brief cast location to type `T` * \brief cast location to type `T`
* \return casted result * \return casted result
*/ */
template <typename T> template <typename T>
T cast_to() const { return reinterpret_cast<T>(value_); } T cast_to() const { return reinterpret_cast<T>(value_.val64); }
/*! \brief check if location is null */ /*! \brief check if location is null */
bool operator==(std::nullptr_t) const { return value_ == 0; } bool operator==(std::nullptr_t) const { return value_.val64 == 0; }
/*! \brief check if location is not null */ /*! \brief check if location is not null */
bool operator!=(std::nullptr_t) const { return value_ != 0; } bool operator!=(std::nullptr_t) const { return value_.val64 != 0; }
protected:
/*! \brief raw value storing the location */
std::uintptr_t value_;
};
/*! \brief absolute device address */
class DevPtr : public DeviceLocation {
public:
/*! \brief construct an absolute address with value `value` */
explicit DevPtr(std::uintptr_t val) : DeviceLocation(val) {}
/*! \brief default constructor */
DevPtr() : DeviceLocation() {}
/*! \brief construct a null absolute address */
explicit DevPtr(std::nullptr_t val) : DeviceLocation(val) {}
/*! \brief add an integer to this absolute address to get a larger absolute address */ /*! \brief add an integer to this absolute address to get a larger absolute address */
DevPtr operator+(size_t n) const { DevPtr operator+(size_t n) const {
return DevPtr(value_ + n); return DevPtr(value_.val64 + n);
} }
/*! \brief mutably add an integer to this absolute address */ /*! \brief mutably add an integer to this absolute address */
DevPtr& operator+=(size_t n) { DevPtr& operator+=(size_t n) {
value_ += n; value_.val64 += n;
return *this; return *this;
} }
/*! \brief subtract an integer from this absolute address to get a smaller absolute address */ /*! \brief subtract an integer from this absolute address to get a smaller absolute address */
DevPtr operator-(size_t n) const { DevPtr operator-(size_t n) const {
return DevPtr(value_ - n); return DevPtr(value_.val64 - n);
} }
/*! \brief mutably subtract an integer from this absolute address */ /*! \brief mutably subtract an integer from this absolute address */
DevPtr& operator-=(size_t n) { DevPtr& operator-=(size_t n) {
value_ -= n; value_.val64 -= n;
return *this; return *this;
} }
};
/*! \brief offset from device base address */
class DevBaseOffset : public DeviceLocation {
public:
/*! \brief construct a base offset with value `value` */
explicit DevBaseOffset(std::uintptr_t value) : DeviceLocation(value) {}
/*! \brief default constructor */
DevBaseOffset() : DeviceLocation() {}
/*! \brief construct a null base offset */
explicit DevBaseOffset(std::nullptr_t value) : DeviceLocation(value) {}
/*! \brief add an integer to this base offset to get a larger base offset */ private:
DevBaseOffset operator+(size_t n) const { /*! \brief raw value storing the pointer */
return DevBaseOffset(value_ + n); TargetVal value_;
}
/*! \brief mutably add an integer to this base offset */
DevBaseOffset& operator+=(size_t n) {
value_ += n;
return *this;
}
/*! \brief subtract an integer from this base offset to get a smaller base offset */
DevBaseOffset operator-(size_t n) const {
return DevBaseOffset(value_ - n);
}
/*! \brief mutably subtract an integer from this base offset */
DevBaseOffset& operator-=(size_t n) {
value_ -= n;
return *this;
}
}; };
/*! /*!
...@@ -212,6 +169,10 @@ class SymbolMap { ...@@ -212,6 +169,10 @@ class SymbolMap {
return result->second; return result->second;
} }
bool HasSymbol(const std::string& name) const {
return map_.find(name) != map_.end();
}
private: private:
/*! \brief backing map */ /*! \brief backing map */
std::unordered_map<std::string, DevPtr> map_; std::unordered_map<std::string, DevPtr> map_;
...@@ -220,7 +181,7 @@ class SymbolMap { ...@@ -220,7 +181,7 @@ class SymbolMap {
/*! \brief struct containing start and size of a device memory region */ /*! \brief struct containing start and size of a device memory region */
struct DevMemRegion { struct DevMemRegion {
/*! \brief section start offset */ /*! \brief section start offset */
DevBaseOffset start; DevPtr start;
/*! \brief size of section */ /*! \brief size of section */
size_t size; size_t size;
}; };
...@@ -239,16 +200,13 @@ struct BinaryInfo { ...@@ -239,16 +200,13 @@ struct BinaryInfo {
SymbolMap symbol_map; SymbolMap symbol_map;
}; };
// TODO(weberlo): should this be here? struct BinaryContents {
/*! \brief number of bytes in each page */ BinaryInfo binary_info;
constexpr int kPageSize = 4096; std::string text_contents;
std::string rodata_contents;
const DevBaseOffset kDeviceStart = DevBaseOffset(64); std::string data_contents;
std::string bss_contents;
/*! };
* \brief return default size of given section kind in bytes
*/
size_t GetDefaultSectionSize(SectionKind kind);
/*! /*!
* \brief upper-aligns value according to specified alignment * \brief upper-aligns value according to specified alignment
...@@ -270,19 +228,24 @@ const char* SectionToString(SectionKind section); ...@@ -270,19 +228,24 @@ const char* SectionToString(SectionKind section);
/*! /*!
* \brief links binary by repositioning section addresses * \brief links binary by repositioning section addresses
* \param binary_name input binary filename * \param binary_name input binary filename
* \param text new text section address * \param word_size word size on the target machine
* \param rodata new rodata section address * \param text_start text section address
* \param data new data section address * \param rodata_start rodata section address
* \param bss new bss section address * \param data_start data section address
* \param bss_start bss section address
* \param stack_end stack section end address
* \param toolchain_prefix prefix of compiler toolchain to use * \param toolchain_prefix prefix of compiler toolchain to use
* \return relocated binary file contents * \return relocated binary file contents
*/ */
std::string RelocateBinarySections(const std::string& binary_name, std::string RelocateBinarySections(
DevPtr text, const std::string& binary_path,
DevPtr rodata, size_t word_size,
DevPtr data, DevPtr text_start,
DevPtr bss, DevPtr rodata_start,
const std::string& toolchain_prefix); DevPtr data_start,
DevPtr bss_start,
DevPtr stack_end,
const std::string& toolchain_prefix);
/*! /*!
* \brief reads section from binary * \brief reads section from binary
...@@ -306,7 +269,7 @@ std::string ReadSection(const std::string& binary, ...@@ -306,7 +269,7 @@ std::string ReadSection(const std::string& binary,
size_t GetSectionSize(const std::string& binary_name, size_t GetSectionSize(const std::string& binary_name,
SectionKind section, SectionKind section,
const std::string& toolchain_prefix, const std::string& toolchain_prefix,
size_t align = kDefaultSizeAlignment); size_t align);
} // namespace runtime } // namespace runtime
} // namespace tvm } // namespace tvm
......
...@@ -61,7 +61,7 @@ class MicroDeviceAPI final : public DeviceAPI { ...@@ -61,7 +61,7 @@ class MicroDeviceAPI final : public DeviceAPI {
void FreeDataSpace(TVMContext ctx, void* ptr) final { void FreeDataSpace(TVMContext ctx, void* ptr) final {
MicroDevSpace* dev_space = static_cast<MicroDevSpace*>(ptr); MicroDevSpace* dev_space = static_cast<MicroDevSpace*>(ptr);
dev_space->session->FreeInSection( dev_space->session->FreeInSection(
SectionKind::kHeap, DevBaseOffset(reinterpret_cast<std::uintptr_t>(dev_space->data))); SectionKind::kHeap, DevPtr(reinterpret_cast<std::uintptr_t>(dev_space->data)));
delete dev_space; delete dev_space;
} }
...@@ -89,12 +89,12 @@ class MicroDeviceAPI final : public DeviceAPI { ...@@ -89,12 +89,12 @@ class MicroDeviceAPI final : public DeviceAPI {
ObjectPtr<MicroSession>& session = from_space->session; ObjectPtr<MicroSession>& session = from_space->session;
const std::shared_ptr<LowLevelDevice>& lld = session->low_level_device(); const std::shared_ptr<LowLevelDevice>& lld = session->low_level_device();
DevBaseOffset from_dev_offset = GetDevLoc(from_space, from_offset); DevPtr from_dev_addr = GetDevLoc(from_space, from_offset);
DevBaseOffset to_dev_offset = GetDevLoc(to_space, to_offset); DevPtr to_dev_addr = GetDevLoc(to_space, to_offset);
std::vector<uint8_t> buffer(size); std::vector<uint8_t> buffer(size);
lld->Read(from_dev_offset, static_cast<void*>(buffer.data()), size); lld->Read(from_dev_addr, static_cast<void*>(buffer.data()), size);
lld->Write(to_dev_offset, static_cast<void*>(buffer.data()), size); lld->Write(to_dev_addr, static_cast<void*>(buffer.data()), size);
} else if (type_from_to == std::make_tuple(kDLMicroDev, kDLCPU)) { } else if (type_from_to == std::make_tuple(kDLMicroDev, kDLCPU)) {
// Reading from the device. // Reading from the device.
...@@ -102,9 +102,9 @@ class MicroDeviceAPI final : public DeviceAPI { ...@@ -102,9 +102,9 @@ class MicroDeviceAPI final : public DeviceAPI {
ObjectPtr<MicroSession>& session = from_space->session; ObjectPtr<MicroSession>& session = from_space->session;
const std::shared_ptr<LowLevelDevice>& lld = session->low_level_device(); const std::shared_ptr<LowLevelDevice>& lld = session->low_level_device();
DevBaseOffset from_dev_offset = GetDevLoc(from_space, from_offset); DevPtr from_dev_addr = GetDevLoc(from_space, from_offset);
void* to_host_ptr = GetHostLoc(to, to_offset); void* to_host_ptr = GetHostLoc(to, to_offset);
lld->Read(from_dev_offset, to_host_ptr, size); lld->Read(from_dev_addr, to_host_ptr, size);
} else if (type_from_to == std::make_tuple(kDLCPU, kDLMicroDev)) { } else if (type_from_to == std::make_tuple(kDLCPU, kDLMicroDev)) {
// Writing to the device. // Writing to the device.
...@@ -113,8 +113,8 @@ class MicroDeviceAPI final : public DeviceAPI { ...@@ -113,8 +113,8 @@ class MicroDeviceAPI final : public DeviceAPI {
const std::shared_ptr<LowLevelDevice>& lld = session->low_level_device(); const std::shared_ptr<LowLevelDevice>& lld = session->low_level_device();
void* from_host_ptr = GetHostLoc(from, from_offset); void* from_host_ptr = GetHostLoc(from, from_offset);
DevBaseOffset to_dev_offset = GetDevLoc(to_space, to_offset); DevPtr to_dev_addr = GetDevLoc(to_space, to_offset);
lld->Write(to_dev_offset, from_host_ptr, size); lld->Write(to_dev_addr, from_host_ptr, size);
} else { } else {
LOG(FATAL) << "Expect copy from/to micro device or between micro device\n"; LOG(FATAL) << "Expect copy from/to micro device or between micro device\n";
} }
...@@ -138,7 +138,7 @@ class MicroDeviceAPI final : public DeviceAPI { ...@@ -138,7 +138,7 @@ class MicroDeviceAPI final : public DeviceAPI {
MicroDevSpace* dev_space = static_cast<MicroDevSpace*>(data); MicroDevSpace* dev_space = static_cast<MicroDevSpace*>(data);
ObjectPtr<MicroSession>& session = dev_space->session; ObjectPtr<MicroSession>& session = dev_space->session;
session->FreeInSection(SectionKind::kWorkspace, session->FreeInSection(SectionKind::kWorkspace,
DevBaseOffset(reinterpret_cast<std::uintptr_t>(dev_space->data))); DevPtr(reinterpret_cast<std::uintptr_t>(dev_space->data)));
delete dev_space; delete dev_space;
} }
...@@ -152,10 +152,8 @@ class MicroDeviceAPI final : public DeviceAPI { ...@@ -152,10 +152,8 @@ class MicroDeviceAPI final : public DeviceAPI {
} }
private: private:
DevBaseOffset GetDevLoc(MicroDevSpace* dev_space, size_t offset) { DevPtr GetDevLoc(MicroDevSpace* dev_space, size_t offset) {
DevBaseOffset dev_offset = return DevPtr(reinterpret_cast<std::uintptr_t>(dev_space->data) + offset);
DevBaseOffset(reinterpret_cast<std::uintptr_t>(dev_space->data) + offset);
return dev_offset;
} }
void* GetHostLoc(const void* ptr, size_t offset) { void* GetHostLoc(const void* ptr, size_t offset) {
......
...@@ -55,62 +55,48 @@ class MicroModuleNode final : public ModuleNode { ...@@ -55,62 +55,48 @@ class MicroModuleNode final : public ModuleNode {
*/ */
void InitMicroModule(const std::string& binary_path) { void InitMicroModule(const std::string& binary_path) {
session_ = MicroSession::Current(); session_ = MicroSession::Current();
binary_path_ = binary_path; symbol_map_ = session_->LoadBinary(binary_path, true).symbol_map;
binary_info_ = session_->LoadBinary(binary_path_);
}
/*!
* \brief runs selected function on the micro device
* \param func_name name of the function to be run
* \param func_offset offset of the function to be run
* \param args type-erased arguments passed to the function
*/
void RunFunction(const std::string& func_name, DevBaseOffset func_offset, const TVMArgs& args) {
session_->PushToExecQueue(func_offset, args);
} }
private: private:
/*! \brief module binary info */ SymbolMap symbol_map_;
BinaryInfo binary_info_;
/*! \brief path to module binary */
std::string binary_path_;
/*! \brief global session pointer */ /*! \brief global session pointer */
ObjectPtr<MicroSession> session_; ObjectPtr<MicroSession> session_;
}; };
class MicroWrappedFunc { class MicroWrappedFunc {
public: public:
MicroWrappedFunc(MicroModuleNode* m, MicroWrappedFunc(ObjectPtr<MicroSession> session,
ObjectPtr<MicroSession> session, DevPtr func_ptr) {
const std::string& func_name,
DevBaseOffset func_offset) {
m_ = m;
session_ = session; session_ = session;
func_name_ = func_name; func_ptr_ = func_ptr;
func_offset_ = func_offset;
} }
void operator()(TVMArgs args, TVMRetValue* rv) const { void operator()(TVMArgs args, TVMRetValue* rv) const {
m_->RunFunction(func_name_, func_offset_, args); *rv = session_->PushToExecQueue(func_ptr_, args);
} }
private: private:
/*! \brief internal module */
MicroModuleNode* m_;
/*! \brief reference to the session for this function (to keep the session alive) */ /*! \brief reference to the session for this function (to keep the session alive) */
ObjectPtr<MicroSession> session_; ObjectPtr<MicroSession> session_;
/*! \brief name of the function */
std::string func_name_;
/*! \brief offset of the function to be called */ /*! \brief offset of the function to be called */
DevBaseOffset func_offset_; DevPtr func_ptr_;
}; };
PackedFunc MicroModuleNode::GetFunction( PackedFunc MicroModuleNode::GetFunction(
const std::string& name, const std::string& name,
const ObjectPtr<Object>& sptr_to_self) { const ObjectPtr<Object>& sptr_to_self) {
DevBaseOffset func_offset = DevPtr func_ptr;
session_->low_level_device()->ToDevOffset(binary_info_.symbol_map[name]); if (name == tvm::runtime::symbol::tvm_module_main) {
MicroWrappedFunc f(this, session_, name, func_offset); if (symbol_map_.HasSymbol(tvm::runtime::symbol::tvm_module_main)) {
func_ptr = symbol_map_[tvm::runtime::symbol::tvm_module_main];
} else {
func_ptr = symbol_map_["default_function"];
}
} else {
func_ptr = symbol_map_[name];
}
MicroWrappedFunc f(session_, func_ptr);
return PackedFunc(f); return PackedFunc(f);
} }
......
...@@ -38,11 +38,15 @@ class MicroSectionAllocator { ...@@ -38,11 +38,15 @@ class MicroSectionAllocator {
* \brief constructor that specifies section boundaries * \brief constructor that specifies section boundaries
* \param region location and size of the section on the device * \param region location and size of the section on the device
*/ */
explicit MicroSectionAllocator(DevMemRegion region) explicit MicroSectionAllocator(DevMemRegion region, size_t word_size)
: start_offset_(region.start), : start_addr_(region.start),
size_(0), size_(0),
capacity_(region.size) { capacity_(region.size),
CHECK_EQ(start_offset_.value() % 8, 0) << "micro section not aligned to 8 bytes"; word_size_(word_size) {
CHECK_EQ(start_addr_.value().val64 % word_size, 0)
<< "micro section start not aligned to " << word_size << " bytes";
CHECK_EQ(capacity_ % word_size, 0)
<< "micro section end not aligned to " << word_size << " bytes";
} }
/*! /*!
...@@ -55,15 +59,15 @@ class MicroSectionAllocator { ...@@ -55,15 +59,15 @@ class MicroSectionAllocator {
* \param size size of allocated memory in bytes * \param size size of allocated memory in bytes
* \return pointer to allocated memory region in section, nullptr if out of space * \return pointer to allocated memory region in section, nullptr if out of space
*/ */
DevBaseOffset Allocate(size_t size) { DevPtr Allocate(size_t size) {
size_ = UpperAlignValue(size_, 8); size_ = UpperAlignValue(size_, word_size_);
CHECK(size_ + size < capacity_) CHECK(size_ + size < capacity_)
<< "cannot alloc " << size << " bytes in section with start_addr " << << "cannot alloc " << size << " bytes in section with start_addr " <<
start_offset_.value(); start_addr_.cast_to<void*>();
DevBaseOffset alloc_ptr = start_offset_ + size_; DevPtr alloc_addr = start_addr_ + size_;
size_ += size; size_ += size;
alloc_map_[alloc_ptr.value()] = size; alloc_map_[alloc_addr.value().val64] = size;
return alloc_ptr; return alloc_addr;
} }
/*! /*!
...@@ -71,10 +75,10 @@ class MicroSectionAllocator { ...@@ -71,10 +75,10 @@ class MicroSectionAllocator {
* \param offs offset to allocated memory * \param offs offset to allocated memory
* \note simple allocator scheme, more complex versions will be implemented later * \note simple allocator scheme, more complex versions will be implemented later
*/ */
void Free(DevBaseOffset offs) { void Free(DevPtr addr) {
std::uintptr_t ptr = offs.value(); CHECK(alloc_map_.find(addr.value().val64) != alloc_map_.end())
CHECK(alloc_map_.find(ptr) != alloc_map_.end()) << "freed pointer was never allocated"; << "freed pointer was never allocated";
alloc_map_.erase(ptr); alloc_map_.erase(addr.value().val64);
if (alloc_map_.empty()) { if (alloc_map_.empty()) {
size_ = 0; size_ = 0;
} }
...@@ -83,17 +87,17 @@ class MicroSectionAllocator { ...@@ -83,17 +87,17 @@ class MicroSectionAllocator {
/*! /*!
* \brief start offset of the memory region managed by this allocator * \brief start offset of the memory region managed by this allocator
*/ */
DevBaseOffset start_offset() const { return start_offset_; } DevPtr start_addr() const { return start_addr_; }
/*! /*!
* \brief current end offset of the space being used in this memory region * \brief current end addr of the space being used in this memory region
*/ */
DevBaseOffset curr_end_offset() const { return start_offset_ + size_; } DevPtr curr_end_addr() const { return start_addr_ + size_; }
/*! /*!
* \brief end offset of the memory region managed by this allocator * \brief end addr of the memory region managed by this allocator
*/ */
DevBaseOffset max_end_offset() const { return start_offset_ + capacity_; } DevPtr max_addr() const { return start_addr_ + capacity_; }
/*! /*!
* \brief size of the section * \brief size of the section
...@@ -107,13 +111,15 @@ class MicroSectionAllocator { ...@@ -107,13 +111,15 @@ class MicroSectionAllocator {
private: private:
/*! \brief start address of the section */ /*! \brief start address of the section */
DevBaseOffset start_offset_; DevPtr start_addr_;
/*! \brief current size of the section */ /*! \brief current size of the section */
size_t size_; size_t size_;
/*! \brief total storage capacity of the section */ /*! \brief total storage capacity of the section */
size_t capacity_; size_t capacity_;
/*! \brief number of bytes in a word on the target device */
size_t word_size_;
/*! \brief allocation map for allocation sizes */ /*! \brief allocation map for allocation sizes */
std::unordered_map<std::uintptr_t, size_t> alloc_map_; std::unordered_map<uint64_t, size_t> alloc_map_;
}; };
} // namespace runtime } // namespace runtime
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <dmlc/thread_local.h> #include <dmlc/thread_local.h>
#include <tvm/runtime/registry.h> #include <tvm/runtime/registry.h>
#include <memory>
#include <stack> #include <stack>
#include <tuple> #include <tuple>
#include <vector> #include <vector>
...@@ -56,17 +57,149 @@ void MicroSession::ExitWithScope() { ...@@ -56,17 +57,149 @@ void MicroSession::ExitWithScope() {
entry->session_stack.pop(); entry->session_stack.pop();
} }
MicroSession::MicroSession() { MicroSession::MicroSession(
DevBaseOffset curr_start_offset = kDeviceStart; const std::string& comms_method,
for (size_t i = 0; i < static_cast<size_t>(SectionKind::kNumKinds); i++) { const std::string& binary_path,
size_t section_size = GetDefaultSectionSize(static_cast<SectionKind>(i)); const std::string& toolchain_prefix,
section_allocators_[i] = std::make_shared<MicroSectionAllocator>(DevMemRegion { uint64_t text_start,
.start = curr_start_offset, size_t text_size,
.size = section_size, uint64_t rodata_start,
}); size_t rodata_size,
curr_start_offset += section_size; uint64_t data_start,
size_t data_size,
uint64_t bss_start,
size_t bss_size,
uint64_t args_start,
size_t args_size,
uint64_t heap_start,
size_t heap_size,
uint64_t workspace_start,
size_t workspace_size,
uint64_t stack_start,
size_t stack_size,
size_t word_size,
bool thumb_mode,
const std::string& server_addr,
int port)
: toolchain_prefix_(toolchain_prefix)
, word_size_(word_size)
, thumb_mode_(thumb_mode) {
CHECK(word_size_ == 4 || word_size_ == 8) << "unsupported word size " << word_size_;
if (comms_method == "host") {
// TODO(weberlo): move checks to python
CHECK(
text_start == 0 &&
rodata_start == 0 &&
data_start == 0 &&
bss_start == 0 &&
args_start == 0 &&
heap_start == 0 &&
workspace_start == 0 &&
stack_start == 0) << "unable to specify section addresses for host device";
size_t memory_size =
text_size + rodata_size + data_size + bss_size +
args_size + heap_size + workspace_size + stack_size;
void* base_addr;
low_level_device_ = HostLowLevelDeviceCreate(memory_size, &base_addr);
CHECK_EQ(reinterpret_cast<std::uintptr_t>(base_addr) % word_size_, 0)
<< "base address not aligned to " << word_size_ << " bytes";
DevPtr curr_addr = DevPtr(reinterpret_cast<std::uintptr_t>(base_addr));
section_allocators_[0] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = curr_addr,
.size = text_size,
}, word_size_);
curr_addr += text_size;
section_allocators_[1] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = curr_addr,
.size = rodata_size,
}, word_size_);
curr_addr += rodata_size;
section_allocators_[2] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = curr_addr,
.size = data_size,
}, word_size_);
curr_addr += data_size;
section_allocators_[3] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = curr_addr,
.size = bss_size,
}, word_size_);
curr_addr += bss_size;
section_allocators_[4] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = curr_addr,
.size = args_size,
}, word_size_);
curr_addr += args_size;
section_allocators_[5] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = curr_addr,
.size = heap_size,
}, word_size_);
curr_addr += heap_size;
section_allocators_[6] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = curr_addr,
.size = workspace_size,
}, word_size_);
curr_addr += workspace_size;
section_allocators_[7] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = curr_addr,
.size = stack_size,
}, word_size_);
curr_addr += stack_size;
} else if (comms_method == "openocd") {
low_level_device_ = OpenOCDLowLevelDeviceCreate(server_addr, port);
section_allocators_[0] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = DevPtr(text_start),
.size = text_size,
}, word_size_);
section_allocators_[1] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = DevPtr(rodata_start),
.size = rodata_size,
}, word_size_);
section_allocators_[2] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = DevPtr(data_start),
.size = data_size,
}, word_size_);
section_allocators_[3] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = DevPtr(bss_start),
.size = bss_size,
}, word_size_);
section_allocators_[4] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = DevPtr(args_start),
.size = args_size,
}, word_size_);
section_allocators_[5] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = DevPtr(heap_start),
.size = heap_size,
}, word_size_);
section_allocators_[6] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = DevPtr(workspace_start),
.size = workspace_size,
}, word_size_);
section_allocators_[7] = std::make_shared<MicroSectionAllocator>(DevMemRegion {
.start = DevPtr(stack_start),
.size = stack_size,
}, word_size_);
} else {
LOG(FATAL) << "unsupported micro low-level device";
}
runtime_symbol_map_ = LoadBinary(binary_path, false).symbol_map;
// Patch pointers to define the bounds of the workspace section and the word
// size (for allocation alignment).
std::shared_ptr<MicroSectionAllocator> ws_allocator = GetAllocator(SectionKind::kWorkspace);
TargetVal ws_start = ws_allocator->start_addr().value();
TargetVal ws_end = ws_allocator->max_addr().value();
TargetVal target_word_size { .val64 = word_size_ };
if (word_size_ == 4) {
DevSymbolWrite(runtime_symbol_map_, "utvm_workspace_start", ws_start.val32);
DevSymbolWrite(runtime_symbol_map_, "utvm_workspace_end", ws_end.val32);
DevSymbolWrite(runtime_symbol_map_, "utvm_word_size", target_word_size.val32);
} else if (word_size_ == 8) {
DevSymbolWrite(runtime_symbol_map_, "utvm_workspace_start", ws_start.val64);
DevSymbolWrite(runtime_symbol_map_, "utvm_workspace_end", ws_end.val64);
DevSymbolWrite(runtime_symbol_map_, "utvm_word_size", target_word_size.val64);
} }
memory_size_ = curr_start_offset.cast_to<size_t>();
} }
MicroSession::~MicroSession() { MicroSession::~MicroSession() {
...@@ -76,79 +209,118 @@ MicroSession::~MicroSession() { ...@@ -76,79 +209,118 @@ MicroSession::~MicroSession() {
low_level_device_ = nullptr; low_level_device_ = nullptr;
} }
void MicroSession::CreateSession(const std::string& device_type, double MicroSession::PushToExecQueue(DevPtr func_ptr, const TVMArgs& args) {
const std::string& binary_path, if (thumb_mode_) {
const std::string& toolchain_prefix, func_ptr += 1;
std::uintptr_t base_addr,
const std::string& server_addr,
int port) {
// TODO(weberlo): make device type enum
toolchain_prefix_ = toolchain_prefix;
if (device_type == "host") {
low_level_device_ = HostLowLevelDeviceCreate(memory_size_);
} else if (device_type == "openocd") {
// TODO(weberlo): We need a better way of configuring devices.
low_level_device_ = OpenOCDLowLevelDeviceCreate(base_addr, server_addr, port);
} else {
LOG(FATAL) << "unsupported micro low-level device";
} }
SetRuntimeBinaryPath(binary_path);
CHECK(!runtime_binary_path_.empty()) << "uTVM runtime not initialized";
runtime_bin_info_ = LoadBinary(runtime_binary_path_, /* patch_dylib_pointers */ false);
utvm_main_symbol_ = low_level_device()->ToDevOffset(runtime_symbol_map()["UTVMMain"]);
utvm_done_symbol_ = low_level_device()->ToDevOffset(runtime_symbol_map()["UTVMDone"]);
if (device_type == "openocd") {
// Set OpenOCD device's stack pointer.
auto stack_section = GetAllocator(SectionKind::kStack);
low_level_device_->SetStackTop(stack_section->max_end_offset());
}
// Patch workspace pointers to the start of the workspace section.
DevBaseOffset workspace_start_offset = GetAllocator(SectionKind::kWorkspace)->start_offset();
DevBaseOffset workspace_end_offset = GetAllocator(SectionKind::kWorkspace)->max_end_offset();
void* workspace_start_addr =
low_level_device_->ToDevPtr(workspace_start_offset).cast_to<void*>();
void* workspace_end_addr =
low_level_device_->ToDevPtr(workspace_end_offset).cast_to<void*>();
DevSymbolWrite(runtime_symbol_map(), "utvm_workspace_begin", workspace_start_addr);
DevSymbolWrite(runtime_symbol_map(), "utvm_workspace_end", workspace_end_addr);
}
void MicroSession::PushToExecQueue(DevBaseOffset func, const TVMArgs& args) {
int32_t (*func_dev_addr)(void*, void*, int32_t) =
reinterpret_cast<int32_t (*)(void*, void*, int32_t)>(
low_level_device()->ToDevPtr(func).value());
// Create an allocator stream for the memory region after the most recent // Create an allocator stream for the memory region after the most recent
// allocation in the args section. // allocation in the args section.
DevPtr args_addr = DevPtr args_addr = GetAllocator(SectionKind::kArgs)->curr_end_addr();
low_level_device()->ToDevPtr(GetAllocator(SectionKind::kArgs)->curr_end_offset()); TargetDataLayoutEncoder encoder(args_addr, word_size_);
TargetDataLayoutEncoder encoder(args_addr);
std::tuple<DevPtr, DevPtr> arg_field_addrs = EncoderAppend(&encoder, args); std::tuple<DevPtr, DevPtr> arg_field_addrs = EncoderAppend(&encoder, args);
// Flush `stream` to device memory. // Flush `stream` to device memory.
DevBaseOffset stream_dev_offset = DevPtr stream_dev_addr =
GetAllocator(SectionKind::kArgs)->Allocate(encoder.buf_size()); GetAllocator(SectionKind::kArgs)->Allocate(encoder.buf_size());
low_level_device()->Write(stream_dev_offset, low_level_device()->Write(stream_dev_addr,
reinterpret_cast<void*>(encoder.data()), reinterpret_cast<void*>(encoder.data()),
encoder.buf_size()); encoder.buf_size());
UTVMTask task = { TargetVal arg_values_dev_addr = std::get<0>(arg_field_addrs).value();
.func = func_dev_addr, TargetVal arg_type_codes_dev_addr = std::get<1>(arg_field_addrs).value();
.arg_values = std::get<0>(arg_field_addrs).cast_to<TVMValue*>(), if (word_size_ == 4) {
.arg_type_codes = std::get<1>(arg_field_addrs).cast_to<int*>(), UTVMTask32 task = {
.func = func_ptr.value().val32,
.arg_values = arg_values_dev_addr.val32,
.arg_type_codes = arg_type_codes_dev_addr.val32,
.num_args = args.num_args, .num_args = args.num_args,
}; };
// Write the task. // Write the task.
DevSymbolWrite(runtime_symbol_map(), "task", task); DevSymbolWrite(runtime_symbol_map_, "utvm_task", task);
} else if (word_size_ == 8) {
UTVMTask64 task = {
.func = func_ptr.value().val64,
.arg_values = arg_values_dev_addr.val64,
.arg_type_codes = arg_type_codes_dev_addr.val64,
.num_args = args.num_args,
};
// Write the task.
DevSymbolWrite(runtime_symbol_map_, "utvm_task", task);
}
low_level_device()->Execute(utvm_main_symbol_, utvm_done_symbol_); DevPtr utvm_init_addr = runtime_symbol_map_["UTVMInit"];
DevPtr utvm_done_addr = runtime_symbol_map_["UTVMDone"];
if (thumb_mode_) {
utvm_init_addr += 1;
}
low_level_device()->Execute(utvm_init_addr, utvm_done_addr);
// Check if there was an error during execution. If so, log it. // Check if there was an error during execution. If so, log it.
CheckDeviceError(); CheckDeviceError();
uint32_t task_time = DevSymbolRead<uint32_t>(runtime_symbol_map_, "utvm_task_time");
GetAllocator(SectionKind::kArgs)->Free(stream_dev_addr);
return static_cast<double>(task_time);
}
BinaryInfo MicroSession::LoadBinary(const std::string& binary_path, bool patch_dylib_pointers) {
DevMemRegion text_section;
DevMemRegion rodata_section;
DevMemRegion data_section;
DevMemRegion bss_section;
text_section.size = GetSectionSize(
binary_path, SectionKind::kText, toolchain_prefix_, word_size_);
rodata_section.size = GetSectionSize(
binary_path, SectionKind::kRodata, toolchain_prefix_, word_size_);
data_section.size = GetSectionSize(
binary_path, SectionKind::kData, toolchain_prefix_, word_size_);
bss_section.size = GetSectionSize(
binary_path, SectionKind::kBss, toolchain_prefix_, word_size_);
text_section.start = AllocateInSection(SectionKind::kText, text_section.size);
rodata_section.start = AllocateInSection(SectionKind::kRodata, rodata_section.size);
data_section.start = AllocateInSection(SectionKind::kData, data_section.size);
bss_section.start = AllocateInSection(SectionKind::kBss, bss_section.size);
CHECK(text_section.start != nullptr && rodata_section.start != nullptr &&
data_section.start != nullptr && bss_section.start != nullptr)
<< "not enough space to load module on device";
std::string relocated_bin = RelocateBinarySections(
binary_path,
word_size_,
text_section.start,
rodata_section.start,
data_section.start,
bss_section.start,
GetAllocator(SectionKind::kStack)->max_addr(),
toolchain_prefix_);
std::string text_contents = ReadSection(relocated_bin, SectionKind::kText, toolchain_prefix_);
std::string rodata_contents = ReadSection(relocated_bin, SectionKind::kRodata, toolchain_prefix_);
std::string data_contents = ReadSection(relocated_bin, SectionKind::kData, toolchain_prefix_);
std::string bss_contents = ReadSection(relocated_bin, SectionKind::kBss, toolchain_prefix_);
low_level_device_->Write(text_section.start, &text_contents[0], text_section.size);
low_level_device_->Write(rodata_section.start, &rodata_contents[0], rodata_section.size);
low_level_device_->Write(data_section.start, &data_contents[0], data_section.size);
low_level_device_->Write(bss_section.start, &bss_contents[0], bss_section.size);
SymbolMap symbol_map {relocated_bin, toolchain_prefix_};
if (patch_dylib_pointers) {
// Patch device lib pointers.
PatchImplHole(symbol_map, "TVMBackendAllocWorkspace");
PatchImplHole(symbol_map, "TVMBackendFreeWorkspace");
PatchImplHole(symbol_map, "TVMAPISetLastError");
}
GetAllocator(SectionKind::kArgs)->Free(stream_dev_offset); return BinaryInfo {
.text_section = text_section,
.rodata_section = rodata_section,
.data_section = data_section,
.bss_section = bss_section,
.symbol_map = symbol_map,
};
} }
std::tuple<DevPtr, DevPtr> MicroSession::EncoderAppend( std::tuple<DevPtr, DevPtr> MicroSession::EncoderAppend(
...@@ -171,7 +343,12 @@ std::tuple<DevPtr, DevPtr> MicroSession::EncoderAppend( ...@@ -171,7 +343,12 @@ std::tuple<DevPtr, DevPtr> MicroSession::EncoderAppend(
// Mutate the array to unwrap the `data` field. // Mutate the array to unwrap the `data` field.
base_arr_handle->data = reinterpret_cast<MicroDevSpace*>(old_data)->data; base_arr_handle->data = reinterpret_cast<MicroDevSpace*>(old_data)->data;
// Now, encode the unwrapped version. // Now, encode the unwrapped version.
void* arr_ptr = EncoderAppend(encoder, *base_arr_handle).cast_to<void*>(); void* arr_ptr = nullptr;
if (word_size_ == 4) {
arr_ptr = EncoderAppend<TVMArray32>(encoder, *base_arr_handle).cast_to<void*>();
} else if (word_size_ == 8) {
arr_ptr = EncoderAppend<TVMArray64>(encoder, *base_arr_handle).cast_to<void*>();
}
// And restore the original wrapped version. // And restore the original wrapped version.
base_arr_handle->data = old_data; base_arr_handle->data = old_data;
...@@ -190,54 +367,53 @@ std::tuple<DevPtr, DevPtr> MicroSession::EncoderAppend( ...@@ -190,54 +367,53 @@ std::tuple<DevPtr, DevPtr> MicroSession::EncoderAppend(
} }
} }
type_codes_slot.WriteArray(type_codes, num_args); type_codes_slot.WriteArray(type_codes, num_args);
return std::make_tuple(tvm_vals_slot.start_addr(), type_codes_slot.start_addr()); return std::make_tuple(tvm_vals_slot.start_addr(), type_codes_slot.start_addr());
} }
template <typename T>
DevPtr MicroSession::EncoderAppend(TargetDataLayoutEncoder* encoder, const TVMArray& arr) { DevPtr MicroSession::EncoderAppend(TargetDataLayoutEncoder* encoder, const TVMArray& arr) {
auto tvm_arr_slot = encoder->Alloc<TVMArray>(); auto tvm_arr_slot = encoder->Alloc<T>();
auto shape_slot = encoder->Alloc<int64_t>(arr.ndim); auto shape_slot = encoder->Alloc<int64_t>(arr.ndim);
// `shape` and `strides` are stored on the host, so we need to write them to // `shape` and `strides` are stored on the host, so we need to write them to
// the device first. The `data` field is already allocated on the device and // the device first. The `data` field is already allocated on the device and
// is a device pointer, so we don't need to write it. // is a device pointer, so we don't need to write it.
shape_slot.WriteArray(arr.shape, arr.ndim); shape_slot.WriteArray(arr.shape, arr.ndim);
DevPtr shape_addr = shape_slot.start_addr(); DevPtr shape_dev_addr = shape_slot.start_addr();
DevPtr strides_addr = DevPtr(nullptr); DevPtr strides_dev_addr = DevPtr(nullptr);
if (arr.strides != nullptr) { if (arr.strides != nullptr) {
auto stride_slot = encoder->Alloc<int64_t>(arr.ndim); auto stride_slot = encoder->Alloc<int64_t>(arr.ndim);
stride_slot.WriteArray(arr.strides, arr.ndim); stride_slot.WriteArray(arr.strides, arr.ndim);
strides_addr = stride_slot.start_addr(); strides_dev_addr = stride_slot.start_addr();
} }
// Copy `arr`, update the copy's pointers to be device pointers, then T dev_arr(
// write the copy to `tvm_arr_slot`. TargetVal { .val64 = reinterpret_cast<uint64_t>(arr.data) },
TVMArray dev_arr = arr; arr.ctx,
// Update the device type to look like a host, because codegen generates arr.ndim,
// checks that it is a host array. arr.dtype,
shape_dev_addr.value(),
strides_dev_addr.value(),
TargetVal { .val64 = arr.byte_offset });
CHECK(dev_arr.ctx.device_type == static_cast<DLDeviceType>(kDLMicroDev)) CHECK(dev_arr.ctx.device_type == static_cast<DLDeviceType>(kDLMicroDev))
<< "attempt to write TVMArray with non-micro device type"; << "attempt to write TVMArray with non-micro device type";
// Update the device type to CPU, because from the microcontroller's
// perspective, it is.
dev_arr.ctx.device_type = DLDeviceType::kDLCPU; dev_arr.ctx.device_type = DLDeviceType::kDLCPU;
// Add the base address of the device to the array's data's device offset to
// get a device address.
DevBaseOffset arr_offset(reinterpret_cast<std::uintptr_t>(arr.data));
dev_arr.data = low_level_device()->ToDevPtr(arr_offset).cast_to<void*>();
dev_arr.shape = shape_addr.cast_to<int64_t*>();
dev_arr.strides = strides_addr.cast_to<int64_t*>();
tvm_arr_slot.WriteValue(dev_arr); tvm_arr_slot.WriteValue(dev_arr);
return tvm_arr_slot.start_addr(); return tvm_arr_slot.start_addr();
} }
void MicroSession::CheckDeviceError() { void MicroSession::CheckDeviceError() {
int32_t return_code = DevSymbolRead<int32_t>(runtime_symbol_map(), "utvm_return_code"); int32_t return_code = DevSymbolRead<int32_t>(runtime_symbol_map_, "utvm_return_code");
if (return_code) { if (return_code) {
std::uintptr_t last_error = std::uintptr_t last_error =
DevSymbolRead<std::uintptr_t>(runtime_symbol_map(), "utvm_last_error"); DevSymbolRead<std::uintptr_t>(runtime_symbol_map_, "utvm_last_error");
std::string last_error_str; std::string last_error_str;
if (last_error) { if (last_error) {
DevBaseOffset last_err_offset = low_level_device()->ToDevOffset(DevPtr(last_error)); DevPtr last_err_addr = DevPtr(last_error);
last_error_str = ReadString(last_err_offset); last_error_str = ReadString(last_err_addr);
} }
LOG(FATAL) << "error during micro function execution:\n" LOG(FATAL) << "error during micro function execution:\n"
<< " return code: " << std::dec << return_code << "\n" << " return code: " << std::dec << return_code << "\n"
...@@ -246,100 +422,51 @@ void MicroSession::CheckDeviceError() { ...@@ -246,100 +422,51 @@ void MicroSession::CheckDeviceError() {
} }
} }
BinaryInfo MicroSession::LoadBinary(const std::string& binary_path, bool patch_dylib_pointers) {
DevMemRegion text_section;
DevMemRegion rodata_section;
DevMemRegion data_section;
DevMemRegion bss_section;
text_section.size = GetSectionSize(binary_path, SectionKind::kText, toolchain_prefix_);
rodata_section.size = GetSectionSize(binary_path, SectionKind::kRodata, toolchain_prefix_);
data_section.size = GetSectionSize(binary_path, SectionKind::kData, toolchain_prefix_);
bss_section.size = GetSectionSize(binary_path, SectionKind::kBss, toolchain_prefix_);
text_section.start = AllocateInSection(SectionKind::kText, text_section.size);
rodata_section.start = AllocateInSection(SectionKind::kRodata, rodata_section.size);
data_section.start = AllocateInSection(SectionKind::kData, data_section.size);
bss_section.start = AllocateInSection(SectionKind::kBss, bss_section.size);
CHECK(text_section.start != nullptr && rodata_section.start != nullptr &&
data_section.start != nullptr && bss_section.start != nullptr)
<< "not enough space to load module on device";
std::string relocated_bin = RelocateBinarySections(
binary_path,
low_level_device_->ToDevPtr(text_section.start),
low_level_device_->ToDevPtr(rodata_section.start),
low_level_device_->ToDevPtr(data_section.start),
low_level_device_->ToDevPtr(bss_section.start),
toolchain_prefix_);
std::string text_contents = ReadSection(relocated_bin, SectionKind::kText, toolchain_prefix_);
std::string rodata_contents = ReadSection(relocated_bin, SectionKind::kRodata, toolchain_prefix_);
std::string data_contents = ReadSection(relocated_bin, SectionKind::kData, toolchain_prefix_);
std::string bss_contents = ReadSection(relocated_bin, SectionKind::kBss, toolchain_prefix_);
low_level_device_->Write(text_section.start, &text_contents[0], text_section.size);
low_level_device_->Write(rodata_section.start, &rodata_contents[0], rodata_section.size);
low_level_device_->Write(data_section.start, &data_contents[0], data_section.size);
low_level_device_->Write(bss_section.start, &bss_contents[0], bss_section.size);
SymbolMap symbol_map {relocated_bin, toolchain_prefix_};
if (patch_dylib_pointers) {
// Patch device lib pointers.
PatchImplHole(symbol_map, "TVMBackendAllocWorkspace");
PatchImplHole(symbol_map, "TVMBackendFreeWorkspace");
PatchImplHole(symbol_map, "TVMAPISetLastError");
}
return BinaryInfo {
.text_section = text_section,
.rodata_section = rodata_section,
.data_section = data_section,
.bss_section = bss_section,
.symbol_map = symbol_map,
};
}
void MicroSession::PatchImplHole(const SymbolMap& symbol_map, const std::string& func_name) { void MicroSession::PatchImplHole(const SymbolMap& symbol_map, const std::string& func_name) {
void* runtime_impl_addr = runtime_symbol_map()[func_name].cast_to<void*>(); DevPtr runtime_impl_addr = runtime_symbol_map_[func_name];
if (thumb_mode_) {
runtime_impl_addr += 1;
}
std::ostringstream func_name_underscore; std::ostringstream func_name_underscore;
func_name_underscore << func_name << "_"; func_name_underscore << func_name << "_";
DevSymbolWrite(symbol_map, func_name_underscore.str(), runtime_impl_addr); if (word_size_ == 4) {
} DevSymbolWrite(symbol_map, func_name_underscore.str(), runtime_impl_addr.value().val32);
} else if (word_size_ == 8) {
void MicroSession::SetRuntimeBinaryPath(std::string path) { DevSymbolWrite(symbol_map, func_name_underscore.str(), runtime_impl_addr.value().val64);
runtime_binary_path_ = path; }
} }
std::string MicroSession::ReadString(DevBaseOffset str_offset) { std::string MicroSession::ReadString(DevPtr str_addr) {
std::ostringstream result; std::ostringstream result;
const size_t buf_size = 256; const size_t buf_size = 256;
std::vector<char> buf(buf_size, 0); std::vector<char> buf(buf_size, 0);
size_t i = buf_size; size_t i = buf_size;
while (i == buf_size) { while (i == buf_size) {
low_level_device()->Read(str_offset, buf.data(), buf_size); low_level_device()->Read(str_addr, buf.data(), buf_size);
i = 0; i = 0;
while (i < buf_size) { while (i < buf_size) {
if (buf[i] == 0) break; if (buf[i] == 0) break;
result << buf[i]; result << buf[i];
i++; i++;
} }
str_offset = str_offset + i; str_addr = str_addr + i;
} }
return result.str(); return result.str();
} }
DevBaseOffset MicroSession::AllocateInSection(SectionKind type, size_t size) { DevPtr MicroSession::AllocateInSection(SectionKind type, size_t size) {
return GetAllocator(type)->Allocate(size); return GetAllocator(type)->Allocate(size);
} }
void MicroSession::FreeInSection(SectionKind type, DevBaseOffset ptr) { void MicroSession::FreeInSection(SectionKind type, DevPtr addr) {
return GetAllocator(type)->Free(ptr); return GetAllocator(type)->Free(addr);
} }
template <typename T> template <typename T>
T MicroSession::DevSymbolRead(const SymbolMap& symbol_map, const std::string& symbol) { T MicroSession::DevSymbolRead(const SymbolMap& symbol_map, const std::string& symbol) {
DevBaseOffset sym_offset = low_level_device()->ToDevOffset(symbol_map[symbol]); DevPtr sym_addr = symbol_map[symbol];
T result; T result;
low_level_device()->Read(sym_offset, &result, sizeof(T)); low_level_device()->Read(sym_addr, &result, sizeof(T));
return result; return result;
} }
...@@ -347,8 +474,8 @@ template <typename T> ...@@ -347,8 +474,8 @@ template <typename T>
void MicroSession::DevSymbolWrite(const SymbolMap& symbol_map, void MicroSession::DevSymbolWrite(const SymbolMap& symbol_map,
const std::string& symbol, const std::string& symbol,
const T& value) { const T& value) {
DevBaseOffset sym_offset = low_level_device()->ToDevOffset(symbol_map[symbol]); DevPtr sym_addr = symbol_map[symbol];
low_level_device()->Write(sym_offset, &value, sizeof(T)); low_level_device()->Write(sym_addr, &value, sizeof(T));
} }
PackedFunc MicroSession::GetFunction( PackedFunc MicroSession::GetFunction(
...@@ -370,15 +497,53 @@ PackedFunc MicroSession::GetFunction( ...@@ -370,15 +497,53 @@ PackedFunc MicroSession::GetFunction(
// create micro session and low-level device from Python frontend // create micro session and low-level device from Python frontend
TVM_REGISTER_GLOBAL("micro._CreateSession") TVM_REGISTER_GLOBAL("micro._CreateSession")
.set_body([](TVMArgs args, TVMRetValue* rv) { .set_body([](TVMArgs args, TVMRetValue* rv) {
const std::string& device_type = args[0]; const std::string& comms_method = args[0];
const std::string& binary_path = args[1]; const std::string& binary_path = args[1];
const std::string& toolchain_prefix = args[2]; const std::string& toolchain_prefix = args[2];
uint64_t base_addr = args[3]; uint64_t text_start = args[3];
const std::string& server_addr = args[4]; size_t text_size = args[4];
int port = args[5]; uint64_t rodata_start = args[5];
ObjectPtr<MicroSession> session = make_object<MicroSession>(); size_t rodata_size = args[6];
session->CreateSession( uint64_t data_start = args[7];
device_type, binary_path, toolchain_prefix, base_addr, server_addr, port); size_t data_size = args[8];
uint64_t bss_start = args[9];
size_t bss_size = args[10];
uint64_t args_start = args[11];
size_t args_size = args[12];
uint64_t heap_start = args[13];
size_t heap_size = args[14];
uint64_t workspace_start = args[15];
size_t workspace_size = args[16];
uint64_t stack_start = args[17];
size_t stack_size = args[18];
size_t word_size = args[19];
bool thumb_mode = args[20];
const std::string& server_addr = args[21];
int port = args[22];
ObjectPtr<MicroSession> session = make_object<MicroSession>(
comms_method,
binary_path,
toolchain_prefix,
text_start,
text_size,
rodata_start,
rodata_size,
data_start,
data_size,
bss_start,
bss_size,
args_start,
args_size,
heap_start,
heap_size,
workspace_start,
workspace_size,
stack_start,
stack_size,
word_size,
thumb_mode,
server_addr,
port);
*rv = Module(session); *rv = Module(session);
}); });
......
...@@ -47,7 +47,6 @@ ...@@ -47,7 +47,6 @@
#include <tuple> #include <tuple>
#include "low_level_device.h" #include "low_level_device.h"
#include "device/utvm_runtime.h"
#include "target_data_layout_encoder.h" #include "target_data_layout_encoder.h"
namespace tvm { namespace tvm {
...@@ -75,9 +74,55 @@ class MicroSession : public ModuleNode { ...@@ -75,9 +74,55 @@ class MicroSession : public ModuleNode {
} }
/*! /*!
* \brief constructor * \brief creates session by setting up a low-level device and initting allocators for it
* \param comms_method method of communication with the device (e.g., "openocd")
* \param binary_path file system path to the runtime binary
* \param toolchain_prefix GCC toolchain prefix
* \param text_start text section start address
* \param text_size text section size
* \param rodata_start text section start address
* \param rodata_size rodata section size
* \param data_start data section start address
* \param data_size data section size
* \param bss_start bss section start address
* \param bss_size bss section size
* \param args_start args section start address
* \param args_size args section size
* \param heap_start heap section start address
* \param heap_size heap section size
* \param workspace_start workspace section start address
* \param workspace_size workspace section size
* \param stack_start stack section start address
* \param stack_size stack section size
* \param word_size number of bytes in a word on the target device
* \param thumb_mode whether the target device requires a thumb-mode bit on function addresses
* \param server_addr address of the OpenOCD server to connect to (if `comms_method == "openocd"`)
* \param port port of the OpenOCD server to connect to (if `comms_method == "openocd"`)
*/ */
MicroSession(); MicroSession(
const std::string& comms_method,
const std::string& binary_path,
const std::string& toolchain_prefix,
uint64_t text_start,
size_t text_size,
uint64_t rodata_start,
size_t rodata_size,
uint64_t data_start,
size_t data_size,
uint64_t bss_start,
size_t bss_size,
uint64_t args_start,
size_t args_size,
uint64_t heap_start,
size_t heap_size,
uint64_t workspace_start,
size_t workspace_size,
uint64_t stack_start,
size_t stack_size,
size_t word_size,
bool thumb_mode,
const std::string& server_addr,
int port);
/*! /*!
* \brief destructor * \brief destructor
...@@ -87,20 +132,20 @@ class MicroSession : public ModuleNode { ...@@ -87,20 +132,20 @@ class MicroSession : public ModuleNode {
static ObjectPtr<MicroSession>& Current(); static ObjectPtr<MicroSession>& Current();
/*! /*!
* \brief creates session by setting up a low-level device and initting allocators for it * \brief sets up runtime metadata for `func` and copies arguments for on-device execution
* \param args TVMArgs passed into the micro.init packedfunc * \param func address of the function to be executed
* \param args args to the packed function
* \return elapsed time during function execution on the device
*/ */
void CreateSession(const std::string& device_type, double PushToExecQueue(DevPtr func, const TVMArgs& args);
const std::string& binary_path,
const std::string& toolchain_prefix,
std::uintptr_t base_addr,
const std::string& server_addr,
int port);
/*! /*!
* \brief ends the session by destructing the low-level device and its allocators * \brief loads binary onto device
* \param binary_path path to binary object file
* \param patch_dylib_pointers whether to patch runtime API function pointers
* \return info about loaded binary
*/ */
void EndSession(); BinaryInfo LoadBinary(const std::string& binary_path, bool patch_dylib_pointers);
/*! /*!
* \brief allocate memory in section * \brief allocate memory in section
...@@ -108,36 +153,21 @@ class MicroSession : public ModuleNode { ...@@ -108,36 +153,21 @@ class MicroSession : public ModuleNode {
* \param size size of allocated memory in bytes * \param size size of allocated memory in bytes
* \return pointer to allocated memory region in section, nullptr if out of space * \return pointer to allocated memory region in section, nullptr if out of space
*/ */
DevBaseOffset AllocateInSection(SectionKind type, size_t size); DevPtr AllocateInSection(SectionKind type, size_t size);
/*! /*!
* \brief free prior allocation from section * \brief free prior allocation from section
* \param type type of section to allocate in * \param type type of section to allocate in
* \param ptr pointer to allocated memory * \param addr device address of allocated memory
*/ */
void FreeInSection(SectionKind type, DevBaseOffset ptr); void FreeInSection(SectionKind type, DevPtr addr);
/*! /*!
* \brief read string from device to host * \brief read string from device to host
* \param str_offset device offset of first character of string * \param str_addr device address of first character of string
* \return host copy of device string that was read * \return host copy of device string that was read
*/ */
std::string ReadString(DevBaseOffset str_offset); std::string ReadString(DevPtr str_addr);
/*!
* \brief sets up runtime metadata for `func` and copies arguments for on-device execution
* \param func address of the function to be executed
* \param args args to the packed function
*/
void PushToExecQueue(DevBaseOffset func, const TVMArgs& args);
/*!
* \brief loads binary onto device
* \param binary_path path to binary object file
* \param patch_dylib_pointers whether runtime API function pointer patching is needed
* \return info about loaded binary
*/
BinaryInfo LoadBinary(const std::string& binary_path, bool patch_dylib_pointers = true);
/*! /*!
* \brief read value of symbol from device memory * \brief read value of symbol from device memory
...@@ -174,16 +204,17 @@ class MicroSession : public ModuleNode { ...@@ -174,16 +204,17 @@ class MicroSession : public ModuleNode {
/*! \brief array of memory allocators for each on-device section */ /*! \brief array of memory allocators for each on-device section */
std::shared_ptr<MicroSectionAllocator> std::shared_ptr<MicroSectionAllocator>
section_allocators_[static_cast<size_t>(SectionKind::kNumKinds)]; section_allocators_[static_cast<size_t>(SectionKind::kNumKinds)];
/*! \brief total number of bytes of usable device memory for this session */ /*! \brief number of bytes in a word on the target device */
size_t memory_size_; size_t word_size_;
/*! \brief uTVM runtime binary info */ /*! \brief whether the target device requires a thumb-mode bit on function addresses
BinaryInfo runtime_bin_info_; *
/*! \brief path to uTVM runtime source code */ * ARM and other manufacturers use the lowest bit of a function address to determine
std::string runtime_binary_path_; * whether it's a "thumb mode" function. The Thumb ISA is more restricted, but
/*! \brief offset of the runtime entry function */ * results in more compact binaries.
DevBaseOffset utvm_main_symbol_; */
/*! \brief offset of the runtime exit breakpoint */ bool thumb_mode_;
DevBaseOffset utvm_done_symbol_; /*! \brief symbol map for the device runtime */
SymbolMap runtime_symbol_map_;
/*! /*!
* \brief patches a function pointer in this module to an implementation * \brief patches a function pointer in this module to an implementation
...@@ -192,12 +223,6 @@ class MicroSession : public ModuleNode { ...@@ -192,12 +223,6 @@ class MicroSession : public ModuleNode {
void PatchImplHole(const SymbolMap& symbol_map, const std::string& func_name); void PatchImplHole(const SymbolMap& symbol_map, const std::string& func_name);
/*! /*!
* \brief sets the runtime binary path
* \param path to runtime binary
*/
void SetRuntimeBinaryPath(std::string path);
/*!
* \brief appends arguments to the host-side buffer of `encoder` * \brief appends arguments to the host-side buffer of `encoder`
* \param encoder encoder being used to append `args` * \param encoder encoder being used to append `args`
* \param args args to be appended * \param args args to be appended
...@@ -211,6 +236,7 @@ class MicroSession : public ModuleNode { ...@@ -211,6 +236,7 @@ class MicroSession : public ModuleNode {
* \param arr TVMArray to be appended * \param arr TVMArray to be appended
* \return device address of the allocated `TVMArray` * \return device address of the allocated `TVMArray`
*/ */
template <typename T>
DevPtr EncoderAppend(TargetDataLayoutEncoder* encoder, const TVMArray& arr); DevPtr EncoderAppend(TargetDataLayoutEncoder* encoder, const TVMArray& arr);
/*! /*!
...@@ -228,18 +254,11 @@ class MicroSession : public ModuleNode { ...@@ -228,18 +254,11 @@ class MicroSession : public ModuleNode {
} }
/*! /*!
* \brief returns the symbol map for the uTVM runtime
* \return reference to symbol map
*/
const SymbolMap& runtime_symbol_map() {
return runtime_bin_info_.symbol_map;
}
/*!
* \brief Push a new session context onto the thread-local stack. * \brief Push a new session context onto the thread-local stack.
* The session on top of the stack is used as the current global session. * The session on top of the stack is used as the current global session.
*/ */
static void EnterWithScope(ObjectPtr<MicroSession> session); static void EnterWithScope(ObjectPtr<MicroSession> session);
/*! /*!
* \brief Pop a session off the thread-local context stack, * \brief Pop a session off the thread-local context stack,
* restoring the previous session as the current context. * restoring the previous session as the current context.
...@@ -260,6 +279,118 @@ struct MicroDevSpace { ...@@ -260,6 +279,118 @@ struct MicroDevSpace {
ObjectPtr<MicroSession> session; ObjectPtr<MicroSession> session;
}; };
// TODO(weberlo): maybe templatize serialization to reduce redundancy
/*! \brief TVM array for serialization to 32-bit devices */
struct TVMArray32 {
TVMArray32(
TargetVal data,
DLContext ctx,
int32_t ndim,
DLDataType dtype,
TargetVal shape,
TargetVal strides,
TargetVal byte_offset)
: data(data.val32),
ctx(ctx),
ndim(ndim),
pad0(0),
dtype(dtype),
shape(shape.val32),
strides(strides.val32),
pad1(0),
byte_offset(byte_offset.val32),
pad2(0) { }
/*! \brief opaque pointer to the allocated data */
uint32_t data;
/*! \brief The device context of the tensor */
DLContext ctx;
/*! \brief Number of dimensions */
int32_t ndim;
/*! \brief Padding to enforce struct alignment */
uint32_t pad0;
/*! \brief The data type of the pointer */
DLDataType dtype;
/*! \brief The shape of the tensor */
uint32_t shape;
/*!
* \brief strides of the tensor,
* can be NULL, indicating tensor is compact.
*/
uint32_t strides;
/*! \brief Padding to enforce struct alignment */
uint32_t pad1;
/*! \brief The offset in bytes to the beginning pointer to data */
uint32_t byte_offset;
/*! \brief Padding to enforce struct alignment */
uint32_t pad2;
};
/*! \brief TVM array for serialization to 64-bit devices */
struct TVMArray64 {
TVMArray64(
TargetVal data,
DLContext ctx,
int32_t ndim,
DLDataType dtype,
TargetVal shape,
TargetVal strides,
TargetVal byte_offset)
: data(data.val64),
ctx(ctx),
ndim(ndim),
pad0(0),
dtype(dtype),
shape(shape.val64),
strides(strides.val64),
byte_offset(byte_offset.val64) { }
/*! \brief opaque pointer to the allocated data */
uint64_t data;
/*! \brief The device context of the tensor */
DLContext ctx;
/*! \brief Number of dimensions */
int32_t ndim;
/*! \brief Padding to enforce struct alignment */
uint32_t pad0;
/*! \brief The data type of the pointer */
DLDataType dtype;
/*! \brief The shape of the tensor */
uint64_t shape;
/*!
* \brief strides of the tensor,
* can be NULL, indicating tensor is compact.
*/
uint64_t strides;
/*! \brief The offset in bytes to the beginning pointer to data */
uint64_t byte_offset;
};
/*! \brief MicroTVM task for serialization to 32-bit devices */
typedef struct StructUTVMTask32 {
/*! \brief Pointer to function to call for this task */
uint32_t func;
/*! \brief Array of argument values */
uint32_t arg_values;
/*! \brief Array of type codes for each argument value */
uint32_t arg_type_codes;
/*! \brief Number of arguments */
int32_t num_args;
} UTVMTask32;
/*! \brief MicroTVM task for serialization to 64-bit devices */
typedef struct StructUTVMTask64 {
/*! \brief Pointer to function to call for this task */
uint64_t func;
/*! \brief Array of argument values */
uint64_t arg_values;
/*! \brief Array of type codes for each argument value */
uint64_t arg_type_codes;
/*! \brief Number of arguments */
int32_t num_args;
} UTVMTask64;
} // namespace runtime } // namespace runtime
} // namespace tvm } // namespace tvm
#endif // TVM_RUNTIME_MICRO_MICRO_SESSION_H_ #endif // TVM_RUNTIME_MICRO_MICRO_SESSION_H_
...@@ -37,21 +37,20 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { ...@@ -37,21 +37,20 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
public: public:
/*! /*!
* \brief constructor to initialize connection to openocd device * \brief constructor to initialize connection to openocd device
* \param base_addr base address of the device
* \param server_addr address of the OpenOCD server to connect to * \param server_addr address of the OpenOCD server to connect to
* \param port port of the OpenOCD server to connect to * \param port port of the OpenOCD server to connect to
*/ */
explicit OpenOCDLowLevelDevice(std::uintptr_t base_addr, explicit OpenOCDLowLevelDevice(const std::string& server_addr,
const std::string& server_addr,
int port) : socket_() { int port) : socket_() {
socket_.Connect(tvm::common::SockAddr(server_addr.c_str(), port)); server_addr_ = server_addr;
socket_.cmd_builder() << "reset halt"; port_ = port;
socket_.SendCommand();
base_addr_ = base_addr; socket_.Connect(tvm::common::SockAddr(server_addr_.c_str(), port_));
CHECK(base_addr_ % 8 == 0) << "base address not aligned to 8 bytes"; socket_.cmd_builder() << "halt 0";
socket_.SendCommand();
} }
void Read(DevBaseOffset offset, void* buf, size_t num_bytes) { void Read(DevPtr addr, void* buf, size_t num_bytes) {
if (num_bytes == 0) { if (num_bytes == 0) {
return; return;
} }
...@@ -59,7 +58,6 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { ...@@ -59,7 +58,6 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
// TODO(weberlo): Refactor between read and write. // TODO(weberlo): Refactor between read and write.
// Check if we need to chunk this write request. // Check if we need to chunk this write request.
if (num_bytes > kMemTransferLimit) { if (num_bytes > kMemTransferLimit) {
DevBaseOffset curr_offset = offset;
char* curr_buf_ptr = reinterpret_cast<char*>(buf); char* curr_buf_ptr = reinterpret_cast<char*>(buf);
while (num_bytes != 0) { while (num_bytes != 0) {
size_t amount_to_read; size_t amount_to_read;
...@@ -68,8 +66,8 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { ...@@ -68,8 +66,8 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
} else { } else {
amount_to_read = num_bytes; amount_to_read = num_bytes;
} }
Read(offset, reinterpret_cast<void*>(curr_buf_ptr), amount_to_read); Read(addr, reinterpret_cast<void*>(curr_buf_ptr), amount_to_read);
offset += amount_to_read; addr += amount_to_read;
curr_buf_ptr += amount_to_read; curr_buf_ptr += amount_to_read;
num_bytes -= amount_to_read; num_bytes -= amount_to_read;
} }
...@@ -79,7 +77,6 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { ...@@ -79,7 +77,6 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
socket_.cmd_builder() << "array unset output"; socket_.cmd_builder() << "array unset output";
socket_.SendCommand(); socket_.SendCommand();
DevPtr addr = DevPtr(base_addr_ + offset.value());
socket_.cmd_builder() socket_.cmd_builder()
<< "mem2array output" << "mem2array output"
<< " " << std::dec << kWordSize << " " << std::dec << kWordSize
...@@ -122,14 +119,13 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { ...@@ -122,14 +119,13 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
} }
} }
void Write(DevBaseOffset offset, const void* buf, size_t num_bytes) { void Write(DevPtr addr, const void* buf, size_t num_bytes) {
if (num_bytes == 0) { if (num_bytes == 0) {
return; return;
} }
// Check if we need to chunk this write request. // Check if we need to chunk this write request.
if (num_bytes > kMemTransferLimit) { if (num_bytes > kMemTransferLimit) {
DevBaseOffset curr_offset = offset;
const char* curr_buf_ptr = reinterpret_cast<const char*>(buf); const char* curr_buf_ptr = reinterpret_cast<const char*>(buf);
while (num_bytes != 0) { while (num_bytes != 0) {
size_t amount_to_write; size_t amount_to_write;
...@@ -138,8 +134,8 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { ...@@ -138,8 +134,8 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
} else { } else {
amount_to_write = num_bytes; amount_to_write = num_bytes;
} }
Write(offset, reinterpret_cast<const void*>(curr_buf_ptr), amount_to_write); Write(addr, reinterpret_cast<const void*>(curr_buf_ptr), amount_to_write);
offset += amount_to_write; addr += amount_to_write;
curr_buf_ptr += amount_to_write; curr_buf_ptr += amount_to_write;
num_bytes -= amount_to_write; num_bytes -= amount_to_write;
} }
...@@ -166,7 +162,6 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { ...@@ -166,7 +162,6 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
socket_.SendCommand(); socket_.SendCommand();
} }
{ {
DevPtr addr = DevPtr(base_addr_ + offset.value());
socket_.cmd_builder() socket_.cmd_builder()
<< "array2mem input" << "array2mem input"
<< " " << std::dec << kWordSize << " " << std::dec << kWordSize
...@@ -176,20 +171,14 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { ...@@ -176,20 +171,14 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
} }
} }
void Execute(DevBaseOffset func_offset, DevBaseOffset breakpoint) { void Execute(DevPtr func_addr, DevPtr breakpoint_addr) {
socket_.cmd_builder() << "halt 0"; socket_.cmd_builder() << "halt 0";
socket_.SendCommand(); socket_.SendCommand();
// Set up the stack pointer.
DevPtr stack_end = stack_top() - 8;
socket_.cmd_builder() << "reg sp " << stack_end.cast_to<void*>();
socket_.SendCommand();
// Set a breakpoint at the beginning of `UTVMDone`. // Set a breakpoint at the beginning of `UTVMDone`.
socket_.cmd_builder() << "bp " << ToDevPtr(breakpoint).cast_to<void*>() << " 2"; socket_.cmd_builder() << "bp " << breakpoint_addr.cast_to<void*>() << " 2";
socket_.SendCommand(); socket_.SendCommand();
DevPtr func_addr = DevPtr(base_addr_ + func_offset.value());
socket_.cmd_builder() << "resume " << func_addr.cast_to<void*>(); socket_.cmd_builder() << "resume " << func_addr.cast_to<void*>();
socket_.SendCommand(); socket_.SendCommand();
...@@ -200,34 +189,21 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { ...@@ -200,34 +189,21 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
socket_.SendCommand(); socket_.SendCommand();
// Remove the breakpoint. // Remove the breakpoint.
socket_.cmd_builder() << "rbp " << ToDevPtr(breakpoint).cast_to<void*>(); socket_.cmd_builder() << "rbp " << breakpoint_addr.cast_to<void*>();
socket_.SendCommand(); socket_.SendCommand();
} }
void SetStackTop(DevBaseOffset stack_top) {
stack_top_ = DevPtr(base_addr_ + stack_top.value());
}
std::uintptr_t base_addr() const final {
return base_addr_;
}
DevPtr stack_top() const {
CHECK(stack_top_ != nullptr) << "stack top was never initialized";
return stack_top_;
}
const char* device_type() const final { const char* device_type() const final {
return "openocd"; return "openocd";
} }
private: private:
/*! \brief base address of the micro device memory region */
std::uintptr_t base_addr_;
/*! \brief top of the stack section */
DevPtr stack_top_;
/*! \brief socket used to communicate with the device through Tcl */ /*! \brief socket used to communicate with the device through Tcl */
TclSocket socket_; TclSocket socket_;
/*! \brief address of OpenOCD server */
std::string server_addr_;
/*! \brief port of OpenOCD server */
int port_;
/*! \brief number of bytes in a word on the target device (64-bit) */ /*! \brief number of bytes in a word on the target device (64-bit) */
static const constexpr ssize_t kWordSize = 8; static const constexpr ssize_t kWordSize = 8;
...@@ -239,11 +215,10 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { ...@@ -239,11 +215,10 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
static const constexpr int kWaitTime = 10000; static const constexpr int kWaitTime = 10000;
}; };
const std::shared_ptr<LowLevelDevice> OpenOCDLowLevelDeviceCreate(std::uintptr_t base_addr, const std::shared_ptr<LowLevelDevice> OpenOCDLowLevelDeviceCreate(const std::string& server_addr,
const std::string& server_addr,
int port) { int port) {
std::shared_ptr<LowLevelDevice> lld = std::shared_ptr<LowLevelDevice> lld =
std::make_shared<OpenOCDLowLevelDevice>(base_addr, server_addr, port); std::make_shared<OpenOCDLowLevelDevice>(server_addr, port);
return lld; return lld;
} }
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
#define TVM_RUNTIME_MICRO_TARGET_DATA_LAYOUT_ENCODER_H_ #define TVM_RUNTIME_MICRO_TARGET_DATA_LAYOUT_ENCODER_H_
#include <vector> #include <vector>
#include "device/utvm_runtime.h" #include "host_driven/utvm_runtime.h"
namespace tvm { namespace tvm {
namespace runtime { namespace runtime {
...@@ -96,9 +96,9 @@ class TargetDataLayoutEncoder { ...@@ -96,9 +96,9 @@ class TargetDataLayoutEncoder {
* \brief constructor * \brief constructor
* \param start_addr start address of the encoder in device memory * \param start_addr start address of the encoder in device memory
*/ */
explicit TargetDataLayoutEncoder(DevPtr start_addr) explicit TargetDataLayoutEncoder(DevPtr start_addr, size_t word_size)
: buf_(std::vector<uint8_t>()), curr_offset_(0) { : buf_(std::vector<uint8_t>()), curr_offset_(0), word_size_(word_size) {
start_addr_ = DevPtr(UpperAlignValue(start_addr.value(), 8)); start_addr_ = DevPtr(UpperAlignValue(start_addr.value().val64, word_size_));
} }
/*! /*!
...@@ -108,7 +108,7 @@ class TargetDataLayoutEncoder { ...@@ -108,7 +108,7 @@ class TargetDataLayoutEncoder {
*/ */
template <typename T> template <typename T>
Slot<T> Alloc(size_t num_elems = 1) { Slot<T> Alloc(size_t num_elems = 1) {
curr_offset_ = UpperAlignValue(curr_offset_, 8); curr_offset_ = UpperAlignValue(curr_offset_, word_size_);
size_t size = sizeof(T) * num_elems; size_t size = sizeof(T) * num_elems;
if (curr_offset_ + size > buf_.size()) { if (curr_offset_ + size > buf_.size()) {
buf_.resize(curr_offset_ + size); buf_.resize(curr_offset_ + size);
...@@ -141,6 +141,8 @@ class TargetDataLayoutEncoder { ...@@ -141,6 +141,8 @@ class TargetDataLayoutEncoder {
size_t curr_offset_; size_t curr_offset_;
/*! \brief start address of the encoder in device memory */ /*! \brief start address of the encoder in device memory */
DevPtr start_addr_; DevPtr start_addr_;
/*! \brief number of bytes in a word on the target device */
size_t word_size_;
}; };
template <typename T> template <typename T>
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
* \file rpc_session.cc * \file rpc_session.cc
* \brief RPC session for remote function call. * \brief RPC session for remote function call.
*/ */
#include <tvm/runtime/c_runtime_api.h>
#include <tvm/runtime/packed_func.h> #include <tvm/runtime/packed_func.h>
#include <tvm/runtime/device_api.h> #include <tvm/runtime/device_api.h>
#include <tvm/runtime/registry.h> #include <tvm/runtime/registry.h>
...@@ -40,6 +41,7 @@ ...@@ -40,6 +41,7 @@
namespace tvm { namespace tvm {
namespace runtime { namespace runtime {
// Temp buffer for data array // Temp buffer for data array
struct RPCByteArrayBuffer { struct RPCByteArrayBuffer {
TVMByteArray arr; TVMByteArray arr;
...@@ -1215,11 +1217,45 @@ void RPCSession::EventHandler::HandlePackedCall() { ...@@ -1215,11 +1217,45 @@ void RPCSession::EventHandler::HandlePackedCall() {
CHECK_EQ(state_, kRecvCode); CHECK_EQ(state_, kRecvCode);
} }
PackedFunc MicroTimeEvaluator(
PackedFunc pf,
TVMContext ctx,
int number,
int repeat) {
auto ftimer = [pf, ctx, number, repeat](TVMArgs args, TVMRetValue *rv) mutable {
TVMRetValue temp;
std::ostringstream os;
// skip first time call, to activate lazy compilation components.
pf.CallPacked(args, &temp);
DeviceAPI::Get(ctx)->StreamSync(ctx, nullptr);
for (int i = 0; i < repeat; ++i) {
double speed = 0.0;
for (int j = 0; j < number; ++j) {
pf.CallPacked(args, &temp);
DeviceAPI::Get(ctx)->StreamSync(ctx, nullptr);
speed += (temp.operator double()) / number;
}
os.write(reinterpret_cast<char*>(&speed), sizeof(speed));
}
std::string blob = os.str();
TVMByteArray arr;
arr.size = blob.length();
arr.data = blob.data();
// return the time.
*rv = arr;
};
return PackedFunc(ftimer);
}
PackedFunc WrapTimeEvaluator(PackedFunc pf, PackedFunc WrapTimeEvaluator(PackedFunc pf,
TVMContext ctx, TVMContext ctx,
int number, int number,
int repeat, int repeat,
int min_repeat_ms) { int min_repeat_ms) {
if (static_cast<int>(ctx.device_type) == static_cast<int>(kDLMicroDev)) {
return MicroTimeEvaluator(pf, ctx, number, repeat);
}
auto ftimer = [pf, ctx, number, repeat, min_repeat_ms](TVMArgs args, TVMRetValue *rv) mutable { auto ftimer = [pf, ctx, number, repeat, min_repeat_ms](TVMArgs args, TVMRetValue *rv) mutable {
TVMRetValue temp; TVMRetValue temp;
std::ostringstream os; std::ostringstream os;
......
...@@ -25,6 +25,7 @@ ALLOW_EXTENSION = { ...@@ -25,6 +25,7 @@ ALLOW_EXTENSION = {
"cc", "cc",
"c", "c",
"h", "h",
"s",
"rs", "rs",
"m", "m",
"mm", "mm",
......
...@@ -73,12 +73,21 @@ def test_tvm_callback_relocate_binary(): ...@@ -73,12 +73,21 @@ def test_tvm_callback_relocate_binary():
with open(tmp_bin, "wb") as f: with open(tmp_bin, "wb") as f:
f.write(binary) f.write(binary)
def verify(): def verify():
text_loc_str = "0x0" word_size = 8
rodata_loc_str = "0x10000" text_loc = 0x0
data_loc_str = "0x20000" rodata_loc = 0x10000
bss_loc_str = "0x30000" data_loc = 0x20000
bss_loc = 0x30000
stack_end = 0x50000
rel_bin = tvm_callback_relocate_binary( rel_bin = tvm_callback_relocate_binary(
tmp_bin, text_loc_str, rodata_loc_str, data_loc_str, bss_loc_str, TOOLCHAIN_PREFIX) tmp_bin,
word_size,
text_loc,
rodata_loc,
data_loc,
bss_loc,
stack_end,
TOOLCHAIN_PREFIX)
print("Relocated binary section sizes") print("Relocated binary section sizes")
test_tvm_callback_get_section_size(binary=rel_bin) test_tvm_callback_get_section_size(binary=rel_bin)
relf = tmp_dir.relpath("rel.bin") relf = tmp_dir.relpath("rel.bin")
...@@ -88,10 +97,6 @@ def test_tvm_callback_relocate_binary(): ...@@ -88,10 +97,6 @@ def test_tvm_callback_relocate_binary():
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT) stderr=subprocess.STDOUT)
(out, _) = nm_proc.communicate() (out, _) = nm_proc.communicate()
# Ensure the relocated symbols are within the ranges we specified.
text_loc = int(text_loc_str, 16)
data_loc = int(data_loc_str, 16)
bss_loc = int(bss_loc_str, 16)
symbol_entries = out.decode("utf-8").split("\n") symbol_entries = out.decode("utf-8").split("\n")
for entry in symbol_entries: for entry in symbol_entries:
if len(entry) == 0: if len(entry) == 0:
...@@ -127,12 +132,21 @@ def test_tvm_callback_get_symbol_map(): ...@@ -127,12 +132,21 @@ def test_tvm_callback_get_symbol_map():
with open(tmp_bin, "wb") as f: with open(tmp_bin, "wb") as f:
f.write(binary) f.write(binary)
def verify(): def verify():
text_loc_str = "0x0" word_size = 8
rodata_loc_str = "0x10000" text_loc = 0x0
data_loc_str = "0x20000" rodata_loc = 0x10000
bss_loc_str = "0x30000" data_loc = 0x20000
bss_loc = 0x30000
stack_end = 0x50000
rel_bin = tvm_callback_relocate_binary( rel_bin = tvm_callback_relocate_binary(
tmp_bin, text_loc_str, rodata_loc_str, data_loc_str, bss_loc_str, TOOLCHAIN_PREFIX) tmp_bin,
word_size,
text_loc,
rodata_loc,
data_loc,
bss_loc,
stack_end,
TOOLCHAIN_PREFIX)
symbol_map = tvm_callback_get_symbol_map(rel_bin, TOOLCHAIN_PREFIX) symbol_map = tvm_callback_get_symbol_map(rel_bin, TOOLCHAIN_PREFIX)
symbols = set() symbols = set()
for i, line in enumerate(symbol_map.split('\n')): for i, line in enumerate(symbol_map.split('\n')):
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
# KIND, either express or implied. See the License for the # KIND, either express or implied. See the License for the
# specific language governing permissions and limitations # specific language governing permissions and limitations
# under the License. # under the License.
import os import os
import numpy as np import numpy as np
...@@ -22,38 +21,13 @@ import tvm ...@@ -22,38 +21,13 @@ import tvm
from tvm.contrib import graph_runtime, util from tvm.contrib import graph_runtime, util
from tvm import relay from tvm import relay
import tvm.micro as micro import tvm.micro as micro
from tvm.micro import create_micro_mod
from tvm.relay.testing import resnet from tvm.relay.testing import resnet
# Use the host emulated micro device. # Use the host emulated micro device.
DEVICE_TYPE = "host" DEV_CONFIG = micro.device.host.default_config()
TOOLCHAIN_PREFIX = ""
def create_micro_mod(c_mod, toolchain_prefix):
"""Produces a micro module from a given module.
Parameters
----------
c_mod : tvm.module.Module
module with "c" as its target backend
toolchain_prefix : str
toolchain prefix to be used (see `tvm.micro.Session` docs)
Return
------
micro_mod : tvm.module.Module
micro module for the target device
"""
temp_dir = util.tempdir()
lib_obj_path = temp_dir.relpath("dev_lib.obj")
c_mod.export_library(
lib_obj_path,
fcompile=tvm.micro.cross_compiler(toolchain_prefix=toolchain_prefix))
micro_mod = tvm.module.load(lib_obj_path, "micro_dev")
return micro_mod
def relay_micro_build(func, toolchain_prefix, params=None): def relay_micro_build(func, dev_config, params=None):
"""Create a graph runtime module with a micro device context from a Relay function. """Create a graph runtime module with a micro device context from a Relay function.
Parameters Parameters
...@@ -61,6 +35,9 @@ def relay_micro_build(func, toolchain_prefix, params=None): ...@@ -61,6 +35,9 @@ def relay_micro_build(func, toolchain_prefix, params=None):
func : relay.Function func : relay.Function
function to compile function to compile
dev_config : Dict[str, Any]
MicroTVM config dict for the target device
params : dict params : dict
input parameters that do not change during inference input parameters that do not change during inference
...@@ -71,24 +48,20 @@ def relay_micro_build(func, toolchain_prefix, params=None): ...@@ -71,24 +48,20 @@ def relay_micro_build(func, toolchain_prefix, params=None):
""" """
with tvm.build_config(disable_vectorize=True): with tvm.build_config(disable_vectorize=True):
graph, c_mod, params = relay.build(func, target="c", params=params) graph, c_mod, params = relay.build(func, target="c", params=params)
micro_mod = create_micro_mod(c_mod, TOOLCHAIN_PREFIX) micro_mod = create_micro_mod(c_mod, dev_config)
ctx = tvm.micro_dev(0) ctx = tvm.micro_dev(0)
mod = graph_runtime.create(graph, micro_mod, ctx) mod = graph_runtime.create(graph, micro_mod, ctx)
mod.set_input(**params) mod.set_input(**params)
return mod return mod
# TODO(weberlo): Add example program to test scalar double/int TVMValue serialization.
# TODO(weberlo): How can we test the OpenOCD device? The CI would need to have OpenOCD
# and Spike installed.
def test_alloc(): def test_alloc():
"""Test tensor allocation on the device.""" """Test tensor allocation on the device."""
if not tvm.module.enabled("micro_dev"): if not tvm.module.enabled("micro_dev"):
return return
shape = (1024,) shape = (1024,)
dtype = "float32" dtype = "float32"
with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX): with micro.Session(DEV_CONFIG):
ctx = tvm.micro_dev(0) ctx = tvm.micro_dev(0)
np_tensor = np.random.uniform(size=shape).astype(dtype) np_tensor = np.random.uniform(size=shape).astype(dtype)
micro_tensor = tvm.nd.array(np_tensor, ctx) micro_tensor = tvm.nd.array(np_tensor, ctx)
...@@ -112,15 +85,14 @@ def test_add(): ...@@ -112,15 +85,14 @@ def test_add():
func_name = "fadd" func_name = "fadd"
c_mod = tvm.build(s, [A, B, C], target="c", name=func_name) c_mod = tvm.build(s, [A, B, C], target="c", name=func_name)
with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX): with micro.Session(DEV_CONFIG):
micro_mod = create_micro_mod(c_mod, TOOLCHAIN_PREFIX) micro_mod = create_micro_mod(c_mod, DEV_CONFIG)
micro_func = micro_mod[func_name] micro_func = micro_mod[func_name]
ctx = tvm.micro_dev(0) ctx = tvm.micro_dev(0)
a = tvm.nd.array(np.random.uniform(size=shape).astype(dtype), ctx) a = tvm.nd.array(np.random.uniform(size=shape).astype(dtype), ctx)
b = tvm.nd.array(np.random.uniform(size=shape).astype(dtype), ctx) b = tvm.nd.array(np.random.uniform(size=shape).astype(dtype), ctx)
c = tvm.nd.array(np.zeros(shape, dtype=dtype), ctx) c = tvm.nd.array(np.zeros(shape, dtype=dtype), ctx)
micro_func(a, b, c) micro_func(a, b, c)
tvm.testing.assert_allclose( tvm.testing.assert_allclose(
c.asnumpy(), a.asnumpy() + b.asnumpy()) c.asnumpy(), a.asnumpy() + b.asnumpy())
...@@ -143,8 +115,8 @@ def test_workspace_add(): ...@@ -143,8 +115,8 @@ def test_workspace_add():
func_name = "fadd_two_workspace" func_name = "fadd_two_workspace"
c_mod = tvm.build(s, [A, C], target="c", name=func_name) c_mod = tvm.build(s, [A, C], target="c", name=func_name)
with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX): with micro.Session(DEV_CONFIG):
micro_mod = create_micro_mod(c_mod, TOOLCHAIN_PREFIX) micro_mod = create_micro_mod(c_mod, DEV_CONFIG)
micro_func = micro_mod[func_name] micro_func = micro_mod[func_name]
ctx = tvm.micro_dev(0) ctx = tvm.micro_dev(0)
a = tvm.nd.array(np.random.uniform(size=shape).astype(dtype), ctx) a = tvm.nd.array(np.random.uniform(size=shape).astype(dtype), ctx)
...@@ -168,8 +140,8 @@ def test_graph_runtime(): ...@@ -168,8 +140,8 @@ def test_graph_runtime():
z = relay.add(xx, relay.const(1.0)) z = relay.add(xx, relay.const(1.0))
func = relay.Function([x], z) func = relay.Function([x], z)
with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX): with micro.Session(DEV_CONFIG):
mod = relay_micro_build(func, TOOLCHAIN_PREFIX) mod = relay_micro_build(func, DEV_CONFIG)
x_in = np.random.uniform(size=shape[0]).astype(dtype) x_in = np.random.uniform(size=shape[0]).astype(dtype)
mod.run(x=x_in) mod.run(x=x_in)
...@@ -195,9 +167,9 @@ def test_multiple_modules(): ...@@ -195,9 +167,9 @@ def test_multiple_modules():
ret = relay.subtract(x, relay.const(1.0)) ret = relay.subtract(x, relay.const(1.0))
sub_const_func = relay.Function([x], ret) sub_const_func = relay.Function([x], ret)
with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX): with micro.Session(DEV_CONFIG):
add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX) add_const_mod = relay_micro_build(add_const_func, DEV_CONFIG)
sub_const_mod = relay_micro_build(sub_const_func, TOOLCHAIN_PREFIX) sub_const_mod = relay_micro_build(sub_const_func, DEV_CONFIG)
x_in = np.random.uniform(size=shape[0]).astype(dtype) x_in = np.random.uniform(size=shape[0]).astype(dtype)
add_const_mod.run(x=x_in) add_const_mod.run(x=x_in)
...@@ -223,8 +195,8 @@ def test_interleave_sessions(): ...@@ -223,8 +195,8 @@ def test_interleave_sessions():
ret = relay.add(x, relay.const(1.0)) ret = relay.add(x, relay.const(1.0))
add_const_func = relay.Function([x], ret) add_const_func = relay.Function([x], ret)
sess_a = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) sess_a = micro.Session(DEV_CONFIG)
sess_b = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) sess_b = micro.Session(DEV_CONFIG)
with sess_a: with sess_a:
np_tensor_a = np.random.uniform(size=shape).astype(dtype) np_tensor_a = np.random.uniform(size=shape).astype(dtype)
micro_tensor_a = tvm.nd.array(np_tensor_a, tvm.micro_dev(0)) micro_tensor_a = tvm.nd.array(np_tensor_a, tvm.micro_dev(0))
...@@ -232,13 +204,13 @@ def test_interleave_sessions(): ...@@ -232,13 +204,13 @@ def test_interleave_sessions():
np_tensor_b = np.random.uniform(size=shape).astype(dtype) np_tensor_b = np.random.uniform(size=shape).astype(dtype)
micro_tensor_b = tvm.nd.array(np_tensor_b, tvm.micro_dev(0)) micro_tensor_b = tvm.nd.array(np_tensor_b, tvm.micro_dev(0))
with sess_a: with sess_a:
add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX) add_const_mod = relay_micro_build(add_const_func, DEV_CONFIG)
add_const_mod.run(x=micro_tensor_a) add_const_mod.run(x=micro_tensor_a)
add_result = add_const_mod.get_output(0).asnumpy() add_result = add_const_mod.get_output(0).asnumpy()
tvm.testing.assert_allclose( tvm.testing.assert_allclose(
add_result, np_tensor_a + 1.0) add_result, np_tensor_a + 1.0)
with sess_b: with sess_b:
add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX) add_const_mod = relay_micro_build(add_const_func, DEV_CONFIG)
add_const_mod.run(x=micro_tensor_b) add_const_mod.run(x=micro_tensor_b)
add_result = add_const_mod.get_output(0).asnumpy() add_result = add_const_mod.get_output(0).asnumpy()
tvm.testing.assert_allclose( tvm.testing.assert_allclose(
...@@ -257,15 +229,15 @@ def test_nested_sessions(): ...@@ -257,15 +229,15 @@ def test_nested_sessions():
ret = relay.add(x, relay.const(1.0)) ret = relay.add(x, relay.const(1.0))
add_const_func = relay.Function([x], ret) add_const_func = relay.Function([x], ret)
sess_a = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) sess_a = micro.Session(DEV_CONFIG)
sess_b = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) sess_b = micro.Session(DEV_CONFIG)
with sess_a: with sess_a:
np_tensor_a = np.random.uniform(size=shape).astype(dtype) np_tensor_a = np.random.uniform(size=shape).astype(dtype)
micro_tensor_a = tvm.nd.array(np_tensor_a, tvm.micro_dev(0)) micro_tensor_a = tvm.nd.array(np_tensor_a, tvm.micro_dev(0))
with sess_b: with sess_b:
np_tensor_b = np.random.uniform(size=shape).astype(dtype) np_tensor_b = np.random.uniform(size=shape).astype(dtype)
micro_tensor_b = tvm.nd.array(np_tensor_b, tvm.micro_dev(0)) micro_tensor_b = tvm.nd.array(np_tensor_b, tvm.micro_dev(0))
add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX) add_const_mod = relay_micro_build(add_const_func, DEV_CONFIG)
add_const_mod.run(x=micro_tensor_a) add_const_mod.run(x=micro_tensor_a)
add_result = add_const_mod.get_output(0).asnumpy() add_result = add_const_mod.get_output(0).asnumpy()
tvm.testing.assert_allclose( tvm.testing.assert_allclose(
...@@ -284,12 +256,12 @@ def test_inactive_session_use(): ...@@ -284,12 +256,12 @@ def test_inactive_session_use():
ret = relay.add(x, relay.const(1.0)) ret = relay.add(x, relay.const(1.0))
add_const_func = relay.Function([x], ret) add_const_func = relay.Function([x], ret)
sess_a = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) sess_a = micro.Session(DEV_CONFIG)
sess_b = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) sess_b = micro.Session(DEV_CONFIG)
with sess_a: with sess_a:
np_tensor_a = np.random.uniform(size=shape).astype(dtype) np_tensor_a = np.random.uniform(size=shape).astype(dtype)
micro_tensor_a = tvm.nd.array(np_tensor_a, tvm.micro_dev(0)) micro_tensor_a = tvm.nd.array(np_tensor_a, tvm.micro_dev(0))
add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX) add_const_mod = relay_micro_build(add_const_func, DEV_CONFIG)
with sess_b: with sess_b:
# These objects belong to `sess_a`. # These objects belong to `sess_a`.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment