Commit 47c870a9 by Logan Weber Committed by Tianqi Chen

[µTVM] Enable AutoTVM for ARM STM32F746XX Boards (#4274)

parent 11af82c0
...@@ -75,6 +75,9 @@ def context(dev_type, dev_id=0): ...@@ -75,6 +75,9 @@ def context(dev_type, dev_id=0):
assert tvm.context("cuda", 0) == tvm.gpu(0) assert tvm.context("cuda", 0) == tvm.gpu(0)
""" """
if isinstance(dev_type, string_types): if isinstance(dev_type, string_types):
if '-device=micro_dev' in dev_type:
dev_type = 'micro_dev'
else:
dev_type = dev_type.split()[0] dev_type = dev_type.split()[0]
if dev_type not in TVMContext.STR2MASK: if dev_type not in TVMContext.STR2MASK:
raise ValueError("Unknown device type %s" % dev_type) raise ValueError("Unknown device type %s" % dev_type)
......
...@@ -19,9 +19,81 @@ ...@@ -19,9 +19,81 @@
import os import os
import subprocess import subprocess
from . import util from . import util
from .._ffi.base import py_str
from ..api import register_func from ..api import register_func
RELOCATION_LD_SCRIPT_TEMPLATE = """
/* linker symbol for use in UTVMInit */
_utvm_stack_pointer_init = 0x{stack_pointer_init:x};
SECTIONS
{{
. = 0x{text_start:x};
. = ALIGN({word_size});
.text :
{{
. = ALIGN({word_size});
KEEP(*(.text))
KEEP(*(.text*))
. = ALIGN({word_size});
}}
. = 0x{rodata_start:x};
. = ALIGN({word_size});
.rodata :
{{
. = ALIGN({word_size});
KEEP(*(.rodata))
KEEP(*(.rodata*))
. = ALIGN({word_size});
}}
. = 0x{data_start:x};
. = ALIGN({word_size});
.data :
{{
. = ALIGN({word_size});
KEEP(*(.data))
KEEP(*(.data*))
. = ALIGN({word_size});
}}
. = 0x{bss_start:x};
. = ALIGN({word_size});
.bss :
{{
. = ALIGN({word_size});
KEEP(*(.bss))
KEEP(*(.bss*))
. = ALIGN({word_size});
}}
}}
"""
def run_cmd(cmd):
"""Runs `cmd` in a subprocess and awaits its completion.
Parameters
----------
cmd : List[str]
list of command-line arguments
Returns
-------
output : str
resulting stdout capture from the subprocess
"""
proc = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
(output, _) = proc.communicate()
output = output.decode("utf-8")
if proc.returncode != 0:
cmd_str = " ".join(cmd)
msg = f"error while running command \"{cmd_str}\":\n{output}"
raise RuntimeError(msg)
return output
@register_func("tvm_callback_get_section_size") @register_func("tvm_callback_get_section_size")
def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix): def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix):
...@@ -48,14 +120,7 @@ def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix): ...@@ -48,14 +120,7 @@ def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix):
raise RuntimeError("no such file \"{}\"".format(binary_path)) raise RuntimeError("no such file \"{}\"".format(binary_path))
# We use the "-A" flag here to get the ".rodata" section's size, which is # We use the "-A" flag here to get the ".rodata" section's size, which is
# not included by default. # not included by default.
size_proc = subprocess.Popen( size_output = run_cmd(["{}size".format(toolchain_prefix), "-A", binary_path])
["{}size".format(toolchain_prefix), "-A", binary_path], stdout=subprocess.PIPE)
(size_output, _) = size_proc.communicate()
size_output = size_output.decode("utf-8")
if size_proc.returncode != 0:
msg = "error in finding section size:\n"
msg += py_str(size_output)
raise RuntimeError(msg)
# TODO(weberlo): Refactor this method and `*relocate_binary` so they are # TODO(weberlo): Refactor this method and `*relocate_binary` so they are
# both aware of [".bss", ".sbss", ".sdata"] being relocated to ".bss". # both aware of [".bss", ".sbss", ".sdata"] being relocated to ".bss".
...@@ -74,13 +139,15 @@ def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix): ...@@ -74,13 +139,15 @@ def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix):
continue continue
entry_name = tokens[0] entry_name = tokens[0]
entry_size = int(tokens[1]) entry_size = int(tokens[1])
if entry_name in sections_to_sum: for section in sections_to_sum:
if entry_name.startswith(section):
section_size += entry_size section_size += entry_size
break
# NOTE: For some reason, the size of the BSS section on the RISC-V # NOTE: For some reason, the size of the BSS section on the RISC-V
# GCC is sometimes reported to be smaller than it is, so we need to adjust # GCC is sometimes reported to be smaller than it is, so we need to adjust
# for this. # for this.
if "riscv" in toolchain_prefix and section_name == 'bss': if "riscv" in toolchain_prefix and section_name == "bss":
# TODO(weberlo): Figure out why 32 is the minimum constant that works. # TODO(weberlo): Figure out why 32 is the minimum constant that works.
# #
# The current hypothesis is that the last symbols in the ".bss" and # The current hypothesis is that the last symbols in the ".bss" and
...@@ -97,7 +164,14 @@ def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix): ...@@ -97,7 +164,14 @@ def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix):
@register_func("tvm_callback_relocate_binary") @register_func("tvm_callback_relocate_binary")
def tvm_callback_relocate_binary( def tvm_callback_relocate_binary(
binary_path, text_addr, rodata_addr, data_addr, bss_addr, toolchain_prefix): binary_path,
word_size,
text_start,
rodata_start,
data_start,
bss_start,
stack_end,
toolchain_prefix):
"""Relocates sections in the binary to new addresses """Relocates sections in the binary to new addresses
Parameters Parameters
...@@ -105,17 +179,23 @@ def tvm_callback_relocate_binary( ...@@ -105,17 +179,23 @@ def tvm_callback_relocate_binary(
binary_path : str binary_path : str
path of the binary file path of the binary file
text_addr : str word_size : int
text section absolute address word size on the target machine
text_start : int
text section address
rodata_addr : str rodata_start : int
rodata section absolute address rodata section address
data_addr : str data_start : int
data section absolute address data section address
bss_addr : str bss_start : int
bss section absolute address bss section address
stack_end : int
stack section end address
toolchain_prefix : str toolchain_prefix : str
prefix for binary names in target compiler toolchain prefix for binary names in target compiler toolchain
...@@ -125,68 +205,29 @@ def tvm_callback_relocate_binary( ...@@ -125,68 +205,29 @@ def tvm_callback_relocate_binary(
rel_bin : bytearray rel_bin : bytearray
the relocated binary the relocated binary
""" """
tmp_dir = util.tempdir() stack_pointer_init = stack_end - word_size
rel_obj_path = tmp_dir.relpath("relocated.o")
ld_script_contents = "" ld_script_contents = ""
# TODO(weberlo): There should be a better way to configure this for different archs. # TODO(weberlo): There should be a better way to configure this for different archs.
if "riscv" in toolchain_prefix: if "riscv" in toolchain_prefix:
ld_script_contents += "OUTPUT_ARCH( \"riscv\" )\n\n" ld_script_contents += "OUTPUT_ARCH( \"riscv\" )\n\n"
# TODO(weberlo): Generate the script in a more procedural manner. ld_script_contents += RELOCATION_LD_SCRIPT_TEMPLATE.format(
ld_script_contents += """ word_size=word_size,
SECTIONS text_start=text_start,
{ rodata_start=rodata_start,
. = %s; data_start=data_start,
. = ALIGN(8); bss_start=bss_start,
.text : stack_pointer_init=stack_pointer_init)
{
*(.text) tmp_dir = util.tempdir()
. = ALIGN(8); rel_obj_path = tmp_dir.relpath("relocated.obj")
*(.text*)
}
. = %s;
. = ALIGN(8);
.rodata :
{
*(.rodata)
. = ALIGN(8);
*(.rodata*)
}
. = %s;
. = ALIGN(8);
.data :
{
*(.data)
. = ALIGN(8);
*(.data*)
. = ALIGN(8);
*(.sdata)
}
. = %s;
. = ALIGN(8);
.bss :
{
*(.bss)
. = ALIGN(8);
*(.bss*)
. = ALIGN(8);
*(.sbss)
}
}
""" % (text_addr, rodata_addr, data_addr, bss_addr)
rel_ld_script_path = tmp_dir.relpath("relocated.lds") rel_ld_script_path = tmp_dir.relpath("relocated.lds")
with open(rel_ld_script_path, "w") as f: with open(rel_ld_script_path, "w") as f:
f.write(ld_script_contents) f.write(ld_script_contents)
ld_proc = subprocess.Popen(["{}ld".format(toolchain_prefix), binary_path, run_cmd([
"{}ld".format(toolchain_prefix),
binary_path,
"-T", rel_ld_script_path, "-T", rel_ld_script_path,
"-o", rel_obj_path], "-o", rel_obj_path])
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
(out, _) = ld_proc.communicate()
if ld_proc.returncode != 0:
msg = "linking error using ld:\n"
msg += py_str(out)
raise RuntimeError(msg)
with open(rel_obj_path, "rb") as f: with open(rel_obj_path, "rb") as f:
rel_bin = bytearray(f.read()) rel_bin = bytearray(f.read())
return rel_bin return rel_bin
...@@ -217,16 +258,11 @@ def tvm_callback_read_binary_section(binary, section, toolchain_prefix): ...@@ -217,16 +258,11 @@ def tvm_callback_read_binary_section(binary, section, toolchain_prefix):
tmp_section = tmp_dir.relpath("tmp_section.bin") tmp_section = tmp_dir.relpath("tmp_section.bin")
with open(tmp_bin, "wb") as out_file: with open(tmp_bin, "wb") as out_file:
out_file.write(bytes(binary)) out_file.write(bytes(binary))
objcopy_proc = subprocess.Popen(["{}objcopy".format(toolchain_prefix), "--dump-section", run_cmd([
"{}objcopy".format(toolchain_prefix),
"--dump-section",
".{}={}".format(section, tmp_section), ".{}={}".format(section, tmp_section),
tmp_bin], tmp_bin])
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
(out, _) = objcopy_proc.communicate()
if objcopy_proc.returncode != 0:
msg = "error in using objcopy:\n"
msg += py_str(out)
raise RuntimeError(msg)
if os.path.isfile(tmp_section): if os.path.isfile(tmp_section):
# Get section content if it exists. # Get section content if it exists.
with open(tmp_section, "rb") as f: with open(tmp_section, "rb") as f:
...@@ -259,15 +295,12 @@ def tvm_callback_get_symbol_map(binary, toolchain_prefix): ...@@ -259,15 +295,12 @@ def tvm_callback_get_symbol_map(binary, toolchain_prefix):
tmp_obj = tmp_dir.relpath("tmp_obj.bin") tmp_obj = tmp_dir.relpath("tmp_obj.bin")
with open(tmp_obj, "wb") as out_file: with open(tmp_obj, "wb") as out_file:
out_file.write(bytes(binary)) out_file.write(bytes(binary))
nm_proc = subprocess.Popen(["{}nm".format(toolchain_prefix), "-C", "--defined-only", tmp_obj], nm_output = run_cmd([
stdout=subprocess.PIPE, "{}nm".format(toolchain_prefix),
stderr=subprocess.STDOUT) "-C",
(nm_output, _) = nm_proc.communicate() "--defined-only",
if nm_proc.returncode != 0: tmp_obj])
msg = "error in using nm:\n" nm_output = nm_output.splitlines()
msg += py_str(nm_output)
raise RuntimeError(msg)
nm_output = nm_output.decode("utf8").splitlines()
map_str = "" map_str = ""
for line in nm_output: for line in nm_output:
line = line.split() line = line.split()
......
...@@ -19,14 +19,22 @@ ...@@ -19,14 +19,22 @@
from __future__ import absolute_import from __future__ import absolute_import
import argparse import argparse
import ast
import multiprocessing import multiprocessing
import sys import sys
import logging import logging
import tvm
from tvm import micro
from .. import rpc from .. import rpc
def main(args): def main(args):
"""Main function""" """Main function
Parameters
----------
args : argparse.Namespace
parsed args from command-line invocation
"""
if args.tracker: if args.tracker:
url, port = args.tracker.rsplit(":", 1) url, port = args.tracker.rsplit(":", 1)
port = int(port) port = int(port)
...@@ -37,6 +45,9 @@ def main(args): ...@@ -37,6 +45,9 @@ def main(args):
else: else:
tracker_addr = None tracker_addr = None
if args.utvm_dev_config or args.utvm_dev_id:
init_utvm(args)
server = rpc.Server(args.host, server = rpc.Server(args.host,
args.port, args.port,
args.port_end, args.port_end,
...@@ -48,6 +59,38 @@ def main(args): ...@@ -48,6 +59,38 @@ def main(args):
server.proc.join() server.proc.join()
def init_utvm(args):
"""MicroTVM-specific RPC initialization
Parameters
----------
args : argparse.Namespace
parsed args from command-line invocation
"""
if args.utvm_dev_config and args.utvm_dev_id:
raise RuntimeError('only one of --utvm-dev-config and --utvm-dev-id allowed')
if args.utvm_dev_config:
with open(args.utvm_dev_config, 'r') as dev_conf_file:
dev_config = json.load(dev_conf_file)
else:
dev_config_args = ast.literal_eval(args.utvm_dev_config_args)
default_config_func = micro.device.get_device_funcs(args.utvm_dev_id)['default_config']
dev_config = default_config_func(*dev_config_args)
if args.utvm_dev_config or args.utvm_dev_id:
# add MicroTVM overrides
@tvm.register_func('tvm.rpc.server.start', override=True)
def server_start():
# pylint: disable=unused-variable
session = micro.Session(dev_config)
session._enter()
@tvm.register_func('tvm.rpc.server.shutdown', override=True)
def server_shutdown():
session._exit()
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--host', type=str, default="0.0.0.0", parser.add_argument('--host', type=str, default="0.0.0.0",
...@@ -71,6 +114,13 @@ if __name__ == "__main__": ...@@ -71,6 +114,13 @@ if __name__ == "__main__":
and ROCM compilers.") and ROCM compilers.")
parser.add_argument('--custom-addr', type=str, parser.add_argument('--custom-addr', type=str,
help="Custom IP Address to Report to RPC Tracker") help="Custom IP Address to Report to RPC Tracker")
parser.add_argument('--utvm-dev-config', type=str,
help='JSON config file for the target device (if using MicroTVM)')
parser.add_argument('--utvm-dev-id', type=str,
help='Unique ID for the target device (if using MicroTVM)')
parser.add_argument('--utvm-dev-config-args', type=str,
help=('Python list of literals required to generate a default'
' MicroTVM config (if --utvm-dev-id is specified)'))
parser.set_defaults(fork=True) parser.set_defaults(fork=True)
args = parser.parse_args() args = parser.parse_args()
......
...@@ -14,13 +14,9 @@ ...@@ -14,13 +14,9 @@
# KIND, either express or implied. See the License for the # KIND, either express or implied. See the License for the
# specific language governing permissions and limitations # specific language governing permissions and limitations
# under the License. # under the License.
"""MicroTVM module for bare-metal backends"""
"""uTVM module for bare-metal backends.
uTVM (or the micro backend) enables provides support for bare-metal devices.
Its targets currently include a host-emulated device which is used for testing,
and JTAG-based openocd device which allows actual interfacing with microdevices.
"""
from ..contrib import binutil from ..contrib import binutil
from .base import Session, cross_compiler, create_micro_lib from .base import Session, create_micro_mod, cross_compiler
from .base import LibType, get_micro_host_driven_dir, get_micro_device_dir
from . import device
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Device-specific configuration for MicroTVM"""
from .base import register_device, get_device_funcs, create_micro_lib_base
from . import host
from . import arm
from . import riscv_spike
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Base module for ARM device configurations"""
from . import stm32f746xx
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Compilation and config definitions for ARM STM32F746XX devices"""
from .. import create_micro_lib_base, register_device
DEVICE_ID = "arm.stm32f746xx"
TOOLCHAIN_PREFIX = "arm-none-eabi-"
def create_micro_lib(obj_path, src_path, lib_type, options=None):
"""Wrapper over `create_micro_lib_base` to add device-specific options
Parameters
----------
obj_path : str
path to generated object file
src_path : str
path to source file
lib_type : micro.LibType
whether to compile a MicroTVM runtime or operator library
options : Optional[List[str]]
additional options to pass to GCC
"""
if options is None:
options = []
options += [
"-mcpu=cortex-m7",
"-mlittle-endian",
"-mfloat-abi=hard",
"-mfpu=fpv5-sp-d16",
"-mthumb",
"-gdwarf-5",
]
create_micro_lib_base(
obj_path, src_path, TOOLCHAIN_PREFIX, DEVICE_ID, lib_type, options=options)
def default_config(server_addr, server_port):
"""Generates a default configuration for ARM STM32F746XX devices
Parameters
----------
server_addr : str
address of OpenOCD server to connect to
server_port : int
port of OpenOCD server to connect to
Return
------
config : Dict[str, Any]
MicroTVM config dict for this device
"""
return {
"device_id": DEVICE_ID,
"toolchain_prefix": TOOLCHAIN_PREFIX,
#
# [Device Memory Layout]
# RAM (rwx) : START = 0x20000000, LENGTH = 320K
# FLASH (rx) : START = 0x8000000, LENGTH = 1024K
#
"mem_layout": {
"text": {
"start": 0x20000180,
"size": 20480,
},
"rodata": {
"start": 0x20005180,
"size": 20480,
},
"data": {
"start": 0x2000a180,
"size": 768,
},
"bss": {
"start": 0x2000a480,
"size": 768,
},
"args": {
"start": 0x2000a780,
"size": 1280,
},
"heap": {
"start": 0x2000ac80,
"size": 262144,
},
"workspace": {
"start": 0x2004ac80,
"size": 20480,
},
"stack": {
"start": 0x2004fc80,
"size": 80,
},
},
"word_size": 4,
"thumb_mode": True,
"comms_method": "openocd",
"server_addr": server_addr,
"server_port": server_port,
}
register_device(DEVICE_ID, {
"create_micro_lib": create_micro_lib,
"default_config": default_config,
})
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Base definitions for MicroTVM config"""
import glob
import os
from pathlib import Path
from tvm.contrib import util as _util
from tvm.contrib.binutil import run_cmd
from tvm._ffi.libinfo import find_include_path
from tvm.micro import LibType, get_micro_host_driven_dir, get_micro_device_dir
_DEVICE_REGISTRY = {}
def register_device(device_id, device_funcs):
"""Register a device and associated compilation/config functions
Parameters
----------
device_id : str
unique identifier for the device
device_funcs : Dict[str, func]
dictionary with compilation and config generation functions as values
"""
if device_id in _DEVICE_REGISTRY:
raise RuntimeError(f"\"{device_id}\" already exists in the device registry")
_DEVICE_REGISTRY[device_id] = device_funcs
def get_device_funcs(device_id):
"""Get compilation and config generation functions for device
Parameters
----------
device_id : str
unique identifier for the device
Return
------
device_funcs : Dict[str, func]
dictionary with compilation and config generation functions as values
"""
if device_id not in _DEVICE_REGISTRY:
raise RuntimeError(f"\"{device_id}\" does not exist in the binutil registry")
device_funcs = _DEVICE_REGISTRY[device_id]
return device_funcs
def create_micro_lib_base(
out_obj_path,
in_src_path,
toolchain_prefix,
device_id,
lib_type,
options=None):
"""Compiles code into a binary for the target micro device.
Parameters
----------
out_obj_path : str
path to generated object file
in_src_path : str
path to source file
toolchain_prefix : str
toolchain prefix to be used. For example, a prefix of
"riscv64-unknown-elf-" means "riscv64-unknown-elf-gcc" is used as
the compiler and "riscv64-unknown-elf-ld" is used as the linker,
etc.
device_id : str
unique identifier for the target device
lib_type : micro.LibType
whether to compile a MicroTVM runtime or operator library
options : List[str]
additional options to pass to GCC
"""
base_compile_cmd = [
f"{toolchain_prefix}gcc",
"-std=c11",
"-Wall",
"-Wextra",
"--pedantic",
"-c",
"-O0",
"-g",
"-nostartfiles",
"-nodefaultlibs",
"-nostdlib",
"-fdata-sections",
"-ffunction-sections",
]
if options is not None:
base_compile_cmd += options
src_paths = []
include_paths = find_include_path() + [get_micro_host_driven_dir()]
tmp_dir = _util.tempdir()
# we might transform the src path in one of the branches below
new_in_src_path = in_src_path
if lib_type == LibType.RUNTIME:
dev_dir = _get_device_source_dir(device_id)
dev_src_paths = glob.glob(f"{dev_dir}/*.[csS]")
# there needs to at least be a utvm_timer.c file
assert dev_src_paths
assert "utvm_timer.c" in map(os.path.basename, dev_src_paths)
src_paths += dev_src_paths
elif lib_type == LibType.OPERATOR:
# create a temporary copy of the source, so we can inject the dev lib
# header without modifying the original.
temp_src_path = tmp_dir.relpath("temp.c")
with open(in_src_path, "r") as f:
src_lines = f.read().splitlines()
src_lines.insert(0, "#include \"utvm_device_dylib_redirect.c\"")
with open(temp_src_path, "w") as f:
f.write("\n".join(src_lines))
new_in_src_path = temp_src_path
base_compile_cmd += ["-c"]
else:
raise RuntimeError("unknown lib type")
src_paths += [new_in_src_path]
for path in include_paths:
base_compile_cmd += ["-I", path]
prereq_obj_paths = []
for src_path in src_paths:
curr_obj_path = Path(src_path).with_suffix(".o").name
assert curr_obj_path not in prereq_obj_paths
prereq_obj_paths.append(curr_obj_path)
curr_compile_cmd = base_compile_cmd + [src_path, "-o", curr_obj_path]
run_cmd(curr_compile_cmd)
ld_cmd = [f"{toolchain_prefix}ld", "-relocatable"]
ld_cmd += prereq_obj_paths
ld_cmd += ["-o", out_obj_path]
run_cmd(ld_cmd)
def _get_device_source_dir(device_id):
"""Grabs the source directory for device-specific uTVM files"""
dev_subdir = "/".join(device_id.split("."))
return get_micro_device_dir() + "/" + dev_subdir
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Compilation and config definitions for the host emulated device"""
import sys
from . import create_micro_lib_base, register_device
DEVICE_ID = "host"
TOOLCHAIN_PREFIX = ""
def create_micro_lib(obj_path, src_path, lib_type, options=None):
"""Wrapper over `create_micro_lib_base` to add device-specific options
Parameters
----------
obj_path : str
path to generated object file
src_path : str
path to source file
lib_type : micro.LibType
whether to compile a MicroTVM runtime or operator library
options : Optional[List[str]]
additional options to pass to GCC
"""
if options is None:
options = []
if sys.maxsize > 2**32 and sys.platform.startswith("linux"):
options += ["-mcmodel=large"]
create_micro_lib_base(
obj_path, src_path, TOOLCHAIN_PREFIX, DEVICE_ID, lib_type, options=options)
def default_config():
"""Generates a default configuration for the host emulated device
Return
------
config : Dict[str, Any]
MicroTVM config dict for this device
"""
return {
"device_id": DEVICE_ID,
"toolchain_prefix": TOOLCHAIN_PREFIX,
"mem_layout": {
"text": {
"size": 20480,
},
"rodata": {
"size": 20480,
},
"data": {
"size": 768,
},
"bss": {
"size": 768,
},
"args": {
"size": 1280,
},
"heap": {
"size": 262144,
},
"workspace": {
"size": 20480,
},
"stack": {
"size": 80,
},
},
"word_size": 8 if sys.maxsize > 2**32 else 4,
"thumb_mode": False,
"comms_method": "host",
}
register_device(DEVICE_ID, {
"create_micro_lib": create_micro_lib,
"default_config": default_config,
})
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Compilation and config definitions for Spike, a RISC-V functional ISA simulator"""
from collections import OrderedDict
from . import create_micro_lib_base, register_device
DEVICE_ID = "riscv_spike"
TOOLCHAIN_PREFIX = "riscv64-unknown-elf-"
def create_micro_lib(obj_path, src_path, lib_type, options=None):
"""Wrapper over `create_micro_lib_base` to add device-specific options
Parameters
----------
obj_path : str
path to generated object file
src_path : str
path to source file
lib_type : micro.LibType
whether to compile a MicroTVM runtime or operator library
options : Optional[List[str]]
additional options to pass to GCC
"""
create_micro_lib_base(
obj_path,
src_path,
TOOLCHAIN_PREFIX,
DEVICE_ID,
lib_type,
options=options)
def default_config(base_addr, server_addr, server_port):
"""Generates a default configuration for Spike
Parameters
----------
base_addr : int
base address of the simulator (for calculating the memory layout)
server_addr : str
address of OpenOCD server to connect to
server_port : int
port of OpenOCD server to connect to
Return
------
config : Dict[str, Any]
MicroTVM config dict for this device
"""
res = {
"device_id": DEVICE_ID,
"toolchain_prefix": TOOLCHAIN_PREFIX,
"mem_layout": OrderedDict([
("text", {
"size": 20480,
}),
("rodata", {
"size": 20480,
}),
("data", {
"size": 768,
}),
("bss", {
"size": 768,
}),
("args", {
"size": 1280,
}),
("heap", {
"size": 262144,
}),
("workspace", {
"size": 20480,
}),
("stack", {
"size": 80,
}),
]),
"word_size": 4,
"thumb_mode": True,
"comms_method": "openocd",
"server_addr": server_addr,
"server_port": server_port,
}
# generate section start addresses from the given `base_addr`
curr_offset = 0
mem_layout = res["mem_layout"]
for region_dict in mem_layout.values():
region_dict["start"] = base_addr + curr_offset
curr_offset += region_dict["size"]
return res
register_device(DEVICE_ID, {
"create_micro_lib": create_micro_lib,
"default_config": default_config,
})
...@@ -265,6 +265,9 @@ def load(path, fmt=""): ...@@ -265,6 +265,9 @@ def load(path, fmt=""):
files = [tar_temp.relpath(x) for x in tar_temp.listdir()] files = [tar_temp.relpath(x) for x in tar_temp.listdir()]
_cc.create_shared(path + ".so", files) _cc.create_shared(path + ".so", files)
path += ".so" path += ".so"
# TODO(weberlo): we should probably use a more distinctive suffix for uTVM object files
elif path.endswith(".obj"):
fmt = "micro_dev"
# Redirect to the load API # Redirect to the load API
return _LoadFromFile(path, fmt) return _LoadFromFile(path, fmt)
......
...@@ -85,7 +85,9 @@ Target CreateTarget(const std::string& target_name, ...@@ -85,7 +85,9 @@ Target CreateTarget(const std::string& target_name,
} }
t->device_type = kDLCPU; t->device_type = kDLCPU;
t->thread_warp_size = 1; t->thread_warp_size = 1;
if (target_name == "c" || target_name == "llvm") { if (target_name == "c" && t->device_name == "micro_dev") {
t->device_type = kDLMicroDev;
} else if (target_name == "c" || target_name == "llvm") {
t->keys_array.push_back(ir::StringImm::make("cpu")); t->keys_array.push_back(ir::StringImm::make("cpu"));
} else if (target_name == "cuda" || target_name == "nvptx") { } else if (target_name == "cuda" || target_name == "nvptx") {
t->device_type = kDLGPU; t->device_type = kDLGPU;
......
...@@ -33,7 +33,8 @@ CodeGenCHost::CodeGenCHost() { ...@@ -33,7 +33,8 @@ CodeGenCHost::CodeGenCHost() {
module_name_ = GetUniqueName("__tvm_module_ctx"); module_name_ = GetUniqueName("__tvm_module_ctx");
} }
void CodeGenCHost::Init(bool output_ssa) { void CodeGenCHost::Init(bool output_ssa, bool emit_asserts) {
emit_asserts_ = emit_asserts;
decl_stream << "#include \"tvm/runtime/c_runtime_api.h\"\n"; decl_stream << "#include \"tvm/runtime/c_runtime_api.h\"\n";
decl_stream << "#include \"tvm/runtime/c_backend_api.h\"\n"; decl_stream << "#include \"tvm/runtime/c_backend_api.h\"\n";
decl_stream << "extern void* " << module_name_ << " = NULL;\n"; decl_stream << "extern void* " << module_name_ << " = NULL;\n";
...@@ -237,6 +238,7 @@ void CodeGenCHost::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*) ...@@ -237,6 +238,7 @@ void CodeGenCHost::VisitExpr_(const Call *op, std::ostream& os) { // NOLINT(*)
} }
void CodeGenCHost::VisitStmt_(const AssertStmt *op) { // NOLINT(*) void CodeGenCHost::VisitStmt_(const AssertStmt *op) { // NOLINT(*)
if (emit_asserts_) {
std::string cond = PrintExpr(op->condition); std::string cond = PrintExpr(op->condition);
PrintIndent(); PrintIndent();
stream << "if (!(" << cond << ")) {\n"; stream << "if (!(" << cond << ")) {\n";
...@@ -248,6 +250,7 @@ void CodeGenCHost::VisitStmt_(const AssertStmt *op) { // NOLINT(*) ...@@ -248,6 +250,7 @@ void CodeGenCHost::VisitStmt_(const AssertStmt *op) { // NOLINT(*)
this->EndScope(assert_if_scope); this->EndScope(assert_if_scope);
PrintIndent(); PrintIndent();
stream << "}\n"; stream << "}\n";
}
this->PrintStmt(op->body); this->PrintStmt(op->body);
} }
...@@ -277,8 +280,9 @@ inline void CodeGenCHost::PrintTernaryCondExpr(const T* op, ...@@ -277,8 +280,9 @@ inline void CodeGenCHost::PrintTernaryCondExpr(const T* op,
runtime::Module BuildCHost(Array<LoweredFunc> funcs) { runtime::Module BuildCHost(Array<LoweredFunc> funcs) {
using tvm::runtime::Registry; using tvm::runtime::Registry;
bool output_ssa = false; bool output_ssa = false;
bool emit_asserts = false;
CodeGenCHost cg; CodeGenCHost cg;
cg.Init(output_ssa); cg.Init(output_ssa, emit_asserts);
for (LoweredFunc f : funcs) { for (LoweredFunc f : funcs) {
cg.AddFunction(f); cg.AddFunction(f);
} }
......
...@@ -35,7 +35,7 @@ namespace codegen { ...@@ -35,7 +35,7 @@ namespace codegen {
class CodeGenCHost final : public CodeGenC { class CodeGenCHost final : public CodeGenC {
public: public:
CodeGenCHost(); CodeGenCHost();
void Init(bool output_ssa); void Init(bool output_ssa, bool emit_asserts);
void AddFunction(LoweredFunc f); void AddFunction(LoweredFunc f);
std::string Finish(); std::string Finish();
...@@ -53,6 +53,8 @@ class CodeGenCHost final : public CodeGenC { ...@@ -53,6 +53,8 @@ class CodeGenCHost final : public CodeGenC {
private: private:
std::string module_name_; std::string module_name_;
/*! \brief whether to emit asserts in the resulting C code */
bool emit_asserts_;
void PrintGetFuncFromBackend(const std::string& func_name, const std::string& packed_func_name); void PrintGetFuncFromBackend(const std::string& func_name, const std::string& packed_func_name);
void PrintFuncCall(const std::string& packed_func_name, int num_args); void PrintFuncCall(const std::string& packed_func_name, int num_args);
......
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file utvm_init.s
* \brief uTVM init definition for STM32F746XX-series boards
*/
.syntax unified
.cpu cortex-m7
.fpu softvfp
.thumb
.section .text.UTVMInit
.type UTVMInit, %function
UTVMInit:
/* enable fpu */
ldr r0, =0xE000ED88
ldr r1, [r0]
ldr r2, =0xF00000
orr r1, r2
str r1, [r0]
dsb
isb
/* set stack pointer */
ldr sp, =_utvm_stack_pointer_init
bl UTVMMain
.size UTVMInit, .-UTVMInit
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file utvm_timer.c
* \brief uTVM timer API definitions for STM32F746XX-series boards
*/
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include "utvm_runtime.h"
// There are two implementations of cycle counters on the STM32F7X: SysTick and
// CYCCNT. SysTick is preferred, as it gives better error handling, but the
// counter is only 24 bits wide. If a larger timer is needed, use the CYCCNT
// implementation, which has a 32-bit counter.
#define USE_SYSTICK
#ifdef USE_SYSTICK
#define SYST_CSR (*((volatile uint32_t *) 0xE000E010))
#define SYST_RVR (*((volatile uint32_t *) 0xE000E014))
#define SYST_CVR (*((volatile uint32_t *) 0xE000E018))
#define SYST_CALIB (*((volatile uint32_t *) 0xE000E01C))
#define SYST_CSR_ENABLE 0
#define SYST_CSR_TICKINT 1
#define SYST_CSR_CLKSOURCE 2
#define SYST_COUNTFLAG 16
#define SYST_CALIB_NOREF 31
#define SYST_CALIB_SKEW 30
uint32_t start_time = 0;
uint32_t stop_time = 0;
int32_t UTVMTimerStart() {
SYST_CSR = (1 << SYST_CSR_ENABLE) | (1 << SYST_CSR_CLKSOURCE);
// wait until timer starts
while (SYST_CVR == 0) {}
start_time = SYST_CVR;
return 0;
}
void UTVMTimerStop() {
SYST_CSR = 0;
stop_time = SYST_CVR;
}
void UTVMTimerReset() {
SYST_CSR = 0;
// maximum reload value (24-bit)
SYST_RVR = (~((uint32_t) 0)) >> 8;
SYST_CVR = 0;
}
uint32_t UTVMTimerRead() {
if (SYST_CSR & SYST_COUNTFLAG) {
TVMAPISetLastError("timer overflowed");
return -1;
} else {
return start_time - stop_time;
}
}
#else // !USE_SYSTICK
#define DWT_CTRL (*((volatile uint32_t *) 0xE0001000))
#define DWT_CYCCNT (*((volatile uint32_t *) 0xE0001004))
#define DWT_CTRL_NOCYCCNT 25
#define DWT_CTRL_CYCCNTENA 0
uint32_t start_time = 0;
uint32_t stop_time = 0;
void UTVMTimerReset() {
DWT_CYCCNT = 0;
}
int32_t UTVMTimerStart() {
if (DWT_CTRL & DWT_CTRL_NOCYCCNT) {
TVMAPISetLastError("cycle counter not implemented on device");
return -1;
}
start_time = DWT_CYCCNT;
DWT_CTRL |= (1 << DWT_CTRL_CYCCNTENA);
}
void UTVMTimerStop() {
stop_time = DWT_CYCCNT;
DWT_CTRL &= ~(1 << DWT_CTRL_CYCCNTENA);
}
int32_t UTVMTimerRead() {
if (stop_time > stop_time) {
return stop_time - start_time;
} else {
uint32_t largest = ~0;
return (largest - start_time) + stop_time;
}
}
#endif // USE_SYSTICK
#ifdef __cplusplus
} // TVM_EXTERN_C
#endif
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file utvm_init.c
* \brief uTVM init definition for the host emulated device
*/
#ifdef __cplusplus
extern "C" {
#endif
#include "utvm_runtime.h"
void UTVMInit() {
// no init required for the host
UTVMMain();
}
#ifdef __cplusplus
} // TVM_EXTERN_C
#endif
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file utvm_timer.c
* \brief uTVM timer API stubs for the host emulated device
*/
#ifdef __cplusplus
extern "C" {
#endif
#include "utvm_runtime.h"
// TODO(weberlo): use this? https://stackoverflow.com/questions/5141960/get-the-current-time-in-c
int32_t UTVMTimerStart() {
return 0;
}
void UTVMTimerStop() { }
void UTVMTimerReset() { }
uint32_t UTVMTimerRead() {
return 1;
}
#ifdef __cplusplus
} // TVM_EXTERN_C
#endif
...@@ -21,9 +21,9 @@ ...@@ -21,9 +21,9 @@
* \file utvm_runtime.cc * \file utvm_runtime.cc
* \brief uTVM runtime * \brief uTVM runtime
* *
* All function calls go through `UTVMMain`, which reads from the current * All function calls go through the externally defined `UTVMInit`, which
* `UTVMTask` and calls the appropriate function with the arguments from the * performs device-specific setup, then calls `UTVMMain`. `UTVMMain` then
* task. * calls the function in `utvm_task` with the arguments from the task.
* *
* Additionally included in this file are definitions for some of the most * Additionally included in this file are definitions for some of the most
* common functions used in the C runtime API. * common functions used in the C runtime API.
...@@ -35,10 +35,17 @@ extern "C" { ...@@ -35,10 +35,17 @@ extern "C" {
#include "utvm_runtime.h" #include "utvm_runtime.h"
// Task pointers must be patched before calling a function. // Task pointers must be patched before calling a function.
UTVMTask task; UTVMTask utvm_task = {
.func = NULL,
.arg_values = NULL,
.arg_type_codes = NULL,
.num_args = 0,
};
size_t utvm_word_size = 0; // NOLINT(*)
// These pointers are patched at load time to point to the workspace section. // These pointers are patched at load time to point to the workspace section.
char* utvm_workspace_begin = NULL; // NOLINT(*) char* utvm_workspace_start = NULL; // NOLINT(*)
char* utvm_workspace_end = NULL; // NOLINT(*) char* utvm_workspace_end = NULL; // NOLINT(*)
char* utvm_workspace_curr = NULL; // NOLINT(*) char* utvm_workspace_curr = NULL; // NOLINT(*)
// Keep track of how many active allocations there are on the workspace. // Keep track of how many active allocations there are on the workspace.
...@@ -47,24 +54,39 @@ size_t utvm_num_active_allocs = 0; ...@@ -47,24 +54,39 @@ size_t utvm_num_active_allocs = 0;
const char* utvm_last_error = NULL; // NOLINT(*) const char* utvm_last_error = NULL; // NOLINT(*)
int32_t utvm_return_code = 0; // NOLINT(*) int32_t utvm_return_code = 0; // NOLINT(*)
// We use a dummy function to signal execution is finished for device uint32_t utvm_task_time = 0;
// backends which require breakpoints.
void UTVMDone() { }
// Gets called by UTVMInit, after device-specific initialization is finished.
void UTVMMain() { void UTVMMain() {
utvm_workspace_curr = utvm_workspace_begin; utvm_workspace_curr = utvm_workspace_start;
utvm_num_active_allocs = 0; utvm_num_active_allocs = 0;
utvm_last_error = NULL; // NOLINT(*) utvm_last_error = NULL; // NOLINT(*)
utvm_return_code = 0; utvm_return_code = 0;
utvm_return_code = task.func((void*) task.arg_values, (void*) task.arg_type_codes, // NOLINT(*) utvm_task_time = 0;
task.num_args); UTVMTimerReset();
int32_t err = UTVMTimerStart();
if (err < 0) {
utvm_return_code = err;
UTVMDone();
}
utvm_return_code = utvm_task.func(
(void*) utvm_task.arg_values, // NOLINT(*)
(void*) utvm_task.arg_type_codes, // NOLINT(*)
utvm_task.num_args);
UTVMTimerStop();
utvm_task_time = UTVMTimerRead();
UTVMDone(); UTVMDone();
} }
// We use a dummy function to signal execution is finished for device
// backends which require breakpoints.
void UTVMDone() { }
void* TVMBackendAllocWorkspace(int device_type, int device_id, uint64_t size, void* TVMBackendAllocWorkspace(int device_type, int device_id, uint64_t size,
int dtype_code_hint, int dtype_bits_hint) { int dtype_code_hint, int dtype_bits_hint) {
// Align up to 8 bytes. // Align up to 8 bytes.
utvm_workspace_curr += (8 - ((uintptr_t) utvm_workspace_curr % 8)) % 8; // NOLINT(*) utvm_workspace_curr +=
(utvm_word_size - ((uintptr_t) utvm_workspace_curr % utvm_word_size)) % utvm_word_size; // NOLINT(*)
if (utvm_workspace_curr + size > utvm_workspace_end) { if (utvm_workspace_curr + size > utvm_workspace_end) {
// Out of space in workspace. // Out of space in workspace.
return NULL; return NULL;
...@@ -81,11 +103,11 @@ int TVMBackendFreeWorkspace(int device_type, int device_id, void* ptr) { ...@@ -81,11 +103,11 @@ int TVMBackendFreeWorkspace(int device_type, int device_id, void* ptr) {
TVMAPISetLastError("free called with no active workspace allocations"); TVMAPISetLastError("free called with no active workspace allocations");
// Reset allocations and workspace (for future task executions). // Reset allocations and workspace (for future task executions).
utvm_num_active_allocs = 0; utvm_num_active_allocs = 0;
utvm_workspace_curr = utvm_workspace_begin; utvm_workspace_curr = utvm_workspace_start;
return -1; return -1;
} else if (utvm_num_active_allocs == 0) { } else if (utvm_num_active_allocs == 0) {
// No more allocations. Reset workspace. // No more allocations. Reset workspace.
utvm_workspace_curr = utvm_workspace_begin; utvm_workspace_curr = utvm_workspace_start;
return 0; return 0;
} else { } else {
return 0; return 0;
......
...@@ -21,8 +21,8 @@ ...@@ -21,8 +21,8 @@
* \file utvm_runtime.h * \file utvm_runtime.h
* \brief uTVM runtime headers * \brief uTVM runtime headers
*/ */
#ifndef TVM_RUNTIME_MICRO_DEVICE_UTVM_RUNTIME_H_ #ifndef TVM_RUNTIME_MICRO_HOST_DRIVEN_UTVM_RUNTIME_H_
#define TVM_RUNTIME_MICRO_DEVICE_UTVM_RUNTIME_H_ #define TVM_RUNTIME_MICRO_HOST_DRIVEN_UTVM_RUNTIME_H_
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
...@@ -30,6 +30,7 @@ extern "C" { ...@@ -30,6 +30,7 @@ extern "C" {
#include <stdint.h> #include <stdint.h>
#include <tvm/runtime/c_runtime_api.h> #include <tvm/runtime/c_runtime_api.h>
#include <tvm/runtime/c_backend_api.h>
/*! /*!
* \brief Task structure for uTVM * \brief Task structure for uTVM
...@@ -45,8 +46,22 @@ typedef struct { ...@@ -45,8 +46,22 @@ typedef struct {
int32_t num_args; int32_t num_args;
} UTVMTask; } UTVMTask;
extern void UTVMInit();
extern void UTVMTimerReset();
extern int32_t UTVMTimerStart();
extern void UTVMTimerStop();
extern uint32_t UTVMTimerRead();
void UTVMMain();
void UTVMDone();
#ifdef __cplusplus #ifdef __cplusplus
} // TVM_EXTERN_C } // TVM_EXTERN_C
#endif #endif
#endif // TVM_RUNTIME_MICRO_DEVICE_UTVM_RUNTIME_H_ #endif // TVM_RUNTIME_MICRO_HOST_DRIVEN_UTVM_RUNTIME_H_
...@@ -31,6 +31,9 @@ ...@@ -31,6 +31,9 @@
namespace tvm { namespace tvm {
namespace runtime { namespace runtime {
/*! \brief number of bytes in each page */
constexpr int kPageSize = 4096;
/*! /*!
* \brief emulated low-level device on host machine * \brief emulated low-level device on host machine
*/ */
...@@ -40,40 +43,33 @@ class HostLowLevelDevice final : public LowLevelDevice { ...@@ -40,40 +43,33 @@ class HostLowLevelDevice final : public LowLevelDevice {
* \brief constructor to initialize on-host memory region to act as device * \brief constructor to initialize on-host memory region to act as device
* \param num_bytes size of the emulated on-device memory region * \param num_bytes size of the emulated on-device memory region
*/ */
explicit HostLowLevelDevice(size_t num_bytes) : size_(num_bytes) { explicit HostLowLevelDevice(size_t num_bytes, void** base_addr) : size_(num_bytes) {
size_t size_in_pages = (num_bytes + kPageSize - 1) / kPageSize; size_t size_in_pages = (num_bytes + kPageSize - 1) / kPageSize;
// TODO(weberlo): Set permissions per section (e.g., read-write perms for // TODO(weberlo): Set permissions per section (e.g., read-write perms for
// the heap, execute perms for text, etc.). // the heap, execute perms for text, etc.).
int mmap_prot = PROT_READ | PROT_WRITE | PROT_EXEC; int mmap_prot = PROT_READ | PROT_WRITE | PROT_EXEC;
int mmap_flags = MAP_ANONYMOUS | MAP_PRIVATE; int mmap_flags = MAP_ANONYMOUS | MAP_PRIVATE;
base_addr_ = reinterpret_cast<std::uintptr_t>( base_addr_ = mmap(nullptr, size_in_pages * kPageSize, mmap_prot, mmap_flags, -1, 0);
mmap(nullptr, size_in_pages * kPageSize, mmap_prot, mmap_flags, -1, 0)); *base_addr = base_addr_;
} }
/*! /*!
* \brief destructor to deallocate on-host device region * \brief destructor to deallocate on-host device region
*/ */
virtual ~HostLowLevelDevice() { virtual ~HostLowLevelDevice() {
munmap(reinterpret_cast<void*>(base_addr_), size_); munmap(base_addr_, size_);
}
void Read(DevBaseOffset offset, void* buf, size_t num_bytes) {
void* addr = ToDevPtr(offset).cast_to<void*>();
std::memcpy(buf, addr, num_bytes);
} }
void Write(DevBaseOffset offset, const void* buf, size_t num_bytes) { void Read(DevPtr addr, void* buf, size_t num_bytes) {
void* addr = ToDevPtr(offset).cast_to<void*>(); std::memcpy(buf, addr.cast_to<void*>(), num_bytes);
std::memcpy(addr, buf, num_bytes);
} }
void Execute(DevBaseOffset func_offset, DevBaseOffset breakpoint) { void Write(DevPtr addr, const void* buf, size_t num_bytes) {
DevPtr func_addr = ToDevPtr(func_offset); std::memcpy(addr.cast_to<void*>(), buf, num_bytes);
reinterpret_cast<void (*)(void)>(func_addr.value())();
} }
std::uintptr_t base_addr() const final { void Execute(DevPtr func_addr, DevPtr breakpoint_addr) {
return base_addr_; reinterpret_cast<void (*)(void)>(func_addr.value().val64)();
} }
const char* device_type() const final { const char* device_type() const final {
...@@ -82,14 +78,14 @@ class HostLowLevelDevice final : public LowLevelDevice { ...@@ -82,14 +78,14 @@ class HostLowLevelDevice final : public LowLevelDevice {
private: private:
/*! \brief base address of the micro device memory region */ /*! \brief base address of the micro device memory region */
std::uintptr_t base_addr_; void* base_addr_;
/*! \brief size of memory region */ /*! \brief size of memory region */
size_t size_; size_t size_;
}; };
const std::shared_ptr<LowLevelDevice> HostLowLevelDeviceCreate(size_t num_bytes) { const std::shared_ptr<LowLevelDevice> HostLowLevelDeviceCreate(size_t num_bytes, void** base_addr) {
std::shared_ptr<LowLevelDevice> lld = std::shared_ptr<LowLevelDevice> lld =
std::make_shared<HostLowLevelDevice>(num_bytes); std::make_shared<HostLowLevelDevice>(num_bytes, base_addr);
return lld; return lld;
} }
......
...@@ -40,87 +40,52 @@ class LowLevelDevice { ...@@ -40,87 +40,52 @@ class LowLevelDevice {
virtual ~LowLevelDevice() {} virtual ~LowLevelDevice() {}
/*! /*!
* \brief reads num_bytes from device memory at base_addr + offset into buffer * \brief reads num_bytes from device memory at addr into buffer
* \param offset on-device memory offset pointer to be read from * \param addr on-device memory address to read from
* \param buffer on-host buffer to be read into * \param buffer on-host buffer to be read into
* \param num_bytes number of bytes to be read * \param num_bytes number of bytes to read
*/ */
virtual void Read(DevBaseOffset offset, virtual void Read(DevPtr addr,
void* buffer, void* buffer,
size_t num_bytes) = 0; size_t num_bytes) = 0;
/*! /*!
* \brief writes num_bytes from buffer to device memory at base_addr + offset * \brief writes num_bytes from buffer to device memory at addr
* \param offset on-device memory offset pointer to be written to * \param addr on-device memory address to write into
* \param buffer on-host buffer to be written * \param buffer host buffer to write from
* \param num_bytes number of bytes to be written * \param num_bytes number of bytes to write
*/ */
virtual void Write(DevBaseOffset offset, virtual void Write(DevPtr addr,
const void* buffer, const void* buffer,
size_t num_bytes) = 0; size_t num_bytes) = 0;
/*! /*!
* \brief starts execution of device at offset * \brief starts execution of device at func_addr
* \param func_addr offset of the init stub function * \param func_addr offset of the init stub function
* \param breakpoint breakpoint at which to stop function execution * \param breakpoint_addr address at which to stop function execution
*/ */
virtual void Execute(DevBaseOffset func_offset, DevBaseOffset breakpoint) = 0; virtual void Execute(DevPtr func_addr, DevPtr breakpoint_addr) = 0;
// TODO(weberlo): Should we just give the device the *entire* memory layout
// decided by the session?
/*!
* \brief sets the offset of the top of the stack section
* \param stack_top offset of the stack top
*/
virtual void SetStackTop(DevBaseOffset stack_top) {
LOG(FATAL) << "unimplemented";
}
/*!
* \brief convert from base offset to absolute address
* \param offset base offset
* \return absolute address
*/
DevPtr ToDevPtr(DevBaseOffset offset) {
return DevPtr(base_addr() + offset.value());
}
/*!
* \brief convert from absolute address to base offset
* \param ptr absolute address
* \return base offset
*/
DevBaseOffset ToDevOffset(DevPtr ptr) {
return DevBaseOffset(ptr.value() - base_addr());
}
/*! /*!
* \brief getter function for low-level device type * \brief getter function for low-level device type
* \return string containing device type * \return string containing device type
*/ */
virtual const char* device_type() const = 0; virtual const char* device_type() const = 0;
protected:
/*!
* \brief getter function for base_addr
* \return the base address of the device memory region
*/
virtual std::uintptr_t base_addr() const = 0;
}; };
/*! /*!
* \brief create a host low-level device * \brief create a host low-level device
* \param num_bytes size of the memory region * \param num_bytes size of the memory region
* \param base_addr pointer to write the host device's resulting base address into
*/ */
const std::shared_ptr<LowLevelDevice> HostLowLevelDeviceCreate(size_t num_bytes); const std::shared_ptr<LowLevelDevice> HostLowLevelDeviceCreate(size_t num_bytes, void** base_addr);
/*! /*!
* \brief connect to OpenOCD and create an OpenOCD low-level device * \brief connect to OpenOCD and create an OpenOCD low-level device
* \param addr address of the OpenOCD server to connect to
* \param port port of the OpenOCD server to connect to * \param port port of the OpenOCD server to connect to
*/ */
const std::shared_ptr<LowLevelDevice> OpenOCDLowLevelDeviceCreate(std::uintptr_t base_addr, const std::shared_ptr<LowLevelDevice> OpenOCDLowLevelDeviceCreate(const std::string& addr,
const std::string& addr,
int port); int port);
} // namespace runtime } // namespace runtime
......
...@@ -35,30 +35,6 @@ ...@@ -35,30 +35,6 @@
namespace tvm { namespace tvm {
namespace runtime { namespace runtime {
size_t GetDefaultSectionSize(SectionKind kind) {
switch (kind) {
case SectionKind::kText:
return 0xF000;
case SectionKind::kRodata:
return 0xF000;
case SectionKind::kData:
return 0xF00;
case SectionKind::kBss:
return 0xF00;
case SectionKind::kArgs:
return 0xF0000;
case SectionKind::kStack:
return 0xF000;
case SectionKind::kHeap:
return 0xF00000;
case SectionKind::kWorkspace:
return 0xF0000;
default:
LOG(FATAL) << "invalid section " << static_cast<size_t>(kind);
return 0;
}
}
const char* SectionToString(SectionKind section) { const char* SectionToString(SectionKind section) {
switch (section) { switch (section) {
case SectionKind::kText: return "text"; case SectionKind::kText: return "text";
...@@ -66,37 +42,32 @@ const char* SectionToString(SectionKind section) { ...@@ -66,37 +42,32 @@ const char* SectionToString(SectionKind section) {
case SectionKind::kData: return "data"; case SectionKind::kData: return "data";
case SectionKind::kBss: return "bss"; case SectionKind::kBss: return "bss";
case SectionKind::kArgs: return "args"; case SectionKind::kArgs: return "args";
case SectionKind::kStack: return "stack";
case SectionKind::kHeap: return "heap"; case SectionKind::kHeap: return "heap";
case SectionKind::kWorkspace: return "workspace"; case SectionKind::kWorkspace: return "workspace";
case SectionKind::kStack: return "stack";
default: return ""; default: return "";
} }
} }
static std::string AddrToString(void* addr) { std::string RelocateBinarySections(
std::stringstream stream; const std::string& binary_path,
if (addr != nullptr) size_t word_size,
stream << addr; DevPtr text_start,
else DevPtr rodata_start,
stream << "0x0"; DevPtr data_start,
std::string string_addr = stream.str(); DevPtr bss_start,
return string_addr; DevPtr stack_end,
}
std::string RelocateBinarySections(const std::string& binary_path,
DevPtr text,
DevPtr rodata,
DevPtr data,
DevPtr bss,
const std::string& toolchain_prefix) { const std::string& toolchain_prefix) {
const auto* f = Registry::Get("tvm_callback_relocate_binary"); const auto* f = Registry::Get("tvm_callback_relocate_binary");
CHECK(f != nullptr) CHECK(f != nullptr)
<< "Require tvm_callback_relocate_binary to exist in registry"; << "Require tvm_callback_relocate_binary to exist in registry";
std::string relocated_bin = (*f)(binary_path, std::string relocated_bin = (*f)(binary_path,
AddrToString(text.cast_to<void*>()), word_size,
AddrToString(rodata.cast_to<void*>()), text_start.cast_to<uint64_t>(),
AddrToString(data.cast_to<void*>()), rodata_start.cast_to<uint64_t>(),
AddrToString(bss.cast_to<void*>()), data_start.cast_to<uint64_t>(),
bss_start.cast_to<uint64_t>(),
stack_end.cast_to<uint64_t>(),
toolchain_prefix); toolchain_prefix);
return relocated_bin; return relocated_bin;
} }
......
...@@ -46,122 +46,79 @@ enum class SectionKind : size_t { ...@@ -46,122 +46,79 @@ enum class SectionKind : size_t {
kData, kData,
kBss, kBss,
kArgs, kArgs,
kStack,
kHeap, kHeap,
kWorkspace, kWorkspace,
kStack,
kNumKinds, kNumKinds,
}; };
/*! \brief default size alignment */ /*! \brief union for storing values on varying target word sizes */
constexpr int kDefaultSizeAlignment = 8; union TargetVal {
/*! \brief 32-bit pointer */
uint32_t val32;
/*! \brief 64-bit pointer */
uint64_t val64;
};
/*! \brief Base class for interfacing with device locations (pointers/offsets) */ /*! \brief absolute device address */
class DeviceLocation { class DevPtr {
public: public:
/*! \brief construct a location with value `value` */ /*! \brief construct a device address with value `value` */
explicit DeviceLocation(std::uintptr_t value) : value_(value) {} explicit DevPtr(std::uintptr_t value) : value_(TargetVal { .val64 = value }) {}
/*! \brief default constructor */ /*! \brief default constructor */
DeviceLocation() : value_(0) {} DevPtr() : value_(TargetVal { .val64 = 0 }) {}
/*! \brief construct a null location */ /*! \brief construct a null address */
explicit DeviceLocation(std::nullptr_t value) : value_(0) {} explicit DevPtr(std::nullptr_t value) : value_(TargetVal { .val64 = 0 }) {}
/*! \brief destructor */ /*! \brief destructor */
virtual ~DeviceLocation() {} ~DevPtr() {}
/*! /*!
* \brief get value of location * \brief get value of pointer
* \return value of location * \return value of pointer
*/ */
std::uintptr_t value() const { return value_; } TargetVal value() const { return value_; }
/*! /*!
* \brief cast location to type `T` * \brief cast location to type `T`
* \return casted result * \return casted result
*/ */
template <typename T> template <typename T>
T cast_to() const { return reinterpret_cast<T>(value_); } T cast_to() const { return reinterpret_cast<T>(value_.val64); }
/*! \brief check if location is null */ /*! \brief check if location is null */
bool operator==(std::nullptr_t) const { return value_ == 0; } bool operator==(std::nullptr_t) const { return value_.val64 == 0; }
/*! \brief check if location is not null */ /*! \brief check if location is not null */
bool operator!=(std::nullptr_t) const { return value_ != 0; } bool operator!=(std::nullptr_t) const { return value_.val64 != 0; }
protected:
/*! \brief raw value storing the location */
std::uintptr_t value_;
};
/*! \brief absolute device address */
class DevPtr : public DeviceLocation {
public:
/*! \brief construct an absolute address with value `value` */
explicit DevPtr(std::uintptr_t val) : DeviceLocation(val) {}
/*! \brief default constructor */
DevPtr() : DeviceLocation() {}
/*! \brief construct a null absolute address */
explicit DevPtr(std::nullptr_t val) : DeviceLocation(val) {}
/*! \brief add an integer to this absolute address to get a larger absolute address */ /*! \brief add an integer to this absolute address to get a larger absolute address */
DevPtr operator+(size_t n) const { DevPtr operator+(size_t n) const {
return DevPtr(value_ + n); return DevPtr(value_.val64 + n);
} }
/*! \brief mutably add an integer to this absolute address */ /*! \brief mutably add an integer to this absolute address */
DevPtr& operator+=(size_t n) { DevPtr& operator+=(size_t n) {
value_ += n; value_.val64 += n;
return *this; return *this;
} }
/*! \brief subtract an integer from this absolute address to get a smaller absolute address */ /*! \brief subtract an integer from this absolute address to get a smaller absolute address */
DevPtr operator-(size_t n) const { DevPtr operator-(size_t n) const {
return DevPtr(value_ - n); return DevPtr(value_.val64 - n);
} }
/*! \brief mutably subtract an integer from this absolute address */ /*! \brief mutably subtract an integer from this absolute address */
DevPtr& operator-=(size_t n) { DevPtr& operator-=(size_t n) {
value_ -= n; value_.val64 -= n;
return *this; return *this;
} }
};
/*! \brief offset from device base address */
class DevBaseOffset : public DeviceLocation {
public:
/*! \brief construct a base offset with value `value` */
explicit DevBaseOffset(std::uintptr_t value) : DeviceLocation(value) {}
/*! \brief default constructor */
DevBaseOffset() : DeviceLocation() {}
/*! \brief construct a null base offset */
explicit DevBaseOffset(std::nullptr_t value) : DeviceLocation(value) {}
/*! \brief add an integer to this base offset to get a larger base offset */ private:
DevBaseOffset operator+(size_t n) const { /*! \brief raw value storing the pointer */
return DevBaseOffset(value_ + n); TargetVal value_;
}
/*! \brief mutably add an integer to this base offset */
DevBaseOffset& operator+=(size_t n) {
value_ += n;
return *this;
}
/*! \brief subtract an integer from this base offset to get a smaller base offset */
DevBaseOffset operator-(size_t n) const {
return DevBaseOffset(value_ - n);
}
/*! \brief mutably subtract an integer from this base offset */
DevBaseOffset& operator-=(size_t n) {
value_ -= n;
return *this;
}
}; };
/*! /*!
...@@ -212,6 +169,10 @@ class SymbolMap { ...@@ -212,6 +169,10 @@ class SymbolMap {
return result->second; return result->second;
} }
bool HasSymbol(const std::string& name) const {
return map_.find(name) != map_.end();
}
private: private:
/*! \brief backing map */ /*! \brief backing map */
std::unordered_map<std::string, DevPtr> map_; std::unordered_map<std::string, DevPtr> map_;
...@@ -220,7 +181,7 @@ class SymbolMap { ...@@ -220,7 +181,7 @@ class SymbolMap {
/*! \brief struct containing start and size of a device memory region */ /*! \brief struct containing start and size of a device memory region */
struct DevMemRegion { struct DevMemRegion {
/*! \brief section start offset */ /*! \brief section start offset */
DevBaseOffset start; DevPtr start;
/*! \brief size of section */ /*! \brief size of section */
size_t size; size_t size;
}; };
...@@ -239,16 +200,13 @@ struct BinaryInfo { ...@@ -239,16 +200,13 @@ struct BinaryInfo {
SymbolMap symbol_map; SymbolMap symbol_map;
}; };
// TODO(weberlo): should this be here? struct BinaryContents {
/*! \brief number of bytes in each page */ BinaryInfo binary_info;
constexpr int kPageSize = 4096; std::string text_contents;
std::string rodata_contents;
const DevBaseOffset kDeviceStart = DevBaseOffset(64); std::string data_contents;
std::string bss_contents;
/*! };
* \brief return default size of given section kind in bytes
*/
size_t GetDefaultSectionSize(SectionKind kind);
/*! /*!
* \brief upper-aligns value according to specified alignment * \brief upper-aligns value according to specified alignment
...@@ -270,18 +228,23 @@ const char* SectionToString(SectionKind section); ...@@ -270,18 +228,23 @@ const char* SectionToString(SectionKind section);
/*! /*!
* \brief links binary by repositioning section addresses * \brief links binary by repositioning section addresses
* \param binary_name input binary filename * \param binary_name input binary filename
* \param text new text section address * \param word_size word size on the target machine
* \param rodata new rodata section address * \param text_start text section address
* \param data new data section address * \param rodata_start rodata section address
* \param bss new bss section address * \param data_start data section address
* \param bss_start bss section address
* \param stack_end stack section end address
* \param toolchain_prefix prefix of compiler toolchain to use * \param toolchain_prefix prefix of compiler toolchain to use
* \return relocated binary file contents * \return relocated binary file contents
*/ */
std::string RelocateBinarySections(const std::string& binary_name, std::string RelocateBinarySections(
DevPtr text, const std::string& binary_path,
DevPtr rodata, size_t word_size,
DevPtr data, DevPtr text_start,
DevPtr bss, DevPtr rodata_start,
DevPtr data_start,
DevPtr bss_start,
DevPtr stack_end,
const std::string& toolchain_prefix); const std::string& toolchain_prefix);
/*! /*!
...@@ -306,7 +269,7 @@ std::string ReadSection(const std::string& binary, ...@@ -306,7 +269,7 @@ std::string ReadSection(const std::string& binary,
size_t GetSectionSize(const std::string& binary_name, size_t GetSectionSize(const std::string& binary_name,
SectionKind section, SectionKind section,
const std::string& toolchain_prefix, const std::string& toolchain_prefix,
size_t align = kDefaultSizeAlignment); size_t align);
} // namespace runtime } // namespace runtime
} // namespace tvm } // namespace tvm
......
...@@ -61,7 +61,7 @@ class MicroDeviceAPI final : public DeviceAPI { ...@@ -61,7 +61,7 @@ class MicroDeviceAPI final : public DeviceAPI {
void FreeDataSpace(TVMContext ctx, void* ptr) final { void FreeDataSpace(TVMContext ctx, void* ptr) final {
MicroDevSpace* dev_space = static_cast<MicroDevSpace*>(ptr); MicroDevSpace* dev_space = static_cast<MicroDevSpace*>(ptr);
dev_space->session->FreeInSection( dev_space->session->FreeInSection(
SectionKind::kHeap, DevBaseOffset(reinterpret_cast<std::uintptr_t>(dev_space->data))); SectionKind::kHeap, DevPtr(reinterpret_cast<std::uintptr_t>(dev_space->data)));
delete dev_space; delete dev_space;
} }
...@@ -89,12 +89,12 @@ class MicroDeviceAPI final : public DeviceAPI { ...@@ -89,12 +89,12 @@ class MicroDeviceAPI final : public DeviceAPI {
ObjectPtr<MicroSession>& session = from_space->session; ObjectPtr<MicroSession>& session = from_space->session;
const std::shared_ptr<LowLevelDevice>& lld = session->low_level_device(); const std::shared_ptr<LowLevelDevice>& lld = session->low_level_device();
DevBaseOffset from_dev_offset = GetDevLoc(from_space, from_offset); DevPtr from_dev_addr = GetDevLoc(from_space, from_offset);
DevBaseOffset to_dev_offset = GetDevLoc(to_space, to_offset); DevPtr to_dev_addr = GetDevLoc(to_space, to_offset);
std::vector<uint8_t> buffer(size); std::vector<uint8_t> buffer(size);
lld->Read(from_dev_offset, static_cast<void*>(buffer.data()), size); lld->Read(from_dev_addr, static_cast<void*>(buffer.data()), size);
lld->Write(to_dev_offset, static_cast<void*>(buffer.data()), size); lld->Write(to_dev_addr, static_cast<void*>(buffer.data()), size);
} else if (type_from_to == std::make_tuple(kDLMicroDev, kDLCPU)) { } else if (type_from_to == std::make_tuple(kDLMicroDev, kDLCPU)) {
// Reading from the device. // Reading from the device.
...@@ -102,9 +102,9 @@ class MicroDeviceAPI final : public DeviceAPI { ...@@ -102,9 +102,9 @@ class MicroDeviceAPI final : public DeviceAPI {
ObjectPtr<MicroSession>& session = from_space->session; ObjectPtr<MicroSession>& session = from_space->session;
const std::shared_ptr<LowLevelDevice>& lld = session->low_level_device(); const std::shared_ptr<LowLevelDevice>& lld = session->low_level_device();
DevBaseOffset from_dev_offset = GetDevLoc(from_space, from_offset); DevPtr from_dev_addr = GetDevLoc(from_space, from_offset);
void* to_host_ptr = GetHostLoc(to, to_offset); void* to_host_ptr = GetHostLoc(to, to_offset);
lld->Read(from_dev_offset, to_host_ptr, size); lld->Read(from_dev_addr, to_host_ptr, size);
} else if (type_from_to == std::make_tuple(kDLCPU, kDLMicroDev)) { } else if (type_from_to == std::make_tuple(kDLCPU, kDLMicroDev)) {
// Writing to the device. // Writing to the device.
...@@ -113,8 +113,8 @@ class MicroDeviceAPI final : public DeviceAPI { ...@@ -113,8 +113,8 @@ class MicroDeviceAPI final : public DeviceAPI {
const std::shared_ptr<LowLevelDevice>& lld = session->low_level_device(); const std::shared_ptr<LowLevelDevice>& lld = session->low_level_device();
void* from_host_ptr = GetHostLoc(from, from_offset); void* from_host_ptr = GetHostLoc(from, from_offset);
DevBaseOffset to_dev_offset = GetDevLoc(to_space, to_offset); DevPtr to_dev_addr = GetDevLoc(to_space, to_offset);
lld->Write(to_dev_offset, from_host_ptr, size); lld->Write(to_dev_addr, from_host_ptr, size);
} else { } else {
LOG(FATAL) << "Expect copy from/to micro device or between micro device\n"; LOG(FATAL) << "Expect copy from/to micro device or between micro device\n";
} }
...@@ -138,7 +138,7 @@ class MicroDeviceAPI final : public DeviceAPI { ...@@ -138,7 +138,7 @@ class MicroDeviceAPI final : public DeviceAPI {
MicroDevSpace* dev_space = static_cast<MicroDevSpace*>(data); MicroDevSpace* dev_space = static_cast<MicroDevSpace*>(data);
ObjectPtr<MicroSession>& session = dev_space->session; ObjectPtr<MicroSession>& session = dev_space->session;
session->FreeInSection(SectionKind::kWorkspace, session->FreeInSection(SectionKind::kWorkspace,
DevBaseOffset(reinterpret_cast<std::uintptr_t>(dev_space->data))); DevPtr(reinterpret_cast<std::uintptr_t>(dev_space->data)));
delete dev_space; delete dev_space;
} }
...@@ -152,10 +152,8 @@ class MicroDeviceAPI final : public DeviceAPI { ...@@ -152,10 +152,8 @@ class MicroDeviceAPI final : public DeviceAPI {
} }
private: private:
DevBaseOffset GetDevLoc(MicroDevSpace* dev_space, size_t offset) { DevPtr GetDevLoc(MicroDevSpace* dev_space, size_t offset) {
DevBaseOffset dev_offset = return DevPtr(reinterpret_cast<std::uintptr_t>(dev_space->data) + offset);
DevBaseOffset(reinterpret_cast<std::uintptr_t>(dev_space->data) + offset);
return dev_offset;
} }
void* GetHostLoc(const void* ptr, size_t offset) { void* GetHostLoc(const void* ptr, size_t offset) {
......
...@@ -55,62 +55,48 @@ class MicroModuleNode final : public ModuleNode { ...@@ -55,62 +55,48 @@ class MicroModuleNode final : public ModuleNode {
*/ */
void InitMicroModule(const std::string& binary_path) { void InitMicroModule(const std::string& binary_path) {
session_ = MicroSession::Current(); session_ = MicroSession::Current();
binary_path_ = binary_path; symbol_map_ = session_->LoadBinary(binary_path, true).symbol_map;
binary_info_ = session_->LoadBinary(binary_path_);
}
/*!
* \brief runs selected function on the micro device
* \param func_name name of the function to be run
* \param func_offset offset of the function to be run
* \param args type-erased arguments passed to the function
*/
void RunFunction(const std::string& func_name, DevBaseOffset func_offset, const TVMArgs& args) {
session_->PushToExecQueue(func_offset, args);
} }
private: private:
/*! \brief module binary info */ SymbolMap symbol_map_;
BinaryInfo binary_info_;
/*! \brief path to module binary */
std::string binary_path_;
/*! \brief global session pointer */ /*! \brief global session pointer */
ObjectPtr<MicroSession> session_; ObjectPtr<MicroSession> session_;
}; };
class MicroWrappedFunc { class MicroWrappedFunc {
public: public:
MicroWrappedFunc(MicroModuleNode* m, MicroWrappedFunc(ObjectPtr<MicroSession> session,
ObjectPtr<MicroSession> session, DevPtr func_ptr) {
const std::string& func_name,
DevBaseOffset func_offset) {
m_ = m;
session_ = session; session_ = session;
func_name_ = func_name; func_ptr_ = func_ptr;
func_offset_ = func_offset;
} }
void operator()(TVMArgs args, TVMRetValue* rv) const { void operator()(TVMArgs args, TVMRetValue* rv) const {
m_->RunFunction(func_name_, func_offset_, args); *rv = session_->PushToExecQueue(func_ptr_, args);
} }
private: private:
/*! \brief internal module */
MicroModuleNode* m_;
/*! \brief reference to the session for this function (to keep the session alive) */ /*! \brief reference to the session for this function (to keep the session alive) */
ObjectPtr<MicroSession> session_; ObjectPtr<MicroSession> session_;
/*! \brief name of the function */
std::string func_name_;
/*! \brief offset of the function to be called */ /*! \brief offset of the function to be called */
DevBaseOffset func_offset_; DevPtr func_ptr_;
}; };
PackedFunc MicroModuleNode::GetFunction( PackedFunc MicroModuleNode::GetFunction(
const std::string& name, const std::string& name,
const ObjectPtr<Object>& sptr_to_self) { const ObjectPtr<Object>& sptr_to_self) {
DevBaseOffset func_offset = DevPtr func_ptr;
session_->low_level_device()->ToDevOffset(binary_info_.symbol_map[name]); if (name == tvm::runtime::symbol::tvm_module_main) {
MicroWrappedFunc f(this, session_, name, func_offset); if (symbol_map_.HasSymbol(tvm::runtime::symbol::tvm_module_main)) {
func_ptr = symbol_map_[tvm::runtime::symbol::tvm_module_main];
} else {
func_ptr = symbol_map_["default_function"];
}
} else {
func_ptr = symbol_map_[name];
}
MicroWrappedFunc f(session_, func_ptr);
return PackedFunc(f); return PackedFunc(f);
} }
......
...@@ -38,11 +38,15 @@ class MicroSectionAllocator { ...@@ -38,11 +38,15 @@ class MicroSectionAllocator {
* \brief constructor that specifies section boundaries * \brief constructor that specifies section boundaries
* \param region location and size of the section on the device * \param region location and size of the section on the device
*/ */
explicit MicroSectionAllocator(DevMemRegion region) explicit MicroSectionAllocator(DevMemRegion region, size_t word_size)
: start_offset_(region.start), : start_addr_(region.start),
size_(0), size_(0),
capacity_(region.size) { capacity_(region.size),
CHECK_EQ(start_offset_.value() % 8, 0) << "micro section not aligned to 8 bytes"; word_size_(word_size) {
CHECK_EQ(start_addr_.value().val64 % word_size, 0)
<< "micro section start not aligned to " << word_size << " bytes";
CHECK_EQ(capacity_ % word_size, 0)
<< "micro section end not aligned to " << word_size << " bytes";
} }
/*! /*!
...@@ -55,15 +59,15 @@ class MicroSectionAllocator { ...@@ -55,15 +59,15 @@ class MicroSectionAllocator {
* \param size size of allocated memory in bytes * \param size size of allocated memory in bytes
* \return pointer to allocated memory region in section, nullptr if out of space * \return pointer to allocated memory region in section, nullptr if out of space
*/ */
DevBaseOffset Allocate(size_t size) { DevPtr Allocate(size_t size) {
size_ = UpperAlignValue(size_, 8); size_ = UpperAlignValue(size_, word_size_);
CHECK(size_ + size < capacity_) CHECK(size_ + size < capacity_)
<< "cannot alloc " << size << " bytes in section with start_addr " << << "cannot alloc " << size << " bytes in section with start_addr " <<
start_offset_.value(); start_addr_.cast_to<void*>();
DevBaseOffset alloc_ptr = start_offset_ + size_; DevPtr alloc_addr = start_addr_ + size_;
size_ += size; size_ += size;
alloc_map_[alloc_ptr.value()] = size; alloc_map_[alloc_addr.value().val64] = size;
return alloc_ptr; return alloc_addr;
} }
/*! /*!
...@@ -71,10 +75,10 @@ class MicroSectionAllocator { ...@@ -71,10 +75,10 @@ class MicroSectionAllocator {
* \param offs offset to allocated memory * \param offs offset to allocated memory
* \note simple allocator scheme, more complex versions will be implemented later * \note simple allocator scheme, more complex versions will be implemented later
*/ */
void Free(DevBaseOffset offs) { void Free(DevPtr addr) {
std::uintptr_t ptr = offs.value(); CHECK(alloc_map_.find(addr.value().val64) != alloc_map_.end())
CHECK(alloc_map_.find(ptr) != alloc_map_.end()) << "freed pointer was never allocated"; << "freed pointer was never allocated";
alloc_map_.erase(ptr); alloc_map_.erase(addr.value().val64);
if (alloc_map_.empty()) { if (alloc_map_.empty()) {
size_ = 0; size_ = 0;
} }
...@@ -83,17 +87,17 @@ class MicroSectionAllocator { ...@@ -83,17 +87,17 @@ class MicroSectionAllocator {
/*! /*!
* \brief start offset of the memory region managed by this allocator * \brief start offset of the memory region managed by this allocator
*/ */
DevBaseOffset start_offset() const { return start_offset_; } DevPtr start_addr() const { return start_addr_; }
/*! /*!
* \brief current end offset of the space being used in this memory region * \brief current end addr of the space being used in this memory region
*/ */
DevBaseOffset curr_end_offset() const { return start_offset_ + size_; } DevPtr curr_end_addr() const { return start_addr_ + size_; }
/*! /*!
* \brief end offset of the memory region managed by this allocator * \brief end addr of the memory region managed by this allocator
*/ */
DevBaseOffset max_end_offset() const { return start_offset_ + capacity_; } DevPtr max_addr() const { return start_addr_ + capacity_; }
/*! /*!
* \brief size of the section * \brief size of the section
...@@ -107,13 +111,15 @@ class MicroSectionAllocator { ...@@ -107,13 +111,15 @@ class MicroSectionAllocator {
private: private:
/*! \brief start address of the section */ /*! \brief start address of the section */
DevBaseOffset start_offset_; DevPtr start_addr_;
/*! \brief current size of the section */ /*! \brief current size of the section */
size_t size_; size_t size_;
/*! \brief total storage capacity of the section */ /*! \brief total storage capacity of the section */
size_t capacity_; size_t capacity_;
/*! \brief number of bytes in a word on the target device */
size_t word_size_;
/*! \brief allocation map for allocation sizes */ /*! \brief allocation map for allocation sizes */
std::unordered_map<std::uintptr_t, size_t> alloc_map_; std::unordered_map<uint64_t, size_t> alloc_map_;
}; };
} // namespace runtime } // namespace runtime
......
...@@ -37,21 +37,20 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { ...@@ -37,21 +37,20 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
public: public:
/*! /*!
* \brief constructor to initialize connection to openocd device * \brief constructor to initialize connection to openocd device
* \param base_addr base address of the device
* \param server_addr address of the OpenOCD server to connect to * \param server_addr address of the OpenOCD server to connect to
* \param port port of the OpenOCD server to connect to * \param port port of the OpenOCD server to connect to
*/ */
explicit OpenOCDLowLevelDevice(std::uintptr_t base_addr, explicit OpenOCDLowLevelDevice(const std::string& server_addr,
const std::string& server_addr,
int port) : socket_() { int port) : socket_() {
socket_.Connect(tvm::common::SockAddr(server_addr.c_str(), port)); server_addr_ = server_addr;
socket_.cmd_builder() << "reset halt"; port_ = port;
socket_.Connect(tvm::common::SockAddr(server_addr_.c_str(), port_));
socket_.cmd_builder() << "halt 0";
socket_.SendCommand(); socket_.SendCommand();
base_addr_ = base_addr;
CHECK(base_addr_ % 8 == 0) << "base address not aligned to 8 bytes";
} }
void Read(DevBaseOffset offset, void* buf, size_t num_bytes) { void Read(DevPtr addr, void* buf, size_t num_bytes) {
if (num_bytes == 0) { if (num_bytes == 0) {
return; return;
} }
...@@ -59,7 +58,6 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { ...@@ -59,7 +58,6 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
// TODO(weberlo): Refactor between read and write. // TODO(weberlo): Refactor between read and write.
// Check if we need to chunk this write request. // Check if we need to chunk this write request.
if (num_bytes > kMemTransferLimit) { if (num_bytes > kMemTransferLimit) {
DevBaseOffset curr_offset = offset;
char* curr_buf_ptr = reinterpret_cast<char*>(buf); char* curr_buf_ptr = reinterpret_cast<char*>(buf);
while (num_bytes != 0) { while (num_bytes != 0) {
size_t amount_to_read; size_t amount_to_read;
...@@ -68,8 +66,8 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { ...@@ -68,8 +66,8 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
} else { } else {
amount_to_read = num_bytes; amount_to_read = num_bytes;
} }
Read(offset, reinterpret_cast<void*>(curr_buf_ptr), amount_to_read); Read(addr, reinterpret_cast<void*>(curr_buf_ptr), amount_to_read);
offset += amount_to_read; addr += amount_to_read;
curr_buf_ptr += amount_to_read; curr_buf_ptr += amount_to_read;
num_bytes -= amount_to_read; num_bytes -= amount_to_read;
} }
...@@ -79,7 +77,6 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { ...@@ -79,7 +77,6 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
socket_.cmd_builder() << "array unset output"; socket_.cmd_builder() << "array unset output";
socket_.SendCommand(); socket_.SendCommand();
DevPtr addr = DevPtr(base_addr_ + offset.value());
socket_.cmd_builder() socket_.cmd_builder()
<< "mem2array output" << "mem2array output"
<< " " << std::dec << kWordSize << " " << std::dec << kWordSize
...@@ -122,14 +119,13 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { ...@@ -122,14 +119,13 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
} }
} }
void Write(DevBaseOffset offset, const void* buf, size_t num_bytes) { void Write(DevPtr addr, const void* buf, size_t num_bytes) {
if (num_bytes == 0) { if (num_bytes == 0) {
return; return;
} }
// Check if we need to chunk this write request. // Check if we need to chunk this write request.
if (num_bytes > kMemTransferLimit) { if (num_bytes > kMemTransferLimit) {
DevBaseOffset curr_offset = offset;
const char* curr_buf_ptr = reinterpret_cast<const char*>(buf); const char* curr_buf_ptr = reinterpret_cast<const char*>(buf);
while (num_bytes != 0) { while (num_bytes != 0) {
size_t amount_to_write; size_t amount_to_write;
...@@ -138,8 +134,8 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { ...@@ -138,8 +134,8 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
} else { } else {
amount_to_write = num_bytes; amount_to_write = num_bytes;
} }
Write(offset, reinterpret_cast<const void*>(curr_buf_ptr), amount_to_write); Write(addr, reinterpret_cast<const void*>(curr_buf_ptr), amount_to_write);
offset += amount_to_write; addr += amount_to_write;
curr_buf_ptr += amount_to_write; curr_buf_ptr += amount_to_write;
num_bytes -= amount_to_write; num_bytes -= amount_to_write;
} }
...@@ -166,7 +162,6 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { ...@@ -166,7 +162,6 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
socket_.SendCommand(); socket_.SendCommand();
} }
{ {
DevPtr addr = DevPtr(base_addr_ + offset.value());
socket_.cmd_builder() socket_.cmd_builder()
<< "array2mem input" << "array2mem input"
<< " " << std::dec << kWordSize << " " << std::dec << kWordSize
...@@ -176,20 +171,14 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { ...@@ -176,20 +171,14 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
} }
} }
void Execute(DevBaseOffset func_offset, DevBaseOffset breakpoint) { void Execute(DevPtr func_addr, DevPtr breakpoint_addr) {
socket_.cmd_builder() << "halt 0"; socket_.cmd_builder() << "halt 0";
socket_.SendCommand(); socket_.SendCommand();
// Set up the stack pointer.
DevPtr stack_end = stack_top() - 8;
socket_.cmd_builder() << "reg sp " << stack_end.cast_to<void*>();
socket_.SendCommand();
// Set a breakpoint at the beginning of `UTVMDone`. // Set a breakpoint at the beginning of `UTVMDone`.
socket_.cmd_builder() << "bp " << ToDevPtr(breakpoint).cast_to<void*>() << " 2"; socket_.cmd_builder() << "bp " << breakpoint_addr.cast_to<void*>() << " 2";
socket_.SendCommand(); socket_.SendCommand();
DevPtr func_addr = DevPtr(base_addr_ + func_offset.value());
socket_.cmd_builder() << "resume " << func_addr.cast_to<void*>(); socket_.cmd_builder() << "resume " << func_addr.cast_to<void*>();
socket_.SendCommand(); socket_.SendCommand();
...@@ -200,34 +189,21 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { ...@@ -200,34 +189,21 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
socket_.SendCommand(); socket_.SendCommand();
// Remove the breakpoint. // Remove the breakpoint.
socket_.cmd_builder() << "rbp " << ToDevPtr(breakpoint).cast_to<void*>(); socket_.cmd_builder() << "rbp " << breakpoint_addr.cast_to<void*>();
socket_.SendCommand(); socket_.SendCommand();
} }
void SetStackTop(DevBaseOffset stack_top) {
stack_top_ = DevPtr(base_addr_ + stack_top.value());
}
std::uintptr_t base_addr() const final {
return base_addr_;
}
DevPtr stack_top() const {
CHECK(stack_top_ != nullptr) << "stack top was never initialized";
return stack_top_;
}
const char* device_type() const final { const char* device_type() const final {
return "openocd"; return "openocd";
} }
private: private:
/*! \brief base address of the micro device memory region */
std::uintptr_t base_addr_;
/*! \brief top of the stack section */
DevPtr stack_top_;
/*! \brief socket used to communicate with the device through Tcl */ /*! \brief socket used to communicate with the device through Tcl */
TclSocket socket_; TclSocket socket_;
/*! \brief address of OpenOCD server */
std::string server_addr_;
/*! \brief port of OpenOCD server */
int port_;
/*! \brief number of bytes in a word on the target device (64-bit) */ /*! \brief number of bytes in a word on the target device (64-bit) */
static const constexpr ssize_t kWordSize = 8; static const constexpr ssize_t kWordSize = 8;
...@@ -239,11 +215,10 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice { ...@@ -239,11 +215,10 @@ class OpenOCDLowLevelDevice final : public LowLevelDevice {
static const constexpr int kWaitTime = 10000; static const constexpr int kWaitTime = 10000;
}; };
const std::shared_ptr<LowLevelDevice> OpenOCDLowLevelDeviceCreate(std::uintptr_t base_addr, const std::shared_ptr<LowLevelDevice> OpenOCDLowLevelDeviceCreate(const std::string& server_addr,
const std::string& server_addr,
int port) { int port) {
std::shared_ptr<LowLevelDevice> lld = std::shared_ptr<LowLevelDevice> lld =
std::make_shared<OpenOCDLowLevelDevice>(base_addr, server_addr, port); std::make_shared<OpenOCDLowLevelDevice>(server_addr, port);
return lld; return lld;
} }
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
#define TVM_RUNTIME_MICRO_TARGET_DATA_LAYOUT_ENCODER_H_ #define TVM_RUNTIME_MICRO_TARGET_DATA_LAYOUT_ENCODER_H_
#include <vector> #include <vector>
#include "device/utvm_runtime.h" #include "host_driven/utvm_runtime.h"
namespace tvm { namespace tvm {
namespace runtime { namespace runtime {
...@@ -96,9 +96,9 @@ class TargetDataLayoutEncoder { ...@@ -96,9 +96,9 @@ class TargetDataLayoutEncoder {
* \brief constructor * \brief constructor
* \param start_addr start address of the encoder in device memory * \param start_addr start address of the encoder in device memory
*/ */
explicit TargetDataLayoutEncoder(DevPtr start_addr) explicit TargetDataLayoutEncoder(DevPtr start_addr, size_t word_size)
: buf_(std::vector<uint8_t>()), curr_offset_(0) { : buf_(std::vector<uint8_t>()), curr_offset_(0), word_size_(word_size) {
start_addr_ = DevPtr(UpperAlignValue(start_addr.value(), 8)); start_addr_ = DevPtr(UpperAlignValue(start_addr.value().val64, word_size_));
} }
/*! /*!
...@@ -108,7 +108,7 @@ class TargetDataLayoutEncoder { ...@@ -108,7 +108,7 @@ class TargetDataLayoutEncoder {
*/ */
template <typename T> template <typename T>
Slot<T> Alloc(size_t num_elems = 1) { Slot<T> Alloc(size_t num_elems = 1) {
curr_offset_ = UpperAlignValue(curr_offset_, 8); curr_offset_ = UpperAlignValue(curr_offset_, word_size_);
size_t size = sizeof(T) * num_elems; size_t size = sizeof(T) * num_elems;
if (curr_offset_ + size > buf_.size()) { if (curr_offset_ + size > buf_.size()) {
buf_.resize(curr_offset_ + size); buf_.resize(curr_offset_ + size);
...@@ -141,6 +141,8 @@ class TargetDataLayoutEncoder { ...@@ -141,6 +141,8 @@ class TargetDataLayoutEncoder {
size_t curr_offset_; size_t curr_offset_;
/*! \brief start address of the encoder in device memory */ /*! \brief start address of the encoder in device memory */
DevPtr start_addr_; DevPtr start_addr_;
/*! \brief number of bytes in a word on the target device */
size_t word_size_;
}; };
template <typename T> template <typename T>
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
* \file rpc_session.cc * \file rpc_session.cc
* \brief RPC session for remote function call. * \brief RPC session for remote function call.
*/ */
#include <tvm/runtime/c_runtime_api.h>
#include <tvm/runtime/packed_func.h> #include <tvm/runtime/packed_func.h>
#include <tvm/runtime/device_api.h> #include <tvm/runtime/device_api.h>
#include <tvm/runtime/registry.h> #include <tvm/runtime/registry.h>
...@@ -40,6 +41,7 @@ ...@@ -40,6 +41,7 @@
namespace tvm { namespace tvm {
namespace runtime { namespace runtime {
// Temp buffer for data array // Temp buffer for data array
struct RPCByteArrayBuffer { struct RPCByteArrayBuffer {
TVMByteArray arr; TVMByteArray arr;
...@@ -1215,11 +1217,45 @@ void RPCSession::EventHandler::HandlePackedCall() { ...@@ -1215,11 +1217,45 @@ void RPCSession::EventHandler::HandlePackedCall() {
CHECK_EQ(state_, kRecvCode); CHECK_EQ(state_, kRecvCode);
} }
PackedFunc MicroTimeEvaluator(
PackedFunc pf,
TVMContext ctx,
int number,
int repeat) {
auto ftimer = [pf, ctx, number, repeat](TVMArgs args, TVMRetValue *rv) mutable {
TVMRetValue temp;
std::ostringstream os;
// skip first time call, to activate lazy compilation components.
pf.CallPacked(args, &temp);
DeviceAPI::Get(ctx)->StreamSync(ctx, nullptr);
for (int i = 0; i < repeat; ++i) {
double speed = 0.0;
for (int j = 0; j < number; ++j) {
pf.CallPacked(args, &temp);
DeviceAPI::Get(ctx)->StreamSync(ctx, nullptr);
speed += (temp.operator double()) / number;
}
os.write(reinterpret_cast<char*>(&speed), sizeof(speed));
}
std::string blob = os.str();
TVMByteArray arr;
arr.size = blob.length();
arr.data = blob.data();
// return the time.
*rv = arr;
};
return PackedFunc(ftimer);
}
PackedFunc WrapTimeEvaluator(PackedFunc pf, PackedFunc WrapTimeEvaluator(PackedFunc pf,
TVMContext ctx, TVMContext ctx,
int number, int number,
int repeat, int repeat,
int min_repeat_ms) { int min_repeat_ms) {
if (static_cast<int>(ctx.device_type) == static_cast<int>(kDLMicroDev)) {
return MicroTimeEvaluator(pf, ctx, number, repeat);
}
auto ftimer = [pf, ctx, number, repeat, min_repeat_ms](TVMArgs args, TVMRetValue *rv) mutable { auto ftimer = [pf, ctx, number, repeat, min_repeat_ms](TVMArgs args, TVMRetValue *rv) mutable {
TVMRetValue temp; TVMRetValue temp;
std::ostringstream os; std::ostringstream os;
......
...@@ -25,6 +25,7 @@ ALLOW_EXTENSION = { ...@@ -25,6 +25,7 @@ ALLOW_EXTENSION = {
"cc", "cc",
"c", "c",
"h", "h",
"s",
"rs", "rs",
"m", "m",
"mm", "mm",
......
...@@ -73,12 +73,21 @@ def test_tvm_callback_relocate_binary(): ...@@ -73,12 +73,21 @@ def test_tvm_callback_relocate_binary():
with open(tmp_bin, "wb") as f: with open(tmp_bin, "wb") as f:
f.write(binary) f.write(binary)
def verify(): def verify():
text_loc_str = "0x0" word_size = 8
rodata_loc_str = "0x10000" text_loc = 0x0
data_loc_str = "0x20000" rodata_loc = 0x10000
bss_loc_str = "0x30000" data_loc = 0x20000
bss_loc = 0x30000
stack_end = 0x50000
rel_bin = tvm_callback_relocate_binary( rel_bin = tvm_callback_relocate_binary(
tmp_bin, text_loc_str, rodata_loc_str, data_loc_str, bss_loc_str, TOOLCHAIN_PREFIX) tmp_bin,
word_size,
text_loc,
rodata_loc,
data_loc,
bss_loc,
stack_end,
TOOLCHAIN_PREFIX)
print("Relocated binary section sizes") print("Relocated binary section sizes")
test_tvm_callback_get_section_size(binary=rel_bin) test_tvm_callback_get_section_size(binary=rel_bin)
relf = tmp_dir.relpath("rel.bin") relf = tmp_dir.relpath("rel.bin")
...@@ -88,10 +97,6 @@ def test_tvm_callback_relocate_binary(): ...@@ -88,10 +97,6 @@ def test_tvm_callback_relocate_binary():
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT) stderr=subprocess.STDOUT)
(out, _) = nm_proc.communicate() (out, _) = nm_proc.communicate()
# Ensure the relocated symbols are within the ranges we specified.
text_loc = int(text_loc_str, 16)
data_loc = int(data_loc_str, 16)
bss_loc = int(bss_loc_str, 16)
symbol_entries = out.decode("utf-8").split("\n") symbol_entries = out.decode("utf-8").split("\n")
for entry in symbol_entries: for entry in symbol_entries:
if len(entry) == 0: if len(entry) == 0:
...@@ -127,12 +132,21 @@ def test_tvm_callback_get_symbol_map(): ...@@ -127,12 +132,21 @@ def test_tvm_callback_get_symbol_map():
with open(tmp_bin, "wb") as f: with open(tmp_bin, "wb") as f:
f.write(binary) f.write(binary)
def verify(): def verify():
text_loc_str = "0x0" word_size = 8
rodata_loc_str = "0x10000" text_loc = 0x0
data_loc_str = "0x20000" rodata_loc = 0x10000
bss_loc_str = "0x30000" data_loc = 0x20000
bss_loc = 0x30000
stack_end = 0x50000
rel_bin = tvm_callback_relocate_binary( rel_bin = tvm_callback_relocate_binary(
tmp_bin, text_loc_str, rodata_loc_str, data_loc_str, bss_loc_str, TOOLCHAIN_PREFIX) tmp_bin,
word_size,
text_loc,
rodata_loc,
data_loc,
bss_loc,
stack_end,
TOOLCHAIN_PREFIX)
symbol_map = tvm_callback_get_symbol_map(rel_bin, TOOLCHAIN_PREFIX) symbol_map = tvm_callback_get_symbol_map(rel_bin, TOOLCHAIN_PREFIX)
symbols = set() symbols = set()
for i, line in enumerate(symbol_map.split('\n')): for i, line in enumerate(symbol_map.split('\n')):
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
# KIND, either express or implied. See the License for the # KIND, either express or implied. See the License for the
# specific language governing permissions and limitations # specific language governing permissions and limitations
# under the License. # under the License.
import os import os
import numpy as np import numpy as np
...@@ -22,38 +21,13 @@ import tvm ...@@ -22,38 +21,13 @@ import tvm
from tvm.contrib import graph_runtime, util from tvm.contrib import graph_runtime, util
from tvm import relay from tvm import relay
import tvm.micro as micro import tvm.micro as micro
from tvm.micro import create_micro_mod
from tvm.relay.testing import resnet from tvm.relay.testing import resnet
# Use the host emulated micro device. # Use the host emulated micro device.
DEVICE_TYPE = "host" DEV_CONFIG = micro.device.host.default_config()
TOOLCHAIN_PREFIX = ""
def create_micro_mod(c_mod, toolchain_prefix):
"""Produces a micro module from a given module.
Parameters
----------
c_mod : tvm.module.Module
module with "c" as its target backend
toolchain_prefix : str
toolchain prefix to be used (see `tvm.micro.Session` docs)
Return
------
micro_mod : tvm.module.Module
micro module for the target device
"""
temp_dir = util.tempdir()
lib_obj_path = temp_dir.relpath("dev_lib.obj")
c_mod.export_library(
lib_obj_path,
fcompile=tvm.micro.cross_compiler(toolchain_prefix=toolchain_prefix))
micro_mod = tvm.module.load(lib_obj_path, "micro_dev")
return micro_mod
def relay_micro_build(func, toolchain_prefix, params=None): def relay_micro_build(func, dev_config, params=None):
"""Create a graph runtime module with a micro device context from a Relay function. """Create a graph runtime module with a micro device context from a Relay function.
Parameters Parameters
...@@ -61,6 +35,9 @@ def relay_micro_build(func, toolchain_prefix, params=None): ...@@ -61,6 +35,9 @@ def relay_micro_build(func, toolchain_prefix, params=None):
func : relay.Function func : relay.Function
function to compile function to compile
dev_config : Dict[str, Any]
MicroTVM config dict for the target device
params : dict params : dict
input parameters that do not change during inference input parameters that do not change during inference
...@@ -71,24 +48,20 @@ def relay_micro_build(func, toolchain_prefix, params=None): ...@@ -71,24 +48,20 @@ def relay_micro_build(func, toolchain_prefix, params=None):
""" """
with tvm.build_config(disable_vectorize=True): with tvm.build_config(disable_vectorize=True):
graph, c_mod, params = relay.build(func, target="c", params=params) graph, c_mod, params = relay.build(func, target="c", params=params)
micro_mod = create_micro_mod(c_mod, TOOLCHAIN_PREFIX) micro_mod = create_micro_mod(c_mod, dev_config)
ctx = tvm.micro_dev(0) ctx = tvm.micro_dev(0)
mod = graph_runtime.create(graph, micro_mod, ctx) mod = graph_runtime.create(graph, micro_mod, ctx)
mod.set_input(**params) mod.set_input(**params)
return mod return mod
# TODO(weberlo): Add example program to test scalar double/int TVMValue serialization.
# TODO(weberlo): How can we test the OpenOCD device? The CI would need to have OpenOCD
# and Spike installed.
def test_alloc(): def test_alloc():
"""Test tensor allocation on the device.""" """Test tensor allocation on the device."""
if not tvm.module.enabled("micro_dev"): if not tvm.module.enabled("micro_dev"):
return return
shape = (1024,) shape = (1024,)
dtype = "float32" dtype = "float32"
with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX): with micro.Session(DEV_CONFIG):
ctx = tvm.micro_dev(0) ctx = tvm.micro_dev(0)
np_tensor = np.random.uniform(size=shape).astype(dtype) np_tensor = np.random.uniform(size=shape).astype(dtype)
micro_tensor = tvm.nd.array(np_tensor, ctx) micro_tensor = tvm.nd.array(np_tensor, ctx)
...@@ -112,15 +85,14 @@ def test_add(): ...@@ -112,15 +85,14 @@ def test_add():
func_name = "fadd" func_name = "fadd"
c_mod = tvm.build(s, [A, B, C], target="c", name=func_name) c_mod = tvm.build(s, [A, B, C], target="c", name=func_name)
with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX): with micro.Session(DEV_CONFIG):
micro_mod = create_micro_mod(c_mod, TOOLCHAIN_PREFIX) micro_mod = create_micro_mod(c_mod, DEV_CONFIG)
micro_func = micro_mod[func_name] micro_func = micro_mod[func_name]
ctx = tvm.micro_dev(0) ctx = tvm.micro_dev(0)
a = tvm.nd.array(np.random.uniform(size=shape).astype(dtype), ctx) a = tvm.nd.array(np.random.uniform(size=shape).astype(dtype), ctx)
b = tvm.nd.array(np.random.uniform(size=shape).astype(dtype), ctx) b = tvm.nd.array(np.random.uniform(size=shape).astype(dtype), ctx)
c = tvm.nd.array(np.zeros(shape, dtype=dtype), ctx) c = tvm.nd.array(np.zeros(shape, dtype=dtype), ctx)
micro_func(a, b, c) micro_func(a, b, c)
tvm.testing.assert_allclose( tvm.testing.assert_allclose(
c.asnumpy(), a.asnumpy() + b.asnumpy()) c.asnumpy(), a.asnumpy() + b.asnumpy())
...@@ -143,8 +115,8 @@ def test_workspace_add(): ...@@ -143,8 +115,8 @@ def test_workspace_add():
func_name = "fadd_two_workspace" func_name = "fadd_two_workspace"
c_mod = tvm.build(s, [A, C], target="c", name=func_name) c_mod = tvm.build(s, [A, C], target="c", name=func_name)
with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX): with micro.Session(DEV_CONFIG):
micro_mod = create_micro_mod(c_mod, TOOLCHAIN_PREFIX) micro_mod = create_micro_mod(c_mod, DEV_CONFIG)
micro_func = micro_mod[func_name] micro_func = micro_mod[func_name]
ctx = tvm.micro_dev(0) ctx = tvm.micro_dev(0)
a = tvm.nd.array(np.random.uniform(size=shape).astype(dtype), ctx) a = tvm.nd.array(np.random.uniform(size=shape).astype(dtype), ctx)
...@@ -168,8 +140,8 @@ def test_graph_runtime(): ...@@ -168,8 +140,8 @@ def test_graph_runtime():
z = relay.add(xx, relay.const(1.0)) z = relay.add(xx, relay.const(1.0))
func = relay.Function([x], z) func = relay.Function([x], z)
with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX): with micro.Session(DEV_CONFIG):
mod = relay_micro_build(func, TOOLCHAIN_PREFIX) mod = relay_micro_build(func, DEV_CONFIG)
x_in = np.random.uniform(size=shape[0]).astype(dtype) x_in = np.random.uniform(size=shape[0]).astype(dtype)
mod.run(x=x_in) mod.run(x=x_in)
...@@ -195,9 +167,9 @@ def test_multiple_modules(): ...@@ -195,9 +167,9 @@ def test_multiple_modules():
ret = relay.subtract(x, relay.const(1.0)) ret = relay.subtract(x, relay.const(1.0))
sub_const_func = relay.Function([x], ret) sub_const_func = relay.Function([x], ret)
with micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX): with micro.Session(DEV_CONFIG):
add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX) add_const_mod = relay_micro_build(add_const_func, DEV_CONFIG)
sub_const_mod = relay_micro_build(sub_const_func, TOOLCHAIN_PREFIX) sub_const_mod = relay_micro_build(sub_const_func, DEV_CONFIG)
x_in = np.random.uniform(size=shape[0]).astype(dtype) x_in = np.random.uniform(size=shape[0]).astype(dtype)
add_const_mod.run(x=x_in) add_const_mod.run(x=x_in)
...@@ -223,8 +195,8 @@ def test_interleave_sessions(): ...@@ -223,8 +195,8 @@ def test_interleave_sessions():
ret = relay.add(x, relay.const(1.0)) ret = relay.add(x, relay.const(1.0))
add_const_func = relay.Function([x], ret) add_const_func = relay.Function([x], ret)
sess_a = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) sess_a = micro.Session(DEV_CONFIG)
sess_b = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) sess_b = micro.Session(DEV_CONFIG)
with sess_a: with sess_a:
np_tensor_a = np.random.uniform(size=shape).astype(dtype) np_tensor_a = np.random.uniform(size=shape).astype(dtype)
micro_tensor_a = tvm.nd.array(np_tensor_a, tvm.micro_dev(0)) micro_tensor_a = tvm.nd.array(np_tensor_a, tvm.micro_dev(0))
...@@ -232,13 +204,13 @@ def test_interleave_sessions(): ...@@ -232,13 +204,13 @@ def test_interleave_sessions():
np_tensor_b = np.random.uniform(size=shape).astype(dtype) np_tensor_b = np.random.uniform(size=shape).astype(dtype)
micro_tensor_b = tvm.nd.array(np_tensor_b, tvm.micro_dev(0)) micro_tensor_b = tvm.nd.array(np_tensor_b, tvm.micro_dev(0))
with sess_a: with sess_a:
add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX) add_const_mod = relay_micro_build(add_const_func, DEV_CONFIG)
add_const_mod.run(x=micro_tensor_a) add_const_mod.run(x=micro_tensor_a)
add_result = add_const_mod.get_output(0).asnumpy() add_result = add_const_mod.get_output(0).asnumpy()
tvm.testing.assert_allclose( tvm.testing.assert_allclose(
add_result, np_tensor_a + 1.0) add_result, np_tensor_a + 1.0)
with sess_b: with sess_b:
add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX) add_const_mod = relay_micro_build(add_const_func, DEV_CONFIG)
add_const_mod.run(x=micro_tensor_b) add_const_mod.run(x=micro_tensor_b)
add_result = add_const_mod.get_output(0).asnumpy() add_result = add_const_mod.get_output(0).asnumpy()
tvm.testing.assert_allclose( tvm.testing.assert_allclose(
...@@ -257,15 +229,15 @@ def test_nested_sessions(): ...@@ -257,15 +229,15 @@ def test_nested_sessions():
ret = relay.add(x, relay.const(1.0)) ret = relay.add(x, relay.const(1.0))
add_const_func = relay.Function([x], ret) add_const_func = relay.Function([x], ret)
sess_a = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) sess_a = micro.Session(DEV_CONFIG)
sess_b = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) sess_b = micro.Session(DEV_CONFIG)
with sess_a: with sess_a:
np_tensor_a = np.random.uniform(size=shape).astype(dtype) np_tensor_a = np.random.uniform(size=shape).astype(dtype)
micro_tensor_a = tvm.nd.array(np_tensor_a, tvm.micro_dev(0)) micro_tensor_a = tvm.nd.array(np_tensor_a, tvm.micro_dev(0))
with sess_b: with sess_b:
np_tensor_b = np.random.uniform(size=shape).astype(dtype) np_tensor_b = np.random.uniform(size=shape).astype(dtype)
micro_tensor_b = tvm.nd.array(np_tensor_b, tvm.micro_dev(0)) micro_tensor_b = tvm.nd.array(np_tensor_b, tvm.micro_dev(0))
add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX) add_const_mod = relay_micro_build(add_const_func, DEV_CONFIG)
add_const_mod.run(x=micro_tensor_a) add_const_mod.run(x=micro_tensor_a)
add_result = add_const_mod.get_output(0).asnumpy() add_result = add_const_mod.get_output(0).asnumpy()
tvm.testing.assert_allclose( tvm.testing.assert_allclose(
...@@ -284,12 +256,12 @@ def test_inactive_session_use(): ...@@ -284,12 +256,12 @@ def test_inactive_session_use():
ret = relay.add(x, relay.const(1.0)) ret = relay.add(x, relay.const(1.0))
add_const_func = relay.Function([x], ret) add_const_func = relay.Function([x], ret)
sess_a = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) sess_a = micro.Session(DEV_CONFIG)
sess_b = micro.Session(DEVICE_TYPE, TOOLCHAIN_PREFIX) sess_b = micro.Session(DEV_CONFIG)
with sess_a: with sess_a:
np_tensor_a = np.random.uniform(size=shape).astype(dtype) np_tensor_a = np.random.uniform(size=shape).astype(dtype)
micro_tensor_a = tvm.nd.array(np_tensor_a, tvm.micro_dev(0)) micro_tensor_a = tvm.nd.array(np_tensor_a, tvm.micro_dev(0))
add_const_mod = relay_micro_build(add_const_func, TOOLCHAIN_PREFIX) add_const_mod = relay_micro_build(add_const_func, DEV_CONFIG)
with sess_b: with sess_b:
# These objects belong to `sess_a`. # These objects belong to `sess_a`.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment