Unverified Commit fbcf61ab by Krzysztof Parzyszek Committed by GitHub

[RUNTIME] FastRPC interface for Hexagon runtime (#5353)

* [RUNTIME] FastRPC interface for Hexagon runtime

Co-authored-by: Ravishankar Kolachana <quic_rkolacha@quicinc.com>
Co-authored-by: Krzysztof Parzyszek <kparzysz@quicinc.com>

* Explain store offset in a comment in launcher

Co-authored-by: Abhikrant Sharma <quic_abhikran@quicinc.com>
Co-authored-by: Ravishankar Kolachana <quic_rkolacha@quicinc.com>
parent 3db8880d
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
cmake_minimum_required(VERSION 3.2)
project(HexagonIDL C CXX)
if(NOT "${FASTRPC_LIBS}" STREQUAL "SKEL" AND
NOT "${FASTRPC_LIBS}" STREQUAL "STUB")
message(SEND_ERROR "Please set FASTRPC_LIBS to either SKEL or STUB")
endif()
set(FASTRPC_SRC "${CMAKE_CURRENT_SOURCE_DIR}")
include_directories(include)
include_directories(${HEXAGON_SDK_ROOT}/incs)
include_directories(${HEXAGON_SDK_ROOT}/incs/stddef)
include_directories(
${HEXAGON_SDK_ROOT}/libs/common/remote/ship/android_Release_aarch64)
set(QAIC_EXE "${HEXAGON_SDK_ROOT}/tools/qaic/Ubuntu16/qaic")
set(QAIC_FLAGS
"-I${HEXAGON_SDK_ROOT}/incs/stddef"
"-I${HEXAGON_SDK_ROOT}/libs/common/remote/ship/android_Release_aarch64"
"-I${HEXAGON_SDK_ROOT}/libs/common/rpcmem/inc"
)
set(CMAKE_SKIP_RPATH TRUE)
# Qaic for the non-domain header.
#
# Don't add paths to these filenames, or otherwise cmake may spontaneously
# add -o option to the qaic invocation (with an undesirable path).
set(TVM_REMOTE_ND_IDL "tvm_remote_nd.idl")
set(TVM_REMOTE_ND_H "tvm_remote_nd.h")
set(TVM_REMOTE_ND_SKEL_C "tvm_remote_nd_skel.c")
set(TVM_REMOTE_ND_STUB_C "tvm_remote_nd_stub.c")
add_custom_command(
OUTPUT ${TVM_REMOTE_ND_SKEL_C} ${TVM_REMOTE_ND_STUB_C}
"${FASTRPC_SRC}/include/${TVM_REMOTE_ND_H}"
COMMAND ${QAIC_EXE} ${QAIC_FLAGS}
"${FASTRPC_SRC}/include/${TVM_REMOTE_ND_IDL}"
COMMAND ${CMAKE_COMMAND} -E rename "${TVM_REMOTE_ND_H}"
"${FASTRPC_SRC}/include/${TVM_REMOTE_ND_H}"
MAIN_DEPENDENCY "${FASTRPC_SRC}/include/${TVM_REMOTE_ND_IDL}"
)
# Qaic for the domain header.
#
# Don't add paths to these filenames, or otherwise cmake may spontaneously
# add -o option to the qaic invocation (with an undesirable path).
set(TVM_REMOTE_D_IDL "tvm_remote.idl")
set(TVM_REMOTE_D_H "tvm_remote.h")
set(TVM_REMOTE_D_SKEL_C "tvm_remote_skel.c")
set(TVM_REMOTE_D_STUB_C "tvm_remote_stub.c")
add_custom_command(
OUTPUT ${TVM_REMOTE_D_SKEL_C} ${TVM_REMOTE_D_STUB_C}
"${FASTRPC_SRC}/include/${TVM_REMOTE_D_H}"
COMMAND ${QAIC_EXE} ${QAIC_FLAGS}
"${FASTRPC_SRC}/include/${TVM_REMOTE_D_IDL}"
COMMAND ${CMAKE_COMMAND} -E rename "${TVM_REMOTE_D_H}"
"${FASTRPC_SRC}/include/${TVM_REMOTE_D_H}"
MAIN_DEPENDENCY "${FASTRPC_SRC}/include/${TVM_REMOTE_D_IDL}"
)
if("${FASTRPC_LIBS}" STREQUAL "SKEL")
# Skel libraries.
#
set(HEXARCH_DIR_v60 "ADSPv60MP")
set(HEXARCH_DIR_v62 "ADSPv62MP")
set(HEXARCH_DIR_v65 "computev65")
set(HEXARCH_DIR_v66 "computev66")
set(HEXARCH_DIR_STR "HEXARCH_DIR_${HEXAGON_ARCH}")
set(HEXARCH_DIR ${${HEXARCH_DIR_STR}})
if(NOT HEXARCH_DIR)
message(SEND_ERROR
"Please set HEXAGON_ARCH to one of v60, v62, v65, v66")
endif()
include_directories(
${HEXAGON_SDK_ROOT}/libs/common/qurt/${HEXARCH_DIR}/include/qurt)
include_directories(
${HEXAGON_SDK_ROOT}/libs/common/qurt/${HEXARCH_DIR}/include/posix)
# Extra compile flags (both C and C++).
set(EXTRA_COMP_FLAGS
"-O3"
"-m${HEXAGON_ARCH}"
)
string(REGEX REPLACE ";" " " EXTRA_COMP_FLAGS_STR "${EXTRA_COMP_FLAGS}")
message(STATUS "EXTRA_COMP_FLAGS_STR: ${EXTRA_COMP_FLAGS_STR}")
set(CMAKE_C_FLAGS "${EXTRA_COMP_FLAGS_STR} ${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${EXTRA_COMP_FLAGS_STR} ${CMAKE_CXX_FLAGS}")
set(EXTRA_LINK_FLAGS
"-Wl,--no-threads"
"-Wl,--wrap=malloc"
"-Wl,--wrap=calloc"
"-Wl,--wrap=free"
"-Wl,--wrap=realloc"
"-Wl,--wrap=memalign"
"-Wl,--wrap=posix_memalign"
"-Wl,--wrap=__stack_chk_fail"
)
string(REGEX REPLACE ";" " " EXTRA_LINK_FLAGS_STR "${EXTRA_LINK_FLAGS}")
# Extra linker flags for linking shared libraries.
set(CMAKE_SHARED_LINKER_FLAGS
"${EXTRA_LINK_FLAGS_STR} ${CMAKE_SHARED_LINKER_FLAGS}")
set(SKEL_ND_SRCS
"src/tvm_hvx.cc"
"src/tvm_remote_nd_imp.cc"
)
add_library(tvm_remote_nd_skel SHARED
"${FASTRPC_SRC}/include/${TVM_REMOTE_ND_H}"
${TVM_REMOTE_ND_SKEL_C}
${SKEL_ND_SRCS}
)
set(SKEL_D_SRCS
# Also includes src/tvm_remote_nd_imp.cc
${SKEL_ND_SRCS}
"src/tvm_remote_imp.cc"
)
add_library(tvm_remote_skel SHARED
"${FASTRPC_SRC}/include/${TVM_REMOTE_D_H}"
${TVM_REMOTE_D_SKEL_C}
${SKEL_D_SRCS}
)
# Separate shared library with __wrap_pthread_create.
# It is necessary to have it as a separate library because it defines
# a function that libtvm_runtime.so will call. Because of that, this
# function needs to be in the global dynamic symbol table, but the
# skel libraries are loaded as private by FastRPC.
set(WRAP_PTHREAD_SRCS "src/tvm_wrap_pthread.cc")
add_library(tvm_wrap_pthread SHARED ${WRAP_PTHREAD_SRCS})
else()
# Stub libraries.
#
include_directories(${HEXAGON_SDK_ROOT}/incs/a1std)
include_directories(${HEXAGON_SDK_ROOT}/incs/qlist)
include_directories(${HEXAGON_SDK_ROOT}/libs/common/rpcmem/inc)
link_directories(
${HEXAGON_SDK_ROOT}/libs/common/remote/ship/android_Release_aarch64)
add_library(tvm_remote_nd_stub SHARED
"${FASTRPC_SRC}/include/${TVM_REMOTE_ND_H}"
"${HEXAGON_SDK_ROOT}/libs/common/rpcmem/src/rpcmem_android.c"
"${TVM_REMOTE_ND_STUB_C}"
)
add_library(tvm_remote_stub SHARED
"${FASTRPC_SRC}/include/${TVM_REMOTE_D_H}"
"${HEXAGON_SDK_ROOT}/libs/common/rpcmem/src/rpcmem_android.c"
"${TVM_REMOTE_D_STUB_C}"
)
target_link_libraries(tvm_remote_nd_stub adsprpc)
target_link_libraries(tvm_remote_stub adsprpc)
endif()
<!--- Licensed to the Apache Software Foundation (ASF) under one -->
<!--- or more contributor license agreements. See the NOTICE file -->
<!--- distributed with this work for additional information -->
<!--- regarding copyright ownership. The ASF licenses this file -->
<!--- to you under the Apache License, Version 2.0 (the -->
<!--- "License"); you may not use this file except in compliance -->
<!--- with the License. You may obtain a copy of the License at -->
<!--- http://www.apache.org/licenses/LICENSE-2.0 -->
<!--- Unless required by applicable law or agreed to in writing, -->
<!--- software distributed under the License is distributed on an -->
<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
<!--- KIND, either express or implied. See the License for the -->
<!--- specific language governing permissions and limitations -->
<!--- under the License. -->
# Hexagon IDL libraries
This directory hosts IDL files and their implementations to offload TVM kernels to Hexagon via FastRPC. The implementations can be used to generate stub and skel libraries.
### Prerequisites
1. Android NDK version r19c or later.
2. Hexagon SDK version 3.5.0 or later.
Android NDK can be downloaded from https://developer.android.com/ndk.
Hexagon SDK is available at //developer.qualcomm.com/software/hexagon-dsp-sdk.
### Configuring
Skel and stub libraries need to be configured and built separately. Please use different subdirectories for each. Otherwise the cmake cache from one configuration can interfere with the next.
For skel libraries, set
```
FASTRPC_LIBS=SKEL
HEXAGON_SDK_ROOT=/path/to/sdk
CMAKE_C_COMPILER=hexagon-clang
CMAKE_CXX_COMPILER=hexagon-clang++
HEXAGON_ARCH= one of v60, v62, v65, v66
```
Please note that support for older versions of the Hexagon processor may be removed from the future versions of the Hexagon toolchain.
For stub libraries, set
```
FASTRPC_LIBS=STUB
HEXAGON_SDK_ROOT=/path/to/sdk
CMAKE_C_COMPILER=aarch64-linux-android28-clang # or later
CMAKE_CXX_COMPILER=aarch64-linux-android28-clang++ # or later
```
### Building
In each instance, simple `make` command will create header files `fastrpc/include/tvm_remote.h` and `fastrpc/include/tvm_remote_nd.h`. These headers are needed to compile the TVM runtime for Android (and the stub/skel libraries themselves).
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* IDL to offload TVM kernels to Hexagon from APPS for multi-domains.
*/
#include "remote.idl"
#include "AEEStdDef.idl"
interface tvm_remote : remote_handle64 {
typedef sequence<octet> buffer;
typedef unsigned long handle_t;
long load_library(in sequence<char> soname,
rout handle_t mod_ptr);
long get_symbol(in handle_t mod,
in sequence<char> name,
rout handle_t sym_ptr);
long kernel(in handle_t mod,
in handle_t symbol,
inrout sequence <long> scalar,
inrout sequence <long> stack,
in sequence<buffer> scalar_in_octet,
rout sequence<buffer> scalar_out_octet,
in sequence<buffer> stack_in_octet,
rout sequence<buffer> stack_out_octet,
rout unsigned long long pcycles,
rout unsigned long long time_usec);
long release_library(in handle_t mod);
long alloc_vtcm(in unsigned long size,
in unsigned long align,
rout unsigned long dsp_va);
long free_vtcm(in unsigned long dsp_va);
long call_mmap64();
};
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* IDL to offload TVM kernels to Hexagon from APPS for non-domains.
*/
#include "remote.idl"
#include "AEEStdDef.idl"
interface tvm_remote_nd {
typedef sequence<octet> buffer;
typedef unsigned long handle_t;
long open();
long close();
long load_library(in sequence<char> soname,
rout handle_t mod_ptr);
long get_symbol(in handle_t mod,
in sequence<char> name,
rout handle_t sym_ptr);
long kernel(in handle_t mod,
in handle_t symbol,
inrout sequence <long> scalar,
inrout sequence <long> stack,
in sequence<buffer> scalar_in_octet,
rout sequence<buffer> scalar_out_octet,
in sequence<buffer> stack_in_octet,
rout sequence<buffer> stack_out_octet,
rout unsigned long long pcycles,
rout unsigned long long time_usec);
long release_library(in handle_t mod);
long call_mmap64();
};
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "tvm_hvx.h"
#include "AEEStdErr.h"
#include "HAP_farf.h"
#include "HAP_power.h"
extern "C" {
#include "qurt_error.h"
#include "qurt_hvx.h"
}
namespace hvx {
#if __HEXAGON_ARCH__ >= 65
#define DEFAULT_HVX_MODE MODE_128B
#else
#define DEFAULT_HVX_MODE MODE_DONT_CARE
#endif
static constexpr mode_t default_hvx_mode = DEFAULT_HVX_MODE;
int reserve(unsigned num_units) {
if (qurt_hvx_get_units() <= 0) {
return -1; // HVX not supported in this target.
}
if (num_units == 0) num_units = QURT_HVX_RESERVE_ALL_AVAILABLE;
int ret_val = qurt_hvx_reserve(num_units);
switch (ret_val) {
case QURT_HVX_RESERVE_ALREADY_MADE:
case QURT_HVX_RESERVE_NOT_SUPPORTED:
case QURT_HVX_RESERVE_NOT_SUCCESSFUL:
return 0;
default:
if (ret_val < 0) {
return -1;
}
break;
}
return ret_val;
}
int unreserve() {
int ret_val = qurt_hvx_cancel_reserve();
if (ret_val != QURT_EOK) {
return -1;
}
return 0;
}
int power_on() {
HAP_power_request_t request;
request.type = HAP_power_set_HVX;
request.hvx.power_up = 1;
int rc = HAP_power_set(nullptr, &request);
if (rc != AEE_SUCCESS) {
FARF(ERROR, "%s: unable to power on HVX, rc=%08x", rc);
return -1;
}
return 0;
}
int power_off() {
HAP_power_request_t request;
request.type = HAP_power_set_HVX;
request.hvx.power_up = 0;
int rc = HAP_power_set(nullptr, &request);
if (rc != AEE_SUCCESS) {
FARF(ERROR, "%s: unable to power off HVX, rc=%08x", rc);
return -1;
}
return 0;
}
int lock(mode_t mode) {
qurt_hvx_mode_t qurt_mode;
int vlen;
if (MODE_DONT_CARE == mode) mode = default_hvx_mode;
switch (mode) {
case MODE_DONT_CARE: {
int ret_val = qurt_hvx_get_mode();
if (ret_val < 0) {
FARF(HIGH, "%s: unknown HVX mode %d", __func__, qurt_mode);
return -1;
}
qurt_mode = static_cast<qurt_hvx_mode_t>(ret_val);
switch (qurt_mode) {
case QURT_HVX_MODE_64B:
vlen = 64;
break;
case QURT_HVX_MODE_128B:
vlen = 128;
break;
}
break;
}
case MODE_64B:
qurt_mode = QURT_HVX_MODE_64B;
vlen = 64;
break;
case MODE_128B:
qurt_mode = QURT_HVX_MODE_128B;
vlen = 128;
break;
default:
FARF(HIGH, "%s: unknown HVX mode %d", __func__, qurt_mode);
return -3;
}
// Starting with v65, the RTOS supports HVX context switching.
// Treat all hvx locks as blocking now, so they can succeed, and
// be scheduled according to RTOS scheduler via thread priority.
// Nonblocking call: qurt_hvx_try_lock(qurt_mode).
int ret_val = qurt_hvx_lock(qurt_mode);
if (ret_val != QURT_EOK) {
return -1;
}
return vlen;
}
int unlock() {
int ret_val = qurt_hvx_unlock();
if (ret_val != QURT_EOK) {
return -1;
}
return 0;
}
int prepare_mt_job(config_t* hvx_config) {
int num_units = qurt_hvx_get_units();
if (num_units <= 0) {
return -1;
}
// Check whether HVX is reserved for this protection domain. If not,
// see if we can temporarily reserve them for this invocation only.
hvx_config->temp_reserve = false;
if (hvx_config->num_reserved == 0) {
hvx_config->num_reserved = reserve(0); // Reserve all units.
if (hvx_config->num_reserved <= 0) {
return -1;
}
hvx_config->temp_reserve = true;
}
// If client doesn't specify required mode, fallback to default.
if (hvx_config->mode == MODE_DONT_CARE) hvx_config->mode = default_hvx_mode;
// Choose 64 byte or 128 byte mode, based on whether there are odd or even
// number of units
if (hvx_config->mode == MODE_64B ||
(hvx_config->mode == MODE_DONT_CARE && (hvx_config->num_reserved & 1))) {
hvx_config->vlen = 64;
hvx_config->mode = MODE_64B;
hvx_config->num_threads = hvx_config->num_reserved;
} else {
hvx_config->vlen = 128;
hvx_config->mode = MODE_128B;
hvx_config->num_threads = (num_units >> 8) & 0xFF;
// Handle case where only 1 64-byte unit was available.
if (hvx_config->num_threads == 0) {
if (hvx_config->temp_reserve) unreserve();
return -1;
}
}
// If using HVX, make sure it turns on properly.
if (hvx_config->num_reserved > 0 && power_on() != 0) {
return -1;
}
return 0;
}
int cleanup_mt_job(const config_t* hvx_config) {
// If HVX was used, indicate it can be turned off.
if (hvx_config->num_reserved > 0) power_off();
// If HVX was temporarily reserved, unreserve it.
if (hvx_config->temp_reserve) unreserve();
return 0;
}
} // namespace hvx
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#ifndef TVM_RUNTIME_HEXAGON_TARGET_FASTRPC_SRC_TVM_HVX_H_
#define TVM_RUNTIME_HEXAGON_TARGET_FASTRPC_SRC_TVM_HVX_H_
// Utility providing functions for accessing the Hexagon Vector Extensions
// (HVX) hardware.
#include <cstdint>
namespace hvx {
enum mode_t : uint32_t {
MODE_DONT_CARE = 0, /*!< Don't-care, just use whatever current mode is. */
MODE_64B, /*!< 64 byte HVX vector width. */
MODE_128B /*!< 128 byte HVX vector width. */
};
/*!
* \brief HVX configuration data.
*/
struct config_t {
int num_reserved; /*!< Number of reserved HVX units. */
bool temp_reserve; /*!< Indicates that HVX pool reservation is */
/*!< temporary and needs to be released after use. */
mode_t mode; /*!< Configured HVX mode. */
int vlen; /*!< Configured HVX vector width (64 or 128 bytes). */
int num_threads; /*!< Number of threads that can lock HVX units. */
};
/*!
* \brief
* This function reserves HVX units for the protection domain to which
* the caller belongs. Reservation is optional before locking HVX units.
* Typically it would be called by applications that want to guarantee
* up front that the requested number of HVX units will be available
* for the duration of the application.
*
* \param num_units
* Number of HVX units to reserve. 0 indicates to reserve all the units
* present in the given target. > 0 indicates the number of single HVX
* units to reserve. Mode (64 byte vs. 128 byte) is not specified.
*
* \return
* The number of HVX units (in terms of 64 byte single units) successfully
* reserved. The return value of -1 indicates no HVX hardware is available
* on the target.
*/
int reserve(unsigned num_units);
/*!
* \brief
* This function releases all HVX unit from reservation. A call to this
* function nullifies all previous calls to reserve HVX units from within
* this worker pool's protection domain.
*
* \return
* 0 on success, -1 if there was an error.
*/
int unreserve();
/*!
* \brief
* This function turns on the HVX hardware. It must be called sometime
* before (possibly multiple) software threads lock HVX units.
*
* \return
* 0 on success, -1 if there was an error.
*/
int power_on();
/*!
* \brief
* This function turns off the HVX hardware. It must be called sometime
* after all threads have unlocked their HVX units.
*
* \return
* 0 on success, -1 if there was an error.
*/
int power_off();
/*!
* \brief
* This function locks the HVX units for the calling threads.
*
* \param mode
* The HVX mode.
*
* \return
* 0 on success, -1 if there was an error.
*/
int lock(mode_t mode);
/*!
* \brief
* This function unlocks the HVX units for the calling threads.
*
* \return
* 0 on success, -1 if there was an error.
*/
int unlock();
/*!
* \brief
* This function performs preparations for multithreaded job.
* It does so by filling out data members in the configuration
* structure passed as a parameter, and by setting up the hardware:
* - it performs a temporary reservation of HVX units, if no units
* have yet been reserved,
* - it powers on the HVX hardware.
*
* \param hvx_config
* Structure describing the HVX configuration. Two data members
* must be set prior to calling \ref prepare_mt_job:
* \ref num_reserved, indicating the number of previously reserved
* HVX units (can be 0), and \ref mode indicating the HVX mode.
*
* \return
* 0 on success, -1 if there was an error.
*/
int prepare_mt_job(config_t* hvx_config);
/*!
* \brief
* This function cleans up after \ref prepare_mt_job, in particular
* it releases temporarily reserved HVX units and turns the HVX
* hardware off.
*
* \return
* 0 on success, -1 if there was an error.
*/
int cleanup_mt_job(const config_t* hvx_config);
} // namespace hvx
#endif // TVM_RUNTIME_HEXAGON_TARGET_FASTRPC_SRC_TVM_HVX_H_
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include <assert.h>
#include <stdlib.h>
#define FARF_ERROR 1
#include "AEEStdErr.h"
#include "HAP_farf.h"
#include "HAP_perf.h"
#include "apps_mem.h"
#include "qurt.h"
#include "tvm_remote.h"
#include "tvm_remote_nd.h"
#if __HEXAGON_ARCH__ >= 65
#include "HAP_vtcm_mgr.h"
#else
// Stub functions for targets that don't support VTCM.
static void* HAP_request_VTCM(int a, int b) { return 0; }
static int HAP_release_VTCM(void* a) { return 0; }
static int HAP_query_avail_VTCM(unsigned* avail_block_size,
unsigned* max_page_size, unsigned* num_pages) {
FARF(ALWAYS, "%s: running on architecture V62 or less", __func__);
return AEE_ENOMEMORY;
}
#endif // __HEXAGON_ARCH__
#define MIN_GATHER_SCATTER_SZ (32 * 1024)
#define MAX_GATHER_SCATTER_SZ (64 * 1024)
#define MIN_VTCM_SZ (64 * 1024)
/*!
* \brief Open a domain channel.
*
* \param uri URI of the channel description.
* \param handle_ptr Where to store the channel handle.
*
* \return 0 on success, negative value on error.
*/
int tvm_remote_open(const char* uri, remote_handle64* handle_ptr) {
FARF(ALWAYS, "%s, uri=%s", __func__, uri);
int rc = tvm_remote_nd_open();
if (rc != AEE_SUCCESS) {
FARF(ERROR, "%s: tvm_remote_nd_open failed rc=%08x", __func__, rc);
return rc;
}
*handle_ptr =
static_cast<remote_handle64>(reinterpret_cast<uintptr_t>(malloc(1)));
if (!*handle_ptr) {
FARF(ERROR, "%s: cannot allocate memory", __func__);
return AEE_ENOMEMORY;
}
return AEE_SUCCESS;
}
/*!
* \brief Close domain channel.
*
* \param handle Domain channel handle to close.
*
* \return 0 on success, negative value on error.
*/
int tvm_remote_close(remote_handle64 handle) {
FARF(ALWAYS, "%s", __func__);
if (handle) free(reinterpret_cast<void*>(static_cast<uintptr_t>(handle)));
int rc = tvm_remote_nd_close();
if (rc != AEE_SUCCESS) {
FARF(ERROR, "%s: tvm_remote_nd_close failed rc=%08x", __func__, rc);
}
return rc;
}
/*!
* \brief Dummy function.
*
* \param handle Domain channel handle.
*
* \return This function always returns 0.
*
* This function is present as a workaround. See comment at the call site
* in hexagon_device_target.cc.
*/
int tvm_remote_call_mmap64(remote_handle64 handle) {
return AEE_SUCCESS;
}
/*!
* \brief Load a shared library.
*
* \param handle Domain channel handle.
* \param soname Name of the shared library.
* \param soname_len Length of the name.
* \param lib_ptr Where to store the handle of the loaded libarary.
*
* \return 0 on success, negative value on error.
*/
int tvm_remote_load_library(remote_handle64 handle, const char* soname,
int soname_len, tvm_remote_handle_t* lib_ptr) {
return tvm_remote_nd_load_library(soname, soname_len, lib_ptr);
}
/*!
* \brief Resolve symbol name to an address.
*
* \param handle Domain channel handle.
* \param lib Handle of the shared library with the symbol.
* \param name Symbol name.
* \param name_len Length of the name.
* \param sym_ptr Where to store the resolved address.
*
* \return 0 on success, negative value on error.
*/
int tvm_remote_get_symbol(remote_handle64 handle, tvm_remote_handle_t lib,
const char* name, int name_len,
tvm_remote_handle_t* sym_ptr) {
return tvm_remote_nd_get_symbol(lib, name, name_len, sym_ptr);
}
/*!
* \brief Call the specified function.
*
* \param handle Domain channel handle.
* \param lib Handle of the library containing
* the function to call.
* \param symbol Address of the function to call.
* \param scalar Address of values to pass in registers.
* \param scalar_len Number of values to pass in registers.
* \param stack Address of values to pass on stack.
* \param stack_len Number of values to pass on stack.
*
* \param scalar_in_octet Address of the incoming scalar buffer.
* \param scalar_in_octet_len Length of the incoming scalar buffer.
* \param scalar_out_octet Address of the outgoing scalar buffer.
* \param scalar_out_octet_len Length of the outgoing scalar buffer.
* \param stack_in_octet Address of the incoming stack buffer.
* \param stack_in_octet_len Length of the incoming stack buffer.
* \param stack_out_octet Address of the outgoing stack buffer.
* \param stack_out_octet_len Length of the outgoing stack buffer.
*
* \param pcycles Pointer to where to store cycle count.
* \param time_usec Pointer to where to store time in usec.
*
* \return 0 on success, negative value on error.
*
* The 8 "octet" arguments in this function are used for cache operations
* only. They are not used for procesing.
*/
int tvm_remote_kernel(
remote_handle64 handle, tvm_remote_handle_t lib,
tvm_remote_handle_t symbol, int* scalar, int scalar_len, int* stack,
int stack_len, const tvm_remote_buffer* scalar_in_octet,
int scalar_in_octet_len, tvm_remote_buffer* scalar_out_octet,
int scalar_out_octet_len, const tvm_remote_buffer* stack_in_octet,
int stack_in_octet_len, tvm_remote_buffer* stack_out_octet,
int stack_out_octet_len, uint64* pcycles, uint64* time_usec) {
return tvm_remote_nd_kernel(
lib, symbol, scalar, scalar_len, stack, stack_len,
reinterpret_cast<const tvm_remote_nd_buffer*>(scalar_in_octet),
scalar_in_octet_len,
reinterpret_cast<tvm_remote_nd_buffer*>(scalar_out_octet),
scalar_out_octet_len,
reinterpret_cast<const tvm_remote_nd_buffer*>(stack_in_octet),
stack_in_octet_len,
reinterpret_cast<tvm_remote_nd_buffer*>(stack_out_octet),
stack_out_octet_len, pcycles, time_usec);
}
/*!
* \brief Release previously loaded shared object.
*
* \param handle Domain channel handle.
* \param lib Handle of shared library to release.
*
* \return 0 on success, negative value on error.
*/
int tvm_remote_release_library(remote_handle64 handle,
tvm_remote_handle_t lib) {
// FARF(ALWAYS, "tvm_remote_release_library begin ");
return tvm_remote_nd_release_library(lib);
}
/*!
* \brief Allocate VTCM memory.
*
* \param handle Domain channel handle.
* \param size Number of bytes to allocate.
* \param align Requested alignment.
* \param dsp_va Address of variable to store the allocated VTCM
* address to.
*
* \return 0 on success, negative value on error.
*/
int tvm_remote_alloc_vtcm(remote_handle64 handle, unsigned size,
unsigned align, unsigned* dsp_va) {
FARF(ALWAYS, "%s: size=%u, align=%u", __func__, size, align);
unsigned avail_block_size, max_page_size, num_pages;
int rc = HAP_query_avail_VTCM(&avail_block_size, &max_page_size, &num_pages);
if (rc != AEE_SUCCESS) {
FARF(ERROR, "%s: HAP_query_avail_VTCM failed, rc=%08x", __func__, rc);
return rc;
}
FARF(ALWAYS, "%s: avail_block_size=%u, max_page_size=%u, num_pages=%u",
__func__, avail_block_size, max_page_size, num_pages);
if (max_page_size < MIN_VTCM_SZ) {
FARF(ERROR, "%s: available VTCM size less than %d KB, aborting", __func__,
MIN_VTCM_SZ / 1024);
return AEE_ENOMEMORY;
}
void* vtcm_base = HAP_request_VTCM(size, /*single_page_flag=*/1);
if (!vtcm_base) {
FARF(ERROR, "%s: error allocating VTCM", __func__);
return AEE_ENOMEMORY;
}
*dsp_va = static_cast<unsigned>(reinterpret_cast<uintptr_t>(vtcm_base));
FARF(ALWAYS, "%s: allocated VTCM addr=0x%p", __func__, vtcm_base);
return AEE_SUCCESS;
}
/*!
* \brief Free VTCM memory.
*
* \param handle Domain channel handle.
* \param dsp_va VTCM address to free.
*
* \return 0 on success, negative value on error.
*/
int tvm_remote_free_vtcm(remote_handle64 handle, unsigned dsp_va) {
FARF(ALWAYS, "%s: dsp_va=0x%08x", __func__, dsp_va);
void* vtcm_base = reinterpret_cast<void*>(dsp_va);
int rc = HAP_release_VTCM(vtcm_base);
if (rc != AEE_SUCCESS) {
FARF(ERROR, "%s: error freeing VTCM, rc=%08x", __func__, rc);
}
return rc;
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include <assert.h>
#include <dlfcn.h>
#include <stdio.h>
#include <stdlib.h>
#include <vector>
#define FARF_ERROR 1
#include "AEEStdDef.h"
#include "AEEStdErr.h"
#include "HAP_farf.h"
#include "HAP_mem.h"
#include "HAP_perf.h"
#include "qurt.h"
#include "tvm_hvx.h"
#include "tvm_remote_nd.h"
struct msg_call {
uint32_t func_va;
uint32_t scalar_num;
uint32_t stack_num;
uint32_t data[];
} __attribute__((packed));
__attribute__((naked)) uint32_t launcher(volatile msg_call* mc,
uint64_t* pcc) {
__asm__(
"// This function is intentionally written to be readable, \n"
"// rather than fast. \n"
"// r0 = value of 'volatile msg_call *mc' \n"
"// r1 = address where to store the program cycle count \n"
"// In this packet the store happens before the allocframe so \n"
"// the offset added to r29 must reflect that the r29 has not \n"
"// yet been updated (stack grows towards decreasing addresses):\n"
"// r29 before allocframe --. \n"
"// [ r17:16 ] [ r19:18 ] [ r21:20 ] [ FP/LR ] \n"
"// `-- r29 after allocframe increasing addresses --> \n"
"{ memd(r29+#-16) = r21:20 \n"
" allocframe(#24) } \n"
"{ memd(r29+#0) = r17:16 \n"
" memd(r29+#8) = r19:18 } \n"
"{ r17:16 = combine(r1,r0) \n"
" r18 = r29 \n"
" r1 = memw(r0+#4) // scalar_num \n"
" r2 = memw(r0+#8) } // stack_num \n"
"// If there are no stack values, skip the stack setup. \n"
"{ p0 = cmp.eq(r2,#0) \n"
" if (p0.new) jump:t .Llauncher1 } \n"
"// Allocate space on the stack. Let r2 = needed space \n"
"// rounded up to a multiple of 8. \n"
"{ loop0(.Llauncher0,r2) \n"
" r2 = asl(r2,#2) } \n"
"{ r2 = add(r2,#4) } \n"
"{ r2 = clrbit(r2,#2) } \n"
"{ r29 = sub(r29,r2) } \n"
"// Copy stack contents onto the stack. Stack contents start \n"
"// at r3 = r0 + offsetof(data) + scalar_num*4 \n"
"{ r3 = addasl(r0,r1,#2) \n"
" r4 = r29 } \n"
"{ r3 = add(r3,#12) } // offsetof(data) \n"
".Llauncher0: \n"
"{ r5 = memw(r3++#4) \n"
" memw(r4++#4) = r5.new } :endloop0 \n"
"// Load registers. Some of the loaded data may actually be \n"
"// values from the stack part of 'data', but it's not an issue.\n"
".Llauncher1: \n"
"{ r0 = memw(r16+#12) // mc + offsetof(data) \n"
" r1 = memw(r16+#16) } \n"
"{ r2 = memw(r16+#20) \n"
" r3 = memw(r16+#24) } \n"
"{ r4 = memw(r16+#28) \n"
" r5 = memw(r16+#32) } \n"
"// Call. \n"
"{ r6 = memw(r16+#0) \n"
" r21:20 = upcycle } \n"
"{ callr r6 } \n"
"// Restore stack pointer (free up r18), calculate cycle count. \n"
"{ r29 = r18 \n"
" r19:18 = upcycle } \n"
"{ r19:18 = sub(r19:18, r21:20) } \n"
"// Store pcount, restore non-volatile registers, and return. \n"
"{ memd(r17+#0) = r19:18 \n"
" r21:20 = memd(r29+#16) } \n"
"{ r19:18 = memd(r29+#8) \n"
" r17:16 = memd(r29+#0) } \n"
"{ dealloc_return } // implicit-use r1:0 \n");
}
extern "C" {
#pragma weak __wrap_pthread_create
int __wrap_pthread_create(pthread_t* restrict thread,
const pthread_attr_t* restrict attr,
void* (*start)(void*), void* restrict arg) {
FARF(ERROR, "Wrong %s called", __func__);
abort();
}
}
static void* lib_rt = nullptr;
static void* lib_thread = nullptr;
/*!
* \brief Perform initialization.
*
* \return 0 on success, negative value on error.
*/
int tvm_remote_nd_open() {
lib_thread = dlopen("libtvm_wrap_pthread.so", RTLD_NOW | RTLD_GLOBAL);
if (lib_thread == nullptr) {
FARF(ERROR, "%s: dlopen failed for libtvm_wrap_pthread.so: %s", __func__,
dlerror());
return AEE_EUNABLETOLOAD;
}
lib_rt = dlopen("libtvm_runtime.so", RTLD_NOW | RTLD_GLOBAL);
if (lib_rt == nullptr) {
FARF(ERROR, "%s: dlopen failed for libtvm_runtime.so: %s", __func__,
dlerror());
return AEE_EUNABLETOLOAD;
}
return AEE_SUCCESS;
}
/*!
* \brief Perform cleanup.
*
* \return 0 on success, negative value on error.
*/
int tvm_remote_nd_close() {
if (lib_thread != nullptr) {
dlclose(lib_thread);
lib_thread = nullptr;
}
if (lib_rt != nullptr) {
dlclose(lib_rt);
lib_rt = nullptr;
}
return AEE_SUCCESS;
}
/*!
* \brief Dummy function.
*
* \param handle Domain channel handle.
*
* \return This function always returns 0.
*
* This function is present as a workaround. See comment at the call site
* in hexagon_device_target.cc.
*/
int tvm_remote_nd_call_mmap64() {
return AEE_SUCCESS;
}
/*!
* \brief Load a shared library.
*
* \param soname Name of the shared library.
* \param soname_len Length of the name.
* \param lib_ptr Where to store the handle of the loaded libarary.
*
* \return 0 on success, negative value on error.
*/
int tvm_remote_nd_load_library(const char* soname, int soname_len,
tvm_remote_nd_handle_t* lib_ptr) {
// We need to use RTLD_NOW, the libraries we build for Hexagon
// offloading do not support lazy binding.
FARF(ALWAYS, "%s: %s", __func__, soname);
if (void* lib = dlopen(soname, RTLD_GLOBAL | RTLD_NOW)) {
*lib_ptr = reinterpret_cast<tvm_remote_nd_handle_t>(lib);
return AEE_SUCCESS;
}
FARF(ERROR, "%s: dlopen failed: %s", __func__, dlerror());
return AEE_EUNKNOWN;
}
/*!
* \brief Resolve symbol name to an address.
*
* \param lib Handle of the shared library with the symbol.
* \param name Symbol name.
* \param name_len Length of the name.
* \param sym_ptr Where to store the resolved address.
*
* \return 0 on success, negative value on error.
*/
int tvm_remote_nd_get_symbol(tvm_remote_nd_handle_t lib, const char* name,
int name_len, tvm_remote_nd_handle_t* sym_ptr) {
FARF(ALWAYS, "%s: name=%s", __func__, name);
if (void* p = dlsym(reinterpret_cast<void*>(lib), name)) {
*sym_ptr = reinterpret_cast<tvm_remote_nd_handle_t>(p);
return AEE_SUCCESS;
}
FARF(ERROR, "%s: dlsym failed: %s", __func__, dlerror());
return AEE_EUNKNOWN;
}
static void print_msg_call(const msg_call& mc) {
FARF(ALWAYS, "device: launching %x scalar_num:%d stack_num:%d", mc.func_va,
mc.scalar_num, mc.stack_num);
for (unsigned i = 0; i != mc.scalar_num; ++i) {
FARF(ALWAYS, "scalar_data[%d] %x", i, mc.data[i]);
}
for (unsigned i = 0; i != mc.stack_num; ++i) {
FARF(ALWAYS, "stack_data[%d] %x", i, mc.data[mc.scalar_num + i]);
}
}
/*!
* \brief Call the specified function.
*
* \param lib Handle of the library containing
* the function to call.
* \param symbol Address of the function to call.
* \param scalar Address of values to pass in registers.
* \param scalar_len Number of values to pass in registers.
* \param stack Address of values to pass on stack.
* \param stack_len Number of values to pass on stack.
*
* \param scalar_in_octet Address of the incoming scalar buffer.
* \param scalar_in_octet_len Length of the incoming scalar buffer.
* \param scalar_out_octet Address of the outgoing scalar buffer.
* \param scalar_out_octet_len Length of the outgoing scalar buffer.
* \param stack_in_octet Address of the incoming stack buffer.
* \param stack_in_octet_len Length of the incoming stack buffer.
* \param stack_out_octet Address of the outgoing stack buffer.
* \param stack_out_octet_len Length of the outgoing stack buffer.
*
* \param pcycles Pointer to where to store cycle count.
* \param time_usec Pointer to where to store time in usec.
*
* \return 0 on success, negative value on error.
*
* The 8 "octet" arguments in this function are used for cache operations
* only. They are not used for procesing.
*/
int tvm_remote_nd_kernel(
tvm_remote_nd_handle_t lib, tvm_remote_nd_handle_t symbol, int* scalar,
int scalar_len, int* stack, int stack_len,
const tvm_remote_nd_buffer* scalar_in_octet, int scalar_in_octet_len,
tvm_remote_nd_buffer* scalar_out_octet, int scalar_out_octet_len,
const tvm_remote_nd_buffer* stack_in_octet, int stack_in_octet_len,
tvm_remote_nd_buffer* stack_out_octet, int stack_out_octet_len,
uint64* pcycles, uint64* time_usec) {
hvx::config_t hvx_info = {0};
hvx::prepare_mt_job(&hvx_info);
int lock_result;
// Check if HVX units are available
if (hvx_info.num_reserved > 0) {
lock_result = hvx::lock(hvx::MODE_128B);
if (lock_result < 0) {
FARF(ERROR, "%s: HVX locking failed lock_result=%d num_reserved=%d",
__func__, lock_result, hvx_info.num_reserved);
} else {
FARF(ALWAYS, "%s: HVX lock successful lock_result=%d", __func__,
lock_result);
}
} else {
FARF(ERROR, "%s: there are no HVX units available", __func__);
}
struct msg_call* mc = (struct msg_call*)malloc(sizeof(uint32_t) *
(3 + scalar_len + stack_len));
if (mc == nullptr) {
FARF(ERROR, "%s: failed to allocate memory for mc", __func__);
return AEE_ENOMEMORY;
}
int32_t* mc_ptr = reinterpret_cast<int32_t*>(mc);
// Scalar buffers come first.
int k = 3;
for (int i = 0; i < scalar_len; i++, k++) {
*(mc_ptr + k) = static_cast<uint32_t>(scalar[i]);
}
for (int i = 0; i < stack_len; i++, k++) {
*(mc_ptr + k) = static_cast<uint32_t>(stack[i]);
}
mc->scalar_num = scalar_len;
mc->stack_num = stack_len;
mc->func_va = symbol;
print_msg_call(*mc);
uint64_t start_time = HAP_perf_get_time_us();
int result = launcher(mc, pcycles);
*time_usec = HAP_perf_get_time_us() - start_time;
FARF(ALWAYS, "kernel execution: %llu pcycles %llu usec", *pcycles,
*time_usec);
if (lock_result > 0) hvx::unlock();
hvx::cleanup_mt_job(&hvx_info);
if (mc) free(mc);
return result;
}
/*!
* \brief Release previously loaded shared object.
*
* \param lib Handle of shared library to release.
*
* \return 0 on success, negative value on error.
*/
int tvm_remote_nd_release_library(tvm_remote_nd_handle_t lib) {
// FARF(ALWAYS, "tvm_remote_nd_release_library begin ");
dlclose(reinterpret_cast<void*>(lib));
FARF(ALWAYS, "tvm_remote_nd_release_library done ");
return 0;
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* Implement a wrapper around pthread_create that sets the thread stack
* size to a chosen value.
*
* TVM runtime uses std::thread, but the C++ standard does not provide
* any means of controlling thread attributes (like stack size). Because
* of that, any thread created by the std::thread constructor will use
* default attributes. The default stack size for a thread in QuRT is 16kB.
* This has proven to be insufficient in the past, so we need to increase
* it.
* When libtvm_runtime.so is linked, a linker flag --wrap=pthread_create
* is used, which causes the linker to rename all uses of pthread_create
* with references to __wrap_pthread_create. This file implements the
* __wrap function to set the larger stack size and call the actual
* pthread_create. The call to pthread_create here must not be renamed,
* so this function cannot be included in the TVM runtime binary.
* Instead, it's implemented in a separate shared library.
*/
#include <pthread.h>
#include "HAP_farf.h"
static constexpr size_t kThreadStackSize = 128 * 1024; // 128kB
// Make sure the function has C linkage.
extern "C" {
int __wrap_pthread_create(pthread_t* restrict thread,
const pthread_attr_t* restrict attr,
void* (*start)(void*), void* restrict arg);
}
int __wrap_pthread_create(pthread_t* restrict thread,
const pthread_attr_t* restrict attr,
void* (*start)(void*), void* restrict arg) {
pthread_attr_t def_attr;
if (attr == nullptr) {
if (int rc = pthread_attr_init(&def_attr)) {
FARF(ERROR, "pthread_attr_init failed: rc=%08x", rc);
return rc;
}
if (int rc = pthread_attr_setstacksize(&def_attr, kThreadStackSize)) {
FARF(ERROR, "pthread_attr_setstacksize failed: rc=%08x", rc);
return rc;
}
attr = &def_attr;
}
size_t stack_size = 0;
if (int rc = pthread_attr_getstacksize(attr, &stack_size)) {
FARF(ERROR, "pthread_attr_setstacksize failed: rc=%08x", rc);
return rc;
}
FARF(ALWAYS, "launching thread with stack_size=%zu", stack_size);
int t = pthread_create(thread, attr, start, arg);
if (int rc = pthread_attr_destroy(&def_attr)) {
FARF(ERROR, "pthread_attr_destroy failed (after pthread_create): rc=%08x",
rc);
}
return t;
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#ifndef TVM_RUNTIME_HEXAGON_TARGET_FASTRPC_TVM_HEXAGON_REMOTE_H_
#define TVM_RUNTIME_HEXAGON_TARGET_FASTRPC_TVM_HEXAGON_REMOTE_H_
/// @file tvm_hexagon_remote.idl
/// IDL to offload TVM kernels to Hexagon from APPS for multi-domains
#include "AEEStdDef.h"
#include "remote.h"
#ifndef __QAIC_HEADER
#define __QAIC_HEADER(ff) ff
#endif // __QAIC_HEADER
#ifndef __QAIC_HEADER_EXPORT
#define __QAIC_HEADER_EXPORT
#endif // __QAIC_HEADER_EXPORT
#ifndef __QAIC_HEADER_ATTRIBUTE
#define __QAIC_HEADER_ATTRIBUTE
#endif // __QAIC_HEADER_ATTRIBUTE
#ifndef __QAIC_IMPL
#define __QAIC_IMPL(ff) ff
#endif // __QAIC_IMPL
#ifndef __QAIC_IMPL_EXPORT
#define __QAIC_IMPL_EXPORT
#endif // __QAIC_IMPL_EXPORT
#ifndef __QAIC_IMPL_ATTRIBUTE
#define __QAIC_IMPL_ATTRIBUTE
#endif // __QAIC_IMPL_ATTRIBUTE
#ifdef __cplusplus
extern "C" {
#endif
/**
* Opens the handle in the specified domain. If this is the first
* handle, this creates the session. Typically this means opening
* the device, aka open("/dev/adsprpc-smd"), then calling ioctl
* device APIs to create a PD on the DSP to execute our code in,
* then asking that PD to dlopen the .so and dlsym the skel function.
*
* @param uri, <interface>_URI"&_dom=aDSP"
* <interface>_URI is a QAIC generated uri, or
* "file:///<sofilename>?<interface>_skel_handle_invoke&_modver=1.0"
* If the _dom parameter is not present, _dom=DEFAULT is assumed
* but not forwarded.
* Reserved uri keys:
* [0]: first unamed argument is the skel invoke function
* _dom: execution domain name, _dom=mDSP/aDSP/DEFAULT
* _modver: module version, _modver=1.0
* _*: any other key name starting with an _ is reserved
* Unknown uri keys/values are forwarded as is.
* @param h, resulting handle
* @retval, 0 on success
*/
__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_open)(
const char* uri, remote_handle64* h) __QAIC_HEADER_ATTRIBUTE;
/**
* Closes a handle. If this is the last handle to close, the session
* is closed as well, releasing all the allocated resources.
* @param h, the handle to close
* @retval, 0 on success, should always succeed
*/
__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_close)(
remote_handle64 h) __QAIC_HEADER_ATTRIBUTE;
typedef struct _tvm_hexagon_remote_buffer__seq_octet
_tvm_hexagon_remote_buffer__seq_octet;
typedef _tvm_hexagon_remote_buffer__seq_octet tvm_hexagon_remote_buffer;
struct _tvm_hexagon_remote_buffer__seq_octet {
unsigned char* data;
int dataLen;
};
typedef unsigned int tvm_hexagon_remote_handle_t;
typedef uint64 tvm_hexagon_remote_scalar_t;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_load_library)(
remote_handle64 _h, const char* soname, int sonameLen, const char* code,
int codeLen,
tvm_hexagon_remote_handle_t* module_ptr) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_get_symbol)(
remote_handle64 _h, tvm_hexagon_remote_handle_t module_ptr,
const char* name, int nameLen,
tvm_hexagon_remote_handle_t* sym_ptr) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_kernel)(
remote_handle64 _h, tvm_hexagon_remote_handle_t module_ptr,
tvm_hexagon_remote_handle_t symbol, int* scalar, int scalarLen, int* stack,
int stackLen, const tvm_hexagon_remote_buffer* scalar_in_octet,
int scalar_in_octetLen, tvm_hexagon_remote_buffer* scalar_out_octet,
int scalar_out_octetLen, const tvm_hexagon_remote_buffer* stack_in_octet,
int stack_in_octetLen, tvm_hexagon_remote_buffer* stack_out_octet,
int stack_out_octetLen, uint64* pcycles,
uint64* time_usec) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_release_library)(
remote_handle64 _h,
tvm_hexagon_remote_handle_t module_ptr) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_alloc_vtcm)(
remote_handle64 _h, unsigned int size, unsigned int align,
unsigned int* dsp_va) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_free_vtcm)(
remote_handle64 _h, unsigned int dsp_va) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_call_mmap64)(
remote_handle64 _h) __QAIC_HEADER_ATTRIBUTE;
#ifndef tvm_hexagon_remote_URI
#define tvm_hexagon_remote_URI \
"file:///" \
"libtvm_hexagon_remote_skel.so?tvm_hexagon_remote_skel_handle_invoke&_" \
"modver=1.0"
#endif /*tvm_hexagon_remote_URI*/
#ifdef __cplusplus
}
#endif
#endif // TVM_RUNTIME_HEXAGON_TARGET_FASTRPC_TVM_HEXAGON_REMOTE_H_
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#ifndef TVM_RUNTIME_HEXAGON_TARGET_FASTRPC_TVM_HEXAGON_REMOTE_ND_H_
#define TVM_RUNTIME_HEXAGON_TARGET_FASTRPC_TVM_HEXAGON_REMOTE_ND_H_
/// @file tvm_hexagon_remote_nd.idl
/// IDL to offload TVM kernels to Hexagon from APPS for non-domains
#include "AEEStdDef.h"
#include "remote.h"
#ifndef __QAIC_HEADER
#define __QAIC_HEADER(ff) ff
#endif // __QAIC_HEADER
#ifndef __QAIC_HEADER_EXPORT
#define __QAIC_HEADER_EXPORT
#endif // __QAIC_HEADER_EXPORT
#ifndef __QAIC_HEADER_ATTRIBUTE
#define __QAIC_HEADER_ATTRIBUTE
#endif // __QAIC_HEADER_ATTRIBUTE
#ifndef __QAIC_IMPL
#define __QAIC_IMPL(ff) ff
#endif // __QAIC_IMPL
#ifndef __QAIC_IMPL_EXPORT
#define __QAIC_IMPL_EXPORT
#endif // __QAIC_IMPL_EXPORT
#ifndef __QAIC_IMPL_ATTRIBUTE
#define __QAIC_IMPL_ATTRIBUTE
#endif // __QAIC_IMPL_ATTRIBUTE
#ifdef __cplusplus
extern "C" {
#endif
typedef struct _tvm_hexagon_remote_nd_buffer__seq_octet
_tvm_hexagon_remote_nd_buffer__seq_octet;
typedef _tvm_hexagon_remote_nd_buffer__seq_octet tvm_hexagon_remote_nd_buffer;
struct _tvm_hexagon_remote_nd_buffer__seq_octet {
unsigned char* data;
int dataLen;
};
typedef unsigned int tvm_hexagon_remote_nd_handle_t;
typedef uint64 tvm_hexagon_remote_nd_scalar_t;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_nd_open)(void)
__QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_nd_close)(void)
__QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_nd_load_library)(
const char* soname, int sonameLen, const char* code, int codeLen,
tvm_hexagon_remote_nd_handle_t* module_ptr) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_nd_get_symbol)(
tvm_hexagon_remote_nd_handle_t module_ptr, const char* name, int nameLen,
tvm_hexagon_remote_nd_handle_t* sym_ptr) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_nd_kernel)(
tvm_hexagon_remote_nd_handle_t module_ptr,
tvm_hexagon_remote_nd_handle_t symbol, int* scalar, int scalarLen,
int* stack, int stackLen,
const tvm_hexagon_remote_nd_buffer* scalar_in_octet,
int scalar_in_octetLen, tvm_hexagon_remote_nd_buffer* scalar_out_octet,
int scalar_out_octetLen,
const tvm_hexagon_remote_nd_buffer* stack_in_octet, int stack_in_octetLen,
tvm_hexagon_remote_nd_buffer* stack_out_octet, int stack_out_octetLen,
uint64* pcycles, uint64* time_usec) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_nd_release_library)(
tvm_hexagon_remote_nd_handle_t module_ptr) __QAIC_HEADER_ATTRIBUTE;
__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_nd_call_mmap64)(void)
__QAIC_HEADER_ATTRIBUTE;
#ifdef __cplusplus
}
#endif
#endif // TVM_RUNTIME_HEXAGON_TARGET_FASTRPC_TVM_HEXAGON_REMOTE_ND_H_
......@@ -29,7 +29,7 @@
#include "../hexagon_module.h"
#include "AEEStdErr.h"
#include "fastrpc/tvm_hexagon_remote.h"
#include "fastrpc/include/tvm_remote.h"
#include "hexagon_dsprpcapi.h"
#include "hexagon_stubapi.h"
#include "hexagon_target_log.h"
......@@ -88,7 +88,7 @@ class HexagonTarget : public tvm::runtime::hexagon::Device {
// in apps's pointers, i.e. sizeof_dsp(void*) <= sizeof_apps(void*).
std::map<const void*, std::pair<void*, size_t>> dsp_to_apps_;
remote_handle64 domain_channel_handle_ = AEE_EUNKNOWN;
tvm_hexagon_remote_handle_t module_pointer_ = AEE_EUNKNOWN;
tvm_remote_handle_t module_pointer_ = AEE_EUNKNOWN;
uint64_t count_channel_open_ = 0;
// Global lock, used for all critical sections. This can be refined
// in the future.
......@@ -200,8 +200,8 @@ int HexagonTarget::OpenDomainChannel(bool use_unsigned_pd) {
TVM_LOGD_HT("remote_session_control not available");
}
int rc = stub_api->tvm_hexagon_remote_open(
tvm_hexagon_remote_URI "&_dom=cdsp", &domain_channel_handle_);
int rc = stub_api->tvm_remote_open(tvm_remote_URI "&_dom=cdsp",
&domain_channel_handle_);
if (rc != AEE_SUCCESS) {
TVM_LOGE_HT("failed to open channel rc=0x%x", rc);
} else {
......@@ -216,7 +216,7 @@ int HexagonTarget::CloseDomainChannel() {
const StubAPI* stub_api = StubAPI::Global();
int rc = stub_api->tvm_hexagon_remote_close(domain_channel_handle_);
int rc = stub_api->tvm_remote_close(domain_channel_handle_);
if (rc == AEE_SUCCESS) {
domain_channel_handle_ = AEE_EUNKNOWN;
stub_api->rpcmem_deinit_ptr()();
......@@ -231,8 +231,8 @@ void HexagonTarget::ReleaseLibrary() {
crit_section_.lock();
if (module_pointer_ != AEE_EUNKNOWN) {
const StubAPI* stub_api = StubAPI::Global();
int rc = stub_api->tvm_hexagon_remote_release_library(
domain_channel_handle_, module_pointer_);
int rc = stub_api->tvm_remote_release_library(domain_channel_handle_,
module_pointer_);
if (rc != AEE_SUCCESS) {
TVM_LOGE_HT("failed to unload device library rc=0x%x", rc);
} else {
......@@ -268,9 +268,10 @@ void* HexagonTarget::Alloc(unsigned size, unsigned align) {
// DSP before calling remote_map64. Hence this call is needed for now untill
// FastRPC comes up with a fix.
int rc_call_mmap_64 =
stub_api->tvm_hexagon_remote_call_mmap64(domain_channel_handle_);
stub_api->tvm_remote_call_mmap64(domain_channel_handle_);
if (rc_call_mmap_64 != AEE_SUCCESS) {
TVM_LOGE_HT("mmap64 failed for domain channel %lu", domain_channel_handle_);
TVM_LOGE_HT("mmap64 failed for domain channel %lu",
domain_channel_handle_);
return nullptr;
}
......@@ -325,8 +326,8 @@ void* HexagonTarget::AllocVtcm(unsigned size, unsigned align) {
const StubAPI* stub_api = StubAPI::Global();
unsigned int dsp_va = 0;
int rc = stub_api->tvm_hexagon_remote_alloc_vtcm(domain_channel_handle_,
size, align, &dsp_va);
int rc = stub_api->tvm_remote_alloc_vtcm(domain_channel_handle_, size, align,
&dsp_va);
if (rc != AEE_SUCCESS) {
TVM_LOGE_HT("VTCM allocation failed size=%u, align=%u", size, align);
return nullptr;
......@@ -342,8 +343,7 @@ void HexagonTarget::FreeVtcm(void* ptr) {
TVM_LOGD_HT("%s:Calling vtcm free. ptr=%p", __func__, ptr);
uintptr_t dsp_va = reinterpret_cast<uintptr_t>(ptr);
int rc =
stub_api->tvm_hexagon_remote_free_vtcm(domain_channel_handle_, dsp_va);
int rc = stub_api->tvm_remote_free_vtcm(domain_channel_handle_, dsp_va);
if (rc != AEE_SUCCESS) {
TVM_LOGE_HT("VTCM deallocation failed");
}
......@@ -365,16 +365,18 @@ void HexagonTarget::CopyDeviceToDevice(void* dst, const void* src,
if (aa_src.second < len) {
TVM_LOGD_HT(
"specified length:%u larger than source buffer size:%zu, copy "
"truncated", len, aa_src.second);
"truncated",
len, aa_src.second);
}
if (aa_dst.second < len) {
TVM_LOGD_HT(
"specified length:%u larger than dest buffer size:%zu, copy "
"truncated", len, aa_dst.second);
"truncated",
len, aa_dst.second);
}
len = std::min({size_t(len), aa_src.second, aa_dst.second});
TVM_LOGD_HT("copy, dsp:%p(apps:%p) -> dsp:%p(apps:%p), len:%u",
src, aa_src.first, dst, aa_dst.first, len);
TVM_LOGD_HT("copy, dsp:%p(apps:%p) -> dsp:%p(apps:%p), len:%u", src,
aa_src.first, dst, aa_dst.first, len);
std::memcpy(aa_dst.first, aa_src.first, len);
}
......@@ -438,9 +440,8 @@ void* HexagonTarget::Load(const std::string& data, const std::string& fmt) {
crit_section_.lock();
TVM_LOGD_HT("loading library %s ", data.c_str());
const StubAPI* stub_api = StubAPI::Global();
int rc = stub_api->tvm_hexagon_remote_load_library(
domain_channel_handle_, data.c_str(), data.size() + 1, data.c_str(),
data.size() + 1, &module_pointer_);
int rc = stub_api->tvm_remote_load_library(
domain_channel_handle_, data.c_str(), data.size() + 1, &module_pointer_);
if (rc != AEE_SUCCESS) {
TVM_LOGE_HT("failed to load device library rc=0x%x", rc);
}
......@@ -470,11 +471,11 @@ void HexagonTarget::Unload(void* mod) {
void* HexagonTarget::Resolve(const std::string& sym) {
const StubAPI* stub_api = StubAPI::Global();
tvm_hexagon_remote_handle_t pf;
tvm_remote_handle_t pf;
TVM_LOGD_HT("resolving symbol %s", sym.c_str());
int rc = stub_api->tvm_hexagon_remote_get_symbol(
domain_channel_handle_, module_pointer_, sym.c_str(), sym.size() + 1,
&pf);
int rc =
stub_api->tvm_remote_get_symbol(domain_channel_handle_, module_pointer_,
sym.c_str(), sym.size() + 1, &pf);
if (rc != AEE_SUCCESS) {
TVM_LOGE_HT("failed to get symbol from CDSP rc=0x%x", rc);
return nullptr;
......@@ -487,23 +488,20 @@ void* HexagonTarget::Resolve(const std::string& sym) {
void HexagonTarget::Call(void* func, uint32_t* scalar, unsigned scalar_num,
uint32_t* stack, unsigned stack_num) {
uint64 pcycles = 0, execution_time_usec = 0;
auto scalar_octet = std::unique_ptr<tvm_hexagon_remote_buffer[]>(
new tvm_hexagon_remote_buffer[scalar_num]);
auto stack_octet = std::unique_ptr<tvm_hexagon_remote_buffer[]>(
new tvm_hexagon_remote_buffer[stack_num]);
auto scalar_octet =
std::unique_ptr<tvm_remote_buffer[]>(new tvm_remote_buffer[scalar_num]);
auto stack_octet =
std::unique_ptr<tvm_remote_buffer[]>(new tvm_remote_buffer[stack_num]);
TVM_LOGD_HT("scalars=%p, stack=%p", scalar, stack);
if (scalar_octet == nullptr || stack_octet == nullptr) {
TVM_LOGE_HT("mem alloc failed for scalar/stack octets");
return;
}
std::memset(scalar_octet.get(), 0,
scalar_num * sizeof(tvm_hexagon_remote_buffer));
std::memset(stack_octet.get(), 0,
stack_num * sizeof(tvm_hexagon_remote_buffer));
std::memset(scalar_octet.get(), 0, scalar_num * sizeof(tvm_remote_buffer));
std::memset(stack_octet.get(), 0, stack_num * sizeof(tvm_remote_buffer));
auto ProcessInputs = [this](uint32_t* inputs,
tvm_hexagon_remote_buffer* buffers,
auto ProcessInputs = [this](uint32_t* inputs, tvm_remote_buffer* buffers,
unsigned num) {
for (unsigned i = 0; i != num; ++i) {
void* ptr = reinterpret_cast<void*>(static_cast<uintptr_t>(inputs[i]));
......@@ -533,10 +531,9 @@ void HexagonTarget::Call(void* func, uint32_t* scalar, unsigned scalar_num,
TVM_LOGD_HT("%s", ToString(" stack", stack, stack_num).c_str());
const StubAPI* stub_api = StubAPI::Global();
int rc = stub_api->tvm_hexagon_remote_kernel(
int rc = stub_api->tvm_remote_kernel(
domain_channel_handle_, module_pointer_,
static_cast<tvm_hexagon_remote_handle_t>(
reinterpret_cast<uintptr_t>(func)),
static_cast<tvm_remote_handle_t>(reinterpret_cast<uintptr_t>(func)),
reinterpret_cast<int*>(scalar), scalar_num,
reinterpret_cast<int*>(stack), stack_num, scalar_octet.get(), scalar_num,
scalar_octet.get(), scalar_num, stack_octet.get(), stack_num,
......
......@@ -41,8 +41,8 @@ StubAPI::StubAPI() {
TVM_LOGD("ADSP subsystem present");
}
constexpr auto domain_lib_name = "libtvm_hexagon_remote_stub.so";
constexpr auto nondomain_lib_name = "libtvm_hexagon_remote_nd_stub.so";
constexpr auto domain_lib_name = "libtvm_remote_stub.so";
constexpr auto nondomain_lib_name = "libtvm_remote_nd_stub.so";
const char* lib_name =
enable_domains_ ? domain_lib_name : nondomain_lib_name;
......@@ -50,22 +50,22 @@ StubAPI::StubAPI() {
#define RESOLVE(fn) p##fn##_ = GetSymbol<fn##_t*>(#fn)
if (enable_domains_) {
RESOLVE(tvm_hexagon_remote_load_library);
RESOLVE(tvm_hexagon_remote_release_library);
RESOLVE(tvm_hexagon_remote_get_symbol);
RESOLVE(tvm_hexagon_remote_kernel);
RESOLVE(tvm_hexagon_remote_open);
RESOLVE(tvm_hexagon_remote_close);
RESOLVE(tvm_hexagon_remote_alloc_vtcm);
RESOLVE(tvm_hexagon_remote_free_vtcm);
RESOLVE(tvm_hexagon_remote_call_mmap64);
RESOLVE(tvm_remote_load_library);
RESOLVE(tvm_remote_release_library);
RESOLVE(tvm_remote_get_symbol);
RESOLVE(tvm_remote_kernel);
RESOLVE(tvm_remote_open);
RESOLVE(tvm_remote_close);
RESOLVE(tvm_remote_alloc_vtcm);
RESOLVE(tvm_remote_free_vtcm);
RESOLVE(tvm_remote_call_mmap64);
} else {
RESOLVE(tvm_hexagon_remote_nd_load_library);
RESOLVE(tvm_hexagon_remote_nd_release_library);
RESOLVE(tvm_hexagon_remote_nd_get_symbol);
RESOLVE(tvm_hexagon_remote_nd_kernel);
RESOLVE(tvm_hexagon_remote_nd_open);
RESOLVE(tvm_hexagon_remote_nd_call_mmap64);
RESOLVE(tvm_remote_nd_load_library);
RESOLVE(tvm_remote_nd_release_library);
RESOLVE(tvm_remote_nd_get_symbol);
RESOLVE(tvm_remote_nd_kernel);
RESOLVE(tvm_remote_nd_open);
RESOLVE(tvm_remote_nd_call_mmap64);
}
RESOLVE(rpcmem_init);
......
......@@ -28,8 +28,8 @@
#include <tuple>
#include "fastrpc/tvm_hexagon_remote.h"
#include "fastrpc/tvm_hexagon_remote_nd.h"
#include "fastrpc/include/tvm_remote.h"
#include "fastrpc/include/tvm_remote_nd.h"
namespace tvm {
namespace runtime {
......@@ -39,15 +39,15 @@ namespace hexagon {
* Unify the handling of domain and non-domain functions.
*
* In most cases, for a function "foo", the domain version will be called
* "tvm_hexagon_remote_foo", and the non-domain version will have "nd_foo".
* "tvm_remote_foo", and the non-domain version will have "nd_foo".
* The interfaces will be the same, except:
* - the domain version will take "remote_handle64" as the first parameter,
* while the non-domain version will not:
* int tvm_hexagon_remote_foo (remote_handle64 h, param1, param2, ...);
* int tvm_hexagon_remote_nd_foo (param1, param2, ...);
* int tvm_remote_foo (remote_handle64 h, param1, param2, ...);
* int tvm_remote_nd_foo (param1, param2, ...);
* - any parameter of type "buffer" in the IDL, will be converted into a
* type "tvm_hexagon_remote_buffer" for domain functions, and into
* "tvm_hexagon_remote_nd_buffer" for non-domain functions. These two
* type "tvm_remote_buffer" for domain functions, and into
* "tvm_remote_nd_buffer" for non-domain functions. These two
* types are identical, but since they are declared in two different IDLs,
* they get different names.
*
......@@ -55,32 +55,32 @@ namespace hexagon {
* since the pointee types are different, this is enough to create a
* difference in the function signatures even if the "remote_handle64"
* parameter is ignored. For this reason, in all function types, the
* types "tvm_hexagon_remote_buffer *" and "tvm_hexagon_remote_nd_buffer *",
* types "tvm_remote_buffer *" and "tvm_remote_nd_buffer *",
* both const and non-const, are replaced with "void *", with the
* corresponding const-qualification. This is done by the templates
* "replace_pointee_type" and "map_tuple_element" below.
*
* The following functions are subject to the uniform handling:
*
* tvm_hexagon_remote_load_library (remote_handle64 h, p1, p2, ...)
* tvm_hexagon_remote_release_library
* tvm_hexagon_remote_get_symbol
* tvm_hexagon_remote_kernel
* tvm_hexagon_remote_close
* tvm_hexagon_remote_alloc_vtcm
* tvm_hexagon_remote_free_vtcm
* tvm_remote_load_library (remote_handle64 h, p1, p2, ...)
* tvm_remote_release_library
* tvm_remote_get_symbol
* tvm_remote_kernel
* tvm_remote_close
* tvm_remote_alloc_vtcm
* tvm_remote_free_vtcm
*
* tvm_hexagon_remote_nd_load_library (p1, p2, ...)
* tvm_hexagon_remote_nd_release_library
* tvm_hexagon_remote_nd_get_symbol
* tvm_hexagon_remote_nd_kernel
* tvm_hexagon_remote_nd_close
* tvm_remote_nd_load_library (p1, p2, ...)
* tvm_remote_nd_release_library
* tvm_remote_nd_get_symbol
* tvm_remote_nd_kernel
* tvm_remote_nd_close
*
* The "open" functions differ in their parameters in different ways, and
* need to be handled individually.
*
* tvm_hexagon_remote_open
* tvm_hexagon_remote_nd_open
* tvm_remote_open
* tvm_remote_nd_open
*/
namespace {
......@@ -157,35 +157,35 @@ class StubAPI {
private:
// Create types for each remote function. For functions that take
// a pointer to tvm_hexagon_remote_buffer or tvm_hexagon_remote_nd_buffer,
// a pointer to tvm_remote_buffer or tvm_remote_nd_buffer,
// replace that pointer with pointer to void to make pointers to these
// two types identical in the function types created below.
// For example, int foo(tvm_hexagon_remote_buffer*) and
// int bar(tvm_hexagon_remote_nd_buffer*) should both have the same type.
// For example, int foo(tvm_remote_buffer*) and
// int bar(tvm_remote_nd_buffer*) should both have the same type.
#define MAPTYPE(fn, ty) \
using fn##_t = typename map_func_type<ty, void, decltype(::fn)>::type;
MAPTYPE(tvm_hexagon_remote_load_library, tvm_hexagon_remote_buffer)
MAPTYPE(tvm_hexagon_remote_release_library, tvm_hexagon_remote_buffer)
MAPTYPE(tvm_hexagon_remote_get_symbol, tvm_hexagon_remote_buffer)
MAPTYPE(tvm_hexagon_remote_kernel, tvm_hexagon_remote_buffer)
MAPTYPE(tvm_hexagon_remote_close, tvm_hexagon_remote_buffer)
MAPTYPE(tvm_hexagon_remote_alloc_vtcm, tvm_hexagon_remote_buffer)
MAPTYPE(tvm_hexagon_remote_free_vtcm, tvm_hexagon_remote_buffer)
MAPTYPE(tvm_hexagon_remote_call_mmap64, tvm_hexagon_remote_buffer)
MAPTYPE(tvm_hexagon_remote_nd_load_library, tvm_hexagon_remote_nd_buffer)
MAPTYPE(tvm_hexagon_remote_nd_release_library, tvm_hexagon_remote_nd_buffer)
MAPTYPE(tvm_hexagon_remote_nd_get_symbol, tvm_hexagon_remote_nd_buffer)
MAPTYPE(tvm_hexagon_remote_nd_kernel, tvm_hexagon_remote_nd_buffer)
MAPTYPE(tvm_hexagon_remote_nd_close, tvm_hexagon_remote_buffer)
MAPTYPE(tvm_hexagon_remote_nd_call_mmap64, tvm_hexagon_remote_buffer)
MAPTYPE(tvm_remote_load_library, tvm_remote_buffer)
MAPTYPE(tvm_remote_release_library, tvm_remote_buffer)
MAPTYPE(tvm_remote_get_symbol, tvm_remote_buffer)
MAPTYPE(tvm_remote_kernel, tvm_remote_buffer)
MAPTYPE(tvm_remote_close, tvm_remote_buffer)
MAPTYPE(tvm_remote_alloc_vtcm, tvm_remote_buffer)
MAPTYPE(tvm_remote_free_vtcm, tvm_remote_buffer)
MAPTYPE(tvm_remote_call_mmap64, tvm_remote_buffer)
MAPTYPE(tvm_remote_nd_load_library, tvm_remote_nd_buffer)
MAPTYPE(tvm_remote_nd_release_library, tvm_remote_nd_buffer)
MAPTYPE(tvm_remote_nd_get_symbol, tvm_remote_nd_buffer)
MAPTYPE(tvm_remote_nd_kernel, tvm_remote_nd_buffer)
MAPTYPE(tvm_remote_nd_close, tvm_remote_buffer)
MAPTYPE(tvm_remote_nd_call_mmap64, tvm_remote_buffer)
#undef MAPTYPE
// For remote functions whose prototypes differ significantly between
// the domain and non-domain versions, create the types directly.
#define DECLTYPE(fn) using fn##_t = decltype(::fn);
DECLTYPE(tvm_hexagon_remote_open)
DECLTYPE(tvm_hexagon_remote_nd_open)
DECLTYPE(tvm_remote_open)
DECLTYPE(tvm_remote_nd_open)
DECLTYPE(rpcmem_init)
DECLTYPE(rpcmem_deinit)
......@@ -214,9 +214,9 @@ class StubAPI {
#define CONCAT_STR_FOR_REAL(a, b) a##b
#define CONCAT_STR(a, b) CONCAT_STR_FOR_REAL(a, b)
#define FUNC(name) CONCAT_STR(tvm_hexagon_remote_, name)
#define FUNC_D(name) CONCAT_STR(tvm_hexagon_remote_, name)
#define FUNC_ND(name) CONCAT_STR(tvm_hexagon_remote_nd_, name)
#define FUNC(name) CONCAT_STR(tvm_remote_, name)
#define FUNC_D(name) CONCAT_STR(tvm_remote_, name)
#define FUNC_ND(name) CONCAT_STR(tvm_remote_nd_, name)
#define PTRNAME(fn) CONCAT_STR(p, CONCAT_STR(fn, _))
#define DECLFUNC(name) \
......@@ -254,11 +254,11 @@ class StubAPI {
#undef DECLSFUNC
#undef DECLFUNC_D
int tvm_hexagon_remote_open(const char* uri, remote_handle64* handle) const {
int tvm_remote_open(const char* uri, remote_handle64* handle) const {
if (enable_domains_) {
return PTRNAME(tvm_hexagon_remote_open)(uri, handle);
return PTRNAME(tvm_remote_open)(uri, handle);
}
return PTRNAME(tvm_hexagon_remote_nd_open)();
return PTRNAME(tvm_remote_nd_open)();
}
static const StubAPI* Global();
......@@ -268,23 +268,23 @@ class StubAPI {
void* lib_handle_ = nullptr;
#define DECLPTR(fn) fn##_t* PTRNAME(fn) = nullptr
DECLPTR(tvm_hexagon_remote_load_library);
DECLPTR(tvm_hexagon_remote_release_library);
DECLPTR(tvm_hexagon_remote_get_symbol);
DECLPTR(tvm_hexagon_remote_kernel);
DECLPTR(tvm_hexagon_remote_open);
DECLPTR(tvm_hexagon_remote_close);
DECLPTR(tvm_hexagon_remote_alloc_vtcm);
DECLPTR(tvm_hexagon_remote_free_vtcm);
DECLPTR(tvm_hexagon_remote_call_mmap64);
DECLPTR(tvm_hexagon_remote_nd_load_library);
DECLPTR(tvm_hexagon_remote_nd_release_library);
DECLPTR(tvm_hexagon_remote_nd_get_symbol);
DECLPTR(tvm_hexagon_remote_nd_kernel);
DECLPTR(tvm_hexagon_remote_nd_open);
DECLPTR(tvm_hexagon_remote_nd_close);
DECLPTR(tvm_hexagon_remote_nd_call_mmap64);
DECLPTR(tvm_remote_load_library);
DECLPTR(tvm_remote_release_library);
DECLPTR(tvm_remote_get_symbol);
DECLPTR(tvm_remote_kernel);
DECLPTR(tvm_remote_open);
DECLPTR(tvm_remote_close);
DECLPTR(tvm_remote_alloc_vtcm);
DECLPTR(tvm_remote_free_vtcm);
DECLPTR(tvm_remote_call_mmap64);
DECLPTR(tvm_remote_nd_load_library);
DECLPTR(tvm_remote_nd_release_library);
DECLPTR(tvm_remote_nd_get_symbol);
DECLPTR(tvm_remote_nd_kernel);
DECLPTR(tvm_remote_nd_open);
DECLPTR(tvm_remote_nd_close);
DECLPTR(tvm_remote_nd_call_mmap64);
#undef DECLPTR
// "System" functions.
......
......@@ -73,7 +73,9 @@ ALLOW_EXTENSION = {
"sdc",
# generated parser
"interp",
"tokens"
"tokens",
# interface definition
"idl",
}
# List of file names allowed
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment