Commit 734df8d5 by Liangfu Chen Committed by Thierry Moreau

[VTA] de10-nano driver (#3394)

* rework;

* `de10-nano` -> `de10nano`;

* fix compilation error;

* bug fix;

* Update install.md

* Update install.md

* Update install.md

* update with current runtime;

* add debug messages;

* bug fix in cma kernel module;
parent 66235d1c
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
PATH_SETTINGS?=$(PWD)/settings.mk
# Driver configuration file
include $(PATH_SETTINGS)
ifeq ($(KERNELRELEASE),)
# kbuild
default:
make ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) -C $(KSOURCE_DIR) M=`pwd` modules
clean:
make -C $(KSOURCE_DIR) M=`pwd` clean
else
# run from Kernel Makefile
obj-m := cma.o
ccflags-y := -DDRIVER_NODE_NAME="\"$(DRIVER_NODE_NAME)\"" \
-DCMA_DEBUG=$(CMA_DEBUG) \
-DCMA_IOC_MAGIC=$(CMA_IOC_MAGIC)
endif
/* cma.h
*
* The MIT License (MIT)
*
* COPYRIGHT (C) 2017 Institute of Electronics and Computer Science (EDI), Latvia.
* AUTHOR: Rihards Novickis (rihards.novickis@edi.lv)
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef VTA_DE10_NANO_KERNEL_MODULE_CMA_H_
#define VTA_DE10_NANO_KERNEL_MODULE_CMA_H_
/* Should be defined in settings.mk file */
#ifndef CMA_IOCTL_MAGIC
#define CMA_IOCTL_MAGIC 0xf2
#endif
#define CMA_ALLOC_CACHED _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 1, 4)
#define CMA_ALLOC_NONCACHED _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 2, 4)
#define CMA_FREE _IOC(_IOC_WRITE, CMA_IOCTL_MAGIC, 3, 4)
#define CMA_GET_PHY_ADDR _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 4, 4)
#define CMA_GET_SIZE _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 5, 4)
#define CMA_IOCTL_MAXNR 5
#endif // VTA_DE10_NANO_KERNEL_MODULE_CMA_H_
# ==================== COMPILATION RELATED SETTINGS ====================
# Path to the kernel sources (from "./driver", if relative path is used)
KSOURCE_DIR=/opt/intel/linux-socfpga-rel_socfpga-4.9.78-ltsi_18.08.02_pr
# Cross compiler "prepend" string
CROSS_COMPILE=arm-linux-gnueabihf-
# Architecture
ARCH=arm
# Compile with debug information
CMA_DEBUG?=0
# ==================== DRIVER RELATED SETTINGS ====================
# Node name used in "/dev" folder
DRIVER_NODE_NAME="cma"
# Unique (across system) ioctl magic number. Every ioctl interface should have one.
CMA_IOC_MAGIC=0xf2
......@@ -82,7 +82,11 @@ elseif(PYTHON)
# Rules for Zynq-class FPGAs with pynq OS support (see pynq.io)
if(${VTA_TARGET} STREQUAL "pynq" OR
${VTA_TARGET} STREQUAL "ultra96")
list(APPEND FPGA_RUNTIME_SRCS vta/src/pynq/pynq_driver.cc)
file(GLOB FPGA_RUNTIME_SRCS vta/src/pynq/pynq_driver.cc)
# Rules for Pynq v2.4
find_library(__cma_lib NAMES cma PATH /usr/lib)
elseif(${VTA_TARGET} STREQUAL "de10nano") # DE10-Nano rules
file(GLOB FPGA_RUNTIME_SRCS vta/src/de10nano/*.cc vta/src/*.cc)
endif()
# Target lib: vta
add_library(vta SHARED ${FPGA_RUNTIME_SRCS})
......@@ -91,11 +95,17 @@ elseif(PYTHON)
string(SUBSTRING ${__def} 3 -1 __strip_def)
target_compile_definitions(vta PUBLIC ${__strip_def})
endforeach()
# Rules for Pynq v2.4
find_library(__cma_lib NAMES cma PATH /usr/lib)
target_link_libraries(vta ${__cma_lib})
if(${VTA_TARGET} STREQUAL "pynq" OR
${VTA_TARGET} STREQUAL "ultra96")
target_link_libraries(vta ${__cma_lib})
elseif(${VTA_TARGET} STREQUAL "de10nano") # DE10-Nano rules
target_compile_definitions(vta PUBLIC VTA_MAX_XFER=2097152) # (1<<21)
target_include_directories(vta PUBLIC
"/usr/local/intelFPGA_lite/18.1/embedded/ds-5/sw/gcc/arm-linux-gnueabihf/include")
endif()
endif()
else()
message(STATUS "Cannot found python in env, VTA build is skipped..")
endif()
......@@ -20,8 +20,9 @@ VTA Installation Guide
We present three installation guides, each extending on the previous one:
1. [Simulator installation](#vta-simulator-installation)
2. [Hardware test setup](#vta-pynq-based-test-setup)
3. [FPGA toolchain installation](#vta-fpga-toolchain-installation)
2. [PYNQ-based test setup](#vta-pynq-based-test-setup)
3. [Custom test setup for Intel FPGA](#vta-custom-test-setup-for-intel-fpga)
4. [FPGA toolchain installation](#vta-fpga-toolchain-installation)
## VTA Simulator Installation
......@@ -182,9 +183,80 @@ The performance metrics measured on the Pynq board will be reported for each con
You can also try out our [VTA programming tutorials](https://docs.tvm.ai/vta/tutorials/index.html).
## VTA Custom Test Setup for Intel FPGA
Similar to the PYNQ side setup steps, this third guide bring us the details on how can we setup up the Linux environment for Intel FPGA boards like DE10-Nano.
In terms of hardware components, you would need the [DE10-Nano Development Kit](https://www.terasic.com.tw/cgi-bin/page/archive.pl?Language=English&No=1046), which can be acquired for $130, or $100 for academics from [Terasic](https://www.terasic.com.tw/). A microSD card would be delivered the kit. Power cables and USB cables would be included as well. However, an additional Ethernet cable would be needed to connect the board to LAN.
The rest part of this guide would provide the steps to
* Flash the microSD card with latest Angstrom Linux image
* Cross compilation setup
* Device-side RPC server setup and deployment
### DE10-Nano Board Setup
Before powering up the device, we need to flash the microSD card image with latest Angstrom Linux image.
#### Flash SD Card and Boot Angstrom Linux
To flash SD card and boot Linux on DE10-Nano, it is recommended to navigate to the [Resource](https://www.terasic.com.tw/cgi-bin/page/archive.pl?Language=English&CategoryNo=167&No=1046&PartNo=4) tab of the DE10-Nano product page from Terasic Inc.
After registeration and login on the webpage, the prebuild Angstrom Linux image would be available for downloading and flashing.
Specifically, to flash the downloaded Linux SD card image into your physical SD card:
First, extract the gzipped archive file.
``` bash
tar xf de10-nano-image-Angstrom-v2016.12.socfpga-sdimg.2017.03.31.tgz
```
This would produce a single SD card image named `de10-nano-image-Angstrom-v2016.12.socfpga-sdimg` (approx. 2.4 GB), it contains all the file systems to boot Angstrom Linux.
Second, plugin a SD card that is ready to flash in your PC, and identify the device id for the disk with `fdisk -l`, or `gparted` if you feel better to use GUI. The typical device id for your disk would likely to be `/dev/sdb`.
Then, flash the disk image into your physical SD card with the following command:
``` bash
# NOTE: root privilege is typically required to run the following command.
dd if=de10-nano-image-Angstrom-v2016.12.socfpga-sdimg of=/dev/sdb status=progress
```
This would take a few minutes for your PC to write the whole file systems into the SD card.
After this process completes, you are ready to unmount the SD card and insert it into your DE10-Nano board.
Now you can connect the power cable and serial port to boot the Angstrom Linux.
> Note: When boot up from the microSD card, you might notice the incompatibility of the linux kernel `zImage` in the microSD card.
> In this case, you might need to build the `zImage` file of your own from [socfpga-4.9.78-ltsi](https://github.com/altera-opensource/linux-socfpga/tree/socfpga-4.9.78-ltsi) branch of the [linux-socfpga](https://github.com/altera-opensource/linux-socfpga) repository.
> For a quick fix, you can also download a prebuilt version of the `zImage` file [here](https://raw.githubusercontent.com/liangfu/de10-nano-supplement/master/zImage).
After connecting he usb cables to the DE10-Nano board, power on the board by connecting the power cable. You may then connect to the serial port of the device by using `minicom` on your host PC:
``` bash
# NOTE: root privilege is typically required to run the following command.
minicom -D /dev/ttyUSB0
```
The default user name for the device would be `root`, and the password is empty for the default user.
You may now start to install supporting Python3 packages (TVM has dropped the support for Python2), specifically, they are `numpy`, `attrs` and `decorator`.
> Note: You might fail to install `numpy` by using `pip3` on the DE10-Nano device.
> In that case, you have the option to either build your own filesystem image for the board from [meta-de10-nano](https://github.com/intel/meta-de10-nano) repository;
> an alternative option is to download prebuilt packages from existing Linux distributions, e.g. Debian.
> For a quick fix, we have concatenated the supplementary binary files [here](https://raw.githubusercontent.com/liangfu/de10-nano-supplement/master/rootfs_supplement.tgz), and you can extract the files into the root filesystem.
#### Install Required Python Packages
After accessing bash terminal from the serial port, we need to install required Python packages before building and installing TVM and VTA programs.
#### Build Additional Components to Use VTA Bitstream
To use the above built bitstream on DE10-Nano hardware, several additional components need to be compiled for the system.
Specifically, to compile application executables for the system, you need to download and install [SoCEDS](http://fpgasoftware.intel.com/soceds/18.1/?edition=standard&download_manager=dlm3&platform=linux) (recommended), or alternatively install the `g++-arm-linux-gnueabihf` package on your host machine. You would also need a `cma` kernel module to allocate contigous memory, and a driver for communicating with the VTA subsystem.
## VTA FPGA Toolchain Installation
This third and last guide allows users to generate custom VTA bitstreams using free-to-use Xilinx or Intel compilation toolchains.
This last guide allows users to generate custom VTA bitstreams using free-to-use Xilinx or Intel compilation toolchains.
### Xilinx Toolchain Installation
......@@ -323,50 +395,6 @@ This process might be a bit lengthy, and might take up to half an hour to comple
Once the compilation completes, the generated bistream can be found under `<tvmroot>/vta/build/hardware/intel/quartus/<configuration>/export/vta.rbf`. You can also open the Quartus project file (.qpf) available at `<tvmroot>/vta/build/hardware/intel/quartus/<configuration>/de10_nano_top.qpf` to look around the generated reports.
#### Flash SD Card and Boot Angstrom Linux
To flash SD card and boot Linux on DE10-Nano, it is recommended to navigate to the [Resource](https://www.terasic.com.tw/cgi-bin/page/archive.pl?Language=English&CategoryNo=167&No=1046&PartNo=4) tab of the DE10-Nano product page from Terasic Inc.
After registeration and login on the webpage, the prebuild Angstrom Linux image would be available for downloading and flashing.
Specifically, to flash the downloaded Linux SD card image into your physical SD card:
First, extract the gzipped archive file.
``` bash
tar xf de10-nano-image-Angstrom-v2016.12.socfpga-sdimg.2017.03.31.tgz
```
This would produce a single SD card image named `de10-nano-image-Angstrom-v2016.12.socfpga-sdimg` (approx. 2.4 GB), it contains all the file systems to boot Angstrom Linux.
Second, plugin a SD card that is ready to flash in your PC, and identify the device id for the disk with `fdisk -l`, or `gparted` if you feel better to use GUI. The typical device id for your disk would likely to be `/dev/sdb`.
Then, flash the disk image into your physical SD card with the following command:
``` bash
# NOTE: root privilege is typically required to run the following command.
dd if=de10-nano-image-Angstrom-v2016.12.socfpga-sdimg of=/dev/sdb status=progress
```
This would take a few minutes for your PC to write the whole file systems into the SD card.
After this process completes, you are ready to unmount the SD card and insert it into your DE10-Nano board.
Now you can connect the power cable and serial port to boot the Angstrom Linux.
#### Build Additional Components to Use VTA Bitstream
To use the above built bitstream on DE10-Nano hardware, several additional components need to be compiled for the system.
Specifically, to compile application executables for the system, you need to download and install [SoCEDS](http://fpgasoftware.intel.com/soceds/18.1/?edition=standard&download_manager=dlm3&platform=linux), or alternatively install the `g++-arm-linux-gnueabihf` package on your host machine. You would also need a `cma` kernel module to allocate contigous memory, and a driver for communicating with the VTA subsystem.
For easier program debugging (e.g. `metal_test` program at `vta/tests/hardware/metal_test`), it is also recommended to install `gdbserver` on you device. For instance, you can start your program on the device by runninng:
``` bash
gdbserver localhost:4444 ./metal_test
```
, and then you can set break points and print values of desired varilables on the host:
``` bash
gdb-multiarch --fullname metal_test
(gdb) target remote <device-ip>:4444
```
In addition, to enable fully featured VTA for DE10-Nano, you would also need `python3-numpy`, `python3-decorate`, `python3-attrs` to be cross-compiled.
### Use the Custom Bitstream
We can program the new VTA FPGA bitstream by setting the bitstream path of the `vta.program_fpga()` function in the tutorial examples, or in the `test_program_rpc.py` script.
......
......@@ -133,7 +133,9 @@ def main():
cflags_str = " ".join(pkg.cflags)
if pkg.TARGET == "pynq":
cflags_str += " -DVTA_TARGET_PYNQ"
if pkg.TARGET == "ultra96":
elif cfg.TARGET == "de10nano":
cflags_str += " -DVTA_TARGET_DE10_NANO"
elif pkg.TARGET == "ultra96":
cflags_str += " -DVTA_TARGET_ULTRA96"
print(cflags_str)
......
......@@ -229,11 +229,11 @@ class Environment(object):
@property
def target_host(self):
"""The target host"""
if self.TARGET == "pynq":
if self.TARGET in ["pynq", "de10nano"]:
return "llvm -target=armv7-none-linux-gnueabihf"
if self.TARGET == "ultra96":
elif self.TARGET == "ultra96":
return "llvm -target=aarch64-linux-gnu"
if self.TARGET == "sim" or self.TARGET == "tsim":
elif self.TARGET in ["sim", "tsim"]:
return "llvm"
raise ValueError("Unknown target %s" % self.TARGET)
......
......@@ -52,7 +52,7 @@ def run(run_func):
assert simulator.enabled()
run_func(env, rpc.LocalSession())
elif env.TARGET in ["pynq", "ultra96"]:
elif env.TARGET in ["pynq", "ultra96", "de10nano"]:
# The environment variables below should be set if we are using
# a tracker to obtain a remote for a test device
tracker_host = os.environ.get("TVM_TRACKER_HOST", None)
......
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* The MIT License (MIT)
*
* COPYRIGHT (C) 2017 Institute of Electronics and Computer Science (EDI), Latvia.
* AUTHOR: Rihards Novickis (rihards.novickis@edi.lv)
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*/
/*!
* Copyright (c) 2018 by Contributors
* \file cma_api.cc
* \brief Application layer implementation for contigous memory allocation.
*/
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include "cma_api.h"
#ifndef CMA_IOCTL_MAGIC
#define CMA_IOCTL_MAGIC 0xf2
#endif
#define CMA_ALLOC_CACHED _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 1, 4)
#define CMA_ALLOC_NONCACHED _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 2, 4)
#define CMA_FREE _IOC(_IOC_WRITE, CMA_IOCTL_MAGIC, 3, 4)
#define CMA_GET_PHY_ADDR _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 4, 4)
#define CMA_GET_SIZE _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 5, 4)
#define CMA_IOCTL_MAXNR 5
#ifndef CMA_DEBUG
#define CMA_DEBUG 0
#endif
#ifndef DRIVER_NODE_NAME
#define DRIVER_NODE_NAME "cma"
#endif
#if CMA_DEBUG == 1
#define __DEBUG(fmt, args...) printf("CMA_API_DEBUG: " fmt, ##args)
#else
#define __DEBUG(fmt, args...)
#endif
#define ROUND_UP(N, S) ((((N) + (S) - 1) / (S)) * (S))
/* Private functions */
void *cma_alloc(size_t size, unsigned ioctl_cmd);
/* Global file descriptor */
int cma_fd = 0;
int cma_init(void) {
__DEBUG("Opening \"/dev/" DRIVER_NODE_NAME "\" file\n");
cma_fd = open("/dev/" DRIVER_NODE_NAME, O_RDWR);
if (cma_fd == -1) {
__DEBUG("Failed to initialize api - \"%s\"\n", strerror(errno));
return -1;
}
return 0;
}
int cma_release(void) {
__DEBUG("Closing \"/dev/" DRIVER_NODE_NAME "\" file\n");
if (close(cma_fd) == -1) {
__DEBUG("Failed to finilize api - \"%s\"\n", strerror(errno));
return -1;
}
return 0;
}
void *cma_alloc_cached(size_t size) {
return cma_alloc(size, CMA_ALLOC_CACHED);
}
void *cma_alloc_noncached(size_t size) {
return cma_alloc(size, CMA_ALLOC_NONCACHED);
}
int cma_free(void *mem) {
__DEBUG("Releasing contigous memory from 0x%x\n", (unsigned)mem);
unsigned data, v_addr;
/* save user space pointer value */
data = (unsigned)mem;
v_addr = (unsigned)mem;
if ( ioctl(cma_fd, CMA_GET_SIZE, &data) == -1 ) {
__DEBUG("cma_free - ioctl command unsuccsessful - 0\n");
return -1;
}
/* data now contains size */
/* unmap memory */
munmap(mem, data);
/* free cma entry */
if ( ioctl(cma_fd, CMA_FREE, &v_addr) == -1 ) {
__DEBUG("cma_free - ioctl command unsuccsessful - 1\n");
return -1;
}
return 0;
}
unsigned cma_get_phy_addr(void *mem) {
unsigned data;
__DEBUG("Getting physical address from 0x%x\n", (unsigned)mem);
/* save user space pointer value */
data = (unsigned)mem;
/* get physical address */
if ( ioctl(cma_fd, CMA_GET_PHY_ADDR, &data) == -1 ) {
__DEBUG("cma_free - ioctl command unsuccsessful\n");
return 0;
}
/* data now contains physical address */
return data;
}
void *cma_alloc(size_t size, unsigned ioctl_cmd) {
unsigned data;
void *mem;
__DEBUG("Allocating 0x%x bytes of contigous memory\n", size);
/* Page align size */
size = ROUND_UP(size, getpagesize());
/* ioctl cmd to allocate contigous memory */
data = (unsigned)size;
if ( ioctl(cma_fd, ioctl_cmd, &data) == -1 ) {
__DEBUG("cma_alloc - ioctl command unsuccsessful\n");
return NULL;
}
/* at this point phy_addr is written to data */
/* mmap memory */
mem = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED, cma_fd, data);
if (mem == MAP_FAILED) {
__DEBUG("cma_alloc - mmap unsuccsessful\n");
return NULL;
}
return mem;
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
* \file cma_api.h
* \brief API for contigous memory allocation driver.
*/
#ifndef VTA_DE10NANO_CMA_API_H_
#define VTA_DE10NANO_CMA_API_H_
#ifdef __cplusplus
extern "C" {
#endif
/**
* \brief Initialize CMA api (basically perform open() syscall).
*
* \return Returns 0 on SUCCESS. On FAILURE returns -1 and errno is set
* accordingly.
*/
int cma_init(void);
/**
* \brief Release CMA api (basically perform close() syscall).
*
* \return Returns 0 on SUCCESS. On FAILURE returns -1 and errno is set
* accordingly.
*/
int cma_release(void);
/**
* \brief Allocate cached, physically contigous memory.
*
* \param size Size in bytes.
*
* \return Returns NULL on FAILURE. Otherwise pointer to valid userspace
* memory.
*/
void *cma_alloc_cached(size_t size);
/**
* \brief Allocate noncached, physically contigous memory.
*
* \param size Size in bytes.
*
* \return Returns NULL on FAILURE. Otherwise pointer to valid userspace
* memory.
*/
void *cma_alloc_noncached(size_t size);
/**
* \brief Release physically contigous memory.
*
* \param mem Pointer to previously allocated contiguous memory.
*
* \return Returns 0 on SUCCESS, -1 on FAILURE.
*/
int cma_free(void *mem);
/**
* \brief Get physical memory of cma memory block (should be used for DMA).
*
* \param mem Pointer to previously allocated contiguous memory.
*
* \return Returns address on SUCCESS, 0 on FAILURE.
*/
unsigned cma_get_phy_addr(void *mem);
#ifdef __cplusplus
}
#endif
#endif // VTA_DE10NANO_CMA_API_H_
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
* \file de10-nano_driver.cc
* \brief VTA driver for DE10_Nano board.
*/
#include "de10nano_driver.h"
#include <string.h>
#include <vta/driver.h>
#include <dmlc/logging.h>
#include <thread>
#include "cma_api.h"
void* VTAMemAlloc(size_t size, int cached) {
static int _ = cma_init(); (void)_;
if (cached) {
return cma_alloc_cached(size);
} else {
return cma_alloc_noncached(size);
}
}
void VTAMemFree(void* buf) {
cma_free(buf);
}
vta_phy_addr_t VTAMemGetPhyAddr(void* buf) {
return cma_get_phy_addr(buf) + 0x80000000;
}
void VTAMemCopyFromHost(void* dst, const void* src, size_t size) {
// For SoC-based FPGAs that used shared memory with the CPU, use memcopy()
memcpy(dst, src, size);
}
void VTAMemCopyToHost(void* dst, const void* src, size_t size) {
// For SoC-based FPGAs that used shared memory with the CPU, use memcopy()
memcpy(dst, src, size);
}
void VTAFlushCache(void * offset, vta_phy_addr_t buf, int size) {
CHECK(false) << "VTAFlushCache not implemented for de10nano";
printf("VTAFlushCache not implemented for de10nano");
}
void VTAInvalidateCache(void * offset, vta_phy_addr_t buf, int size) {
CHECK(false) << "VTAInvalidateCache not implemented for de10nano";
printf("VTAInvalidateCache not implemented for de10nano");
}
void *VTAMapRegister(uint32_t addr) {
// Align the base address with the pages
uint32_t virt_base = addr & ~(getpagesize() - 1);
// Calculate base address offset w.r.t the base address
uint32_t virt_offset = addr - virt_base;
// Open file and mmap
uint32_t mmap_file = open("/dev/mem", O_RDWR|O_SYNC);
return mmap(NULL,
(VTA_IP_REG_MAP_RANGE + virt_offset),
PROT_READ|PROT_WRITE,
MAP_SHARED,
mmap_file,
virt_base);
}
void VTAUnmapRegister(void *vta) {
// Unmap memory
int status = munmap(vta, VTA_IP_REG_MAP_RANGE);
assert(status == 0);
}
void VTAWriteMappedReg(void* base_addr, uint32_t offset, uint32_t val) {
*((volatile uint32_t *) (reinterpret_cast<char *>(base_addr) + offset)) = val;
}
uint32_t VTAReadMappedReg(void* base_addr, uint32_t offset) {
return *((volatile uint32_t *) (reinterpret_cast<char *>(base_addr) + offset));
}
class VTADevice {
public:
VTADevice() {
// VTA stage handles
vta_host_handle_ = VTAMapRegister(VTA_HOST_ADDR);
}
~VTADevice() {
// Close VTA stage handle
VTAUnmapRegister(vta_host_handle_);
}
int Run(vta_phy_addr_t insn_phy_addr,
uint32_t insn_count,
uint32_t wait_cycles) {
VTAWriteMappedReg(vta_host_handle_, 0x04, 0);
VTAWriteMappedReg(vta_host_handle_, 0x08, insn_count);
VTAWriteMappedReg(vta_host_handle_, 0x0c, insn_phy_addr);
// VTA start
VTAWriteMappedReg(vta_host_handle_, 0x0, VTA_START);
// Loop until the VTA is done
unsigned t, flag = 0;
for (t = 0; t < wait_cycles; ++t) {
flag = VTAReadMappedReg(vta_host_handle_, 0x00);
flag &= 0x2;
if (flag == 0x2) break;
std::this_thread::yield();
}
// Report error if timeout
return t < wait_cycles ? 0 : 1;
}
private:
// VTA handles (register maps)
void* vta_host_handle_{nullptr};
};
VTADeviceHandle VTADeviceAlloc() {
return new VTADevice();
}
void VTADeviceFree(VTADeviceHandle handle) {
delete static_cast<VTADevice*>(handle);
}
int VTADeviceRun(VTADeviceHandle handle,
vta_phy_addr_t insn_phy_addr,
uint32_t insn_count,
uint32_t wait_cycles) {
return static_cast<VTADevice*>(handle)->Run(
insn_phy_addr, insn_count, wait_cycles);
}
void VTAProgram(const char* bitstream) {
CHECK(false) << "VTAProgram not implemented for de10nano";
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
* \file de10-nano_driver.h
* \brief VTA driver for DE10_Nano board.
*/
#ifndef VTA_DE10NANO_DE10NANO_DRIVER_H_
#define VTA_DE10NANO_DE10NANO_DRIVER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <assert.h>
#include <fcntl.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
void *VTAMapRegister(uint32_t addr);
void VTAUnmapRegister(void *vta);
void VTAWriteMappedReg(void* base_addr, uint32_t offset, uint32_t val);
uint32_t VTAReadMappedReg(void* base_addr, uint32_t offset);
void VTAProgram(const char* bitstream);
/*! \brief VTA configuration register address range */
#define VTA_RANGE 0x400
/*! \brief VTA configuration register start value */
#define VTA_START 0x1
/*! \brief VTA configuration register auto-restart value */
#define VTA_AUTORESTART 0x81
/*! \brief VTA configuration register done value */
#define VTA_DONE 0x2
/*! \brief VTA fetch stage configuration register address
*/
#define VTA_HOST_ADDR 0xFF220000
#ifdef __cplusplus
}
#endif
#endif // VTA_DE10NANO_DE10NANO_DRIVER_H_
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment