/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * * \file pynq_driver.c * \brief VTA driver for Zynq SoC boards with Pynq support (see pynq.io). */ #include <vta/driver.h> #include <thread> #include "pynq_driver.h" void* VTAMemAlloc(size_t size, int cached) { assert(size <= VTA_MAX_XFER); // Rely on the pynq-specific cma library return cma_alloc(size, cached); } void VTAMemFree(void* buf) { // Rely on the pynq-specific cma library cma_free(buf); } vta_phy_addr_t VTAMemGetPhyAddr(void* buf) { return cma_get_phy_addr(buf); } void VTAMemCopyFromHost(void* dst, const void* src, size_t size) { // For SoC-based FPGAs that used shared memory with the CPU, use memcopy() memcpy(dst, src, size); } void VTAMemCopyToHost(void* dst, const void* src, size_t size) { // For SoC-based FPGAs that used shared memory with the CPU, use memcopy() memcpy(dst, src, size); } void VTAFlushCache(void* vir_addr, vta_phy_addr_t phy_addr, int size) { // Call the cma_flush_cache on the CMA buffer // so that the FPGA can read the buffer data. cma_flush_cache(vir_addr, phy_addr, size); } void VTAInvalidateCache(void* vir_addr, vta_phy_addr_t phy_addr, int size) { // Call the cma_invalidate_cache on the CMA buffer // so that the host needs to read the buffer data. cma_invalidate_cache(vir_addr, phy_addr, size); } void *VTAMapRegister(uint32_t addr) { // Align the base address with the pages uint32_t virt_base = addr & ~(getpagesize() - 1); // Calculate base address offset w.r.t the base address uint32_t virt_offset = addr - virt_base; // Open file and mmap uint32_t mmap_file = open("/dev/mem", O_RDWR|O_SYNC); return mmap(NULL, (VTA_IP_REG_MAP_RANGE + virt_offset), PROT_READ|PROT_WRITE, MAP_SHARED, mmap_file, virt_base); } void VTAUnmapRegister(void *vta) { // Unmap memory int status = munmap(vta, VTA_IP_REG_MAP_RANGE); assert(status == 0); } void VTAWriteMappedReg(void* base_addr, uint32_t offset, uint32_t val) { *((volatile uint32_t *) (reinterpret_cast<char *>(base_addr) + offset)) = val; } uint32_t VTAReadMappedReg(void* base_addr, uint32_t offset) { return *((volatile uint32_t *) (reinterpret_cast<char *>(base_addr) + offset)); } class VTADevice { public: VTADevice() { // VTA stage handles vta_fetch_handle_ = VTAMapRegister(VTA_FETCH_ADDR); vta_load_handle_ = VTAMapRegister(VTA_LOAD_ADDR); vta_compute_handle_ = VTAMapRegister(VTA_COMPUTE_ADDR); vta_store_handle_ = VTAMapRegister(VTA_STORE_ADDR); } ~VTADevice() { // Close VTA stage handle VTAUnmapRegister(vta_fetch_handle_); VTAUnmapRegister(vta_load_handle_); VTAUnmapRegister(vta_compute_handle_); VTAUnmapRegister(vta_store_handle_); } int Run(vta_phy_addr_t insn_phy_addr, uint32_t insn_count, uint32_t wait_cycles) { VTAWriteMappedReg(vta_fetch_handle_, VTA_FETCH_INSN_COUNT_OFFSET, insn_count); VTAWriteMappedReg(vta_fetch_handle_, VTA_FETCH_INSN_ADDR_OFFSET, insn_phy_addr); VTAWriteMappedReg(vta_load_handle_, VTA_LOAD_INP_ADDR_OFFSET, 0); VTAWriteMappedReg(vta_load_handle_, VTA_LOAD_WGT_ADDR_OFFSET, 0); VTAWriteMappedReg(vta_compute_handle_, VTA_COMPUTE_UOP_ADDR_OFFSET, 0); VTAWriteMappedReg(vta_compute_handle_, VTA_COMPUTE_BIAS_ADDR_OFFSET, 0); VTAWriteMappedReg(vta_store_handle_, VTA_STORE_OUT_ADDR_OFFSET, 0); // VTA start VTAWriteMappedReg(vta_fetch_handle_, 0x0, VTA_START); VTAWriteMappedReg(vta_load_handle_, 0x0, VTA_AUTORESTART); VTAWriteMappedReg(vta_compute_handle_, 0x0, VTA_AUTORESTART); VTAWriteMappedReg(vta_store_handle_, 0x0, VTA_AUTORESTART); // Loop until the VTA is done unsigned t, flag = 0; for (t = 0; t < wait_cycles; ++t) { flag = VTAReadMappedReg(vta_compute_handle_, VTA_COMPUTE_DONE_RD_OFFSET); if (flag == VTA_DONE) break; std::this_thread::yield(); } // Report error if timeout return t < wait_cycles ? 0 : 1; } private: // VTA handles (register maps) void* vta_fetch_handle_{nullptr}; void* vta_load_handle_{nullptr}; void* vta_compute_handle_{nullptr}; void* vta_store_handle_{nullptr}; }; VTADeviceHandle VTADeviceAlloc() { return new VTADevice(); } void VTADeviceFree(VTADeviceHandle handle) { delete static_cast<VTADevice*>(handle); } int VTADeviceRun(VTADeviceHandle handle, vta_phy_addr_t insn_phy_addr, uint32_t insn_count, uint32_t wait_cycles) { return static_cast<VTADevice*>(handle)->Run( insn_phy_addr, insn_count, wait_cycles); }