Commit 9d64d321 by Thierry Moreau Committed by Tianqi Chen

[VTA] Runtime refactor to allow for non-shared memory FPGAs (e.g. F1) (#3554)

* updated runtime to support non-shared memory FPGAs for instruction and micro-op kernels

* adding driver-defined memcpy function to handle F1 cases

* refactor to include flush/invalidate in memcpy driver function

* update tsim driver

* bug fixes

* cleanup

* pre-allocate fpga readable buffers to improve perf

* fix

* remove instruction stream address rewrite pass for micro op kernels

* fix:

* white spaces

* fix lint

* avoid signed/unsigned compilation warning

* avoid signed/unsigned compilation warning

* fix

* fix

* addressing comments

* whitespace

* moving flush/invalidate out of memmove

* clearnup

* fix

* cosmetic

* rename API

* comment fix
parent 4d314833
...@@ -98,7 +98,7 @@ int VTADeviceRun(VTADeviceHandle device, ...@@ -98,7 +98,7 @@ int VTADeviceRun(VTADeviceHandle device,
#endif #endif
/*! /*!
* \brief Allocates physically contiguous region in memory (limited by MAX_XFER). * \brief Allocates physically contiguous region in memory readable/writeable by FPGA.
* \param size Size of the region in Bytes. * \param size Size of the region in Bytes.
* \param cached Region can be set to not cached (write-back) if set to 0. * \param cached Region can be set to not cached (write-back) if set to 0.
* \return A pointer to the allocated region. * \return A pointer to the allocated region.
...@@ -106,7 +106,7 @@ int VTADeviceRun(VTADeviceHandle device, ...@@ -106,7 +106,7 @@ int VTADeviceRun(VTADeviceHandle device,
void* VTAMemAlloc(size_t size, int cached); void* VTAMemAlloc(size_t size, int cached);
/*! /*!
* \brief Frees a physically contiguous region in memory. * \brief Frees a physically contiguous region in memory readable/writeable by FPGA.
* \param buf Buffer to free. * \param buf Buffer to free.
*/ */
void VTAMemFree(void* buf); void VTAMemFree(void* buf);
...@@ -119,6 +119,22 @@ void VTAMemFree(void* buf); ...@@ -119,6 +119,22 @@ void VTAMemFree(void* buf);
vta_phy_addr_t VTAMemGetPhyAddr(void* buf); vta_phy_addr_t VTAMemGetPhyAddr(void* buf);
/*! /*!
* \brief Performs a copy operation from host memory to buffer allocated with VTAMemAlloc.
* \param dst The desination buffer in FPGA-accessible memory. Has to be allocated with VTAMemAlloc.
* \param src The source buffer in host memory.
* \param size Size of the region in Bytes.
*/
void VTAMemCopyFromHost(void* dst, const void* src, size_t size);
/*!
* \brief Performs a copy operation from buffer allocated with VTAMemAlloc to host memory.
* \param dst The destination buffer in host memory.
* \param src The source buffer in FPGA-accessible memory. Has to be allocated with VTAMemAlloc.
* \param size Size of the region in Bytes.
*/
void VTAMemCopyToHost(void* dst, const void* src, size_t size);
/*!
* \brief Flushes the region of memory out of the CPU cache to DRAM. * \brief Flushes the region of memory out of the CPU cache to DRAM.
* \param buf Pointer to memory region allocated with VTAMemAlloc to be flushed. * \param buf Pointer to memory region allocated with VTAMemAlloc to be flushed.
* This need to be the physical address. * This need to be the physical address.
......
...@@ -105,6 +105,7 @@ TVM_DLL void VTAWriteBarrier(VTACommandHandle cmd, ...@@ -105,6 +105,7 @@ TVM_DLL void VTAWriteBarrier(VTACommandHandle cmd,
uint32_t elem_bits, uint32_t elem_bits,
uint32_t start, uint32_t start,
uint32_t extent); uint32_t extent);
/*! /*!
* \brief Perform a read barrier to a memory region visible to VTA. * \brief Perform a read barrier to a memory region visible to VTA.
* \param cmd The VTA command handle. * \param cmd The VTA command handle.
......
...@@ -29,10 +29,13 @@ ...@@ -29,10 +29,13 @@
void* VTAMemAlloc(size_t size, int cached) { void* VTAMemAlloc(size_t size, int cached) {
assert(size <= VTA_MAX_XFER);
// Rely on the pynq-specific cma library
return cma_alloc(size, cached); return cma_alloc(size, cached);
} }
void VTAMemFree(void* buf) { void VTAMemFree(void* buf) {
// Rely on the pynq-specific cma library
cma_free(buf); cma_free(buf);
} }
...@@ -40,11 +43,25 @@ vta_phy_addr_t VTAMemGetPhyAddr(void* buf) { ...@@ -40,11 +43,25 @@ vta_phy_addr_t VTAMemGetPhyAddr(void* buf) {
return cma_get_phy_addr(buf); return cma_get_phy_addr(buf);
} }
void VTAMemCopyFromHost(void* dst, const void* src, size_t size) {
// For SoC-based FPGAs that used shared memory with the CPU, use memcopy()
memcpy(dst, src, size);
}
void VTAMemCopyToHost(void* dst, const void* src, size_t size) {
// For SoC-based FPGAs that used shared memory with the CPU, use memcopy()
memcpy(dst, src, size);
}
void VTAFlushCache(vta_phy_addr_t buf, int size) { void VTAFlushCache(vta_phy_addr_t buf, int size) {
// Call the xlnkFlushCache on the CMA buffer
// so that the FPGA can read the buffer data.
xlnkFlushCache(reinterpret_cast<void*>(buf), size); xlnkFlushCache(reinterpret_cast<void*>(buf), size);
} }
void VTAInvalidateCache(vta_phy_addr_t buf, int size) { void VTAInvalidateCache(vta_phy_addr_t buf, int size) {
// Call the xlnkInvalidateCache on the CMA buffer
// so that the host needs to read the buffer data.
xlnkInvalidateCache(reinterpret_cast<void*>(buf), size); xlnkInvalidateCache(reinterpret_cast<void*>(buf), size);
} }
...@@ -54,7 +71,7 @@ void *VTAMapRegister(uint32_t addr, size_t length) { ...@@ -54,7 +71,7 @@ void *VTAMapRegister(uint32_t addr, size_t length) {
// Calculate base address offset w.r.t the base address // Calculate base address offset w.r.t the base address
uint32_t virt_offset = addr - virt_base; uint32_t virt_offset = addr - virt_base;
// Open file and mmap // Open file and mmap
uint32_t mmap_file = open(VTA_PYNQ_DEV_MEM_PATH, O_RDWR|O_SYNC); uint32_t mmap_file = open("/dev/mem", O_RDWR|O_SYNC);
return mmap(NULL, return mmap(NULL,
(length+virt_offset), (length+virt_offset),
PROT_READ|PROT_WRITE, PROT_READ|PROT_WRITE,
......
...@@ -56,13 +56,6 @@ void VTAUnmapRegister(void *vta, size_t length); ...@@ -56,13 +56,6 @@ void VTAUnmapRegister(void *vta, size_t length);
void VTAWriteMappedReg(void* base_addr, uint32_t offset, uint32_t val); void VTAWriteMappedReg(void* base_addr, uint32_t offset, uint32_t val);
uint32_t VTAReadMappedReg(void* base_addr, uint32_t offset); uint32_t VTAReadMappedReg(void* base_addr, uint32_t offset);
/*! \brief (Pynq only) Path to /dev/mem */
#define VTA_PYNQ_DEV_MEM_PATH "/dev/mem"
/*! \brief (Pynq only) MMIO driver constant */
#define VTA_PYNQ_MMIO_WORD_LENGTH 4
/*! \brief (Pynq only) MMIO driver constant */
#define VTA_PYNQ_MMIO_WORD_MASK (~(MMIO_WORD_LENGTH - 1))
/*! \brief VTA configuration register address range */ /*! \brief VTA configuration register address range */
#define VTA_RANGE 0x100 #define VTA_RANGE 0x100
/*! \brief VTA configuration register start value */ /*! \brief VTA configuration register start value */
......
...@@ -607,6 +607,14 @@ vta_phy_addr_t VTAMemGetPhyAddr(void* buf) { ...@@ -607,6 +607,14 @@ vta_phy_addr_t VTAMemGetPhyAddr(void* buf) {
return vta::sim::DRAM::Global()->GetPhyAddr(buf); return vta::sim::DRAM::Global()->GetPhyAddr(buf);
} }
void VTAMemCopyFromHost(void* dst, const void* src, size_t size) {
memcpy(dst, src, size);
}
void VTAMemCopyToHost(void* dst, const void* src, size_t size) {
memcpy(dst, src, size);
}
void VTAFlushCache(vta_phy_addr_t buf, int size) { void VTAFlushCache(vta_phy_addr_t buf, int size) {
} }
......
...@@ -220,6 +220,14 @@ vta_phy_addr_t VTAMemGetPhyAddr(void* buf) { ...@@ -220,6 +220,14 @@ vta_phy_addr_t VTAMemGetPhyAddr(void* buf) {
return reinterpret_cast<uint64_t>(reinterpret_cast<uint64_t*>(buf)); return reinterpret_cast<uint64_t>(reinterpret_cast<uint64_t*>(buf));
} }
void VTAMemCopyFromHost(void* dst, const void* src, size_t size) {
memcpy(dst, src, size);
}
void VTAMemCopyToHost(void* dst, const void* src, size_t size) {
memcpy(dst, src, size);
}
void VTAFlushCache(vta_phy_addr_t buf, int size) { void VTAFlushCache(vta_phy_addr_t buf, int size) {
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment