Commit 92b6ca71 by Liangfu Chen Committed by Thierry Moreau

[VTA][TSIM] Introduce Virtual Memory for TSIM Driver (#3686)

* initial virtual memory;

* initial integration;

* include the header file in cmake;

* implement allocation with virtual to logical address mapping;

* virtual memory for tsim_driver;

* implement the missing memory release function;

* readability improvement;

* readability improvement;

* address review comments;

* improved robustness in virtual memory allocation;

* remove VTA_TSIM_USE_VIRTUAL_MEMORY macro and use virtual memory for tsim by default;

* link tvm against vta library;

* merge with master

* build virtual memory system without linking tvm against vta;

* minor change;

* reuse VTA_PAGE_BYTES;

* using DRAM class from sim_driver as VirtualMemoryManager;

* satisfy linter;

* add comments in code;

* undo changes to Makefile

* undo changes to Makefile

* retrigger ci;

* retrigger ci;

* directly call into VirtualMemoryManager::Global()
parent 70f0a169
...@@ -42,6 +42,7 @@ elseif(PYTHON) ...@@ -42,6 +42,7 @@ elseif(PYTHON)
# Add fsim driver sources # Add fsim driver sources
file(GLOB FSIM_RUNTIME_SRCS vta/src/*.cc) file(GLOB FSIM_RUNTIME_SRCS vta/src/*.cc)
list(APPEND FSIM_RUNTIME_SRCS vta/src/sim/sim_driver.cc) list(APPEND FSIM_RUNTIME_SRCS vta/src/sim/sim_driver.cc)
list(APPEND FSIM_RUNTIME_SRCS vta/src/vmem/virtual_memory.cc vta/src/vmem/virtual_memory.h)
# Target lib: vta_fsim # Target lib: vta_fsim
add_library(vta_fsim SHARED ${FSIM_RUNTIME_SRCS}) add_library(vta_fsim SHARED ${FSIM_RUNTIME_SRCS})
target_include_directories(vta_fsim PUBLIC vta/include) target_include_directories(vta_fsim PUBLIC vta/include)
...@@ -61,6 +62,7 @@ elseif(PYTHON) ...@@ -61,6 +62,7 @@ elseif(PYTHON)
file(GLOB TSIM_RUNTIME_SRCS vta/src/*.cc) file(GLOB TSIM_RUNTIME_SRCS vta/src/*.cc)
list(APPEND TSIM_RUNTIME_SRCS vta/src/tsim/tsim_driver.cc) list(APPEND TSIM_RUNTIME_SRCS vta/src/tsim/tsim_driver.cc)
list(APPEND TSIM_RUNTIME_SRCS vta/src/dpi/module.cc) list(APPEND TSIM_RUNTIME_SRCS vta/src/dpi/module.cc)
list(APPEND TSIM_RUNTIME_SRCS vta/src/vmem/virtual_memory.cc vta/src/vmem/virtual_memory.h)
# Target lib: vta_tsim # Target lib: vta_tsim
add_library(vta_tsim SHARED ${TSIM_RUNTIME_SRCS}) add_library(vta_tsim SHARED ${TSIM_RUNTIME_SRCS})
target_include_directories(vta_tsim PUBLIC vta/include) target_include_directories(vta_tsim PUBLIC vta/include)
......
...@@ -33,6 +33,9 @@ ...@@ -33,6 +33,9 @@
#include <queue> #include <queue>
#include <thread> #include <thread>
#include <condition_variable> #include <condition_variable>
#include <fstream>
#include "../vmem/virtual_memory.h"
namespace vta { namespace vta {
namespace dpi { namespace dpi {
...@@ -179,12 +182,14 @@ void HostDevice::WaitPopResponse(HostResponse* r) { ...@@ -179,12 +182,14 @@ void HostDevice::WaitPopResponse(HostResponse* r) {
void MemDevice::SetRequest(uint8_t opcode, uint64_t addr, uint32_t len) { void MemDevice::SetRequest(uint8_t opcode, uint64_t addr, uint32_t len) {
std::lock_guard<std::mutex> lock(mutex_); std::lock_guard<std::mutex> lock(mutex_);
void * vaddr = vta::vmem::VirtualMemoryManager::Global()->GetAddr(addr);
if (opcode == 1) { if (opcode == 1) {
wlen_ = len + 1; wlen_ = len + 1;
waddr_ = reinterpret_cast<uint64_t*>(addr); waddr_ = reinterpret_cast<uint64_t*>(vaddr);
} else { } else {
rlen_ = len + 1; rlen_ = len + 1;
raddr_ = reinterpret_cast<uint64_t*>(addr); raddr_ = reinterpret_cast<uint64_t*>(vaddr);
} }
} }
......
...@@ -32,6 +32,8 @@ ...@@ -32,6 +32,8 @@
#include <cstring> #include <cstring>
#include <sstream> #include <sstream>
#include "../vmem/virtual_memory.h"
namespace vta { namespace vta {
namespace sim { namespace sim {
...@@ -125,113 +127,7 @@ class BitPacker { ...@@ -125,113 +127,7 @@ class BitPacker {
* \brief DRAM memory manager * \brief DRAM memory manager
* Implements simple paging to allow physical address translation. * Implements simple paging to allow physical address translation.
*/ */
class DRAM { using DRAM = ::vta::vmem::VirtualMemoryManager;
public:
/*!
* \brief Get virtual address given physical address.
* \param phy_addr The simulator phyiscal address.
* \return The true virtual address;
*/
void* GetAddr(uint64_t phy_addr) {
CHECK_NE(phy_addr, 0)
<< "trying to get address that is nullptr";
std::lock_guard<std::mutex> lock(mutex_);
uint64_t loc = (phy_addr >> kPageBits) - 1;
CHECK_LT(loc, ptable_.size())
<< "phy_addr=" << phy_addr;
Page* p = ptable_[loc];
CHECK(p != nullptr);
size_t offset = (loc - p->ptable_begin) << kPageBits;
offset += phy_addr & (kPageSize - 1);
return reinterpret_cast<char*>(p->data) + offset;
}
/*!
* \brief Get physical address
* \param buf The virtual address.
* \return The true physical address;
*/
vta_phy_addr_t GetPhyAddr(void* buf) {
std::lock_guard<std::mutex> lock(mutex_);
auto it = pmap_.find(buf);
CHECK(it != pmap_.end());
Page* p = it->second.get();
return (p->ptable_begin + 1) << kPageBits;
}
/*!
* \brief Allocate memory from manager
* \param size The size of memory
* \return The virtual address
*/
void* Alloc(size_t size) {
std::lock_guard<std::mutex> lock(mutex_);
size_t npage = (size + kPageSize - 1) / kPageSize;
auto it = free_map_.lower_bound(npage);
if (it != free_map_.end()) {
Page* p = it->second;
free_map_.erase(it);
return p->data;
}
size_t start = ptable_.size();
std::unique_ptr<Page> p(new Page(start, npage));
// insert page entry
ptable_.resize(start + npage, p.get());
void* data = p->data;
pmap_[data] = std::move(p);
return data;
}
/*!
* \brief Free the memory.
* \param size The size of memory
* \return The virtual address
*/
void Free(void* data) {
std::lock_guard<std::mutex> lock(mutex_);
if (pmap_.size() == 0) return;
auto it = pmap_.find(data);
CHECK(it != pmap_.end());
Page* p = it->second.get();
free_map_.insert(std::make_pair(p->num_pages, p));
}
static DRAM* Global() {
static DRAM inst;
return &inst;
}
private:
// The bits in page table
static constexpr vta_phy_addr_t kPageBits = VTA_PAGE_BITS;
// page size, also the maximum allocable size 16 K
static constexpr vta_phy_addr_t kPageSize = VTA_PAGE_BYTES;
/*! \brief A page in the DRAM */
struct Page {
/*! \brief Data Type */
using DType = typename std::aligned_storage<kPageSize, 256>::type;
/*! \brief Start location in page table */
size_t ptable_begin;
/*! \brief The total number of pages */
size_t num_pages;
/*! \brief Data */
DType* data{nullptr};
// construct a new page
explicit Page(size_t ptable_begin, size_t num_pages)
: ptable_begin(ptable_begin), num_pages(num_pages) {
data = new DType[num_pages];
}
~Page() {
delete [] data;
}
};
// Internal lock
std::mutex mutex_;
// Physical address -> page
std::vector<Page*> ptable_;
// virtual addres -> page
std::unordered_map<void*, std::unique_ptr<Page> > pmap_;
// Free map
std::multimap<size_t, Page*> free_map_;
};
/*! /*!
* \brief Register file. * \brief Register file.
......
...@@ -22,6 +22,8 @@ ...@@ -22,6 +22,8 @@
#include <vta/driver.h> #include <vta/driver.h>
#include <vta/dpi/module.h> #include <vta/dpi/module.h>
#include "../vmem/virtual_memory.h"
namespace vta { namespace vta {
namespace tsim { namespace tsim {
...@@ -208,12 +210,13 @@ TVM_REGISTER_GLOBAL("vta.tsim.profiler_status") ...@@ -208,12 +210,13 @@ TVM_REGISTER_GLOBAL("vta.tsim.profiler_status")
} // namespace vta } // namespace vta
void* VTAMemAlloc(size_t size, int cached) { void* VTAMemAlloc(size_t size, int cached) {
void *p = malloc(size); void * addr = vta::vmem::VirtualMemoryManager::Global()->Alloc(size);
return p; return reinterpret_cast<void*>(vta::vmem::VirtualMemoryManager::Global()->GetPhyAddr(addr));
} }
void VTAMemFree(void* buf) { void VTAMemFree(void* buf) {
free(buf); void * addr = vta::vmem::VirtualMemoryManager::Global()->GetAddr(reinterpret_cast<uint64_t>(buf));
vta::vmem::VirtualMemoryManager::Global()->Free(addr);
} }
vta_phy_addr_t VTAMemGetPhyAddr(void* buf) { vta_phy_addr_t VTAMemGetPhyAddr(void* buf) {
...@@ -221,11 +224,11 @@ vta_phy_addr_t VTAMemGetPhyAddr(void* buf) { ...@@ -221,11 +224,11 @@ vta_phy_addr_t VTAMemGetPhyAddr(void* buf) {
} }
void VTAMemCopyFromHost(void* dst, const void* src, size_t size) { void VTAMemCopyFromHost(void* dst, const void* src, size_t size) {
memcpy(dst, src, size); vta::vmem::VirtualMemoryManager::Global()->MemCopyFromHost(dst, src, size);
} }
void VTAMemCopyToHost(void* dst, const void* src, size_t size) { void VTAMemCopyToHost(void* dst, const void* src, size_t size) {
memcpy(dst, src, size); vta::vmem::VirtualMemoryManager::Global()->MemCopyToHost(dst, src, size);
} }
void VTAFlushCache(void* vir_addr, vta_phy_addr_t phy_addr, int size) { void VTAFlushCache(void* vir_addr, vta_phy_addr_t phy_addr, int size) {
......
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* Copyright (c) 2019 by Contributors
* \file virtual_memory.cc
* \brief Thread-safe virtal memory manager
*/
#include "virtual_memory.h"
#include <dmlc/logging.h>
#include <vta/driver.h>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <list>
#include <utility>
#include <iterator>
#include <unordered_map>
#include <map>
#include <mutex>
namespace vta {
namespace vmem {
/*!
* \brief Get virtual address given physical address.
* \param phy_addr The simulator phyiscal address.
* \return The true virtual address;
*/
void* VirtualMemoryManager::GetAddr(uint64_t phy_addr) {
CHECK_NE(phy_addr, 0)
<< "trying to get address that is nullptr";
std::lock_guard<std::mutex> lock(mutex_);
uint64_t loc = (phy_addr >> kPageBits) - 1;
CHECK_LT(loc, ptable_.size())
<< "phy_addr=" << phy_addr;
Page* p = ptable_[loc];
CHECK(p != nullptr);
size_t offset = (loc - p->ptable_begin) << kPageBits;
offset += phy_addr & (kPageSize - 1);
return reinterpret_cast<char*>(p->data) + offset;
}
/*!
* \brief Get physical address
* \param buf The virtual address.
* \return The true physical address;
*/
vta_phy_addr_t VirtualMemoryManager::GetPhyAddr(void* buf) {
std::lock_guard<std::mutex> lock(mutex_);
auto it = pmap_.find(buf);
CHECK(it != pmap_.end());
Page* p = it->second.get();
return (p->ptable_begin + 1) << kPageBits;
}
/*!
* \brief Allocate memory from manager
* \param size The size of memory
* \return The virtual address
*/
void* VirtualMemoryManager::Alloc(size_t size) {
std::lock_guard<std::mutex> lock(mutex_);
size_t npage = (size + kPageSize - 1) / kPageSize;
auto it = free_map_.lower_bound(npage);
if (it != free_map_.end()) {
Page* p = it->second;
free_map_.erase(it);
return p->data;
}
size_t start = ptable_.size();
std::unique_ptr<Page> p(new Page(start, npage));
// insert page entry
ptable_.resize(start + npage, p.get());
void* data = p->data;
pmap_[data] = std::move(p);
return data;
}
/*!
* \brief Free the memory.
* \param size The size of memory
* \return The virtual address
*/
void VirtualMemoryManager::Free(void* data) {
std::lock_guard<std::mutex> lock(mutex_);
if (pmap_.size() == 0) return;
auto it = pmap_.find(data);
CHECK(it != pmap_.end());
Page* p = it->second.get();
free_map_.insert(std::make_pair(p->num_pages, p));
}
/*!
* \brief Copy from the host memory to device memory (virtual).
* \param dst The device memory address (virtual)
* \param src The host memory address
* \param size The size of memory
*/
void VirtualMemoryManager::MemCopyFromHost(void* dst, const void * src, size_t size) {
void * addr = this->GetAddr(reinterpret_cast<uint64_t>(dst));
memcpy(addr, src, size);
}
/*!
* \brief Copy from the device memory (virtual) to host memory.
* \param dst The host memory address
* \param src The device memory address (virtual)
* \param size The size of memory
*/
void VirtualMemoryManager::MemCopyToHost(void* dst, const void * src, size_t size) {
void * addr = this->GetAddr(reinterpret_cast<uint64_t>(src));
memcpy(dst, addr, size);
}
VirtualMemoryManager* VirtualMemoryManager::Global() {
static VirtualMemoryManager inst;
return &inst;
}
} // namespace vmem
} // namespace vta
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* Copyright (c) 2019 by Contributors
* \file virtual_memory.h
* \brief The virtual memory manager for device simulation
*/
#ifndef VTA_VMEM_VIRTUAL_MEMORY_H_
#define VTA_VMEM_VIRTUAL_MEMORY_H_
#include <vta/driver.h>
#include <cstdint>
#include <type_traits>
#include <mutex>
#include <vector>
#include <map>
#include <unordered_map>
#include <memory>
enum VMemCopyType {
kVirtualMemCopyFromHost = 0,
kVirtualMemCopyToHost = 1
};
namespace vta {
namespace vmem {
/*!
* \brief DRAM memory manager
* Implements simple paging to allow physical address translation.
*/
class VirtualMemoryManager {
public:
/*!
* \brief Get virtual address given physical address.
* \param phy_addr The simulator phyiscal address.
* \return The true virtual address;
*/
void* GetAddr(uint64_t phy_addr);
/*!
* \brief Get physical address
* \param buf The virtual address.
* \return The true physical address;
*/
vta_phy_addr_t GetPhyAddr(void* buf);
/*!
* \brief Allocate memory from manager
* \param size The size of memory
* \return The virtual address
*/
void* Alloc(size_t size);
/*!
* \brief Free the memory.
* \param size The size of memory
* \return The virtual address
*/
void Free(void* data);
/*!
* \brief Copy from the host memory to device memory (virtual).
* \param dst The device memory address (virtual)
* \param src The host memory address
* \param size The size of memory
*/
void MemCopyFromHost(void* dst, const void * src, size_t size);
/*!
* \brief Copy from the device memory (virtual) to host memory.
* \param dst The host memory address
* \param src The device memory address (virtual)
* \param size The size of memory
*/
void MemCopyToHost(void* dst, const void * src, size_t size);
static VirtualMemoryManager* Global();
private:
// The bits in page table
static constexpr vta_phy_addr_t kPageBits = VTA_PAGE_BITS;
// page size, also the maximum allocable size 16 K
static constexpr vta_phy_addr_t kPageSize = VTA_PAGE_BYTES;
/*! \brief A page in the DRAM */
struct Page {
/*! \brief Data Type */
using DType = typename std::aligned_storage<kPageSize, 256>::type;
/*! \brief Start location in page table */
size_t ptable_begin;
/*! \brief The total number of pages */
size_t num_pages;
/*! \brief Data */
DType* data{nullptr};
// construct a new page
explicit Page(size_t ptable_begin, size_t num_pages)
: ptable_begin(ptable_begin), num_pages(num_pages) {
data = new DType[num_pages];
}
~Page() {
delete [] data;
}
};
// Internal lock
std::mutex mutex_;
// Physical address -> page
std::vector<Page*> ptable_;
// virtual addres -> page
std::unordered_map<void*, std::unique_ptr<Page> > pmap_;
// Free map
std::multimap<size_t, Page*> free_map_;
};
} // namespace vmem
} // namespace vta
#endif // VTA_VMEM_VIRTUAL_MEMORY_H_
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment