Commit 4434a89c by Luis Vega Committed by Thierry Moreau

[VTA][Chisel] rename USE_TSIM macro with USE_VTA64 and cleanup runtime (#3872)

parent 4b827593
......@@ -71,8 +71,6 @@ elseif(PYTHON)
target_compile_definitions(vta_tsim PUBLIC ${__strip_def})
endforeach()
include_directories("vta/include")
# Set USE_TSIM macro
target_compile_definitions(vta_tsim PUBLIC USE_TSIM)
if(APPLE)
set_target_properties(vta_tsim PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
endif(APPLE)
......
......@@ -32,7 +32,7 @@ ifeq (, $(VERILATOR_INC_DIR))
endif
endif
CONFIG = DefaultF1Config
CONFIG = DefaultPynqConfig
TOP = VTA
TOP_TEST = Test
BUILD_NAME = build
......
......@@ -53,7 +53,7 @@ extern "C" {
typedef void * VTADeviceHandle;
/*! \brief physical address */
#ifdef USE_TSIM
#ifdef USE_VTA64
typedef uint64_t vta_phy_addr_t;
#else
typedef uint32_t vta_phy_addr_t;
......@@ -80,22 +80,10 @@ void VTADeviceFree(VTADeviceHandle handle);
*
* \return 0 if running is successful, 1 if timeout.
*/
#ifdef USE_TSIM
int VTADeviceRun(VTADeviceHandle device,
vta_phy_addr_t insn_phy_addr,
vta_phy_addr_t uop_phy_addr,
vta_phy_addr_t inp_phy_addr,
vta_phy_addr_t wgt_phy_addr,
vta_phy_addr_t acc_phy_addr,
vta_phy_addr_t out_phy_addr,
uint32_t insn_count,
uint32_t wait_cycles);
#else
int VTADeviceRun(VTADeviceHandle device,
vta_phy_addr_t insn_phy_addr,
uint32_t insn_count,
uint32_t wait_cycles);
#endif
/*!
* \brief Allocates physically contiguous region in memory readable/writeable by FPGA.
......
......@@ -1129,24 +1129,11 @@ class CommandQueue {
// Make sure that we don't exceed contiguous physical memory limits
CHECK(insn_queue_.count() * sizeof(VTAGenericInsn) < VTA_MAX_XFER);
#ifdef USE_TSIM
int timeout = VTADeviceRun(
device_,
insn_queue_.dram_phy_addr(),
uop_queue_.dram_phy_addr(),
inp_phy_addr_,
wgt_phy_addr_,
acc_phy_addr_,
out_phy_addr_,
insn_queue_.count(),
wait_cycles);
#else
int timeout = VTADeviceRun(
device_,
insn_queue_.dram_phy_addr(),
insn_queue_.count(),
wait_cycles);
#endif
CHECK_EQ(timeout, 0);
// Reset buffers
uop_queue_.Reset();
......@@ -1221,18 +1208,6 @@ class CommandQueue {
ThreadLocal().reset();
}
#ifdef USE_TSIM
void SetBufPhyAddr(uint32_t type, vta_phy_addr_t addr) {
switch (type) {
case VTA_MEM_ID_INP: inp_phy_addr_ = addr;
case VTA_MEM_ID_WGT: wgt_phy_addr_ = addr;
case VTA_MEM_ID_ACC: acc_phy_addr_ = addr;
case VTA_MEM_ID_OUT: out_phy_addr_ = addr;
default: break;
}
}
#endif
private:
// Push GEMM uop to the command buffer
void PushGEMMOp(UopKernel* kernel) {
......@@ -1337,16 +1312,6 @@ class CommandQueue {
InsnQueue<VTA_MAX_XFER, kBufferCoherent, kAlwaysCache> insn_queue_;
// Device handle
VTADeviceHandle device_{nullptr};
#ifdef USE_TSIM
// Input phy addr
vta_phy_addr_t inp_phy_addr_{0};
// Weight phy addr
vta_phy_addr_t wgt_phy_addr_{0};
// Accumulator phy addr
vta_phy_addr_t acc_phy_addr_{0};
// Output phy addr
vta_phy_addr_t out_phy_addr_{0};
#endif
};
} // namespace vta
......@@ -1439,10 +1404,6 @@ void VTALoadBuffer2D(VTACommandHandle cmd,
uint32_t y_pad_after,
uint32_t dst_sram_index,
uint32_t dst_memory_type) {
#ifdef USE_TSIM
vta::DataBuffer* src = vta::DataBuffer::FromHandle(src_dram_addr);
static_cast<vta::CommandQueue*>(cmd)->SetBufPhyAddr(dst_memory_type, src->phy_addr());
#endif
static_cast<vta::CommandQueue*>(cmd)->
LoadBuffer2D(src_dram_addr, src_elem_offset,
x_size, y_size, x_stride,
......@@ -1459,10 +1420,6 @@ void VTAStoreBuffer2D(VTACommandHandle cmd,
uint32_t x_size,
uint32_t y_size,
uint32_t x_stride) {
#ifdef USE_TSIM
vta::DataBuffer* dst = vta::DataBuffer::FromHandle(dst_dram_addr);
static_cast<vta::CommandQueue*>(cmd)->SetBufPhyAddr(src_memory_type, dst->phy_addr());
#endif
static_cast<vta::CommandQueue*>(cmd)->
StoreBuffer2D(src_sram_index, src_memory_type,
dst_dram_addr, dst_elem_offset,
......
......@@ -116,20 +116,10 @@ class Device {
}
int Run(vta_phy_addr_t insn_phy_addr,
vta_phy_addr_t uop_phy_addr,
vta_phy_addr_t inp_phy_addr,
vta_phy_addr_t wgt_phy_addr,
vta_phy_addr_t acc_phy_addr,
vta_phy_addr_t out_phy_addr,
uint32_t insn_count,
uint32_t wait_cycles) {
this->Init();
this->Launch(insn_phy_addr,
uop_phy_addr,
inp_phy_addr,
wgt_phy_addr,
acc_phy_addr,
out_phy_addr,
insn_count,
wait_cycles);
this->WaitForCompletion(wait_cycles);
......@@ -143,27 +133,15 @@ class Device {
}
void Launch(vta_phy_addr_t insn_phy_addr,
vta_phy_addr_t uop_phy_addr,
vta_phy_addr_t inp_phy_addr,
vta_phy_addr_t wgt_phy_addr,
vta_phy_addr_t acc_phy_addr,
vta_phy_addr_t out_phy_addr,
uint32_t insn_count,
uint32_t wait_cycles) {
dpi_->WriteReg(0x04, 0);
dpi_->WriteReg(0x08, insn_count);
dpi_->WriteReg(0x0c, insn_phy_addr);
dpi_->WriteReg(0x10, insn_phy_addr >> 32);
dpi_->WriteReg(0x10, 0);
dpi_->WriteReg(0x14, 0);
dpi_->WriteReg(0x18, uop_phy_addr >> 32);
dpi_->WriteReg(0x18, 0);
dpi_->WriteReg(0x1c, 0);
dpi_->WriteReg(0x20, inp_phy_addr >> 32);
dpi_->WriteReg(0x24, 0);
dpi_->WriteReg(0x28, wgt_phy_addr >> 32);
dpi_->WriteReg(0x2c, 0);
dpi_->WriteReg(0x30, acc_phy_addr >> 32);
dpi_->WriteReg(0x34, 0);
dpi_->WriteReg(0x38, out_phy_addr >> 32);
dpi_->WriteReg(0x20, 0);
// start
dpi_->WriteReg(0x00, 0x1);
}
......@@ -247,20 +225,10 @@ void VTADeviceFree(VTADeviceHandle handle) {
int VTADeviceRun(VTADeviceHandle handle,
vta_phy_addr_t insn_phy_addr,
vta_phy_addr_t uop_phy_addr,
vta_phy_addr_t inp_phy_addr,
vta_phy_addr_t wgt_phy_addr,
vta_phy_addr_t acc_phy_addr,
vta_phy_addr_t out_phy_addr,
uint32_t insn_count,
uint32_t wait_cycles) {
return static_cast<vta::tsim::Device*>(handle)->Run(
insn_phy_addr,
uop_phy_addr,
inp_phy_addr,
wgt_phy_addr,
acc_phy_addr,
out_phy_addr,
insn_count,
wait_cycles);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment