Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
tic
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wenyuanbo
tic
Commits
0faefafc
Commit
0faefafc
authored
Apr 11, 2018
by
Tianqi Chen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[DRIVER][RUNTIME] Make runtime fully device agnostic (#23)
parent
dea167a8
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
188 additions
and
150 deletions
+188
-150
vta/include/vta/driver.h
+47
-40
vta/src/data_buffer.h
+2
-2
vta/src/pynq/pynq_driver.cc
+110
-30
vta/src/pynq/pynq_driver.h
+5
-3
vta/src/runtime.cc
+24
-71
vta/src/tvm/vta_device_api.cc
+0
-4
No files found.
vta/include/vta/driver.h
View file @
0faefafc
/*!
* Copyright (c) 2018 by Contributors
* \file vta_driver.h
* \brief General driver interface.
* \brief Driver interface that is used by runtime.
*
* Driver's implementation is device specific.
*/
#ifndef VTA_DRIVER_H_
...
...
@@ -11,16 +13,50 @@
extern
"C"
{
#endif
#include <stdlib.h>
#include <stdint.h>
#include <stdlib.h>
/*! \brief Memory management constants */
/*! \brief Memory management constants
for cached memory
*/
#define VTA_CACHED 1
/*! \brief Memory management constants */
/*! \brief Memory management constants
for non-cached memory
*/
#define VTA_NOT_CACHED 0
/*! \brief VTA command handle */
typedef
void
*
VTAHandle
;
/*! \brief Physically contiguous buffer size limit */
#ifndef VTA_MAX_XFER
#define VTA_MAX_XFER (1<<22)
#endif
/*! \brief Device resource context */
typedef
void
*
VTADeviceHandle
;
/*! \brief physical address */
typedef
uint32_t
vta_phy_addr_t
;
/*!
* \brief Allocate a device resource handle
* \return The device handle.
*/
VTADeviceHandle
VTADeviceAlloc
();
/*!
* \brief Free a device handle
* \param handle The device handle to be freed.
*/
void
VTADeviceFree
(
VTADeviceHandle
handle
);
/*!
* \brief Launch the instructions block until done.
* \param The device handle.
* \param insn_phy_addr The physical address of instruction stream.
* \param insn_count Instruction count.
* \param wait_cycles The maximum of cycles to wait
*
* \return 0 if running is successful, 1 if timeout.
*/
int
VTADeviceRun
(
VTADeviceHandle
device
,
vta_phy_addr_t
insn_phy_addr
,
uint32_t
insn_count
,
uint32_t
wait_cycles
);
/*!
* \brief Allocates physically contiguous region in memory (limited by MAX_XFER).
...
...
@@ -41,52 +77,23 @@ void VTAMemFree(void* buf);
* \param buf Pointer to memory region allocated with VTAMemAlloc.
* \return The physical address of the memory region.
*/
uint32
_t
VTAGetMemPhysAddr
(
void
*
buf
);
vta_phy_addr
_t
VTAGetMemPhysAddr
(
void
*
buf
);
/*!
* \brief Flushes the region of memory out of the CPU cache to DRAM.
* \param buf Pointer to memory region allocated with VTAMemAlloc to be flushed.
* This need to be the physical address.
* \param size Size of the region to flush in Bytes.
*/
void
VTAFlushCache
(
v
oid
*
buf
,
int
size
);
void
VTAFlushCache
(
v
ta_phy_addr_t
buf
,
int
size
);
/*!
* \brief Invalidates the region of memory that is cached.
* \param buf Pointer to memory region allocated with VTAMemAlloc to be invalidated.
* This need to be the physical address.
* \param size Size of the region to invalidate in Bytes.
*/
void
VTAInvalidateCache
(
void
*
buf
,
int
size
);
/*!
* \brief Returns a memory map to FPGA configuration registers.
* \param addr The base physical address of the configuration registers.
* \param length The size of the memory mapped region in bytes.
* \return A pointer to the memory mapped region.
*/
void
*
VTAMapRegister
(
unsigned
addr
,
size_t
length
);
/*!
* \brief Deletes the configuration register memory map.
* \param vta The memory mapped region.
* \param length The size of the memory mapped region in bytes.
*/
void
VTAUnmapRegister
(
void
*
vta
,
size_t
length
);
/*!
* \brief Writes to a memory mapped configuration register.
* \param vta_base The handle to the memory mapped configuration registers.
* \param offset The offset of the register to write to.
* \param val The value to be written to the memory mapped register.
*/
void
VTAWriteMappedReg
(
VTAHandle
vta_base
,
unsigned
offset
,
unsigned
val
);
/*!
* \brief Reads from the memory mapped configuration register.
* \param vta_base The handle to the memory mapped configuration registers.
* \param offset The offset of the register to read from.
* \return The value read from the memory mapped register.
*/
unsigned
VTAReadMappedReg
(
VTAHandle
vta_base
,
unsigned
offset
);
void
VTAInvalidateCache
(
vta_phy_addr_t
buf
,
int
size
);
/*!
* \brief Programming the bit stream on the FPGA.
...
...
vta/src/data_buffer.h
View file @
0faefafc
...
...
@@ -35,7 +35,7 @@ struct DataBuffer {
*/
void
InvalidateCache
(
size_t
offset
,
size_t
size
)
{
if
(
!
kBufferCoherent
)
{
VTAInvalidateCache
(
reinterpret_cast
<
void
*>
(
phy_addr_
+
offset
)
,
size
);
VTAInvalidateCache
(
phy_addr_
+
offset
,
size
);
}
}
/*!
...
...
@@ -45,7 +45,7 @@ struct DataBuffer {
*/
void
FlushCache
(
size_t
offset
,
size_t
size
)
{
if
(
!
kBufferCoherent
)
{
VTAFlushCache
(
reinterpret_cast
<
void
*>
(
phy_addr_
+
offset
)
,
size
);
VTAFlushCache
(
phy_addr_
+
offset
,
size
);
}
}
/*!
...
...
vta/src/pynq/pynq_driver.cc
View file @
0faefafc
...
...
@@ -5,6 +5,7 @@
*/
#include <vta/driver.h>
#include <thread>
#include "./pynq_driver.h"
...
...
@@ -16,16 +17,16 @@ void VTAMemFree(void* buf) {
cma_free
(
buf
);
}
uint32
_t
VTAGetMemPhysAddr
(
void
*
buf
)
{
vta_phy_addr
_t
VTAGetMemPhysAddr
(
void
*
buf
)
{
return
cma_get_phy_addr
(
buf
);
}
void
VTAFlushCache
(
v
oid
*
buf
,
int
size
)
{
xlnkFlushCache
(
buf
,
size
);
void
VTAFlushCache
(
v
ta_phy_addr_t
buf
,
int
size
)
{
xlnkFlushCache
(
reinterpret_cast
<
void
*>
(
buf
)
,
size
);
}
void
VTAInvalidateCache
(
v
oid
*
buf
,
int
size
)
{
xlnkInvalidateCache
(
buf
,
size
);
void
VTAInvalidateCache
(
v
ta_phy_addr_t
buf
,
int
size
)
{
xlnkInvalidateCache
(
reinterpret_cast
<
void
*>
(
buf
)
,
size
);
}
void
*
VTAMapRegister
(
uint32_t
addr
,
size_t
length
)
{
...
...
@@ -57,33 +58,112 @@ uint32_t VTAReadMappedReg(void* base_addr, uint32_t offset) {
return
*
((
volatile
uint32_t
*
)
(
reinterpret_cast
<
char
*>
(
base_addr
)
+
offset
));
}
class
VTADevice
{
public
:
VTADevice
()
{
// VTA stage handles
vta_fetch_handle_
=
VTAMapRegister
(
VTA_FETCH_ADDR
,
VTA_RANGE
);
vta_load_handle_
=
VTAMapRegister
(
VTA_LOAD_ADDR
,
VTA_RANGE
);
vta_compute_handle_
=
VTAMapRegister
(
VTA_COMPUTE_ADDR
,
VTA_RANGE
);
vta_store_handle_
=
VTAMapRegister
(
VTA_STORE_ADDR
,
VTA_RANGE
);
}
~
VTADevice
()
{
// Close VTA stage handle
VTAUnmapRegister
(
vta_fetch_handle_
,
VTA_RANGE
);
VTAUnmapRegister
(
vta_load_handle_
,
VTA_RANGE
);
VTAUnmapRegister
(
vta_compute_handle_
,
VTA_RANGE
);
VTAUnmapRegister
(
vta_store_handle_
,
VTA_RANGE
);
}
int
Run
(
vta_phy_addr_t
insn_phy_addr
,
uint32_t
insn_count
,
uint32_t
wait_cycles
)
{
// NOTE: Register address map is derived from the auto-generated
// driver files available under hardware/build/vivado/<design>/export/driver
// FETCH @ 0x10 : Data signal of insn_count_V
VTAWriteMappedReg
(
vta_fetch_handle_
,
0x10
,
insn_count
);
// FETCH @ 0x18 : Data signal of insns_V
VTAWriteMappedReg
(
vta_fetch_handle_
,
0x18
,
insn_phy_addr
);
// LOAD @ 0x10 : Data signal of inputs_V
VTAWriteMappedReg
(
vta_load_handle_
,
0x10
,
0
);
// LOAD @ 0x18 : Data signal of weight_V
VTAWriteMappedReg
(
vta_load_handle_
,
0x18
,
0
);
// COMPUTE @ 0x20 : Data signal of uops_V
VTAWriteMappedReg
(
vta_compute_handle_
,
0x20
,
0
);
// COMPUTE @ 0x28 : Data signal of biases_V
VTAWriteMappedReg
(
vta_compute_handle_
,
0x28
,
0
);
// STORE @ 0x10 : Data signal of outputs_V
VTAWriteMappedReg
(
vta_store_handle_
,
0x10
,
0
);
// VTA start
VTAWriteMappedReg
(
vta_fetch_handle_
,
0x0
,
VTA_START
);
VTAWriteMappedReg
(
vta_load_handle_
,
0x0
,
VTA_AUTORESTART
);
VTAWriteMappedReg
(
vta_compute_handle_
,
0x0
,
VTA_AUTORESTART
);
VTAWriteMappedReg
(
vta_store_handle_
,
0x0
,
VTA_AUTORESTART
);
// Loop until the VTA is done
unsigned
t
,
flag
=
0
;
for
(
t
=
0
;
t
<
wait_cycles
;
++
t
)
{
flag
=
VTAReadMappedReg
(
vta_compute_handle_
,
0x18
);
if
(
flag
==
VTA_DONE
)
break
;
std
::
this_thread
::
yield
();
}
// Report error if timeout
return
t
<
wait_cycles
?
0
:
1
;
}
private
:
// VTA handles (register maps)
void
*
vta_fetch_handle_
{
nullptr
};
void
*
vta_load_handle_
{
nullptr
};
void
*
vta_compute_handle_
{
nullptr
};
void
*
vta_store_handle_
{
nullptr
};
};
VTADeviceHandle
VTADeviceAlloc
()
{
return
new
VTADevice
();
}
void
VTADeviceFree
(
VTADeviceHandle
handle
)
{
delete
static_cast
<
VTADevice
*>
(
handle
);
}
int
VTADeviceRun
(
VTADeviceHandle
handle
,
vta_phy_addr_t
insn_phy_addr
,
uint32_t
insn_count
,
uint32_t
wait_cycles
)
{
return
static_cast
<
VTADevice
*>
(
handle
)
->
Run
(
insn_phy_addr
,
insn_count
,
wait_cycles
);
}
void
VTAProgram
(
const
char
*
bitstream
)
{
int
elem
;
FILE
*
src
,
*
dst
,
*
partial
;
partial
=
fopen
(
VTA_PYNQ_BS_IS_PARTIAL
,
"w"
);
if
(
partial
==
NULL
)
{
printf
(
"Cannot open partial config file %s
\n
"
,
VTA_PYNQ_BS_IS_PARTIAL
);
int
elem
;
FILE
*
src
,
*
dst
,
*
partial
;
partial
=
fopen
(
VTA_PYNQ_BS_IS_PARTIAL
,
"w"
);
if
(
partial
==
NULL
)
{
printf
(
"Cannot open partial config file %s
\n
"
,
VTA_PYNQ_BS_IS_PARTIAL
);
fclose
(
partial
);
exit
(
1
);
}
fputc
(
'0'
,
partial
);
fclose
(
partial
);
src
=
fopen
(
bitstream
,
"rb"
);
if
(
src
==
NULL
)
{
printf
(
"Cannot open bitstream %s
\n
"
,
bitstream
);
exit
(
1
);
}
dst
=
fopen
(
VTA_PYNQ_BS_XDEVCFG
,
"wb"
);
if
(
dst
==
NULL
)
{
printf
(
"Cannot open device file %s
\n
"
,
VTA_PYNQ_BS_XDEVCFG
);
fclose
(
dst
);
exit
(
1
);
}
elem
=
fgetc
(
src
);
while
(
elem
!=
EOF
)
{
fputc
(
elem
,
dst
);
elem
=
fgetc
(
src
);
}
fclose
(
src
);
}
fputc
(
'0'
,
partial
);
fclose
(
partial
);
src
=
fopen
(
bitstream
,
"rb"
);
if
(
src
==
NULL
)
{
printf
(
"Cannot open bitstream %s
\n
"
,
bitstream
);
exit
(
1
);
}
dst
=
fopen
(
VTA_PYNQ_BS_XDEVCFG
,
"wb"
);
if
(
dst
==
NULL
)
{
printf
(
"Cannot open device file %s
\n
"
,
VTA_PYNQ_BS_XDEVCFG
);
fclose
(
dst
);
exit
(
1
);
}
elem
=
fgetc
(
src
);
while
(
elem
!=
EOF
)
{
fputc
(
elem
,
dst
);
elem
=
fgetc
(
src
);
}
fclose
(
src
);
fclose
(
dst
);
}
vta/src/pynq/pynq_driver.h
View file @
0faefafc
...
...
@@ -32,6 +32,11 @@ void xlnkFlushCache(void* buf, int size);
void
xlnkInvalidateCache
(
void
*
buf
,
int
size
);
#endif
void
*
VTAMapRegister
(
uint32_t
addr
,
size_t
length
);
void
VTAUnmapRegister
(
void
*
vta
,
size_t
length
);
void
VTAWriteMappedReg
(
void
*
base_addr
,
uint32_t
offset
,
uint32_t
val
);
uint32_t
VTAReadMappedReg
(
void
*
base_addr
,
uint32_t
offset
);
/*! \brief (Pynq only) Partial bitstream status file path */
#define VTA_PYNQ_BS_IS_PARTIAL "/sys/devices/soc0/amba/f8007000.devcfg/is_partial_bitstream"
/*! \brief (Pynq only) Bitstream destination file path */
...
...
@@ -44,9 +49,6 @@ void xlnkInvalidateCache(void* buf, int size);
/*! \brief (Pynq only) MMIO driver constant */
#define VTA_PYNQ_MMIO_WORD_MASK (~(MMIO_WORD_LENGTH - 1))
/*! \brief Physically contiguous buffer size limit */
#define VTA_MAX_XFER (1<<22)
/*! \brief VTA configuration register address range */
#define VTA_RANGE 0x100
/*! \brief VTA configuration register start value */
...
...
vta/src/runtime.cc
View file @
0faefafc
/*!
* Copyright (c) 2018 by Contributors
* \file runtime.cc
* \brief VTA runtime for PYNQ in C++11
* \brief Generic VTA runtime in C++11.
*
* The runtime depends on specific instruction
* stream spec as specified in hw_spec.h
* It is intended to be used as a dynamic library
* to enable hot swapping of hardware configurations.
*/
#ifdef VTA_PYNQ_TARGET
#include "./pynq/pynq_driver.h"
#endif // VTA_PYNQ_TARGET
#include <vta/driver.h>
#include <vta/hw_spec.h>
#include <vta/runtime.h>
...
...
@@ -245,8 +245,8 @@ class BaseQueue {
if
(
!
coherent_
&&
always_cache_
&&
dram_extent
!=
0
)
{
dram_begin
=
dram_begin
*
elem_bits
/
8
;
dram_extent
=
dram_extent
*
elem_bits
/
8
;
VTAFlushCache
(
reinterpret_cast
<
void
*>
(
dram_phy_addr_
+
dram_begin
)
,
dram_extent
);
VTAFlushCache
(
dram_phy_addr_
+
dram_begin
,
dram_extent
);
}
}
/*! \brief Read barrier to make sure that data written by VTA is visible to CPU. */
...
...
@@ -254,8 +254,8 @@ class BaseQueue {
if
(
!
coherent_
&&
always_cache_
&&
dram_extent
!=
0
)
{
dram_begin
=
dram_begin
*
elem_bits
/
8
;
dram_extent
=
dram_extent
*
elem_bits
/
8
;
VTAInvalidateCache
(
reinterpret_cast
<
void
*>
(
dram_phy_addr_
+
dram_begin
)
,
dram_extent
);
VTAInvalidateCache
(
dram_phy_addr_
+
dram_begin
,
dram_extent
);
}
}
...
...
@@ -818,20 +818,13 @@ class CommandQueue {
void
InitSpace
()
{
uop_queue_
.
InitSpace
();
insn_queue_
.
InitSpace
();
// VTA stage handles
vta_fetch_handle_
=
VTAMapRegister
(
VTA_FETCH_ADDR
,
VTA_RANGE
);
vta_load_handle_
=
VTAMapRegister
(
VTA_LOAD_ADDR
,
VTA_RANGE
);
vta_compute_handle_
=
VTAMapRegister
(
VTA_COMPUTE_ADDR
,
VTA_RANGE
);
vta_store_handle_
=
VTAMapRegister
(
VTA_STORE_ADDR
,
VTA_RANGE
);
device_
=
VTADeviceAlloc
();
assert
(
device_
!=
nullptr
);
printf
(
"Initialize VTACommandHandle...
\n
"
);
}
~
CommandQueue
()
{
// Close VTA stage handle
VTAUnmapRegister
(
vta_fetch_handle_
,
VTA_RANGE
);
VTAUnmapRegister
(
vta_load_handle_
,
VTA_RANGE
);
VTAUnmapRegister
(
vta_compute_handle_
,
VTA_RANGE
);
VTAUnmapRegister
(
vta_store_handle_
,
VTA_RANGE
);
VTADeviceFree
(
device_
);
printf
(
"Close VTACommandhandle...
\n
"
);
}
...
...
@@ -951,44 +944,14 @@ class CommandQueue {
assert
(
reinterpret_cast
<
VTAMemInsn
*>
(
insn_queue_
.
data
())[
insn_queue_
.
count
()
-
1
].
opcode
==
VTA_OPCODE_FINISH
);
#ifdef VTA_PYNQ_TARGET
// Make sure that we don't exceed contiguous physical memory limits
assert
(
insn_queue_
.
count
()
<
VTA_MAX_XFER
);
// NOTE: Register address map is derived from the auto-generated
// driver files available under hardware/build/vivado/<design>/export/driver
// FETCH @ 0x10 : Data signal of insn_count_V
VTAWriteMappedReg
(
vta_fetch_handle_
,
0x10
,
insn_queue_
.
count
());
// FETCH @ 0x18 : Data signal of insns_V
VTAWriteMappedReg
(
vta_fetch_handle_
,
0x18
,
insn_queue_
.
dram_phy_addr
());
// LOAD @ 0x10 : Data signal of inputs_V
VTAWriteMappedReg
(
vta_load_handle_
,
0x10
,
0
);
// LOAD @ 0x18 : Data signal of weight_V
VTAWriteMappedReg
(
vta_load_handle_
,
0x18
,
0
);
// COMPUTE @ 0x20 : Data signal of uops_V
VTAWriteMappedReg
(
vta_compute_handle_
,
0x20
,
0
);
// COMPUTE @ 0x28 : Data signal of biases_V
VTAWriteMappedReg
(
vta_compute_handle_
,
0x28
,
0
);
// STORE @ 0x10 : Data signal of outputs_V
VTAWriteMappedReg
(
vta_store_handle_
,
0x10
,
0
);
// VTA start
VTAWriteMappedReg
(
vta_fetch_handle_
,
0x0
,
VTA_START
);
VTAWriteMappedReg
(
vta_load_handle_
,
0x0
,
VTA_AUTORESTART
);
VTAWriteMappedReg
(
vta_compute_handle_
,
0x0
,
VTA_AUTORESTART
);
VTAWriteMappedReg
(
vta_store_handle_
,
0x0
,
VTA_AUTORESTART
);
// Loop until the VTA is done
unsigned
t
,
flag
=
0
;
for
(
t
=
0
;
t
<
wait_cycles
;
++
t
)
{
flag
=
VTAReadMappedReg
(
vta_compute_handle_
,
0x18
);
if
(
flag
==
VTA_DONE
)
break
;
std
::
this_thread
::
yield
();
}
// Report error if timeout
assert
(
t
<
wait_cycles
);
#endif // VTA_PYNQ_TARGET
assert
(
insn_queue_
.
count
()
*
sizeof
(
VTAGenericInsn
)
<
VTA_MAX_XFER
);
int
timeout
=
VTADeviceRun
(
device_
,
insn_queue_
.
dram_phy_addr
(),
insn_queue_
.
count
(),
wait_cycles
);
assert
(
timeout
==
0
);
// Reset buffers
uop_queue_
.
Reset
();
insn_queue_
.
Reset
();
...
...
@@ -1147,7 +1110,7 @@ class CommandQueue {
void
CheckInsnOverFlow
()
{
// At each API call, we can at most commit:
// one pending store, one pending load, and one uop
if
(
insn_queue_
.
count
(
)
>=
VTA_MAX_XFER
)
{
if
(
(
insn_queue_
.
count
()
+
4
)
*
sizeof
(
VTAGenericInsn
)
>=
VTA_MAX_XFER
)
{
this
->
AutoSync
();
}
}
...
...
@@ -1155,11 +1118,7 @@ class CommandQueue {
void
AutoSync
()
{
this
->
Synchronize
(
1
<<
31
);
}
// VTA handles (register maps)
VTAHandle
vta_fetch_handle_
{
nullptr
};
VTAHandle
vta_load_handle_
{
nullptr
};
VTAHandle
vta_compute_handle_
{
nullptr
};
VTAHandle
vta_store_handle_
{
nullptr
};
// Internal debug flag
int
debug_flag_
{
0
};
// The kernel we currently recording
...
...
@@ -1168,6 +1127,8 @@ class CommandQueue {
UopQueue
<
VTA_MAX_XFER
,
true
,
true
>
uop_queue_
;
// instruction queue
InsnQueue
<
VTA_MAX_XFER
,
true
,
true
>
insn_queue_
;
// Device handle
VTADeviceHandle
device_
{
nullptr
};
};
}
// namespace vta
...
...
@@ -1302,11 +1263,3 @@ void VTASynchronize(VTACommandHandle cmd, uint32_t wait_cycles) {
static_cast
<
vta
::
CommandQueue
*>
(
cmd
)
->
Synchronize
(
wait_cycles
);
}
extern
"C"
int
VTARuntimeDynamicMagic
()
{
#ifdef VTA_DYNAMIC_MAGIC
return
VTA_DYNAMIC_MAGIC
;
#else
return
0
;
#endif
}
vta/src/tvm/vta_device_api.cc
View file @
0faefafc
...
...
@@ -11,10 +11,6 @@
#include "../../nnvm/tvm/src/runtime/workspace_pool.h"
extern
"C"
{
typedef
void
(
*
FShutdown
)();
typedef
int
(
*
FDynamicMagic
)();
}
namespace
tvm
{
namespace
runtime
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment