Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
tic
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wenyuanbo
tic
Commits
dae77cdb
Commit
dae77cdb
authored
May 02, 2018
by
Thierry Moreau
Committed by
Tianqi Chen
Jul 11, 2018
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[HARDWARE, TEST] Fixed hardware generation flow (#34)
parent
9f0e8ffe
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
217 additions
and
197 deletions
+217
-197
vta/apps/pynq_rpc/start_rpc_server.sh
+1
-1
vta/hardware/xilinx/Makefile
+46
-25
vta/hardware/xilinx/scripts/hls.tcl
+45
-38
vta/hardware/xilinx/scripts/vivado.tcl
+9
-9
vta/hardware/xilinx/sim/vta_test.cc
+3
-0
vta/make/config.json
+2
-2
vta/make/sim_sample.json
+2
-2
vta/make/vta_config.py
+79
-1
vta/python/vta/environment.py
+5
-1
vta/python/vta/ir_pass.py
+4
-4
vta/tests/hardware/common/test_lib.cc
+0
-0
vta/tests/hardware/common/test_lib.h
+13
-3
vta/tests/hardware/pynq/Makefile
+7
-12
vta/tests/hardware/pynq/metal_test.cc
+1
-99
No files found.
vta/apps/pynq_rpc/start_rpc_server.sh
View file @
dae77cdb
#!/bin/bash
#!/bin/bash
export
PYTHONPATH
=
${
PYTHONPATH
}
:/home/xilinx/tvm/python:/home/xilinx/vta/python
export
PYTHONPATH
=
${
PYTHONPATH
}
:/home/xilinx/
vta/nnvm/
tvm/python:/home/xilinx/vta/python
export
LD_LIBRARY_PATH
=
${
LD_LIBRARY_PATH
}
:/opt/python3.6/lib/python3.6/site-packages/pynq/drivers/
export
LD_LIBRARY_PATH
=
${
LD_LIBRARY_PATH
}
:/opt/python3.6/lib/python3.6/site-packages/pynq/drivers/
python
-m
vta.exec.rpc_server
python
-m
vta.exec.rpc_server
vta/hardware/xilinx/Makefile
View file @
dae77cdb
...
@@ -13,8 +13,10 @@ VIVADO_HLS = vivado_hls
...
@@ -13,8 +13,10 @@ VIVADO_HLS = vivado_hls
VIVADO
=
vivado
VIVADO
=
vivado
HSI
=
hsi
HSI
=
hsi
# HLS Mode
# HLS mode
MODE
=
all
MODE
=
skip_sim
# Debug flag
DEBUG
=
false
# SLURM
# SLURM
SLURM
=
false
SLURM
=
false
# Prevent generation of DSP
# Prevent generation of DSP
...
@@ -22,15 +24,26 @@ NO_DSP = false
...
@@ -22,15 +24,26 @@ NO_DSP = false
# Prevent generation of ALU
# Prevent generation of ALU
NO_ALU
=
false
NO_ALU
=
false
# Include top-level config file
# Process VTA JSON config
ifndef
config
VTA_CONFIG
=
python
$(CURDIR)
/../../make/vta_config.py
ifneq
(
"$(wildcard ../../config.mk)"
,
""
)
CFLAGS
:=
$(
shell
${
VTA_CONFIG
}
--cflags
)
config
=
../../config.mk
VTA_TARGET
:=
$(
shell
${
VTA_CONFIG
}
--target
)
else
config
=
../../make/config.mk
#---------------------
endif
# VTA Parameters
endif
#--------------------
include
$(config)
VTA_INP_WIDTH
:=
$(
shell
${
VTA_CONFIG
}
--get-inpwidth
)
VTA_WGT_WIDTH
:=
$(
shell
${
VTA_CONFIG
}
--get-wgtwidth
)
VTA_ACC_WIDTH
:=
$(
shell
${
VTA_CONFIG
}
--get-accwidth
)
VTA_OUT_WIDTH
:=
$(
shell
${
VTA_CONFIG
}
--get-outwidth
)
VTA_BATCH
:=
$(
shell
${
VTA_CONFIG
}
--get-batch
)
VTA_IN_BLOCK
:=
$(
shell
${
VTA_CONFIG
}
--get-blockin
)
VTA_OUT_BLOCK
:=
$(
shell
${
VTA_CONFIG
}
--get-blockout
)
VTA_UOP_BUFF_SIZE
:=
$(
shell
${
VTA_CONFIG
}
--get-uopbuffsize
)
VTA_INP_BUFF_SIZE
:=
$(
shell
${
VTA_CONFIG
}
--get-inpbuffsize
)
VTA_WGT_BUFF_SIZE
:=
$(
shell
${
VTA_CONFIG
}
--get-wgtbuffsize
)
VTA_ACC_BUFF_SIZE
:=
$(
shell
${
VTA_CONFIG
}
--get-accbuffsize
)
VTA_OUT_BUFF_SIZE
:=
$(
shell
${
VTA_CONFIG
}
--get-outbuffsize
)
#---------------------
#---------------------
# Compilation parameters
# Compilation parameters
...
@@ -50,8 +63,8 @@ TARGET_PER = \
...
@@ -50,8 +63,8 @@ TARGET_PER = \
$(
shell
echo
"
$$
(( (1000 +
$(VTA_HW_COMP_CLOCK_FREQ)
- 1
)
/
$(VTA_HW_COMP_CLOCK_FREQ)
-
$(VTA_HW_COMP_TIMING_COMP)))
"
)
$(
shell
echo
"
$$
(( (1000 +
$(VTA_HW_COMP_CLOCK_FREQ)
- 1
)
/
$(VTA_HW_COMP_CLOCK_FREQ)
-
$(VTA_HW_COMP_TIMING_COMP)))
"
)
# Derive config name
# Derive config name
CONF
=
\
CONF
_ROOT
=
$(
shell
${
VTA_CONFIG
}
--cfg-str
)
$(VTA_BATCH)
x
$(VTA_IN_BLOCK)
x
$(VTA_OUT_BLOCK)
_
$(VTA_INP_WIDTH)
bx
$(VTA_WGT_WIDTH)
b_
$(VTA_LOG_UOP_BUFF_SIZE)
_
$(VTA_LOG_INP_BUFF_SIZE)
_
$(VTA_LOG_WGT_BUFF_SIZE)
_
$(VTA_LOG_ACC_BUFF_SIZE
)
_
$(VTA_HW_COMP_CLOCK_FREQ)
MHz_
$(TARGET_PER)
ns
CONF
=
$(CONF_ROOT
)
_
$(VTA_HW_COMP_CLOCK_FREQ)
MHz_
$(TARGET_PER)
ns
IP_BUILD_PATH
=
$(BUILD_DIR)
/hls/
$(CONF)
IP_BUILD_PATH
=
$(BUILD_DIR)
/hls/
$(CONF)
HW_BUILD_PATH
=
$(BUILD_DIR)
/vivado/
$(CONF)
HW_BUILD_PATH
=
$(BUILD_DIR)
/vivado/
$(CONF)
...
@@ -60,26 +73,34 @@ ifeq ($(SLURM), true)
...
@@ -60,26 +73,34 @@ ifeq ($(SLURM), true)
HW_BUILD_PATH
=
/scratch/vivado/
$(CONF)
HW_BUILD_PATH
=
/scratch/vivado/
$(CONF)
endif
endif
.PHONY
:
all ip bit driver clean clean_all
# IP file path
IP_PATH
=
$(BUILD_DIR)
/hls/
$(CONF)
/solution0/impl/ip/xilinx_com_hls_vta_1_0.zip
# Bitstream file path
BIT_PATH
=
$(BUILD_DIR)
/vivado/
$(CONF)
/export/
$(CONF)
.bit
all
:
bit
.PHONY
:
all ip bit bsp clean clean_all
ip
:
all
:
bsp
ip
:
$(IP_PATH)
bit
:
$(BIT_PATH)
$(IP_PATH)
:
$(SRC_DIR)/*
mkdir
-p
$(IP_BUILD_PATH)
mkdir
-p
$(IP_BUILD_PATH)
cd
$(IP_BUILD_PATH)
&&
\
cd
$(IP_BUILD_PATH)
&&
\
$(VIVADO_HLS)
-f
$(SCRIPT_DIR)
/hls.tcl
\
$(VIVADO_HLS)
-f
$(SCRIPT_DIR)
/hls.tcl
\
-tclargs
$(SRC_DIR)
$(SIM_DIR)
$(TEST_DIR)
$(INCLUDE_DIR)
$(TARGET_PE
R)
\
-tclargs
$(SRC_DIR)
$(SIM_DIR)
$(TEST_DIR)
$(INCLUDE_DI
R)
\
$(VTA_LOG_INP_WIDTH)
$(VTA_LOG_WGT_WIDTH)
$(VTA_LOG_ACC_WIDTH)
$(VTA_LOG_OUT_WIDTH
)
\
$(MODE)
$(DEBUG)
$(NO_DSP)
$(NO_ALU)
$(TARGET_PER
)
\
$(VTA_LOG_BATCH)
$(VTA_LOG_BLOCK_OUT)
$(VTA_LOG_BLOCK_IN
)
\
$(VTA_INP_WIDTH)
$(VTA_WGT_WIDTH)
$(VTA_ACC_WIDTH)
$(VTA_OUT_WIDTH
)
\
$(VTA_LOG_UOP_BUFF_SIZE)
$(VTA_LOG_INP_BUFF_SIZE)
$(VTA_LOG_WGT_BUFF_SIZE
)
\
$(VTA_BATCH)
$(VTA_IN_BLOCK)
$(VTA_OUT_BLOCK
)
\
$(VTA_LOG_ACC_BUFF_SIZE)
$(VTA_LOG_OU
T_BUFF_SIZE)
\
$(VTA_UOP_BUFF_SIZE)
$(VTA_INP_BUFF_SIZE)
$(VTA_WG
T_BUFF_SIZE)
\
$(MODE)
$(NO_DSP)
$(NO_ALU
)
$(VTA_ACC_BUFF_SIZE)
$(VTA_OUT_BUFF_SIZE
)
ifeq
($(SLURM),
true)
ifeq
($(SLURM),
true)
mkdir
-p
$(BUILD_DIR)/hls
mkdir
-p
$(BUILD_DIR)/hls
mv
$(IP_BUILD_PATH)
$(BUILD_DIR)/hls/.
mv
$(IP_BUILD_PATH)
$(BUILD_DIR)/hls/.
endif
endif
bit
:
ip
$(BIT_PATH)
:
$(IP_PATH)
mkdir
-p
$(HW_BUILD_PATH)
mkdir
-p
$(HW_BUILD_PATH)
cd
$(HW_BUILD_PATH)
&&
\
cd
$(HW_BUILD_PATH)
&&
\
$(VIVADO)
-mode
tcl
-source
$(SCRIPT_DIR)
/vivado.tcl
\
$(VIVADO)
-mode
tcl
-source
$(SCRIPT_DIR)
/vivado.tcl
\
...
@@ -92,12 +113,12 @@ ifeq ($(SLURM), true)
...
@@ -92,12 +113,12 @@ ifeq ($(SLURM), true)
mv
$(HW_BUILD_PATH)
$(BUILD_DIR)/vivado/.
mv
$(HW_BUILD_PATH)
$(BUILD_DIR)/vivado/.
endif
endif
driver
:
bit
bsp
:
$(BIT_PATH)
cd
$(HW_BUILD_PATH)
&&
$(HSI)
-mode
tcl
-source
$(SCRIPT_DIR)
/hsi.tcl
-nojournal
-nolog
cd
$(HW_BUILD_PATH)
&&
$(HSI)
-mode
tcl
-source
$(SCRIPT_DIR)
/hsi.tcl
-nojournal
-nolog
cd
$(HW_BUILD_PATH)
/bsp
&&
make
cd
$(HW_BUILD_PATH)
/bsp
&&
make
clean
:
clean
:
rm
-rf
*
.out
*
.log
*
.sb figures
rm
-rf
*
.out
*
.log
*
.sb figures
clean
_
all
:
clean
cleanall
:
clean
rm
-rf
$(BUILD_DIR)
rm
-rf
$(BUILD_DIR)
vta/hardware/xilinx/scripts/hls.tcl
View file @
dae77cdb
...
@@ -9,65 +9,69 @@
...
@@ -9,65 +9,69 @@
# Arg 2: path to sim sources
# Arg 2: path to sim sources
# Arg 3: path to test sources
# Arg 3: path to test sources
# Arg 4: path to include sources
# Arg 4: path to include sources
# Arg 5: target clock period
# Arg 5: mode
# Arg 6: input type width
(
log
)
# Arg 6: debug
# Arg 7: weight type width
(
log
)
# Arg 7: no_dsp
# Arg 8: accum type width
(
log
)
# Arg 8: no_alu
# Arg 9: output type width
(
log
)
# Arg 9: target clock period
# Arg 10: batch size
(
log
)
# Arg 10: input type width
(
log
)
# Arg 11: in block size
(
log
)
# Arg 11: weight type width
(
log
)
# Arg 12: out block size
(
log
)
# Arg 12: accum type width
(
log
)
# Arg 13: uop buffer size in B
(
log
)
# Arg 13: output type width
(
log
)
# Arg 14: inp buffer size in B
(
log
)
# Arg 14: batch size
(
log
)
# Arg 15: wgt buffer size in B
(
log
)
# Arg 15: in block size
(
log
)
# Arg 16: acc buffer size in B
(
log
)
# Arg 16: out block size
(
log
)
# Arg 17: out buffer size in B
(
log
)
# Arg 17: uop buffer size in B
(
log
)
# Arg 18: mode
# Arg 18: inp buffer size in B
(
log
)
# Arg 19: no_dsp
# Arg 19: wgt buffer size in B
(
log
)
# Arg 20: no_alu
# Arg 20: acc buffer size in B
(
log
)
# Arg 21: out buffer size in B
(
log
)
if
{
[
llength
$argv
]
eq 2
2
}
{
if
{
[
llength
$argv
]
eq 2
3
}
{
set src_dir
[
lindex
$argv
2
]
set src_dir
[
lindex
$argv
2
]
set sim_dir
[
lindex
$argv
3
]
set sim_dir
[
lindex
$argv
3
]
set test_dir
[
lindex
$argv
4
]
set test_dir
[
lindex
$argv
4
]
set include_dir
[
lindex
$argv
5
]
set include_dir
[
lindex
$argv
5
]
set target_period
[
lindex
$argv
6
]
set mode
[
lindex
$argv
6
]
set inp_width
[
lindex
$argv
7
]
set debug
[
lindex
$argv
7
]
set wgt_width
[
lindex
$argv
8
]
set no_dsp
[
lindex
$argv
8
]
set acc_width
[
lindex
$argv
9
]
set no_alu
[
lindex
$argv
9
]
set out_width
[
lindex
$argv
10
]
set target_period
[
lindex
$argv
10
]
set batch
[
lindex
$argv
11
]
set inp_width
[
lindex
$argv
11
]
set block_in
[
lindex
$argv
12
]
set wgt_width
[
lindex
$argv
12
]
set block_out
[
lindex
$argv
13
]
set acc_width
[
lindex
$argv
13
]
set uop_buff_size
[
lindex
$argv
14
]
set out_width
[
lindex
$argv
14
]
set inp_buff_size
[
lindex
$argv
15
]
set batch
[
lindex
$argv
15
]
set wgt_buff_size
[
lindex
$argv
16
]
set block_in
[
lindex
$argv
16
]
set acc_buff_size
[
lindex
$argv
17
]
set block_out
[
lindex
$argv
17
]
set out_buff_size
[
lindex
$argv
18
]
set uop_buff_size
[
lindex
$argv
18
]
set mode
[
lindex
$argv
19
]
set inp_buff_size
[
lindex
$argv
19
]
set no_dsp
[
lindex
$argv
20
]
set wgt_buff_size
[
lindex
$argv
20
]
set no_alu
[
lindex
$argv
21
]
set acc_buff_size
[
lindex
$argv
21
]
set out_buff_size
[
lindex
$argv
22
]
}
else
{
}
else
{
set src_dir
"../src"
set src_dir
"../src"
set sim_dir
"../sim"
set sim_dir
"../sim"
set test_dir
"../../src/test"
set test_dir
"../../src/test"
set include_dir
"../../include"
set include_dir
"../../include"
set mode
"all"
set debug
"false"
set no_dsp
"true"
set no_alu
"false"
set target_period 10
set target_period 10
set inp_width 3
set inp_width 3
set wgt_width 3
set wgt_width 3
set acc_width 5
set acc_width 5
set out_width 3
set out_width 3
set batch 1
set batch 1
set block_out 4
set block_in 4
set block_in 4
set block_out 4
set uop_buff_size 15
set uop_buff_size 15
set inp_buff_size 15
set inp_buff_size 15
set wgt_buff_size 15
set wgt_buff_size 15
set acc_buff_size 17
set acc_buff_size 17
set out_buff_size 15
set out_buff_size 15
set mode
"all"
exit
set no_dsp
"true"
set no_alu
"false"
}
}
# Initializes the HLS design and sets HLS pragmas for memory partitioning.
# Initializes the HLS design and sets HLS pragmas for memory partitioning.
...
@@ -124,12 +128,15 @@ proc init_design {per inp_width wgt_width out_width batch block_in block_out} {
...
@@ -124,12 +128,15 @@ proc init_design {per inp_width wgt_width out_width batch block_in block_out} {
# C define flags to pass to compiler
# C define flags to pass to compiler
set cflags
"-I
$include
_dir -I
$src
_dir -I
$test
_dir
\
set cflags
"-I
$include
_dir -I
$src
_dir -I
$test
_dir
\
-DVTA_
DEBUG=0 -DVTA_
LOG_WGT_WIDTH=
$wgt
_width -DVTA_LOG_INP_WIDTH=
$inp
_width
\
-DVTA_LOG_WGT_WIDTH=
$wgt
_width -DVTA_LOG_INP_WIDTH=
$inp
_width
\
-DVTA_LOG_ACC_WIDTH=
$acc
_width -DVTA_LOG_OUT_WIDTH=
$out
_width
\
-DVTA_LOG_ACC_WIDTH=
$acc
_width -DVTA_LOG_OUT_WIDTH=
$out
_width
\
-DVTA_LOG_BATCH=
$batch
-DVTA_LOG_BLOCK_OUT=
$block
_out -DVTA_LOG_BLOCK_IN=
$block
_in
\
-DVTA_LOG_BATCH=
$batch
-DVTA_LOG_BLOCK_OUT=
$block
_out -DVTA_LOG_BLOCK_IN=
$block
_in
\
-DVTA_LOG_UOP_BUFF_SIZE=
$uop
_buff_size -DVTA_LOG_INP_BUFF_SIZE=
$inp
_buff_size
\
-DVTA_LOG_UOP_BUFF_SIZE=
$uop
_buff_size -DVTA_LOG_INP_BUFF_SIZE=
$inp
_buff_size
\
-DVTA_LOG_WGT_BUFF_SIZE=
$wgt
_buff_size -DVTA_LOG_ACC_BUFF_SIZE=
$acc
_buff_size
\
-DVTA_LOG_WGT_BUFF_SIZE=
$wgt
_buff_size -DVTA_LOG_ACC_BUFF_SIZE=
$acc
_buff_size
\
-DVTA_LOG_OUT_BUFF_SIZE=
$out
_buff_size"
-DVTA_LOG_OUT_BUFF_SIZE=
$out
_buff_size"
if
{
$debug
==
"true"
}
{
append cflags
" -DVTA_DEBUG=1"
}
if
{
$no
_dsp==
"true"
}
{
if
{
$no
_dsp==
"true"
}
{
append cflags
" -DNO_DSP"
append cflags
" -DNO_DSP"
}
}
...
...
vta/hardware/xilinx/scripts/vivado.tcl
View file @
dae77cdb
...
@@ -26,15 +26,15 @@ if { [llength $argv] eq 12 } {
...
@@ -26,15 +26,15 @@ if { [llength $argv] eq 12 } {
set ip_path
[
lindex
$argv
0
]
set ip_path
[
lindex
$argv
0
]
set num_threads
[
lindex
$argv
1
]
set num_threads
[
lindex
$argv
1
]
set clock_freq
[
lindex
$argv
2
]
set clock_freq
[
lindex
$argv
2
]
set inp_width
[
lindex
$argv
3
]
set inp_width
[
expr
1 <<
[
lindex
$argv
3
]
]
set wgt_width
[
lindex
$argv
4
]
set wgt_width
[
expr
1 <<
[
lindex
$argv
4
]
]
set out_width
[
lindex
$argv
5
]
set out_width
[
expr
1 <<
[
lindex
$argv
5
]
]
set batch
[
lindex
$argv
6
]
set batch
[
expr
1 <<
[
lindex
$argv
6
]
]
set out_block
[
lindex
$argv
7
]
set out_block
[
expr
1 <<
[
lindex
$argv
7
]
]
set in_block
[
lindex
$argv
8
]
set in_block
[
expr
1 <<
[
lindex
$argv
8
]
]
set inp_mem_size
[
lindex
$argv
9
]
set inp_mem_size
[
expr
1 <<
[
lindex
$argv
9
]
]
set wgt_mem_size
[
lindex
$argv
10
]
set wgt_mem_size
[
expr
1 <<
[
lindex
$argv
10
]
]
set out_mem_size
[
lindex
$argv
11
]
set out_mem_size
[
expr
1 <<
[
lindex
$argv
11
]
]
if
{
$clock
_freq eq 100
}
{
if
{
$clock
_freq eq 100
}
{
set clock_id 0
set clock_id 0
puts
"Setting clock frequency to 100MHz"
puts
"Setting clock frequency to 100MHz"
...
...
vta/hardware/xilinx/sim/vta_test.cc
View file @
dae77cdb
...
@@ -53,5 +53,8 @@ int main(void) {
...
@@ -53,5 +53,8 @@ int main(void) {
status
|=
blocked_gemm_test
(
256
,
256
,
VTA_BLOCK_OUT
*
4
,
true
,
1
);
status
|=
blocked_gemm_test
(
256
,
256
,
VTA_BLOCK_OUT
*
4
,
true
,
1
);
status
|=
blocked_gemm_test
(
256
,
256
,
VTA_BLOCK_OUT
*
4
,
false
,
1
);
status
|=
blocked_gemm_test
(
256
,
256
,
VTA_BLOCK_OUT
*
4
,
false
,
1
);
// Simple GEMM unit test
status
|=
gemm_test
(
64
,
64
,
64
,
true
);
return
status
;
return
status
;
}
}
vta/make/config.json
View file @
dae77cdb
...
@@ -7,8 +7,8 @@
...
@@ -7,8 +7,8 @@
"LOG_BATCH"
:
0
,
"LOG_BATCH"
:
0
,
"LOG_BLOCK_IN"
:
4
,
"LOG_BLOCK_IN"
:
4
,
"LOG_BLOCK_OUT"
:
4
,
"LOG_BLOCK_OUT"
:
4
,
"LOG_UOP_BUFF_SIZE"
:
1
5
,
"LOG_UOP_BUFF_SIZE"
:
1
4
,
"LOG_INP_BUFF_SIZE"
:
15
,
"LOG_INP_BUFF_SIZE"
:
15
,
"LOG_WGT_BUFF_SIZE"
:
1
5
,
"LOG_WGT_BUFF_SIZE"
:
1
8
,
"LOG_ACC_BUFF_SIZE"
:
17
"LOG_ACC_BUFF_SIZE"
:
17
}
}
vta/make/sim_sample.json
View file @
dae77cdb
...
@@ -7,8 +7,8 @@
...
@@ -7,8 +7,8 @@
"LOG_BATCH"
:
0
,
"LOG_BATCH"
:
0
,
"LOG_BLOCK_IN"
:
4
,
"LOG_BLOCK_IN"
:
4
,
"LOG_BLOCK_OUT"
:
4
,
"LOG_BLOCK_OUT"
:
4
,
"LOG_UOP_BUFF_SIZE"
:
1
5
,
"LOG_UOP_BUFF_SIZE"
:
1
4
,
"LOG_INP_BUFF_SIZE"
:
15
,
"LOG_INP_BUFF_SIZE"
:
15
,
"LOG_WGT_BUFF_SIZE"
:
1
5
,
"LOG_WGT_BUFF_SIZE"
:
1
8
,
"LOG_ACC_BUFF_SIZE"
:
17
"LOG_ACC_BUFF_SIZE"
:
17
}
}
vta/make/vta_config.py
View file @
dae77cdb
...
@@ -28,6 +28,32 @@ def main():
...
@@ -28,6 +28,32 @@ def main():
help
=
"print all the config json"
)
help
=
"print all the config json"
)
parser
.
add_argument
(
"--target"
,
action
=
"store_true"
,
parser
.
add_argument
(
"--target"
,
action
=
"store_true"
,
help
=
"print the target"
)
help
=
"print the target"
)
parser
.
add_argument
(
"--cfg-str"
,
action
=
"store_true"
,
help
=
"print the configuration string"
)
parser
.
add_argument
(
"--get-inpwidth"
,
action
=
"store_true"
,
help
=
"returns log of input bitwidth"
)
parser
.
add_argument
(
"--get-wgtwidth"
,
action
=
"store_true"
,
help
=
"returns log of weight bitwidth"
)
parser
.
add_argument
(
"--get-accwidth"
,
action
=
"store_true"
,
help
=
"returns log of accum bitwidth"
)
parser
.
add_argument
(
"--get-outwidth"
,
action
=
"store_true"
,
help
=
"returns log of output bitwidth"
)
parser
.
add_argument
(
"--get-batch"
,
action
=
"store_true"
,
help
=
"returns log of tensor batch dimension"
)
parser
.
add_argument
(
"--get-blockin"
,
action
=
"store_true"
,
help
=
"returns log of tensor block in dimension"
)
parser
.
add_argument
(
"--get-blockout"
,
action
=
"store_true"
,
help
=
"returns log of tensor block out dimension"
)
parser
.
add_argument
(
"--get-uopbuffsize"
,
action
=
"store_true"
,
help
=
"returns log of micro-op buffer size in B"
)
parser
.
add_argument
(
"--get-inpbuffsize"
,
action
=
"store_true"
,
help
=
"returns log of input buffer size in B"
)
parser
.
add_argument
(
"--get-wgtbuffsize"
,
action
=
"store_true"
,
help
=
"returns log of weight buffer size in B"
)
parser
.
add_argument
(
"--get-accbuffsize"
,
action
=
"store_true"
,
help
=
"returns log of accum buffer size in B"
)
parser
.
add_argument
(
"--get-outbuffsize"
,
action
=
"store_true"
,
help
=
"returns log of output buffer size in B"
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
if
len
(
sys
.
argv
)
==
1
:
if
len
(
sys
.
argv
)
==
1
:
...
@@ -46,13 +72,17 @@ def main():
...
@@ -46,13 +72,17 @@ def main():
raise
RuntimeError
(
"Cannot find config in
%
s"
%
str
(
path_list
))
raise
RuntimeError
(
"Cannot find config in
%
s"
%
str
(
path_list
))
cfg
=
json
.
load
(
open
(
ok_path_list
[
0
]))
cfg
=
json
.
load
(
open
(
ok_path_list
[
0
]))
cfg
[
"LOG_OUT_WIDTH"
]
=
cfg
[
"LOG_INP_WIDTH"
]
cfg
[
"LOG_OUT_WIDTH"
]
=
cfg
[
"LOG_INP_WIDTH"
]
cfg
[
"LOG_OUT_BUFF_SIZE"
]
=
cfg
[
"LOG_ACC_BUFF_SIZE"
]
+
cfg
[
"LOG_ACC_WIDTH"
]
-
cfg
[
"LOG_OUT_WIDTH"
]
pkg
=
get_pkg_config
(
cfg
)
pkg
=
get_pkg_config
(
cfg
)
if
args
.
target
:
if
args
.
target
:
print
(
pkg
.
target
)
print
(
pkg
.
target
)
if
args
.
cflags
:
if
args
.
cflags
:
print
(
" "
.
join
(
pkg
.
cflags
))
cflags_str
=
" "
.
join
(
pkg
.
cflags
)
if
cfg
[
"TARGET"
]
==
"pynq"
:
cflags_str
+=
" -DVTA_TARGET_PYNQ"
print
(
cflags_str
)
if
args
.
ldflags
:
if
args
.
ldflags
:
print
(
" "
.
join
(
pkg
.
ldflags
))
print
(
" "
.
join
(
pkg
.
ldflags
))
...
@@ -60,6 +90,54 @@ def main():
...
@@ -60,6 +90,54 @@ def main():
if
args
.
cfg_json
:
if
args
.
cfg_json
:
print
(
pkg
.
cfg_json
)
print
(
pkg
.
cfg_json
)
if
args
.
cfg_str
:
cfg_str
=
"{}x{}x{}_{}bx{}b_{}_{}_{}_{}"
.
format
(
(
1
<<
cfg
[
"LOG_BATCH"
]),
(
1
<<
cfg
[
"LOG_BLOCK_IN"
]),
(
1
<<
cfg
[
"LOG_BLOCK_OUT"
]),
(
1
<<
cfg
[
"LOG_INP_WIDTH"
]),
(
1
<<
cfg
[
"LOG_WGT_WIDTH"
]),
cfg
[
"LOG_UOP_BUFF_SIZE"
],
cfg
[
"LOG_INP_BUFF_SIZE"
],
cfg
[
"LOG_WGT_BUFF_SIZE"
],
cfg
[
"LOG_ACC_BUFF_SIZE"
])
print
cfg_str
if
args
.
get_inpwidth
:
print
(
cfg
[
"LOG_INP_WIDTH"
])
if
args
.
get_wgtwidth
:
print
(
cfg
[
"LOG_WGT_WIDTH"
])
if
args
.
get_accwidth
:
print
(
cfg
[
"LOG_ACC_WIDTH"
])
if
args
.
get_outwidth
:
print
(
cfg
[
"LOG_OUT_WIDTH"
])
if
args
.
get_batch
:
print
(
cfg
[
"LOG_BATCH"
])
if
args
.
get_blockin
:
print
(
cfg
[
"LOG_BLOCK_IN"
])
if
args
.
get_blockout
:
print
(
cfg
[
"LOG_BLOCK_OUT"
])
if
args
.
get_uopbuffsize
:
print
(
cfg
[
"LOG_UOP_BUFF_SIZE"
])
if
args
.
get_inpbuffsize
:
print
(
cfg
[
"LOG_INP_BUFF_SIZE"
])
if
args
.
get_wgtbuffsize
:
print
(
cfg
[
"LOG_WGT_BUFF_SIZE"
])
if
args
.
get_outbuffsize
:
print
(
cfg
[
"LOG_OUT_BUFF_SIZE"
])
if
args
.
get_accbuffsize
:
print
(
cfg
[
"LOG_ACC_BUFF_SIZE"
])
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
main
()
main
()
vta/python/vta/environment.py
View file @
dae77cdb
...
@@ -130,11 +130,15 @@ class Environment(object):
...
@@ -130,11 +130,15 @@ class Environment(object):
self
.
BLOCK_IN
*
self
.
BLOCK_IN
*
self
.
WGT_WIDTH
)
self
.
WGT_WIDTH
)
self
.
ACC_ELEM_BITS
=
(
self
.
BATCH
*
self
.
ACC_ELEM_BITS
=
(
self
.
BATCH
*
self
.
BLOCK_
IN
*
self
.
BLOCK_
OUT
*
self
.
ACC_WIDTH
)
self
.
ACC_WIDTH
)
self
.
OUT_ELEM_BITS
=
(
self
.
BATCH
*
self
.
BLOCK_OUT
*
self
.
OUT_WIDTH
)
self
.
INP_ELEM_BYTES
=
self
.
INP_ELEM_BITS
//
8
self
.
INP_ELEM_BYTES
=
self
.
INP_ELEM_BITS
//
8
self
.
WGT_ELEM_BYTES
=
self
.
WGT_ELEM_BITS
//
8
self
.
WGT_ELEM_BYTES
=
self
.
WGT_ELEM_BITS
//
8
self
.
ACC_ELEM_BYTES
=
self
.
ACC_ELEM_BITS
//
8
self
.
ACC_ELEM_BYTES
=
self
.
ACC_ELEM_BITS
//
8
self
.
OUT_ELEM_BYTES
=
self
.
OUT_ELEM_BITS
//
8
# dtypes
# dtypes
self
.
acc_dtype
=
"int
%
d"
%
self
.
ACC_WIDTH
self
.
acc_dtype
=
"int
%
d"
%
self
.
ACC_WIDTH
self
.
inp_dtype
=
"int
%
d"
%
self
.
INP_WIDTH
self
.
inp_dtype
=
"int
%
d"
%
self
.
INP_WIDTH
...
...
vta/python/vta/ir_pass.py
View file @
dae77cdb
...
@@ -339,7 +339,7 @@ def inject_dma_intrin(stmt_in):
...
@@ -339,7 +339,7 @@ def inject_dma_intrin(stmt_in):
base
=
0
base
=
0
for
i
in
range
(
1
,
ndim
+
1
):
for
i
in
range
(
1
,
ndim
+
1
):
if
not
util
.
equal_const_int
(
buf
.
strides
[
ndim
-
i
]
-
x_size
,
0
):
if
not
util
.
equal_const_int
(
buf
.
strides
[
ndim
-
i
]
-
x_size
,
0
):
raise
RuntimeError
(
"scope
%
s need
need
to have block=
%
d"
%
(
scope
,
elem_block
))
raise
RuntimeError
(
"scope
%
s need
s
to have block=
%
d"
%
(
scope
,
elem_block
))
x_size
=
x_size
*
buf
.
shape
[
ndim
-
i
]
x_size
=
x_size
*
buf
.
shape
[
ndim
-
i
]
if
util
.
equal_const_int
(
x_size
-
elem_block
,
0
):
if
util
.
equal_const_int
(
x_size
-
elem_block
,
0
):
base
=
i
+
1
base
=
i
+
1
...
@@ -469,10 +469,10 @@ def inject_dma_intrin(stmt_in):
...
@@ -469,10 +469,10 @@ def inject_dma_intrin(stmt_in):
if
pad_before
or
pad_after
:
if
pad_before
or
pad_after
:
raise
RuntimeError
(
"Do not support copy into DRAM with pad"
)
raise
RuntimeError
(
"Do not support copy into DRAM with pad"
)
if
src
.
scope
==
env
.
acc_scope
:
if
src
.
scope
==
env
.
acc_scope
:
elem_width
=
env
.
INP_WIDTH
# output compression to inp type
elem_width
=
env
.
OUT_WIDTH
elem_bytes
=
env
.
INP_ELEM_BYTES
# output compression to inp type
elem_bytes
=
env
.
OUT_ELEM_BYTES
mem_type
=
env
.
dev
.
MEM_ID_OUT
mem_type
=
env
.
dev
.
MEM_ID_OUT
data_type
=
"int
%
d"
%
env
.
INP
_WIDTH
data_type
=
"int
%
d"
%
env
.
OUT
_WIDTH
task_qid
=
env
.
dev
.
QID_STORE_OUT
task_qid
=
env
.
dev
.
QID_STORE_OUT
else
:
else
:
raise
RuntimeError
(
"Do not support copy
%
s->dram"
%
(
src
.
scope
))
raise
RuntimeError
(
"Do not support copy
%
s->dram"
%
(
src
.
scope
))
...
...
vta/tests/hardware/common/test_lib.cc
View file @
dae77cdb
This diff is collapsed.
Click to expand it.
vta/tests/hardware/common/test_lib.h
View file @
dae77cdb
/*!
/*!
* Copyright (c) 2018 by Contributors
* Copyright (c) 2018 by Contributors
* \file
vta_
test_lib.cpp
* \file test_lib.cpp
* \brief Test library for the VTA design simulation and driver tests.
* \brief Test library for the VTA design simulation and driver tests.
*/
*/
...
@@ -17,9 +17,9 @@
...
@@ -17,9 +17,9 @@
#include <vta/driver.h>
#include <vta/driver.h>
#ifdef VTA_
PYNQ_TARGET
#ifdef VTA_
TARGET_PYNQ
#include "../../../src/pynq/pynq_driver.h"
#include "../../../src/pynq/pynq_driver.h"
#endif // VTA_
PYNQ_TARGET
#endif // VTA_
TARGET_PYNQ
typedef
uint64_t
axi_T
;
typedef
uint64_t
axi_T
;
typedef
uint32_t
uop_T
;
typedef
uint32_t
uop_T
;
...
@@ -300,4 +300,14 @@ int alu_test(int opcode, bool use_imm, int batch, int vector_size, bool uop_comp
...
@@ -300,4 +300,14 @@ int alu_test(int opcode, bool use_imm, int batch, int vector_size, bool uop_comp
int
blocked_gemm_test
(
int
batch
,
int
channels
,
int
block
,
bool
uop_compression
,
int
blocked_gemm_test
(
int
batch
,
int
channels
,
int
block
,
bool
uop_compression
,
int
virtual_threads
);
int
virtual_threads
);
/*!
* \brief VTA GEMM unit test.
* \param batch Batch size.
* \param in_channels Input channels.
* \param out_channels Output channels.
* \param uop_compression Apply micro-op compression.
* \return Number of errors from the test run.
*/
int
gemm_test
(
int
batch
,
int
in_channels
,
int
out_channels
,
bool
uop_compression
);
#endif // TESTS_HARDWARE_COMMON_TEST_LIB_H_
#endif // TESTS_HARDWARE_COMMON_TEST_LIB_H_
vta/tests/hardware/pynq/Makefile
View file @
dae77cdb
CC
?=
g++
CC
?=
g++
CFLAGS
=
-Wall
-O3
-std
=
c++11
-I
/usr/include
CFLAGS
=
-Wall
-O3
-std
=
c++11
-I
/usr/include
LDFLAGS
=
-L
/usr/lib
-L
/opt/python3.6/lib/python3.6/site-packages/pynq/lib/
LDFLAGS
=
-L
/usr/lib
-L
/opt/python3.6/lib/python3.6/site-packages/pynq/lib/
LIBS
=
-l
:libsds_lib.so
-l
:libdma.so
LIBS
=
-l
:libsds_lib.so
-l
:libdma.so
-lstdc
++
INCLUDE_DIR
=
../../../include
INCLUDE_DIR
=
../../../include
DRIVER_DIR
=
../../../src/pynq
DRIVER_DIR
=
../../../src/pynq
TESTLIB_DIR
=
../common
TESTLIB_DIR
=
../common
...
@@ -10,19 +10,14 @@ SOURCES = pynq_driver.cc test_lib.cc
...
@@ -10,19 +10,14 @@ SOURCES = pynq_driver.cc test_lib.cc
OBJECTS
=
pynq_driver.o test_lib.o metal_test.o
OBJECTS
=
pynq_driver.o test_lib.o metal_test.o
EXECUTABLE
=
vta
EXECUTABLE
=
vta
# Include top-level config file
# Include VTA config
ifndef
config
VTA_CONFIG
=
python ../../../make/vta_config.py
ifneq
(
"$(wildcard ../../../config.mk)"
,
""
)
CFLAGS
+=
`
${
VTA_CONFIG
}
--cflags
`
config
=
../../../config.mk
LDFLAGS
+=
`
${
VTA_CONFIG
}
--ldflags
`
else
VTA_TARGET
:=
$(
shell
${
VTA_CONFIG
}
--target
)
config
=
../../../make/config.mk
endif
endif
include
$(config)
# Define flags
# Define flags
CFLAGS
+=
-I
$(INCLUDE_DIR)
-DNO_SIM
-DDEBUG
=
0
CFLAGS
+=
-I
$(INCLUDE_DIR)
-DNO_SIM
-DVTA_DEBUG
=
0
CFLAGS
+=
$(ADD_CFLAGS)
# All Target
# All Target
all
:
$(EXECUTABLE)
all
:
$(EXECUTABLE)
...
...
vta/tests/hardware/pynq/metal_test.cc
View file @
dae77cdb
/*!
/*!
* Copyright (c) 2018 by Contributors
* Copyright (c) 2018 by Contributors
* \file
driver
_test.cpp
* \file
metal
_test.cpp
* \brief Bare-metal test to test driver and VTA design.
* \brief Bare-metal test to test driver and VTA design.
*/
*/
...
@@ -13,104 +13,6 @@
...
@@ -13,104 +13,6 @@
#include "../../../src/pynq/pynq_driver.h"
#include "../../../src/pynq/pynq_driver.h"
#include "../common/test_lib.h"
#include "../common/test_lib.h"
// VTA invocation (present the same abstraction as in the simulation tests)
uint64_t
vta
(
uint32_t
insn_count
,
VTAGenericInsn
*
insns
,
VTAUop
*
uops
,
inp_T
*
inputs
,
wgt_T
*
weights
,
acc_T
*
biases
,
inp_T
*
outputs
)
{
// Performance counter variables
uint64_t
t_fpga
;
struct
timespec
start
,
stop
;
// Derive bitstream file
char
bitstream
[
128
];
char
str_batch_size
[
4
];
char
str_block_out_size
[
4
];
char
str_block_in_size
[
4
];
char
str_block_bit_width
[
4
];
snprintf
(
str_batch_size
,
sizeof
(
str_batch_size
),
"%d"
,
VTA_BATCH
);
snprintf
(
str_block_out_size
,
sizeof
(
str_block_out_size
),
"%d"
,
VTA_BLOCK_OUT
);
snprintf
(
str_block_in_size
,
sizeof
(
str_block_in_size
),
"%d"
,
VTA_BLOCK_IN
);
snprintf
(
str_block_bit_width
,
sizeof
(
str_block_bit_width
),
"%d"
,
VTA_WGT_WIDTH
);
snprintf
(
bitstream
,
sizeof
(
bitstream
),
"%s"
,
"vta.bit"
);
#if VTA_DEBUG == 1
printf
(
"INFO - Programming FPGA: %s!
\n
"
,
bitstream
);
#endif
// Program VTA
VTAProgram
(
bitstream
);
// Get VTA handles
VTAHandle
vta_fetch_handle
=
VTAMapRegister
(
VTA_FETCH_ADDR
,
VTA_RANGE
);
VTAHandle
vta_load_handle
=
VTAMapRegister
(
VTA_LOAD_ADDR
,
VTA_RANGE
);
VTAHandle
vta_compute_handle
=
VTAMapRegister
(
VTA_COMPUTE_ADDR
,
VTA_RANGE
);
VTAHandle
vta_store_handle
=
VTAMapRegister
(
VTA_STORE_ADDR
,
VTA_RANGE
);
// Physical address pointers
uint32_t
insn_phy
=
insns
?
cma_get_phy_addr
(
insns
)
:
0
;
uint32_t
uop_phy
=
uops
?
cma_get_phy_addr
(
uops
)
:
0
;
uint32_t
input_phy
=
inputs
?
cma_get_phy_addr
(
inputs
)
:
0
;
uint32_t
weight_phy
=
weights
?
cma_get_phy_addr
(
weights
)
:
0
;
uint32_t
bias_phy
=
biases
?
cma_get_phy_addr
(
biases
)
:
0
;
uint32_t
output_phy
=
outputs
?
cma_get_phy_addr
(
outputs
)
:
0
;
#if VTA_DEBUG == 1
printf
(
"INFO - Starting FPGA!
\n
"
);
#endif
clock_gettime
(
CLOCK_REALTIME
,
&
start
);
// FETCH @ 0x10 : Data signal of insn_count_V
VTAWriteMappedReg
(
vta_fetch_handle
,
0x10
,
insn_count
);
// FETCH @ 0x18 : Data signal of insns_V
if
(
insns
)
VTAWriteMappedReg
(
vta_fetch_handle
,
0x18
,
insn_phy
);
// LOAD @ 0x10 : Data signal of inputs_V
if
(
inputs
)
VTAWriteMappedReg
(
vta_load_handle
,
0x10
,
input_phy
);
// LOAD @ 0x18 : Data signal of weight_V
if
(
weights
)
VTAWriteMappedReg
(
vta_load_handle
,
0x18
,
weight_phy
);
// COMPUTE @ 0x20 : Data signal of uops_V
if
(
uops
)
VTAWriteMappedReg
(
vta_compute_handle
,
0x20
,
uop_phy
);
// COMPUTE @ 0x28 : Data signal of biases_V
if
(
biases
)
VTAWriteMappedReg
(
vta_compute_handle
,
0x28
,
bias_phy
);
// STORE @ 0x10 : Data signal of outputs_V
if
(
outputs
)
VTAWriteMappedReg
(
vta_store_handle
,
0x10
,
output_phy
);
// VTA start
VTAWriteMappedReg
(
vta_fetch_handle
,
0x0
,
0x1
);
VTAWriteMappedReg
(
vta_load_handle
,
0x0
,
0x81
);
VTAWriteMappedReg
(
vta_compute_handle
,
0x0
,
0x81
);
VTAWriteMappedReg
(
vta_store_handle
,
0x0
,
0x81
);
int
flag
=
0
,
t
=
0
;
for
(
t
=
0
;
t
<
10000000
;
++
t
)
{
flag
=
VTAReadMappedReg
(
vta_compute_handle
,
0x18
);
if
(
flag
&
VTA_DONE
)
break
;
}
if
(
t
==
10000000
)
{
printf
(
"
\t
WARNING: VTA TIMEOUT!!!!
\n
"
);
#if VTA_DEBUG == 1
}
else
{
printf
(
"INFO - FPGA Finished!
\n
"
);
#endif
}
clock_gettime
(
CLOCK_REALTIME
,
&
stop
);
t_fpga
=
1000000000ULL
*
(
stop
.
tv_sec
-
start
.
tv_sec
)
+
(
stop
.
tv_nsec
-
start
.
tv_nsec
);
// Unmap VTA register
VTAUnmapRegister
(
vta_fetch_handle
,
VTA_RANGE
);
VTAUnmapRegister
(
vta_load_handle
,
VTA_RANGE
);
VTAUnmapRegister
(
vta_compute_handle
,
VTA_RANGE
);
VTAUnmapRegister
(
vta_store_handle
,
VTA_RANGE
);
return
t_fpga
;
}
int
main
(
void
)
{
int
main
(
void
)
{
#if VTA_DEBUG == 1
#if VTA_DEBUG == 1
printParameters
();
printParameters
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment