Commit 8b01540d by Pariksheet Pinjari Committed by Tianqi Chen

[FRONTEND][DARKNET] YOLO V3 model support (#1734)

parent 585bd1ca
......@@ -440,11 +440,13 @@ class GraphProto(object):
self._state_ctr['cell_state'] = 0
self._state_ctr['gru'] = 0
def _read_memory_buffer(self, shape, data):
def _read_memory_buffer(self, shape, data, dtype=None):
if dtype is None:
dtype = self.dtype
length = 1
for x in shape:
length *= x
data_np = np.zeros(length, dtype=self.dtype)
data_np = np.zeros(length, dtype=dtype)
for i in range(length):
data_np[i] = data[i]
return data_np.reshape(shape)
......@@ -493,6 +495,31 @@ class GraphProto(object):
k = self._get_tvm_params_name(opname[0], 'bias')
self._tvmparams[k] = tvm.nd.array(biases)
def _get_region_weights(self, layer, opname):
"""Parse the biases for region layer."""
biases = self._read_memory_buffer((layer.n*2, ), layer.biases)
attributes = np.array([layer.n, layer.out_c, layer.out_h, layer.out_w,
layer.classes, layer.coords, layer.background],
dtype=np.int32)
k = self._get_tvm_params_name(opname, 'bias')
self._tvmparams[k] = tvm.nd.array(biases)
k = self._get_tvm_params_name(opname, 'attr')
self._tvmparams[k] = tvm.nd.array(attributes)
def _get_yolo_weights(self, layer, opname):
"""Parse the biases and mask for yolo layer."""
biases = self._read_memory_buffer((layer.total*2, ), layer.biases)
mask = self._read_memory_buffer((layer.n, ), layer.mask, dtype='int32')
attributes = np.array([layer.n, layer.out_c, layer.out_h, layer.out_w,
layer.classes, layer.total],
dtype=np.int32)
k = self._get_tvm_params_name(opname, 'bias')
self._tvmparams[k] = tvm.nd.array(biases)
k = self._get_tvm_params_name(opname, 'mask')
self._tvmparams[k] = tvm.nd.array(mask)
k = self._get_tvm_params_name(opname, 'attr')
self._tvmparams[k] = tvm.nd.array(attributes)
def _get_batchnorm_weights(self, layer, opname, size):
"""Parse the weights for batchnorm, which includes, scales, moving mean
and moving variances."""
......@@ -621,6 +648,11 @@ class GraphProto(object):
elif LAYERTYPE.CONNECTED == layer.type:
self._get_connected_weights(layer, opname)
elif LAYERTYPE.REGION == layer.type:
self._get_region_weights(layer, opname)
elif LAYERTYPE.YOLO == layer.type:
self._get_yolo_weights(layer, opname)
def _preproc_layer(self, layer, layer_num):
"""To preprocess each darknet layer, some layer doesnt need processing."""
if layer_num == 0:
......@@ -850,6 +882,27 @@ class GraphProto(object):
return processed, sym
def _make_outlist(self, sym, op_name, layer, layer_num):
if layer.type == LAYERTYPE.REGION:
k = self._get_tvm_params_name(op_name, 'attr')
self._outs.insert(0, _sym.Variable(name=k, init=self._tvmparams[k].asnumpy()))
k = self._get_tvm_params_name(op_name, 'bias')
self._outs.insert(0, _sym.Variable(name=k, init=self._tvmparams[k].asnumpy()))
if layer_num != self.net.n-1:
self._outs.insert(0, sym)
elif layer.type == LAYERTYPE.YOLO:
k = self._get_tvm_params_name(op_name, 'attr')
self._outs.insert(0, _sym.Variable(name=k, init=self._tvmparams[k].asnumpy()))
k = self._get_tvm_params_name(op_name, 'bias')
self._outs.insert(0, _sym.Variable(name=k, init=self._tvmparams[k].asnumpy()))
k = self._get_tvm_params_name(op_name, 'mask')
self._outs.insert(0, _sym.Variable(name=k, init=self._tvmparams[k].asnumpy()))
if layer_num != self.net.n-1:
self._outs.insert(0, sym)
return
def from_darknet(self):
"""To convert the darknet symbol to nnvm symbols."""
for i in range(self.net.n):
......@@ -867,6 +920,8 @@ class GraphProto(object):
layer_name, sym = _darknet_convert_symbol(op_name, _as_list(sym), attr)
self._get_darknet_params(self.net.layers[i], layer_name)
self._sym_array[i] = sym
self._make_outlist(sym, layer_name, layer, i)
self._outs = _as_list(sym) + self._outs
if isinstance(self._outs, list):
sym = _sym.Group(self._outs)
......
......@@ -13,5 +13,5 @@ from . import squeezenet
from . import inception_v3
from . import dcgan
from . import dqn
from . import yolo2_detection
from . import yolo_detection
from . import check_computation
......@@ -55,10 +55,10 @@ def _letterbox_image(img, w_in, h_in):
imc, imh, imw = img.shape
if (w_in / imw) < (h_in / imh):
new_w = w_in
new_h = imh * w_in / imw
new_h = imh * w_in // imw
else:
new_h = h_in
new_w = imw * h_in/imh
new_w = imw * h_in // imh
resized = _resize_image(img, new_w, new_h)
boxed = np.full((imc, h_in, w_in), 0.5, dtype=float)
_, resizedh, resizedw = resized.shape
......@@ -511,6 +511,7 @@ layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int
layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize);
layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam);
layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam);
layer make_upsample_layer(int batch, int w, int h, int c, int stride);
void free_network(network *net);
"""
)
# pylint: disable=invalid-name, unused-variable, unused-argument, no-init
"""
Yolo detection boxes helper functions
====================
DarkNet helper functions for yolo and image loading.
This functions will not be loaded by default.
These are utility functions used for testing and tutorial file.
"""
from __future__ import division
import math
from collections import namedtuple
import numpy as np
def _entry_index(batch, w, h, outputs, classes, coords, location, entry):
n = int(location/(w*h))
loc = location%(w*h)
return batch*outputs + n*w*h*(coords+classes+1) + entry*w*h + loc
Box = namedtuple('Box', ['x', 'y', 'w', 'h'])
def _get_region_box(x, biases, n, index, i, j, w, h, stride):
b = Box(0, 0, 0, 0)
b = b._replace(x=(i + x[index + 0*stride]) / w)
b = b._replace(y=(j + x[index + 1*stride]) / h)
b = b._replace(w=np.exp(x[index + 2*stride]) * biases[2*n] / w)
b = b._replace(h=np.exp(x[index + 3*stride]) * biases[2*n+1] / h)
return b
def _correct_region_boxes(boxes, n, w, h, netw, neth, relative):
new_w, new_h = (netw, (h*netw)/w) if (netw/w < neth/h) else ((w*neth/h), neth)
for i in range(n):
b = boxes[i]
b = boxes[i]
b = b._replace(x=(b.x - (netw - new_w)/2/netw) / (new_w/netw))
b = b._replace(y=(b.y - (neth - new_h)/2/neth) / (new_h/neth))
b = b._replace(w=b.w * netw/new_w)
b = b._replace(h=b.h * neth/new_h)
if not relative:
b = b._replace(x=b.x * w)
b = b._replace(w=b.w * w)
b = b._replace(y=b.y * h)
b = b._replace(h=b.h * h)
boxes[i] = b
def _overlap(x1, w1, x2, w2):
l1 = x1 - w1/2
l2 = x2 - w2/2
left = l1 if l1 > l2 else l2
r1 = x1 + w1/2
r2 = x2 + w2/2
right = r1 if r1 < r2 else r2
return right - left
def _box_intersection(a, b):
w = _overlap(a.x, a.w, b.x, b.w)
h = _overlap(a.y, a.h, b.y, b.h)
if w < 0 or h < 0:
return 0
return w*h
def _box_union(a, b):
i = _box_intersection(a, b)
u = a.w*a.h + b.w*b.h - i
return u
def _box_iou(a, b):
return _box_intersection(a, b)/_box_union(a, b)
def get_region_boxes(layer_in, imw, imh, netw, neth, thresh, probs,
boxes, relative, tvm_out):
"To get the boxes for the image based on the prediction"
lw = layer_in.w
lh = layer_in.h
probs = [[0 for i in range(layer_in.classes + 1)] for y in range(lw*lh*layer_in.n)]
boxes = [Box(0, 0, 0, 0) for i in range(lw*lh*layer_in.n)]
for i in range(lw*lh):
row = int(i / lw)
col = int(i % lw)
for n in range(layer_in.n):
index = n*lw*lh + i
obj_index = _entry_index(0, lw, lh, layer_in.outputs, layer_in.classes,
layer_in.coords, n*lw*lh + i, layer_in.coords)
box_index = _entry_index(0, lw, lh, layer_in.outputs, layer_in.classes,
layer_in.coords, n*lw*lh + i, 0)
mask_index = _entry_index(0, lw, lh, layer_in.outputs, layer_in.classes,
layer_in.coords, n*lw*lh + i, 4)
scale = 1 if layer_in.background else tvm_out[obj_index]
boxes[index] = _get_region_box(tvm_out, layer_in.biases, n, box_index, col,
row, lw, lh, lw*lh)
if not layer_in.softmax_tree:
max_element = 0
for j in range(layer_in.classes):
class_index = _entry_index(0, lw, lh, layer_in.outputs, layer_in.classes,
layer_in.coords, n*lw*lh + i, layer_in.coords+1+j)
prob = scale*tvm_out[class_index]
probs[index][j] = prob if prob > thresh else 0
max_element = max(max_element, prob)
probs[index][layer_in.classes] = max_element
_correct_region_boxes(boxes, lw*lh*layer_in.n, imw, imh, netw, neth, relative)
return boxes, probs
def do_nms_sort(boxes, probs, total, classes, thresh):
"Does the sorting based on the threshold values"
SortableBbox = namedtuple('SortableBbox', ['index_var', 'class_var', 'probs'])
s = [SortableBbox(0, 0, []) for i in range(total)]
for i in range(total):
s[i] = s[i]._replace(index_var=i)
s[i] = s[i]._replace(class_var=0)
s[i] = s[i]._replace(probs=probs)
for k in range(classes):
for i in range(total):
s[i] = s[i]._replace(class_var=k)
s = sorted(s, key=lambda x: x.probs[x.index_var][x.class_var], reverse=True)
for i in range(total):
if probs[s[i].index_var][k] == 0:
continue
a = boxes[s[i].index_var]
for j in range(i+1, total):
b = boxes[s[j].index_var]
if _box_iou(a, b) > thresh:
probs[s[j].index_var][k] = 0
return boxes, probs
def draw_detections(im, num, thresh, boxes, probs, names, classes):
"Draw the markings around the detected region"
for i in range(num):
labelstr = []
category = -1
for j in range(classes):
if probs[i][j] > thresh:
if category == -1:
category = j
labelstr.append(names[j])
if category > -1:
imc, imh, imw = im.shape
width = int(imh * 0.006)
offset = category*123457 % classes
red = _get_color(2, offset, classes)
green = _get_color(1, offset, classes)
blue = _get_color(0, offset, classes)
rgb = [red, green, blue]
b = boxes[i]
left = int((b.x-b.w/2.)*imw)
right = int((b.x+b.w/2.)*imw)
top = int((b.y-b.h/2.)*imh)
bot = int((b.y+b.h/2.)*imh)
if left < 0:
left = 0
if right > imw-1:
right = imw-1
if top < 0:
top = 0
if bot > imh-1:
bot = imh-1
_draw_box_width(im, left, top, right, bot, width, red, green, blue)
label = _get_label(''.join(labelstr), rgb)
_draw_label(im, top + width, left, label, rgb)
def _get_pixel(im, x, y, c):
return im[c][y][x]
def _set_pixel(im, x, y, c, val):
if x < 0 or y < 0 or c < 0 or x >= im.shape[2] or y >= im.shape[1] or c >= im.shape[0]:
return
im[c][y][x] = val
def _draw_label(im, r, c, label, rgb):
w = label.shape[2]
h = label.shape[1]
if (r - h) >= 0:
r = r - h
for j in range(h):
if j < h and (j + r) < im.shape[1]:
for i in range(w):
if i < w and (i + c) < im.shape[2]:
for k in range(label.shape[0]):
val = _get_pixel(label, i, j, k)
_set_pixel(im, i+c, j+r, k, val)#rgb[k] * val)
def _get_label(labelstr, rgb):
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
text = labelstr
colorText = "black"
testDraw = ImageDraw.Draw(Image.new('RGB', (1, 1)))
font = ImageFont.truetype("arial.ttf", 25)
width, height = testDraw.textsize(labelstr, font=font)
img = Image.new('RGB', (width, height), color=(int(rgb[0]*255), int(rgb[1]*255),
int(rgb[2]*255)))
d = ImageDraw.Draw(img)
d.text((0, 0), text, fill=colorText, font=font)
opencvImage = np.divide(np.asarray(img), 255)
return opencvImage.transpose(2, 0, 1)
def _get_color(c, x, max_value):
c = int(c)
colors = [[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]]
ratio = (float(x)/float(max_value)) * 5
i = int(math.floor(ratio))
j = int(math.ceil(ratio))
ratio -= i
r = (1-ratio) * colors[i][c] + ratio*colors[j][c]
return r
def _draw_box(im, x1, y1, x2, y2, r, g, b):
y1 = int(y1)
y2 = int(y2)
x1 = int(x1)
x2 = int(x2)
ac, ah, aw = im.shape
if x1 < 0:
x1 = 0
if x1 >= aw:
y1 = 0
if y1 >= ah:
y1 = ah - 1
if y2 < 0:
y2 = 0
if y2 >= ah:
y2 = ah - 1
for i in range(x1, x2):
im[0][y1][i] = r
im[0][y2][i] = r
im[1][y1][i] = g
im[1][y2][i] = g
im[2][y1][i] = b
im[2][y2][i] = b
for i in range(y1, y2):
im[0][i][x1] = r
im[0][i][x2] = r
im[1][i][x1] = g
im[1][i][x2] = g
im[2][i][x1] = b
im[2][i][x2] = b
def _draw_box_width(im, x1, y1, x2, y2, w, r, g, b):
for i in range(int(w)):
_draw_box(im, x1+i, y1+i, x2-i, y2-i, r, g, b)
# pylint: disable=invalid-name, unused-variable, unused-argument, no-init
"""
Yolo detection boxes helper functions
====================
DarkNet helper functions for yolo and image loading.
This functions will not be loaded by default.
These are utility functions used for testing and tutorial file.
"""
from __future__ import division
import math
from collections import namedtuple
from functools import cmp_to_key
import numpy as np
Box = namedtuple('Box', ['x', 'y', 'w', 'h'])
def nms_comparator(a, b):
if 'sort_class' in b and b['sort_class'] >= 0:
diff = a['prob'][b['sort_class']] - b['prob'][b['sort_class']]
else:
diff = a['objectness'] - b['objectness']
return diff
def _correct_boxes(dets, w, h, netw, neth, relative):
new_w, new_h = (netw, (h*netw)//w) if (netw/w < neth/h) else ((w*neth//h), neth)
for det in dets:
b = det['bbox']
b = b._replace(x=(b.x - (netw - new_w)/2/netw) / (new_w/netw))
b = b._replace(y=(b.y - (neth - new_h)/2/neth) / (new_h/neth))
b = b._replace(w=b.w * netw/new_w)
b = b._replace(h=b.h * neth/new_h)
if not relative:
b = b._replace(x=b.x * w)
b = b._replace(w=b.w * w)
b = b._replace(y=b.y * h)
b = b._replace(h=b.h * h)
det['bbox'] = b
return dets
def _overlap(x1, w1, x2, w2):
l1 = x1 - w1/2
l2 = x2 - w2/2
left = l1 if l1 > l2 else l2
r1 = x1 + w1/2
r2 = x2 + w2/2
right = r1 if r1 < r2 else r2
return right - left
def _box_intersection(a, b):
w = _overlap(a.x, a.w, b.x, b.w)
h = _overlap(a.y, a.h, b.y, b.h)
if w < 0 or h < 0:
return 0
return w*h
def _box_union(a, b):
i = _box_intersection(a, b)
u = a.w*a.h + b.w*b.h - i
return u
def _box_iou(a, b):
return _box_intersection(a, b)/_box_union(a, b)
def _get_box(data, biases, n, location, lw, lh, w, h):
bx = (location[2] + data[location[0]][0][location[1]][location[2]]) / lw
by = (location[1] + data[location[0]][1][location[1]][location[2]]) / lh
bw = np.exp(data[location[0]][2][location[1]][location[2]]) * biases[2*n] / w
bh = np.exp(data[location[0]][3][location[1]][location[2]]) * biases[2*n+1] / h
return Box(bx, by, bw, bh)
def _get_yolo_detections(l, im_shape, net_shape, thresh, relative, dets):
data = l['output']
active_data_loc = np.asarray(np.where(data[:, 4, :, :] > thresh))
before_correct_dets = []
for i in range(active_data_loc.shape[1]):
location = [active_data_loc[0][i], active_data_loc[1][i], active_data_loc[2][i]]
box_b = _get_box(data, l['biases'], np.asarray(l['mask'])[location[0]], location,
data.shape[2], data.shape[3], net_shape[0], net_shape[1])
objectness = data[location[0]][4][location[1]][location[2]]
classes = l['classes']
prob = objectness*data[location[0], 5:5 + 1 + classes, location[1], location[2]]
prob[prob < thresh] = 0
detection = {}
detection['bbox'] = box_b
detection['classes'] = classes
detection['prob'] = prob
detection['objectness'] = objectness
before_correct_dets.append(detection)
dets.extend(_correct_boxes(before_correct_dets, im_shape[0], im_shape[1],
net_shape[0], net_shape[1], relative))
return
def _get_region_detections(l, im_shape, net_shape, thresh, relative, dets):
data = l['output']
before_correct_dets = []
for row in range(data.shape[2]):
for col in range(data.shape[3]):
for n in range(data.shape[0]):
prob = [0]*l['classes']
scale = data[n, l['coords'], row, col] if not l['background'] else 1
location = [n, row, col]
box_b = _get_box(data, l['biases'], n, location,
data.shape[2], data.shape[3], data.shape[2], data.shape[3])
objectness = scale if scale > thresh else 0
if objectness:
prob = scale * data[n, l['coords']+1: l['coords']+1+l['classes'],
row, col]
prob[prob < thresh] = 0
detection = {}
detection['bbox'] = box_b
detection['prob'] = prob
detection['objectness'] = objectness
before_correct_dets.append(detection)
_correct_boxes(before_correct_dets, im_shape[0], im_shape[1],
net_shape[0], net_shape[1], relative)
dets.extend(before_correct_dets)
return
def fill_network_boxes(net_shape, im_shape,
thresh, relative, tvm_out):
dets = []
for layer in tvm_out:
if layer['type'] == 'Yolo':
_get_yolo_detections(layer, im_shape, net_shape, thresh, relative, dets)
elif layer['type'] == 'Region':
_get_region_detections(layer, im_shape, net_shape, thresh, relative, dets)
return dets
def do_nms_sort(dets, classes, thresh):
"Does the sorting based on the threshold values"
k = len(dets)-1
cnt = 0
while cnt < k:
if dets[cnt]['objectness'] == 0:
dets[k], dets[cnt] = dets[cnt], dets[k]
k = k - 1
else:
cnt = cnt + 1
total = k+1
for k in range(classes):
for i in range(total):
dets[i]['sort_class'] = k
dets[0:total] = sorted(dets[0:total],
key=cmp_to_key(nms_comparator), reverse=True)
for i in range(total):
if dets[i]['prob'][k] == 0:
continue
a = dets[i]['bbox']
for j in range(i+1, total):
b = dets[j]['bbox']
if _box_iou(a, b) > thresh:
dets[j]['prob'][k] = 0
def draw_detections(im, dets, thresh, names, classes):
"Draw the markings around the detected region"
for det in dets:
labelstr = []
category = -1
for j in range(classes):
if det['prob'][j] > thresh:
if category == -1:
category = j
labelstr.append(names[j])
if category > -1:
imc, imh, imw = im.shape
width = int(imh * 0.006)
offset = category*123457 % classes
red = _get_color(2, offset, classes)
green = _get_color(1, offset, classes)
blue = _get_color(0, offset, classes)
rgb = [red, green, blue]
b = det['bbox']
left = int((b.x-b.w/2.)*imw)
right = int((b.x+b.w/2.)*imw)
top = int((b.y-b.h/2.)*imh)
bot = int((b.y+b.h/2.)*imh)
if left < 0:
left = 0
if right > imw-1:
right = imw-1
if top < 0:
top = 0
if bot > imh-1:
bot = imh-1
_draw_box_width(im, left, top, right, bot, width, red, green, blue)
label = _get_label(''.join(labelstr), rgb)
_draw_label(im, top + width, left, label, rgb)
def _get_pixel(im, x, y, c):
return im[c][y][x]
def _set_pixel(im, x, y, c, val):
if x < 0 or y < 0 or c < 0 or x >= im.shape[2] or y >= im.shape[1] or c >= im.shape[0]:
return
im[c][y][x] = val
def _draw_label(im, r, c, label, rgb):
w = label.shape[2]
h = label.shape[1]
if (r - h) >= 0:
r = r - h
for j in range(h):
if j < h and (j + r) < im.shape[1]:
for i in range(w):
if i < w and (i + c) < im.shape[2]:
for k in range(label.shape[0]):
val = _get_pixel(label, i, j, k)
_set_pixel(im, i+c, j+r, k, val)#rgb[k] * val)
def _get_label(labelstr, rgb):
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
text = labelstr
colorText = "black"
testDraw = ImageDraw.Draw(Image.new('RGB', (1, 1)))
font = ImageFont.truetype("arial.ttf", 25)
width, height = testDraw.textsize(labelstr, font=font)
img = Image.new('RGB', (width, height), color=(int(rgb[0]*255), int(rgb[1]*255),
int(rgb[2]*255)))
d = ImageDraw.Draw(img)
d.text((0, 0), text, fill=colorText, font=font)
opencvImage = np.divide(np.asarray(img), 255)
return opencvImage.transpose(2, 0, 1)
def _get_color(c, x, max_value):
c = int(c)
colors = [[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]]
ratio = (float(x)/float(max_value)) * 5
i = int(math.floor(ratio))
j = int(math.ceil(ratio))
ratio -= i
r = (1-ratio) * colors[i][c] + ratio*colors[j][c]
return r
def _draw_box(im, x1, y1, x2, y2, r, g, b):
y1 = int(y1)
y2 = int(y2)
x1 = int(x1)
x2 = int(x2)
ac, ah, aw = im.shape
if x1 < 0:
x1 = 0
if x1 >= aw:
y1 = 0
if y1 >= ah:
y1 = ah - 1
if y2 < 0:
y2 = 0
if y2 >= ah:
y2 = ah - 1
for i in range(x1, x2):
im[0][y1][i] = r
im[0][y2][i] = r
im[1][y1][i] = g
im[1][y2][i] = g
im[2][y1][i] = b
im[2][y2][i] = b
for i in range(y1, y2):
im[0][i][x1] = r
im[0][i][x2] = r
im[1][i][x1] = g
im[1][i][x2] = g
im[2][i][x1] = b
im[2][i][x2] = b
def _draw_box_width(im, x1, y1, x2, y2, w, r, g, b):
for i in range(int(w)):
_draw_box(im, x1+i, y1+i, x2-i, y2-i, r, g, b)
......@@ -13,6 +13,7 @@ import numpy as np
import tvm
from tvm.contrib import graph_runtime
from nnvm import frontend
from nnvm.testing.darknet import LAYERTYPE
from nnvm.testing.darknet import __darknetffi__
import nnvm.compiler
if sys.version_info >= (3,):
......@@ -50,14 +51,24 @@ DARKNETLIB_URL = 'https://github.com/siju-samuel/darknet/blob/master/lib/' \
_download(DARKNETLIB_URL, DARKNET_LIB)
LIB = __darknetffi__.dlopen('./' + DARKNET_LIB)
def _get_tvm_output(net, data):
def _read_memory_buffer(shape, data, dtype='float32'):
length = 1
for x in shape:
length *= x
data_np = np.zeros(length, dtype=dtype)
for i in range(length):
data_np[i] = data[i]
return data_np.reshape(shape)
def _get_tvm_output(net, data, build_dtype='float32'):
'''Compute TVM output'''
dtype = 'float32'
sym, params = frontend.darknet.from_darknet(net, dtype)
target = 'llvm'
shape_dict = {'data': data.shape}
graph, library, params = nnvm.compiler.build(sym, target, shape_dict, dtype, params=params)
graph, library, params = nnvm.compiler.build(sym, target, shape_dict,
build_dtype, params=params)
# Execute on TVM
ctx = tvm.cpu(0)
m = graph_runtime.create(graph, library, ctx)
......@@ -66,14 +77,50 @@ def _get_tvm_output(net, data):
m.set_input(**params)
m.run()
# get outputs
out_shape = (net.outputs,)
tvm_out = m.get_output(0, tvm.nd.empty(out_shape, dtype)).asnumpy()
tvm_out = []
for i in range(m.get_num_outputs()):
tvm_out.append(m.get_output(i).asnumpy())
return tvm_out
def test_forward(net):
def test_forward(net, build_dtype='float32'):
'''Test network with given input image on both darknet and tvm'''
def get_darknet_output(net, img):
return LIB.network_predict_image(net, img)
LIB.network_predict_image(net, img)
out = []
for i in range(net.n):
layer = net.layers[i]
if layer.type == LAYERTYPE.REGION:
attributes = np.array([layer.n, layer.out_c, layer.out_h,
layer.out_w, layer.classes,
layer.coords, layer.background],
dtype=np.int32)
out.insert(0, attributes)
out.insert(0, _read_memory_buffer((layer.n*2, ), layer.biases))
layer_outshape = (layer.batch, layer.out_c,
layer.out_h, layer.out_w)
out.insert(0, _read_memory_buffer(layer_outshape, layer.output))
elif layer.type == LAYERTYPE.YOLO:
attributes = np.array([layer.n, layer.out_c, layer.out_h,
layer.out_w, layer.classes,
layer.total],
dtype=np.int32)
out.insert(0, attributes)
out.insert(0, _read_memory_buffer((layer.total*2, ), layer.biases))
out.insert(0, _read_memory_buffer((layer.n, ), layer.mask, dtype='int32'))
layer_ou tshape = (layer.batch, layer.out_c,
layer.out_h, layer.out_w)
out.insert(0, _read_memory_buffer(layer_outshape, layer.output))
elif i == net.n-1:
if layer.type == LAYERTYPE.CONNECTED:
darknet_outshape = (layer.batch, layer.out_c)
elif layer.type in [LAYERTYPE.SOFTMAX]:
darknet_outshape = (layer.batch, layer.outputs)
else:
darknet_outshape = (layer.batch, layer.out_c,
layer.out_h, layer.out_w)
out.insert(0, _read_memory_buffer(darknet_outshape, layer.output))
return out
dtype = 'float32'
test_image = 'dog.jpg'
......@@ -81,11 +128,7 @@ def test_forward(net):
_download(img_url, test_image)
img = LIB.letterbox_image(LIB.load_image_color(test_image.encode('utf-8'), 0, 0), net.w, net.h)
darknet_output = get_darknet_output(net, img)
darknet_out = np.zeros(net.outputs, dtype='float32')
for i in range(net.outputs):
darknet_out[i] = darknet_output[i]
batch_size = 1
data = np.empty([batch_size, img.c, img.h, img.w], dtype)
i = 0
for c in range(img.c):
......@@ -94,8 +137,9 @@ def test_forward(net):
data[0][c][h][k] = img.data[i]
i = i + 1
tvm_out = _get_tvm_output(net, data)
np.testing.assert_allclose(darknet_out, tvm_out, rtol=1e-3, atol=1e-3)
tvm_out = _get_tvm_output(net, data, build_dtype)
for tvm_outs, darknet_out in zip(tvm_out, darknet_output):
np.testing.assert_allclose(darknet_out, tvm_outs, rtol=1e-3, atol=1e-3)
def test_rnn_forward(net):
'''Test network with given input data on both darknet and tvm'''
......@@ -106,11 +150,14 @@ def test_rnn_forward(net):
np_arr = np.zeros([1, net.inputs], dtype='float32')
np_arr[0, 84] = 1
cffi_arr = ffi.cast('float*', np_arr.ctypes.data)
tvm_out = _get_tvm_output(net, np_arr)
tvm_out = _get_tvm_output(net, np_arr)[0]
darknet_output = get_darknet_network_predict(net, cffi_arr)
darknet_out = np.zeros(net.outputs, dtype='float32')
for i in range(net.outputs):
darknet_out[i] = darknet_output[i]
last_layer = net.layers[net.n-1]
darknet_outshape = (last_layer.batch, last_layer.outputs)
darknet_out = darknet_out.reshape(darknet_outshape)
np.testing.assert_allclose(darknet_out, tvm_out, rtol=1e-4, atol=1e-4)
def test_forward_extraction():
......@@ -152,8 +199,8 @@ def test_forward_resnet50():
test_forward(net)
LIB.free_network(net)
def test_forward_yolo():
'''test yolo model'''
def test_forward_yolov2():
'''test yolov2 model'''
model_name = 'yolov2'
cfg_name = model_name + '.cfg'
weights_name = model_name + '.weights'
......@@ -162,7 +209,22 @@ def test_forward_yolo():
_download(cfg_url, cfg_name)
_download(weights_url, weights_name)
net = LIB.load_network(cfg_name.encode('utf-8'), weights_name.encode('utf-8'), 0)
test_forward(net)
build_dtype = {}
test_forward(net, build_dtype)
LIB.free_network(net)
def test_forward_yolov3():
'''test yolov3 model'''
model_name = 'yolov3'
cfg_name = model_name + '.cfg'
weights_name = model_name + '.weights'
cfg_url = 'https://github.com/pjreddie/darknet/blob/master/cfg/' + cfg_name + '?raw=true'
weights_url = 'http://pjreddie.com/media/files/' + weights_name + '?raw=true'
_download(cfg_url, cfg_name)
_download(weights_url, weights_name)
net = LIB.load_network(cfg_name.encode('utf-8'), weights_name.encode('utf-8'), 0)
build_dtype = {}
test_forward(net, build_dtype)
LIB.free_network(net)
def test_forward_convolutional():
......@@ -271,20 +333,21 @@ def test_forward_region():
net.layers[1] = layer_2
net.w = net.h = 224
LIB.resize_network(net, 224, 224)
test_forward(net)
build_dtype = {}
test_forward(net, build_dtype)
LIB.free_network(net)
def test_forward_yolo_op():
'''test yolo layer'''
net = LIB.make_network(2)
layer_1 = LIB.make_convolutional_layer(1, 224, 224, 3, 14, 1, 3, 2, 0, 1, 0, 0, 0, 0)
a = []
layer_2 = LIB.make_yolo_layer(1, 111, 111, 2, 0, a, 2)
layer_2 = LIB.make_yolo_layer(1, 111, 111, 2, 9, __darknetffi__.NULL, 2)
net.layers[0] = layer_1
net.layers[1] = layer_2
net.w = net.h = 224
LIB.resize_network(net, 224, 224)
test_forward(net)
build_dtype = {}
test_forward(net, build_dtype)
LIB.free_network(net)
def test_forward_upsample():
......@@ -313,7 +376,7 @@ def test_forward_softmax():
'''test softmax layer'''
net = LIB.make_network(1)
layer_1 = LIB.make_softmax_layer(1, 75, 1)
layer_1.temperature=1
layer_1.temperature = 1
net.layers[0] = layer_1
net.w = net.h = 5
LIB.resize_network(net, net.w, net.h)
......@@ -324,7 +387,7 @@ def test_forward_softmax_temperature():
'''test softmax layer'''
net = LIB.make_network(1)
layer_1 = LIB.make_softmax_layer(1, 75, 1)
layer_1.temperature=0.8
layer_1.temperature = 0.8
net.layers[0] = layer_1
net.w = net.h = 5
LIB.resize_network(net, net.w, net.h)
......@@ -441,7 +504,8 @@ if __name__ == '__main__':
test_forward_resnet50()
test_forward_alexnet()
test_forward_extraction()
test_forward_yolo()
test_forward_yolov2()
test_forward_yolov3()
test_forward_convolutional()
test_forward_maxpooling()
test_forward_avgpooling()
......
"""
Compile YOLO-V2 in DarkNet Models
Compile YOLO-V2 and YOLO-V3 in DarkNet Models
=================================
**Author**: `Siju Samuel <https://siju-samuel.github.io/>`_
This article is an introductory tutorial to deploy darknet models with NNVM.
All the required models and libraries will be downloaded from the internet by the script.
This script runs the YOLO-V2 Model with the bounding boxes
This script runs the YOLO-V2 and YOLO-V3 Model with the bounding boxes
Darknet parsing have dependancy with CFFI and CV2 library
Please install CFFI and CV2 before executing this script
......@@ -17,6 +17,7 @@ Please install CFFI and CV2 before executing this script
import nnvm
import nnvm.frontend.darknet
import nnvm.testing.yolo_detection
import nnvm.testing.darknet
import matplotlib.pyplot as plt
import numpy as np
......@@ -28,7 +29,7 @@ from tvm.contrib.download import download
from nnvm.testing.darknet import __darknetffi__
# Model name
MODEL_NAME = 'yolo'
MODEL_NAME = 'yolov3'
######################################################################
# Download required files
......@@ -75,9 +76,11 @@ ctx = tvm.cpu(0)
data = np.empty([batch_size, net.c, net.h, net.w], dtype)
shape = {'data': data.shape}
print("Compiling the model...")
dtype_dict = {}
with nnvm.compiler.build_config(opt_level=2):
graph, lib, params = nnvm.compiler.build(sym, target, shape, dtype, params)
graph, lib, params = nnvm.compiler.build(sym, target, shape, dtype_dict, params)
[neth, netw] = shape['data'][2:] # Current image shape is 608x608
######################################################################
# Load a test image
# --------------------------------------------------------------------
......@@ -87,8 +90,7 @@ img_url = 'https://github.com/siju-samuel/darknet/blob/master/data/' + \
test_image + '?raw=true'
download(img_url, test_image)
data = nnvm.testing.darknet.load_image(test_image, net.w, net.h)
data = nnvm.testing.darknet.load_image(test_image, netw, neth)
######################################################################
# Execute on TVM Runtime
# ----------------------
......@@ -105,24 +107,44 @@ print("Running the test image...")
m.run()
# get outputs
out_shape = (net.outputs,)
tvm_out = m.get_output(0).asnumpy().flatten()
tvm_out = []
if MODEL_NAME == 'yolov2':
layer_out = {}
layer_out['type'] = 'Region'
# Get the region layer attributes (n, out_c, out_h, out_w, classes, coords, background)
layer_attr = m.get_output(2).asnumpy()
layer_out['biases'] = m.get_output(1).asnumpy()
out_shape = (layer_attr[0], layer_attr[1]//layer_attr[0],
layer_attr[2], layer_attr[3])
layer_out['output'] = m.get_output(0).asnumpy().reshape(out_shape)
layer_out['classes'] = layer_attr[4]
layer_out['coords'] = layer_attr[5]
layer_out['background'] = layer_attr[6]
tvm_out.append(layer_out)
elif MODEL_NAME == 'yolov3':
for i in range(3):
layer_out = {}
layer_out['type'] = 'Yolo'
# Get the yolo layer attributes (n, out_c, out_h, out_w, classes, total)
layer_attr = m.get_output(i*4+3).asnumpy()
layer_out['biases'] = m.get_output(i*4+2).asnumpy()
layer_out['mask'] = m.get_output(i*4+1).asnumpy()
out_shape = (layer_attr[0], layer_attr[1]//layer_attr[0],
layer_attr[2], layer_attr[3])
layer_out['output'] = m.get_output(i*4).asnumpy().reshape(out_shape)
layer_out['classes'] = layer_attr[4]
tvm_out.append(layer_out)
# do the detection and bring up the bounding boxes
thresh = 0.24
hier_thresh = 0.5
thresh = 0.5
nms_thresh = 0.45
img = nnvm.testing.darknet.load_image_color(test_image)
_, im_h, im_w = img.shape
probs = []
boxes = []
region_layer = net.layers[net.n - 1]
boxes, probs = nnvm.testing.yolo2_detection.get_region_boxes(
region_layer, im_w, im_h, net.w, net.h,
thresh, probs, boxes, 1, tvm_out)
boxes, probs = nnvm.testing.yolo2_detection.do_nms_sort(
boxes, probs,
region_layer.w*region_layer.h*region_layer.n, region_layer.classes, 0.3)
dets = nnvm.testing.yolo_detection.fill_network_boxes((netw, neth), (im_w, im_h), thresh,
1, tvm_out)
last_layer = net.layers[net.n - 1]
nnvm.testing.yolo_detection.do_nms_sort(dets, last_layer.classes, nms_thresh)
coco_name = 'coco.names'
coco_url = 'https://github.com/siju-samuel/darknet/blob/master/data/' + coco_name + '?raw=true'
......@@ -136,8 +158,6 @@ with open(coco_name) as f:
names = [x.strip() for x in content]
nnvm.testing.yolo2_detection.draw_detections(
img, region_layer.w*region_layer.h*region_layer.n,
thresh, boxes, probs, names, region_layer.classes)
nnvm.testing.yolo_detection.draw_detections(img, dets, thresh, names, last_layer.classes)
plt.imshow(img.transpose(1, 2, 0))
plt.show()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment