Commit 70477dab by Zhihong Ma

feat: LSTM-OCR basic fp32 version

parent 569d167e
import torch
import numpy as np
from torch.autograd import Variable
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
# 构建MNIST序列数据集
class seq_mnist(Dataset):
"""docstring for seq_mnist_dataset"""
def __init__(self, trainer_params, train_set):
self.suffix = "_train" if train_set else "_test"
self.data = datasets.MNIST('../../project/p/data', train=train_set, download=False, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]))
self.trainer_params = trainer_params
self.images = []
self.labels = []
self.input_lengths = np.ones(1, dtype=np.int32) * (28 * self.trainer_params.word_size)
self.label_lengths = np.ones(1, dtype=np.int32) * (self.trainer_params.word_size)
self.build_dataset()
# self.load_dataset()
def build_dataset(self):
imgs = []
labels = []
for j in range(len(self.data)//self.trainer_params.word_size): # this loop builds dataset
# 用input_size(32) 下面有pad
img = np.zeros((self.trainer_params.input_size, self.trainer_params.word_size * 28))
labs = np.zeros(self.trainer_params.word_size, dtype=np.int32)
for i in range(self.trainer_params.word_size): # this loop builds one example
ims, labs[i] = self.data[(j*self.trainer_params.word_size)+i]
labs[i] += 1 # because ctc assumes 0 as blank character
ims = np.reshape(ims, (28,28))
# 在上下扩展两行,得到32*28的ims
ims = np.pad(ims, ((2,2),(0,0)), mode='constant', constant_values=-1)
img[:, i*28 : (i+1)*28 ] = ims
# 循环神经网络(RNN)等序列模型的输入数据通常采用3D张量的格式(time_steps, batch_size, input_dim)
# 转置后: time_steps,input_dim
img = np.transpose(img)
imgs.append(img)
labels.append(labs)
# batch_size ,time_steps,input_dim => time_steps, batch_size, input_dim
self.images = np.asarray(imgs, dtype=np.float32).transpose(1, 0, 2)
self.labels.append(labels)
# save
np.save('data/images{}.npy'.format(self.suffix), self.images)
np.save('data/labels{}.npy'.format(self.suffix), np.asarray(self.labels))
# 这里做了Quantize input的操作
if self.trainer_params.quantize_input:
self.images = self.quantize_tensor_image(self.images)
self.images = np.asarray(self.images)
def load_dataset(self):
self.images = np.load('data/images{}.npy'.format(self.suffix))
self.labels = np.load('data/labels{}.npy'.format(self.suffix))
if self.trainer_params.quantize_input:
self.images = self.quantize_tensor_image(self.images)
self.images = np.asarray(self.images)
# 这里无需单独做
def quantize_tensor_image(self, tensor_image):
frac_bits = self.trainer_params.recurrent_activation_bit_width-1
prescale = 2**frac_bits
postscale = 2**-frac_bits
max_val = 1 - postscale
tensor_image = tensor_image.clip(-1, max_val)
tensor_image = np.round(tensor_image*prescale)*postscale
return tensor_image
# batch size
def __len__(self):
return self.images.shape[1]
def __getitem__(self, index):
return self.images[:,index,:], self.labels[0][index], self.input_lengths, self.label_lengths
class seq_mnist_train(seq_mnist):
def __init__(self, trainer_params):
print("Building Training Dataset . . . ")
super(seq_mnist_train, self).__init__(trainer_params, train_set=True)
class seq_mnist_val(seq_mnist):
def __init__(self, trainer_params):
print("Building Testing Dataset . . . ")
super(seq_mnist_val, self).__init__(trainer_params, train_set=False)
import math
import numpy as np
class seq_mnist_decoder():
def __init__(self, labels, blank=0):
self.blank_chr = blank
self.labels = labels
def decode(self, predictions, output_len, label_len):
predictions = predictions.data.cpu().numpy()
output = []
# 把结果逐个翻译,再拼成序列
# predictions参数是一个大小为(time_steps, num_classes)的二维数组,表示模型的预测输出。每一行代表一个时间步长,每一列代表一个可能标签的概率。
for i in range(output_len):
pred = np.argmax(predictions[i, :])
# 对标签做一些去除空和重复的处理(因为lstm序列中可能多个neuron处理同一个字符)
if (pred != self.blank_chr) and (pred != np.argmax(predictions[i-1, :])): # merging repeats and removing blank character (0)
output.append(pred-1)
return np.asarray(output)
def hit(self, pred, target):
res = []
for idx, word in enumerate(target):
if idx < len(pred): # 列表长度
item = pred[idx]
# 判断当前位置是否已经不小于预测结果列表的长度,则真实标签对应的预测结果已不存在,此时我们将item设置为任意一个
else:
item = 10
res.append(word == item)
acc = np.mean(np.asarray(res))*100
if math.isnan(acc):
return 0.00
else:
return acc
def to_string(self, in_str):
out_str = ''
for i in range(in_str.shape[0]):
out_str += str(in_str[i])
return out_str
\ No newline at end of file
import os
import json
import torch
import argparse
from trainer import Seq_MNIST_Trainer
torch.backends.cudnn.enabled = False
torch.set_printoptions(precision=10)
class objdict(dict):
def __getattr__(self, name):
if name in self:
return self[name]
else:
raise AttributeError("No such attribute: " + name)
def __setattr__(self, name, value):
self[name] = value
def __delattr__(self, name):
if name in self:
del self[name]
else:
raise AttributeError("No such attribute: " + name)
def ascii_encode_dict(data):
ascii_encode = lambda x: x.encode('ascii')
# return dict(map(ascii_encode, pair) if isinstance(pair[1], unicode) else pair for pair in data.items())
return dict(map(ascii_encode, pair) if isinstance(pair[1], str) else pair for pair in data.items())
def non_or_str(value):
if value == None:
return None
return value
if __name__ == '__main__':
# Training settings
parser = argparse.ArgumentParser(description='PyTorch Quantized BiLSTM Sequential MNIST Example')
parser.add_argument('--params', '-p', type=str, default="default_trainer_params.json", help='Path to params JSON file. Default ignored when resuming.')
# 这里是可以改的,原版本应该是支持多机训练
parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training')
# parser.add_argument('--gpus', default=0, help='gpus used for training - e.g 0,1,3')
parser.add_argument('--epochs', type=int, default=1000, help='Number of epochs')
parser.add_argument('--init_bn_fc_fusion', default=False, action='store_true', help='Init BN FC fusion.')
# 默认为none
parser.add_argument('--resume', type=non_or_str, help='resume from a checkpoint')
parser.add_argument('--eval', default=False, action='store_true', help='perform evaluation of trained model')
parser.add_argument('--export', default=False, action='store_true', help='perform weights export as .hpp of trained model')
parser.add_argument('--export_image', default=False, action='store_true', help='perform test image export as png and txt')
parser.add_argument('--experiments', default="./experiments", help='Save Path')
parser.add_argument('--simd_factor', default=1, type=int, help='SIMD factor for export.')
parser.add_argument('--pe', default=1, type=int, help='Number of PEs for export.')
#Overrides
parser.add_argument('--random_seed', type=int)
parser.add_argument('--batch_size', type=int)
parser.add_argument('--test_batch_size', type=int)
parser.add_argument('--num_workers', type=int)
parser.add_argument('--num_units', type=int)
parser.add_argument('--num_layers', type=int)
parser.add_argument('--num_classes', type=int)
parser.add_argument('--word_size', type=int)
parser.add_argument('--seq_len', type=int)
parser.add_argument('--neuron_type', type=str)
parser.add_argument('--input_size', type=int)
parser.add_argument('--lr', type=float)
parser.add_argument('--bidirectional', type=bool)
parser.add_argument('--reduce_bidirectional', type=str)
parser.add_argument('--recurrent_bias_enabled', type=bool)
parser.add_argument('--checkpoint_interval', type=int)
parser.add_argument('--recurrent_weight_bit_width', type=int)
parser.add_argument('--recurrent_weight_quantization', type=str)
parser.add_argument('--recurrent_bias_bit_width', type=int)
parser.add_argument('--recurrent_bias_quantization', type=str)
parser.add_argument('--recurrent_activation_bit_width', type=int)
parser.add_argument('--recurrent_activation_quantization', type=str)
parser.add_argument('--internal_activation_bit_width', type=int)
parser.add_argument('--fc_weight_bit_width', type=int)
parser.add_argument('--fc_weight_quantization', type=str)
parser.add_argument('--fc_bias_bit_width', type=int)
parser.add_argument('--fc_bias_quantization', type=str)
parser.add_argument('--quantize_input', type=bool)
args = parser.parse_args()
if args.export:
args.no_cuda = True
args.cuda = not args.no_cuda and torch.cuda.is_available()
if not os.path.exists(args.experiments):
os.mkdir(args.experiments)
# 直接恢复
if (args.resume or args.eval or args.export) and args.params == "default_trainer_params.json":
package = torch.load(args.resume, map_location=lambda storage, loc: storage)
trainer_params = package['trainer_params']
# 重新训练
else:
with open(args.params) as d:
trainer_params = json.load(d, object_hook=ascii_encode_dict)
trainer_params = objdict(trainer_params)
for k in trainer_params.keys():
print(k, trainer_params[k])
# args还是有用的,trainer_params中的default的和args中关注的参数往往是互补的
trainer = Seq_MNIST_Trainer(trainer_params, args)
if args.export:
trainer.export_model(args.simd_factor, args.pe)
exit(0)
if args.export_image:
trainer.export_image()
exit(0)
if args.eval:
trainer.eval_model()
exit(0)
else:
trainer.train_model()
# Copyright (c) 2018, Xilinx, Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os
import math
import numpy
import torch
import torch.nn as nn
from functools import partial
# from quantization.modules.rnn import QuantizedLSTM
# from quantization.modules.quantized_linear import QuantizedLinear
class SequenceWise(nn.Module):
def __init__(self, module):
"""
Collapses input of dim T*N*H to (T*N)*H, and applies to a module.
Allows handling of variable sequence lengths and minibatch sizes.
:param module: Module to apply input to.
"""
super(SequenceWise, self).__init__()
self.module = module
def forward(self, x):
t, n = x.size(0), x.size(1)
x = x.view(t * n, -1)
x = self.module(x)
x = x.view(t, n, -1)
return x
def __repr__(self):
tmpstr = self.__class__.__name__ + ' (\n'
tmpstr += self.module.__repr__()
tmpstr += ')'
return tmpstr
# 可以决定是 FC-BN 还是只是 FC
class FusedBatchNorm1dLinear(nn.Module):
def __init__(self, trainer_params, batch_norm, linear):
super(FusedBatchNorm1dLinear, self).__init__()
self.batch_norm = batch_norm
self.linear = linear
self.trainer_params = trainer_params
def forward(self, x):
if self.trainer_params.prefused_bn_fc:
x = self.linear(x)
else:
x = self.batch_norm(x)
x = self.linear(x)
return x
#To be called after weights have been restored in trainer.py
def init_fusion(self):
print("Fusing BN-FC")
bn_weight_var = torch.mul(self.batch_norm.weight.data, torch.rsqrt(self.batch_norm.running_var + self.batch_norm.eps))
bias_coeff = self.batch_norm.bias.data - torch.mul(self.batch_norm.running_mean, bn_weight_var)
self.linear.bias.data = torch.addmv(self.linear.bias.data, self.linear.weight.data, bias_coeff)
self.linear.weight.data = self.linear.weight.data * bn_weight_var.expand_as(self.linear.weight.data)
class BiLSTM(nn.Module):
def __init__(self, trainer_params):
super(BiLSTM, self).__init__()
self.trainer_params = trainer_params
# print(self.trainer_params.reduce_bidirectional)
self.trainer_params.reduce_bidirectional = 'CONCAT'
if self.trainer_params.bidirectional and self.trainer_params.reduce_bidirectional == 'CONCAT':
self.reduce_factor = 2
else:
self.reduce_factor = 1
# 若是 LSTM ,则括号中的是对类的输入设置
# self.recurrent_layer = self.recurrent_layer_type(input_size=self.trainer_params.input_size,
# hidden_size=self.trainer_params.num_units,
# num_layers=self.trainer_params.num_layers,
# batch_first=False,
# bidirectional=self.trainer_params.bidirectional,
# bias=self.trainer_params.recurrent_bias_enabled)
self.recurrent_layer = nn.LSTM(input_size=self.trainer_params.input_size,
hidden_size=self.trainer_params.num_units,
num_layers=self.trainer_params.num_layers,
batch_first=False,
bidirectional=self.trainer_params.bidirectional,
bias=self.trainer_params.recurrent_bias_enabled)
self.batch_norm_fc = FusedBatchNorm1dLinear(
trainer_params,
nn.BatchNorm1d(self.reduce_factor * self.trainer_params.num_units),
# QuantizedLinear(
# bias=True,
# in_features=self.reduce_factor * self.trainer_params.num_units,
# out_features=trainer_params.num_classes,
# bias_bit_width=self.trainer_params.fc_bias_bit_width,
# bias_q_type=self.trainer_params.fc_bias_quantization,
# weight_bit_width=self.trainer_params.fc_weight_bit_width,
# weight_q_type=self.trainer_params.fc_weight_quantization)
nn.Linear(
in_features=self.reduce_factor * self.trainer_params.num_units,
out_features=trainer_params.num_classes,
bias=True )
)
self.output_layer = nn.Sequential(SequenceWise(self.batch_norm_fc), nn.LogSoftmax(dim=2))
# @property
# def reduce_factor(self):
# if self.trainer_params.bidirectional and self.trainer_params.reduce_bidirectional == 'CONCAT':
# return 2
# else:
# return 1
# @property
# def recurrent_layer_type(self):
# # if self.trainer_params.neuron_type == 'QLSTM':
# # func = QuantizedLSTM
# # elif self.trainer_params.neuron_type == 'LSTM':
# # func = nn.LSTM
# if self.trainer_params.neuron_type == 'LSTM':
# func = nn.LSTM
# else:
# raise Exception("Invalid neuron type.")
# if self.trainer_params.neuron_type == 'QLSTM':
# func = partial(func, bias_bit_width=self.trainer_params.recurrent_bias_bit_width,
# bias_q_type=self.trainer_params.recurrent_bias_quantization,
# weight_bit_width=self.trainer_params.recurrent_weight_bit_width,
# weight_q_type=self.trainer_params.recurrent_weight_quantization,
# activation_bit_width=self.trainer_params.recurrent_activation_bit_width,
# activation_q_type=self.trainer_params.recurrent_activation_quantization,
# internal_activation_bit_width=self.trainer_params.internal_activation_bit_width)
# return func
def forward(self, x):
# 似乎是因为现在只有一个lstm cell (num_layers = 1),所以h没用上
x, h = self.recurrent_layer(x)
if self.trainer_params.bidirectional:
if self.trainer_params.reduce_bidirectional == 'SUM':
x = x.view(x.size(0), x.size(1), 2, -1).sum(2).view(x.size(0), x.size(1), -1)
elif self.trainer_params.reduce_bidirectional == 'CONCAT':
#do nothing, x is already in the proper shape
pass
else:
raise Exception('Unknown reduce mode: {}'.format(self.trainer_params.reduce_bidirectional))
x = self.output_layer(x)
return x
def export(self, output_path, simd_factor, pe):
if self.trainer_params.neuron_type == 'QLSTM':
assert(self.trainer_params.input_size % simd_factor == 0)
assert(self.trainer_params.num_units % simd_factor == 0)
assert((simd_factor >= 1 and pe == 1) or (simd_factor == 1 and pe >= 1))
ih_simd = self.trainer_params.input_size / simd_factor
hh_simd = self.trainer_params.num_units / simd_factor
lstm_weight_ih = self.recurrent_layer.hls_lstm_weight_ih_string(ih_simd, pe)
lstm_weight_hh = self.recurrent_layer.hls_lstm_weight_hh_string(hh_simd, pe)
lstm_weight_decl_list = map(list, zip(*lstm_weight_ih))[0] + map(list, zip(*lstm_weight_hh))[0]
lstm_weight_string_list = map(list, zip(*lstm_weight_ih))[1] + map(list, zip(*lstm_weight_hh))[1]
if self.trainer_params.recurrent_bias_enabled:
lstm_bias = self.recurrent_layer.hls_lstm_bias_strings(pe)
lstm_bias_decl_list = map(list, zip(*lstm_bias))[0]
lstm_bias_string_list = map(list, zip(*lstm_bias))[1]
fc_weight_decl, fc_weight_string = self.batch_norm_fc.linear.hls_weight_string(self.reduce_factor)
fc_bias_decl, fc_bias_string = self.batch_norm_fc.linear.hls_bias_string(self.reduce_factor)
def define(name, val):
return "#define {} {}\n".format(name, val)
with open(output_path, 'w') as f:
print("Exporting model to {}".format(output_path))
f.write("#pragma once" + '\n')
f.write(define("PE", pe))
f.write(define("SIMD_INPUT", ih_simd))
f.write(define("SIMD_RECURRENT", hh_simd))
f.write(define("NUMBER_OF_NEURONS", self.trainer_params.num_units))
f.write(define("NUMBER_OF_NEURONS_TYPEWIDTH", int(math.ceil(math.log(self.trainer_params.num_units, 2.0)) + 2)))
f.write(define("HEIGHT_IN_PIX", self.trainer_params.input_size))
f.write(define("HEIGHT_IN_PIX_TYPEWIDTH", int(math.ceil(math.log(self.trainer_params.input_size, 2.0)) + 2)))
f.write(define("NUMBER_OF_CLASSES", self.trainer_params.num_classes))
f.write(define("NUMBER_OF_CLASSES_TYPEWIDTH", 7+1))
f.write(define("MAX_NUMBER_COLUMNS_TEST_SET", 28*self.trainer_params.word_size))
f.write(define("MAX_NUMBER_COLUMNS_TEST_SET_TYPEWIDTH", 10+1))
f.write(define("SIZE_OF_OUTPUT_BUFFER", 96))
f.write(define("DIRECTIONS", 2 if self.trainer_params.bidirectional else 1))
data_width = 64
input_bit_width = self.trainer_params.recurrent_activation_bit_width if self.trainer_params.quantize_input else 8
f.write(define("PACKEDWIDTH", int(data_width * input_bit_width / 2)))
f.write(define("DATAWIDTH", data_width))
f.write(define("PIXELWIDTH", input_bit_width))
f.write(define("WEIGHTWIDTH", self.trainer_params.recurrent_weight_bit_width))
f.write(define("BIASWIDTH", self.trainer_params.recurrent_bias_bit_width))
f.write(define("FCWEIGHTWIDTH", self.trainer_params.fc_weight_bit_width))
f.write(define("FCBIASWIDTH", self.trainer_params.fc_bias_bit_width))
f.write(define("OUTPUTACTIVATIONHIDDENLAYERWIDTH", self.trainer_params.recurrent_activation_bit_width))
f.write(define("OUTPUTACTIVATIONOUTPUTLAYERWIDTH", 16))
# write lstm weight decl
for decl in lstm_weight_decl_list:
f.write(decl + '\n')
# write lstm bias decl
if self.trainer_params.recurrent_bias_enabled:
for decl in lstm_bias_decl_list:
f.write(decl + '\n')
# write fc weight and bias decl
f.write(fc_weight_decl + '\n')
f.write(fc_bias_decl + '\n')
# write lstm weights
for string in lstm_weight_string_list:
f.write(string + '\n')
# write lstm bias
if self.trainer_params.recurrent_bias_enabled:
for string in lstm_bias_string_list:
f.write(string + '\n')
# write fc weights and bias
f.write(fc_weight_string + '\n')
f.write(fc_bias_string + '\n')
else:
raise Exception("Export not supported for {}".format(self.trainer_params.neuron_type))
\ No newline at end of file
## update 2023.5.2
basic version: FP32版本,只有单个lstm cell,训练数据集采用序列化的MNIST,仅作记录方便后续修改。
\ No newline at end of file
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
# import warpctc_pytorch as wp
from torch.autograd import Variable
from torch.utils.data import DataLoader
from model import BiLSTM
from decoder import seq_mnist_decoder
from data import seq_mnist_train, seq_mnist_val
class Seq_MNIST_Trainer():
def __init__(self, trainer_params, args):
self.args = args
self.trainer_params = trainer_params
random.seed(trainer_params.random_seed)
torch.manual_seed(trainer_params.random_seed)
if args.cuda:
torch.cuda.manual_seed_all(trainer_params.random_seed)
self.train_data = seq_mnist_train(trainer_params)
self.val_data = seq_mnist_val(trainer_params)
self.train_loader = DataLoader(self.train_data, batch_size=trainer_params.batch_size, \
shuffle=True, num_workers=trainer_params.num_workers)
self.val_loader = DataLoader(self.val_data, batch_size=trainer_params.test_batch_size, \
shuffle=False, num_workers=trainer_params.num_workers)
self.starting_epoch = 1
self.prev_loss = 10000
self.model = BiLSTM(trainer_params)
# self.criterion = wp.CTCLoss(size_average=False)
self.criterion = nn.CTCLoss(blank=0, reduction='mean', zero_infinity=False)
self.labels = [i for i in range(trainer_params.num_classes-1)]
self.decoder = seq_mnist_decoder(labels=self.labels)
self.optimizer = optim.Adam(self.model.parameters(), lr=trainer_params.lr)
if args.cuda:
# torch.cuda.set_device(args.gpus)
# self.model = self.model.cuda()
# self.criterion = self.criterion.cuda()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 加入设备选择
self.model = self.model.to(device)
self.criterion = self.criterion.to(device)
if args.resume or args.eval or args.export:
print("Loading model from {}".format(args.resume))
package = torch.load(args.resume, map_location=lambda storage, loc: storage)
self.model.load_state_dict(package['state_dict'])
self.optimizer.load_state_dict(package['optim_dict'])
self.starting_epoch = package['starting_epoch']
self.prev_loss = package['prev_loss']
if args.cuda:
for state in self.optimizer.state.values():
for k, v in state.items():
if torch.is_tensor(v):
# state[k] = v.cuda()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
state[k] = v.to(device)
# 默认为false
if args.init_bn_fc_fusion:
# 默认是false的,应该是用于记录当前是否fuse了吧
if not trainer_params.prefused_bn_fc:
self.model.batch_norm_fc.init_fusion()
self.trainer_params.prefused_bn_fc = True
else:
raise Exception("BN and FC are already fused.")
def serialize(self, model, trainer_params, optimizer, starting_epoch, prev_loss):
package = {'state_dict': model.state_dict(),
'trainer_params': trainer_params,
'optim_dict' : optimizer.state_dict(),
'starting_epoch' : starting_epoch,
'prev_loss': prev_loss
}
return package
# 存储
def save_model(self, epoch, name):
path = self.args.experiments + '/' + name
print("Model saved at: {}\n".format(path))
torch.save(self.serialize(model=self.model, trainer_params=self.trainer_params,
optimizer=self.optimizer, starting_epoch=epoch + 1, prev_loss=self.prev_loss), path)
def train(self, epoch):
self.model.train()
# 重写的 def __getitem__(self, index)
for i, (item) in enumerate(self.train_loader):
data, labels, output_len, lab_len = item
data = Variable(data.transpose(1,0), requires_grad=False)
labels = Variable(labels.view(-1), requires_grad=False)
output_len = Variable(output_len.view(-1), requires_grad=False)
lab_len = Variable(lab_len.view(-1), requires_grad=False)
if self.args.cuda:
# data = data.cuda()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data = data.to(device)
output = self.model(data)
loss = self.criterion(output, labels, output_len, lab_len)
# loss_value = loss.data[0]
loss_value = loss.item()
print("Loss value for epoch = {}/{} and batch {}/{} is = {:.4f}".format(epoch,
self.args.epochs, (i+1)*self.trainer_params.batch_size, len(self.train_data) , loss_value))
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
# 来同步CPU和GPU的内存,确保在GPU上计算完成后再将结果返回到CPU,避免在多GPU环境下出现计算结果的错误或者不一致。
# if self.args.cuda:
# torch.cuda.synchronize()
def test(self, epoch=0, save_model_flag=False):
self.model.eval()
loss_value = 0
for i, (item) in enumerate(self.val_loader):
data, labels, output_len, lab_len = item
data = Variable(data.transpose(1,0), requires_grad=False)
labels = Variable(labels.view(-1), requires_grad=False)
output_len = Variable(output_len.view(-1), requires_grad=False)
lab_len = Variable(lab_len.view(-1), requires_grad=False)
if self.args.cuda:
# data = data.cuda()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data = data.to(device)
output = self.model(data)
index = random.randint(0,self.trainer_params.test_batch_size-1)
label = labels[index*self.trainer_params.word_size:(index+1)*self.trainer_params.word_size].data.numpy()
label = label-1
prediction = self.decoder.decode(output[:,index,:], output_len[index], lab_len[index])
accuracy = self.decoder.hit(prediction, label)
print("Sample Label = {}".format(self.decoder.to_string(label)))
print("Sample Prediction = {}".format(self.decoder.to_string(prediction)))
print("Accuracy on Sample = {:.2f}%\n\n".format(accuracy))
loss = self.criterion(output, labels, output_len, lab_len)
# loss_value += loss.data.numpy()
loss_value += loss.cpu().data.numpy()
loss_value /= (len(self.val_data)//self.trainer_params.test_batch_size)
# loss_value = loss_value[0]
loss_value = loss_value.item()
print("Average Loss Value for Val Data is = {:.4f}\n".format(float(loss_value)))
if loss_value < self.prev_loss and save_model_flag:
self.prev_loss = loss_value
self.save_model(epoch, "best.tar")
elif save_model_flag:
self.save_model(epoch, "checkpoint.tar")
def eval_model(self):
self.test()
def train_model(self):
for epoch in range(self.starting_epoch, self.args.epochs + 1):
self.train(epoch)
self.test(epoch=epoch, save_model_flag=False)
if epoch%20==0:
self.optimizer.param_groups[0]['lr'] = self.optimizer.param_groups[0]['lr']*0.98
def export_model(self, simd_factor, pe):
self.model.eval()
self.model.export('r_model_fw_bw.hpp', simd_factor, pe)
def export_image(self):
random.seed()
idx = random.randint(0,self.val_data.images.shape[1]-1)
# idx = 100
img, label = self.val_data.images[:,idx,:], self.val_data.labels[0][idx]
inp = torch.from_numpy(img)
inp = inp.unsqueeze(1)
inp = Variable(inp, requires_grad=False)
out = self.model(inp)
out = self.decoder.decode(out, self.val_data.input_lengths, self.val_data.label_lengths)
out = self.decoder.to_string(out)
img = img.transpose(1, 0)
label -= 1
label = self.decoder.to_string(label)
assert label==out
from PIL import Image, ImageOps
from matplotlib import cm
img1 = (img+1)/2.
im = Image.fromarray(np.uint8(cm.gist_earth(img1)*255)).convert('L')
im = ImageOps.invert(im)
im.save('test_image.png')
img = img.transpose(1, 0)
img = np.reshape(img, (-1, 1))
np.savetxt("test_image.txt", img, fmt='%.10f')
f = open('test_image_gt.txt','w')
f.write(label)
f.close()
print("Prediction on the image = {}".format(out))
print("Label of exported image = {}".format(label))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment