Commit 2d918317 by Klin
parents 061582e9 2fe094dd
import torch
import numpy as np
from torch.autograd import Variable
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
# 构建MNIST序列数据集
class seq_mnist(Dataset):
"""docstring for seq_mnist_dataset"""
def __init__(self, trainer_params, train_set):
self.suffix = "_train" if train_set else "_test"
self.data = datasets.MNIST('../../project/p/data', train=train_set, download=False, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]))
self.trainer_params = trainer_params
self.images = []
self.labels = []
self.input_lengths = np.ones(1, dtype=np.int32) * (28 * self.trainer_params.word_size)
self.label_lengths = np.ones(1, dtype=np.int32) * (self.trainer_params.word_size)
# self.build_dataset()
self.load_dataset()
def build_dataset(self):
imgs = []
labels = []
for j in range(len(self.data)//self.trainer_params.word_size): # this loop builds dataset
# 用input_size(32) 下面有pad
img = np.zeros((self.trainer_params.input_size, self.trainer_params.word_size * 28))
labs = np.zeros(self.trainer_params.word_size, dtype=np.int32)
for i in range(self.trainer_params.word_size): # this loop builds one example
ims, labs[i] = self.data[(j*self.trainer_params.word_size)+i]
labs[i] += 1 # because ctc assumes 0 as blank character
ims = np.reshape(ims, (28,28))
# 在上下扩展两行,得到32*28的ims
ims = np.pad(ims, ((2,2),(0,0)), mode='constant', constant_values=-1)
img[:, i*28 : (i+1)*28 ] = ims
# 循环神经网络(RNN)等序列模型的输入数据通常采用3D张量的格式(time_steps, batch_size, input_dim)
# 转置后: time_steps,input_dim
img = np.transpose(img)
imgs.append(img)
labels.append(labs)
# batch_size ,time_steps,input_dim => time_steps, batch_size, input_dim
self.images = np.asarray(imgs, dtype=np.float32).transpose(1, 0, 2)
self.labels.append(labels)
# save
np.save('data/images{}.npy'.format(self.suffix), self.images)
np.save('data/labels{}.npy'.format(self.suffix), np.asarray(self.labels))
# 这里做了Quantize input的操作
if self.trainer_params.quantize_input:
self.images = self.quantize_tensor_image(self.images)
self.images = np.asarray(self.images)
def load_dataset(self):
self.images = np.load('data/images{}.npy'.format(self.suffix))
self.labels = np.load('data/labels{}.npy'.format(self.suffix))
print("Successfully load dataset!")
if self.trainer_params.quantize_input:
self.images = self.quantize_tensor_image(self.images)
self.images = np.asarray(self.images)
# 这里无需单独做, ptq时修改
def quantize_tensor_image(self, tensor_image):
frac_bits = self.trainer_params.recurrent_activation_bit_width-1
prescale = 2**frac_bits
postscale = 2**-frac_bits
max_val = 1 - postscale
tensor_image = tensor_image.clip(-1, max_val)
tensor_image = np.round(tensor_image*prescale)*postscale
return tensor_image
# batch size
def __len__(self):
return self.images.shape[1]
def __getitem__(self, index):
return self.images[:,index,:], self.labels[0][index], self.input_lengths, self.label_lengths
class seq_mnist_train(seq_mnist):
def __init__(self, trainer_params):
print("Building Training Dataset . . . ")
super(seq_mnist_train, self).__init__(trainer_params, train_set=True)
class seq_mnist_val(seq_mnist):
def __init__(self, trainer_params):
print("Building Testing Dataset . . . ")
super(seq_mnist_val, self).__init__(trainer_params, train_set=False)
import math
import numpy as np
class seq_mnist_decoder():
def __init__(self, labels, blank=0):
self.blank_chr = blank
self.labels = labels
def decode(self, predictions, output_len, label_len):
predictions = predictions.data.cpu().numpy()
output = []
# 把结果逐个翻译,再拼成序列
# predictions参数是一个大小为(time_steps, num_classes)的二维数组,表示模型的预测输出。每一行代表一个时间步长,每一列代表一个可能标签的概率。
for i in range(output_len):
pred = np.argmax(predictions[i, :])
# 对标签做一些去除空和重复的处理(因为lstm序列中可能多个neuron处理同一个字符)
if (pred != self.blank_chr) and (pred != np.argmax(predictions[i-1, :])): # merging repeats and removing blank character (0)
output.append(pred-1)
return np.asarray(output)
def hit(self, pred, target):
res = []
for idx, word in enumerate(target):
if idx < len(pred): # 列表长度
item = pred[idx]
# 判断当前位置是否已经不小于预测结果列表的长度,则真实标签对应的预测结果已不存在,此时我们将item设置为任意一个
else:
item = 10
res.append(word == item)
acc = np.mean(np.asarray(res))*100
if math.isnan(acc):
return 0.00
else:
return acc
def to_string(self, in_str):
out_str = ''
for i in range(in_str.shape[0]):
out_str += str(in_str[i])
return out_str
\ No newline at end of file
import sys
import os
# 从get_param.py输出重定向文件val.txt中提取参数量和计算量
def extract_ratio(md='ResNet18'):
fr = open('param_flops_' + md + '.txt','r')
lines = fr.readlines()
layer = []
par_ratio = []
flop_ratio = []
for line in lines:
# if '(' in line and ')' in line:
if 'Conv' in line or 'BatchNorm2d' in line or 'Linear' in line:
layer.append(line.split(':')[1].split('(')[0])
r1 = line.split('%')[0].split(',')[-1]
r1 = float(r1)
par_ratio.append(r1)
r2 = line.split('%')[-2].split(',')[-1]
r2 = float(r2)
flop_ratio.append(r2)
return layer, par_ratio, flop_ratio
if __name__ == "__main__":
layer, par_ratio, flop_ratio = extract_ratio()
print(len(layer))
print(len(par_ratio))
print(len(flop_ratio))
\ No newline at end of file
from torch.autograd import Function
class FakeQuantize(Function):
@staticmethod
def forward(ctx, x, qparam):
x = qparam.quantize_tensor(x)
x = qparam.dequantize_tensor(x)
return x
@staticmethod
def backward(ctx, grad_output):
return grad_output, None
\ No newline at end of file
from model import *
import torch
from ptflops import get_model_complexity_info
import argparse
def get_children(model: torch.nn.Module):
# get children form model!
# 为了后续也能够更新参数,需要用nn.ModuleList来承载
# children = nn.ModuleList(model.children())
# print(children)
# 方便对其中的module进行后续的更新
# flatt_children = nn.ModuleList()
children = list(model.children())
# flatt_children = nn.ModuleList()
flatt_children = []
if len(children) == 0:
# if model has no children; model is last child! :O
return model
else:
# look for children from children... to the last child!
for child in children:
try:
flatt_children.extend(get_children(child))
except TypeError:
flatt_children.append(get_children(child))
# print(flatt_children)
return flatt_children
# 定义获取不包含wrapper的所有子模块的函数
def get_all_child_modules(module):
for name, child in module.named_children():
if isinstance(child, nn.Sequential):
yield from get_all_child_modules(child)
elif len(list(child.children())) > 0:
yield from child.children()
else:
yield child
def filter_fn(module, n_inp, outp_shape):
# if isinstance(module, (torch.nn.Linear, torch.nn.Conv2d, torch.nn.ReLU,torch.nn.BatchNorm2d,torch.nn.Linear,torch.nn.AdaptiveAvgPool2d)):
if 'conv' in module or 'bn' in module or 'fc' in module or 'avg' in module or 'relu' in module:
return True
return False
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Model Analysis --- params & flops')
parser.add_argument('-m', '--model', metavar='MODEL ARCH', default='resnet18')
args = parser.parse_args()
if args.model == 'ResNet18':
model = resnet18()
elif args.model == 'ResNet50':
model = resnet50()
elif args.model == 'ResNet152':
model = resnet152()
full_file = 'ckpt/cifar10_' + args.model + '.pt'
model.load_state_dict(torch.load(full_file))
# flat = get_children(model)
# print(flat)
# flat = get_children(model)
# new_model = nn.Sequential(*flat)
flops, params = get_model_complexity_info(model, (3, 32, 32), as_strings=True, print_per_layer_stat=True)
class GlobalVariables:
SELF_INPLANES = 0
\ No newline at end of file
# -*- coding: utf-8 -*-
# 用于多个module之间共享全局变量
def _init(): # 初始化
global _global_dict
_global_dict = {}
def set_value(value,is_bias=False):
# 定义一个全局变量
if is_bias:
_global_dict[0] = value
else:
_global_dict[1] = value
def get_value(is_bias=False): # 给bias独立于各变量外的精度
if is_bias:
return _global_dict[0]
else:
return _global_dict[1]
import os
import json
import torch
import argparse
from trainer import Seq_MNIST_Trainer
torch.backends.cudnn.enabled = False
torch.set_printoptions(precision=10)
class objdict(dict):
def __getattr__(self, name):
if name in self:
return self[name]
else:
raise AttributeError("No such attribute: " + name)
def __setattr__(self, name, value):
self[name] = value
def __delattr__(self, name):
if name in self:
del self[name]
else:
raise AttributeError("No such attribute: " + name)
def ascii_encode_dict(data):
ascii_encode = lambda x: x.encode('ascii')
# return dict(map(ascii_encode, pair) if isinstance(pair[1], unicode) else pair for pair in data.items())
return dict(map(ascii_encode, pair) if isinstance(pair[1], str) else pair for pair in data.items())
def non_or_str(value):
if value == None:
return None
return value
if __name__ == '__main__':
# Training settings
parser = argparse.ArgumentParser(description='PyTorch BiLSTM Sequential MNIST Example')
parser.add_argument('--params', '-p', type=str, default="default_trainer_params.json", help='Path to params JSON file. Default ignored when resuming.')
# 这里是可以改的,原版本应该是支持多机训练
parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training')
# parser.add_argument('--gpus', default=0, help='gpus used for training - e.g 0,1,3')
parser.add_argument('--epochs', type=int, default=1000, help='Number of epochs')
parser.add_argument('--init_bn_fc_fusion', default=False, action='store_true', help='Init BN FC fusion.')
# 默认为none
parser.add_argument('--resume', type=non_or_str, help='resume from a checkpoint')
parser.add_argument('--eval', default=False, action='store_true', help='perform evaluation of trained model')
parser.add_argument('--export', default=False, action='store_true', help='perform weights export as .hpp of trained model')
parser.add_argument('--export_image', default=False, action='store_true', help='perform test image export as png and txt')
parser.add_argument('--experiments', default="./experiments", help='Save Path')
parser.add_argument('--simd_factor', default=1, type=int, help='SIMD factor for export.')
parser.add_argument('--pe', default=1, type=int, help='Number of PEs for export.')
#Overrides
parser.add_argument('--random_seed', type=int)
parser.add_argument('--batch_size', type=int)
parser.add_argument('--test_batch_size', type=int)
parser.add_argument('--num_workers', type=int)
parser.add_argument('--num_units', type=int)
parser.add_argument('--num_layers', type=int)
parser.add_argument('--num_classes', type=int)
parser.add_argument('--word_size', type=int)
parser.add_argument('--seq_len', type=int)
parser.add_argument('--neuron_type', type=str)
parser.add_argument('--input_size', type=int)
parser.add_argument('--lr', type=float)
parser.add_argument('--bidirectional', type=bool)
parser.add_argument('--reduce_bidirectional', type=str)
parser.add_argument('--recurrent_bias_enabled', type=bool)
parser.add_argument('--checkpoint_interval', type=int)
args = parser.parse_args()
if args.export:
args.no_cuda = True
args.cuda = not args.no_cuda and torch.cuda.is_available()
if not os.path.exists(args.experiments):
os.mkdir(args.experiments)
# 直接恢复
# if (args.resume or args.eval) and args.params == "default_trainer_params.json":
# package = torch.load(args.resume, map_location=lambda storage, loc: storage)
# trainer_params = package['trainer_params']
# 重新训练
# else:
with open(args.params) as d:
# trainer_params = json.load(d, object_hook=ascii_encode_dict)
trainer_params = json.load(d)
trainer_params = objdict(trainer_params)
for k in trainer_params.keys():
print(k, trainer_params[k])
if trainer_params[k] == 'LSTM':
print("LSTM YES")
elif trainer_params[k] == 'CONCAT':
print("CONCAT YES")
# args还是有用的,trainer_params中的default的和args中关注的参数往往是互补的
trainer = Seq_MNIST_Trainer(trainer_params, args)
if args.export:
trainer.export_model(args.simd_factor, args.pe)
exit(0)
if args.export_image:
trainer.export_image()
exit(0)
if args.eval:
trainer.eval_model()
exit(0)
else:
trainer.train_model()
# Copyright (c) 2018, Xilinx, Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION). HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os
import math
import numpy
import torch
import torch.nn as nn
from module import *
from functools import partial
# from quantization.modules.rnn import QuantizedLSTM
# from quantization.modules.quantized_linear import QuantizedLinear
class SequenceWise(nn.Module):
def __init__(self, module):
"""
Collapses input of dim T*N*H to (T*N)*H, and applies to a module.
Allows handling of variable sequence lengths and minibatch sizes.
:param module: Module to apply input to.
"""
super(SequenceWise, self).__init__()
self.module = module
def forward(self, x):
t, n = x.size(0), x.size(1)
x = x.view(t * n, -1)
x = self.module(x)
x = x.view(t, n, -1)
return x
def __repr__(self):
tmpstr = self.__class__.__name__ + ' (\n'
tmpstr += self.module.__repr__()
tmpstr += ')'
return tmpstr
# 可以决定是 FC-BN 还是只是 FC
class FusedBatchNorm1dLinear(nn.Module):
def __init__(self, trainer_params, batch_norm, linear):
super(FusedBatchNorm1dLinear, self).__init__()
self.batch_norm = batch_norm
self.linear = linear
self.trainer_params = trainer_params
def forward(self, x):
if self.trainer_params.prefused_bn_fc:
x = self.linear(x)
else:
x = self.batch_norm(x)
x = self.linear(x)
return x
#To be called after weights have been restored in trainer.py
def init_fusion(self):
print("Fusing BN-FC")
bn_weight_var = torch.mul(self.batch_norm.weight.data, torch.rsqrt(self.batch_norm.running_var + self.batch_norm.eps))
bias_coeff = self.batch_norm.bias.data - torch.mul(self.batch_norm.running_mean, bn_weight_var)
self.linear.bias.data = torch.addmv(self.linear.bias.data, self.linear.weight.data, bias_coeff)
self.linear.weight.data = self.linear.weight.data * bn_weight_var.expand_as(self.linear.weight.data)
class BiLSTM(nn.Module):
def __init__(self, trainer_params):
super(BiLSTM, self).__init__()
self.trainer_params = trainer_params
print(f"self.trainer_params.reduce_bidirectional:{self.trainer_params.reduce_bidirectional}")
# self.trainer_params.reduce_bidirectional = 'CONCAT'
# if self.trainer_params.bidirectional and self.trainer_params.reduce_bidirectional == 'CONCAT':
# self.reduce_factor = 2
# else:
# self.reduce_factor = 1
# 若是 LSTM ,则括号中的是对类的输入设置
# self.recurrent_layer = self.recurrent_layer_type(input_size=self.trainer_params.input_size,
# hidden_size=self.trainer_params.num_units,
# num_layers=self.trainer_params.num_layers,
# batch_first=False,
# bidirectional=self.trainer_params.bidirectional,
# bias=self.trainer_params.recurrent_bias_enabled)
# self.recurrent_layer = nn.LSTM(input_size=self.trainer_params.input_size,
# hidden_size=self.trainer_params.num_units,
# num_layers=self.trainer_params.num_layers,
# batch_first=False,
# bidirectional=self.trainer_params.bidirectional,
# bias=self.trainer_params.recurrent_bias_enabled)
self.lstm_layers = nn.ModuleList()
# 创建第1层LSTM模型,并添加到ModuleList中
lstm = nn.LSTM( input_size=self.trainer_params.input_size,
hidden_size=self.trainer_params.num_units,
num_layers=1,
batch_first=False,
bidirectional=self.trainer_params.bidirectional,
bias=self.trainer_params.recurrent_bias_enabled)
self.lstm_layers.append(lstm)
# 创建第2至num_layers层LSTM模型,并添加到ModuleList中
for i in range(1, self.trainer_params.num_layers):
lstm = nn.LSTM(input_size=self.trainer_params.num_units * 2 if self.trainer_params.bidirectional else self.trainer_params.num_units,
hidden_size=self.trainer_params.num_units,
num_layers=1,
batch_first=False,
bidirectional=self.trainer_params.bidirectional,
bias=self.trainer_params.recurrent_bias_enabled)
self.lstm_layers.append(lstm)
# self.batch_norm_fc = FusedBatchNorm1dLinear(
# trainer_params,
# nn.BatchNorm1d(self.reduce_factor * self.trainer_params.num_units),
# nn.Linear(
# in_features=self.reduce_factor * self.trainer_params.num_units,
# out_features=trainer_params.num_classes,
# bias=True )
# )
self.fc1 = nn.Linear(
in_features=self.reduce_factor * self.trainer_params.num_units,
out_features=trainer_params.num_classes,
bias=True )
# self.output_layer = nn.Sequential(SequenceWise(self.batch_norm_fc), nn.LogSoftmax(dim=2))
self.output_layer = nn.Sequential(SequenceWise(self.fc1), nn.LogSoftmax(dim=2))
@property
def reduce_factor(self):
if self.trainer_params.bidirectional and self.trainer_params.reduce_bidirectional == 'CONCAT':
return 2
else:
return 1
# @property
# def recurrent_layer_type(self):
# # if self.trainer_params.neuron_type == 'QLSTM':
# # func = QuantizedLSTM
# # elif self.trainer_params.neuron_type == 'LSTM':
# # func = nn.LSTM
# if self.trainer_params.neuron_type == 'LSTM':
# func = nn.LSTM
# else:
# raise Exception("Invalid neuron type.")
# if self.trainer_params.neuron_type == 'QLSTM':
# func = partial(func, bias_bit_width=self.trainer_params.recurrent_bias_bit_width,
# bias_q_type=self.trainer_params.recurrent_bias_quantization,
# weight_bit_width=self.trainer_params.recurrent_weight_bit_width,
# weight_q_type=self.trainer_params.recurrent_weight_quantization,
# activation_bit_width=self.trainer_params.recurrent_activation_bit_width,
# activation_q_type=self.trainer_params.recurrent_activation_quantization,
# internal_activation_bit_width=self.trainer_params.internal_activation_bit_width)
# return func
def forward(self, x):
# 似乎是因为现在只有一个lstm cell (num_layers = 1),所以h没用上
# x, h = self.recurrent_layer(x)
h_n = []
c_n = []
# 遍历ModuleList中的每个LSTM模型,依次进行前向计算
for i, lstm in enumerate(self.lstm_layers):
# 如果不是第1层LSTM,则将输入的隐藏状态和细胞状态作为该层LSTM的初始状态
if i > 0:
x, (h, c) = lstm(x, (h_n[-1], c_n[-1]))
else:
x, (h, c) = lstm(x)
# 将该层LSTM的隐藏状态和细胞状态添加到列表中,用于下一层LSTM的输入
h_n.append(h)
c_n.append(c)
if self.trainer_params.bidirectional:
if self.trainer_params.reduce_bidirectional == 'SUM':
x = x.view(x.size(0), x.size(1), 2, -1).sum(2).view(x.size(0), x.size(1), -1)
elif self.trainer_params.reduce_bidirectional == 'CONCAT':
#do nothing, x is already in the proper shape
pass
else:
raise Exception('Unknown reduce mode: {}'.format(self.trainer_params.reduce_bidirectional))
x = self.output_layer(x)
return x
def quantize(self, quant_type, num_bits=8, e_bits=3):
self.qlstm_layers = nn.ModuleDict()
for i, lstm in enumerate(self.lstm_layers):
# 如果不是第1层LSTM,则将输入的隐藏状态和细胞状态作为该层LSTM的初始状态
if i > 0:
self.qlstm_layers[str(i)] = QLSTM(quant_type=quant_type,lstm_module=lstm,qix=False,qih=False,qic=False,qox=True,qoh=True,qoc=True,num_bits=num_bits,e_bits=e_bits)
# 第一层lstm layer没有输入的h和c,因此qih,qic为False,有x,qix置为True
else:
self.qlstm_layers[str(i)] = QLSTM(quant_type=quant_type,lstm_module=lstm,qix=True,qih=False,qic=False,qox=True,qoh=True,qoc=True,num_bits=num_bits,e_bits=e_bits)
self.qfc1 = QLinear(quant_type, self.fc1,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
# for name,layer in self.qlstm_layers.items():
# print(f"name:{name}")
def quantize_forward(self, x):
for name, layer in self.qlstm_layers.items():
if '0' in name:
x,(h,c) = layer(x)
else:
x,(h,c) = layer(x,h,c)
t, n = x.size(0), x.size(1)
x = x.view(t * n, -1)
x = self.qfc1(x)
x = x.view(t, n, -1)
x = F.log_softmax(x,dim=2)
# out = F.softmax(x, dim=1)
# return out
return x
def freeze(self):
for name, layer in self.qlstm_layers.items():
if '0' in name:
layer.freeze(flag=0)
else:
layer.freeze(qix = self.qlstm_layers[str(int(name)-1)].qox, qih=self.qlstm_layers[str(int(name)-1)].qoh, qic=self.qlstm_layers[str(int(name)-1)].qoc,flag=1)
self.qfc1.freeze(qi=self.qlstm_layers[name].qox)
def quantize_inference(self, x):
# 首先对x进行一个伪量化 (适配于lstm的伪量化)
x = FakeQuantize.apply(x,self.qlstm_layers['0'].qix)
for name, layer in self.qlstm_layers.items():
if '0' in name:
x,(h,c) = layer.quantize_inference(x)
else:
x,(h,c) = layer.quantize_inference(x,h,c)
t, n = x.size(0), x.size(1)
x = x.view(t * n, -1)
# 经过修改后的QLinear的quantize_inference中对输入的x进行过quantize,因此在这里需要dequantize一下.
x = self.qfc1.quantize_inference(x)
x = self.qfc1.qo.dequantize_tensor(x)
x = x.view(t, n, -1)
x = F.log_softmax(x,dim=2)
return x
def export(self, output_path, simd_factor, pe):
if self.trainer_params.neuron_type == 'QLSTM':
assert(self.trainer_params.input_size % simd_factor == 0)
assert(self.trainer_params.num_units % simd_factor == 0)
assert((simd_factor >= 1 and pe == 1) or (simd_factor == 1 and pe >= 1))
ih_simd = self.trainer_params.input_size / simd_factor
hh_simd = self.trainer_params.num_units / simd_factor
lstm_weight_ih = self.recurrent_layer.hls_lstm_weight_ih_string(ih_simd, pe)
lstm_weight_hh = self.recurrent_layer.hls_lstm_weight_hh_string(hh_simd, pe)
lstm_weight_decl_list = map(list, zip(*lstm_weight_ih))[0] + map(list, zip(*lstm_weight_hh))[0]
lstm_weight_string_list = map(list, zip(*lstm_weight_ih))[1] + map(list, zip(*lstm_weight_hh))[1]
if self.trainer_params.recurrent_bias_enabled:
lstm_bias = self.recurrent_layer.hls_lstm_bias_strings(pe)
lstm_bias_decl_list = map(list, zip(*lstm_bias))[0]
lstm_bias_string_list = map(list, zip(*lstm_bias))[1]
fc_weight_decl, fc_weight_string = self.batch_norm_fc.linear.hls_weight_string(self.reduce_factor)
fc_bias_decl, fc_bias_string = self.batch_norm_fc.linear.hls_bias_string(self.reduce_factor)
def define(name, val):
return "#define {} {}\n".format(name, val)
with open(output_path, 'w') as f:
print("Exporting model to {}".format(output_path))
f.write("#pragma once" + '\n')
f.write(define("PE", pe))
f.write(define("SIMD_INPUT", ih_simd))
f.write(define("SIMD_RECURRENT", hh_simd))
f.write(define("NUMBER_OF_NEURONS", self.trainer_params.num_units))
f.write(define("NUMBER_OF_NEURONS_TYPEWIDTH", int(math.ceil(math.log(self.trainer_params.num_units, 2.0)) + 2)))
f.write(define("HEIGHT_IN_PIX", self.trainer_params.input_size))
f.write(define("HEIGHT_IN_PIX_TYPEWIDTH", int(math.ceil(math.log(self.trainer_params.input_size, 2.0)) + 2)))
f.write(define("NUMBER_OF_CLASSES", self.trainer_params.num_classes))
f.write(define("NUMBER_OF_CLASSES_TYPEWIDTH", 7+1))
f.write(define("MAX_NUMBER_COLUMNS_TEST_SET", 28*self.trainer_params.word_size))
f.write(define("MAX_NUMBER_COLUMNS_TEST_SET_TYPEWIDTH", 10+1))
f.write(define("SIZE_OF_OUTPUT_BUFFER", 96))
f.write(define("DIRECTIONS", 2 if self.trainer_params.bidirectional else 1))
data_width = 64
input_bit_width = self.trainer_params.recurrent_activation_bit_width if self.trainer_params.quantize_input else 8
f.write(define("PACKEDWIDTH", int(data_width * input_bit_width / 2)))
f.write(define("DATAWIDTH", data_width))
f.write(define("PIXELWIDTH", input_bit_width))
f.write(define("WEIGHTWIDTH", self.trainer_params.recurrent_weight_bit_width))
f.write(define("BIASWIDTH", self.trainer_params.recurrent_bias_bit_width))
f.write(define("FCWEIGHTWIDTH", self.trainer_params.fc_weight_bit_width))
f.write(define("FCBIASWIDTH", self.trainer_params.fc_bias_bit_width))
f.write(define("OUTPUTACTIVATIONHIDDENLAYERWIDTH", self.trainer_params.recurrent_activation_bit_width))
f.write(define("OUTPUTACTIVATIONOUTPUTLAYERWIDTH", 16))
# write lstm weight decl
for decl in lstm_weight_decl_list:
f.write(decl + '\n')
# write lstm bias decl
if self.trainer_params.recurrent_bias_enabled:
for decl in lstm_bias_decl_list:
f.write(decl + '\n')
# write fc weight and bias decl
f.write(fc_weight_decl + '\n')
f.write(fc_bias_decl + '\n')
# write lstm weights
for string in lstm_weight_string_list:
f.write(string + '\n')
# write lstm bias
if self.trainer_params.recurrent_bias_enabled:
for string in lstm_bias_string_list:
f.write(string + '\n')
# write fc weights and bias
f.write(fc_weight_string + '\n')
f.write(fc_bias_string + '\n')
else:
raise Exception("Export not supported for {}".format(self.trainer_params.neuron_type))
\ No newline at end of file
import math
import numpy as np
import gol
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from function import FakeQuantize
# 获取最近的量化值
# def get_nearest_val(quant_type,x,is_bias=False):
# if quant_type=='INT':
# return x.round_()
# plist = gol.get_value(is_bias)
# # print('get')
# # print(plist)
# # x = x / 64
# shape = x.shape
# xhard = x.view(-1)
# plist = plist.type_as(x)
# # 取最近幂次作为索引
# idx = (xhard.unsqueeze(0) - plist.unsqueeze(1)).abs().min(dim=0)[1]
# xhard = plist[idx].view(shape)
# xout = (xhard - x).detach() + x
# # xout = xout * 64
# return xout
def get_nearest_val(quant_type, x, is_bias=False, block_size=1000000):
if quant_type == 'INT':
return x.round_()
plist = gol.get_value(is_bias)
shape = x.shape
# xhard = x.view(-1)
xhard = x.reshape(-1)
xout = torch.zeros_like(xhard)
plist = plist.type_as(x)
n_blocks = (x.numel() + block_size - 1) // block_size
for i in range(n_blocks):
start_idx = i * block_size
end_idx = min(start_idx + block_size, xhard.numel())
block_size_i = end_idx - start_idx
# print(x.numel())
# print(block_size_i)
# print(start_idx)
# print(end_idx)
xblock = xhard[start_idx:end_idx]
# xblock = xblock.view(shape[start_idx:end_idx])
plist_block = plist.unsqueeze(1) #.expand(-1, block_size_i)
idx = (xblock.unsqueeze(0) - plist_block).abs().min(dim=0)[1]
# print(xblock.shape)
xhard_block = plist[idx].view(xblock.shape)
xout[start_idx:end_idx] = (xhard_block - xblock).detach() + xblock
# xout = xout.view(shape)
xout = xout.reshape(shape)
return xout
# 采用对称有符号量化时,获取量化范围最大值
def get_qmax(quant_type,num_bits=None, e_bits=None):
if quant_type == 'INT':
qmax = 2. ** (num_bits - 1) - 1
elif quant_type == 'POT':
qmax = 1
else: #FLOAT
m_bits = num_bits - 1 - e_bits
dist_m = 2 ** (-m_bits)
e = 2 ** (e_bits - 1)
expo = 2 ** e
m = 2 ** m_bits -1
frac = 1. + m * dist_m
qmax = frac * expo
return qmax
# 都采用有符号量化,zeropoint都置为0
def calcScaleZeroPoint(min_val, max_val, qmax):
scale = torch.max(max_val.abs(),min_val.abs()) / qmax
zero_point = torch.tensor(0.)
return scale, zero_point
# 将输入进行量化,输入输出都为tensor
def quantize_tensor(quant_type, x, scale, zero_point, qmax, is_bias=False):
# 量化后范围,直接根据位宽确定
qmin = -qmax
q_x = zero_point + x / scale
q_x.clamp_(qmin, qmax)
q_x = get_nearest_val(quant_type, q_x, is_bias)
return q_x
# bias使用不同精度,需要根据量化类型指定num_bits/e_bits
def bias_qmax(quant_type):
if quant_type == 'INT':
return get_qmax(quant_type, 64)
elif quant_type == 'POT':
return get_qmax(quant_type)
else:
return get_qmax(quant_type, 16, 7)
# 转化为FP32,不需再做限制
def dequantize_tensor(q_x, scale, zero_point):
return scale * (q_x - zero_point)
class QParam(nn.Module):
def __init__(self,quant_type, num_bits=8, e_bits=3):
super(QParam, self).__init__()
self.quant_type = quant_type
self.num_bits = num_bits
self.e_bits = e_bits
self.qmax = get_qmax(quant_type, num_bits, e_bits)
scale = torch.tensor([], requires_grad=False)
zero_point = torch.tensor([], requires_grad=False)
min = torch.tensor([], requires_grad=False)
max = torch.tensor([], requires_grad=False)
# 通过注册为register,使得buffer可以被记录到state_dict
self.register_buffer('scale', scale)
self.register_buffer('zero_point', zero_point)
self.register_buffer('min', min)
self.register_buffer('max', max)
# 更新统计范围及量化参数
def update(self, tensor):
if self.max.nelement() == 0 or self.max.data < tensor.max().data:
self.max.data = tensor.max().data
self.max.clamp_(min=0)
if self.min.nelement() == 0 or self.min.data > tensor.min().data:
self.min.data = tensor.min().data
self.min.clamp_(max=0)
self.scale, self.zero_point = calcScaleZeroPoint(self.min, self.max, self.qmax)
def quantize_tensor(self, tensor):
return quantize_tensor(self.quant_type, tensor, self.scale, self.zero_point, self.qmax)
def dequantize_tensor(self, q_x):
return dequantize_tensor(q_x, self.scale, self.zero_point)
# 该方法保证了可以从state_dict里恢复
def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys,
error_msgs):
key_names = ['scale', 'zero_point', 'min', 'max']
for key in key_names:
value = getattr(self, key)
value.data = state_dict[prefix + key].data
state_dict.pop(prefix + key)
# 该方法返回值将是打印该对象的结果
def __str__(self):
info = 'scale: %.10f ' % self.scale
info += 'zp: %.6f ' % self.zero_point
info += 'min: %.6f ' % self.min
info += 'max: %.6f' % self.max
return info
# 作为具体量化层的父类,qi和qo分别为量化输入/输出
class QModule(nn.Module):
def __init__(self,quant_type, qi=True, qo=True, num_bits=8, e_bits=3):
super(QModule, self).__init__()
if qi:
self.qi = QParam(quant_type,num_bits, e_bits)
if qo:
self.qo = QParam(quant_type,num_bits, e_bits)
self.quant_type = quant_type
self.num_bits = num_bits
self.e_bits = e_bits
self.bias_qmax = bias_qmax(quant_type)
def freeze(self):
pass # 空语句
def fakefreeze(self):
pass
def quantize_inference(self, x):
raise NotImplementedError('quantize_inference should be implemented.')
"""
QModule 量化卷积
:quant_type: 量化类型
:conv_module: 卷积模块
:qi: 是否量化输入特征图
:qo: 是否量化输出特征图
:num_bits: 8位bit数
"""
class QConv2d(QModule):
def __init__(self, quant_type, conv_module, qi=True, qo=True, num_bits=8, e_bits=3):
super(QConv2d, self).__init__(quant_type, qi, qo, num_bits, e_bits)
self.conv_module = conv_module
self.qw = QParam(quant_type, num_bits,e_bits)
self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
# freeze方法可以固定真量化的权重参数,并将该值更新到原全精度层上,便于散度计算
def freeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
# 这里因为在池化或者激活的输入,不需要对最大值和最小是进行额外的统计,会共享相同的输出
if qi is not None:
self.qi = qi
if qo is not None:
self.qo = qo
# 根据https://zhuanlan.zhihu.com/p/156835141, 这是式3 的系数
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
self.conv_module.weight.data = self.qw.quantize_tensor(self.conv_module.weight.data)
self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
self.conv_module.bias.data = quantize_tensor(self.quant_type,
self.conv_module.bias.data, scale=self.qi.scale * self.qw.scale,
zero_point=0.,qmax=self.bias_qmax, is_bias=True)
def fakefreeze(self):
self.conv_module.weight.data = self.qw.dequantize_tensor(self.conv_module.weight.data)
self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,scale=self.qi.scale * self.qw.scale, zero_point=0.)
def forward(self, x): # 前向传播,输入张量,x为浮点型数据
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi) # 对输入张量X完成量化
# foward前更新qw,保证量化weight时候scale正确
self.qw.update(self.conv_module.weight.data)
# 注意:此处主要为了统计各层x和weight范围,未对bias进行量化操作
# tmp_wgt = FakeQuantize.apply(self.conv_module.weight, self.qw)
# x = F.conv2d(x, tmp_wgt, self.conv_module.bias,
# stride=self.conv_module.stride,
# padding=self.conv_module.padding, dilation=self.conv_module.dilation,
# groups=self.conv_module.groups)
x = F.conv2d(x, FakeQuantize.apply(self.conv_module.weight, self.qw), self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
# 利用公式 q_a = M(\sigma(q_w-Z_w)(q_x-Z_x) + q_b)
def quantize_inference(self, x): # 此处input为已经量化的qx
x = x - self.qi.zero_point
x = self.conv_module(x)
x = self.M * x
# if self.quant_type is 'INT':
x = get_nearest_val(self.quant_type,x)
x = x + self.qo.zero_point
return x
class QLinear(QModule):
def __init__(self, quant_type, fc_module, qi=True, qo=True, num_bits=8, e_bits=3):
super(QLinear, self).__init__(quant_type, qi, qo, num_bits, e_bits)
self.fc_module = fc_module
self.qw = QParam(quant_type, num_bits, e_bits)
self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
def freeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qi is not None:
self.qi = qi
if qo is not None:
self.qo = qo
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
self.fc_module.weight.data = self.qw.quantize_tensor(self.fc_module.weight.data)
self.fc_module.weight.data = self.fc_module.weight.data - self.qw.zero_point
self.fc_module.bias.data = quantize_tensor(self.quant_type,
self.fc_module.bias.data, scale=self.qi.scale * self.qw.scale,
zero_point=0., qmax=self.bias_qmax, is_bias=True)
def fakefreeze(self):
self.fc_module.weight.data = self.qw.dequantize_tensor(self.fc_module.weight.data)
self.fc_module.bias.data = dequantize_tensor(self.fc_module.bias.data, scale=self.qi.scale * self.qw.scale, zero_point=0.)
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi)
self.qw.update(self.fc_module.weight.data)
# tmp_wgt = FakeQuantize.apply(self.fc_module.weight, self.qw)
# x = F.linear(x, tmp_wgt, self.fc_module.bias)
x = F.linear(x, FakeQuantize.apply(self.fc_module.weight, self.qw), self.fc_module.bias)
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
def quantize_inference(self, x):
# 这里是为了衔接lstm输出的 fp32 scale的x
x = self.qi.quantize_tensor(x)
x = x - self.qi.zero_point
x = self.fc_module(x)
x = self.M * x
# if self.quant_type is 'INT':
x = get_nearest_val(self.quant_type,x)
x = x + self.qo.zero_point
return x
class QReLU(QModule):
def __init__(self,quant_type, qi=False, qo=True, num_bits=8, e_bits=3):
super(QReLU, self).__init__(quant_type, qi, qo, num_bits, e_bits)
def freeze(self, qi=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if qi is not None:
self.qi = qi
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi)
x = F.relu(x)
return x
def quantize_inference(self, x):
x = x.clone()
# x[x < self.qi.zero_point] = self.qi.zero_point
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
a = self.qi.zero_point.float().to(device)
x[x < a] = a
return x
class QMaxPooling2d(QModule):
def __init__(self, quant_type, kernel_size=3, stride=1, padding=0, qi=False, qo=True, num_bits=8,e_bits=3):
super(QMaxPooling2d, self).__init__(quant_type, qi, qo, num_bits, e_bits)
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
def freeze(self, qi=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if qi is not None:
self.qi = qi
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi)
x = F.max_pool2d(x, self.kernel_size, self.stride, self.padding)
return x
def quantize_inference(self, x):
return F.max_pool2d(x, self.kernel_size, self.stride, self.padding)
class QConvBNReLU(QModule):
def __init__(self, quant_type, conv_module, bn_module, qi=True, qo=True, num_bits=8, e_bits=3):
super(QConvBNReLU, self).__init__(quant_type, qi, qo, num_bits, e_bits)
self.conv_module = conv_module
self.bn_module = bn_module
self.qw = QParam(quant_type, num_bits,e_bits)
self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
def fold_bn(self, mean, std):
if self.bn_module.affine:
gamma_ = self.bn_module.weight / std
weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
else:
bias = self.bn_module.bias - gamma_ * mean
else:
gamma_ = 1 / std
weight = self.conv_module.weight * gamma_
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean
else:
bias = -gamma_ * mean
return weight, bias
def freeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qi is not None:
self.qi = qi
if qo is not None:
self.qo = qo
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = self.qw.quantize_tensor(weight.data)
self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
if self.conv_module.bias is None:
self.conv_module.bias = nn.Parameter(quantize_tensor(self.quant_type,
bias, scale=self.qi.scale * self.qw.scale,
zero_point=0., qmax=self.bias_qmax,is_bias=True))
else:
self.conv_module.bias.data = quantize_tensor(self.quant_type,
bias, scale=self.qi.scale * self.qw.scale,
zero_point=0., qmax=self.bias_qmax,is_bias=True)
def fakefreeze(self):
self.conv_module.weight.data = self.qw.dequantize_tensor(self.conv_module.weight.data)
self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,scale=self.qi.scale * self.qw.scale, zero_point=0.)
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi)
if self.training:
y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding,
dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
# mean = y.mean(1)
# var = y.var(1)
mean = y.mean(1).detach()
var = y.var(1).detach()
self.bn_module.running_mean = \
(1 - self.bn_module.momentum) * self.bn_module.running_mean + \
self.bn_module.momentum * mean
self.bn_module.running_var = \
(1 - self.bn_module.momentum) * self.bn_module.running_var + \
self.bn_module.momentum * var
else:
mean = Variable(self.bn_module.running_mean)
var = Variable(self.bn_module.running_var)
std = torch.sqrt(var + self.bn_module.eps)
weight, bias = self.fold_bn(mean, std)
self.qw.update(weight.data)
x = F.conv2d(x, FakeQuantize.apply(weight, self.qw), bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
x = F.relu(x)
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
def quantize_inference(self, x):
x = x - self.qi.zero_point
x = self.conv_module(x)
x = self.M * x
# if self.quant_type is 'INT':
x = get_nearest_val(self.quant_type,x)
x = x + self.qo.zero_point
x.clamp_(min=0)
return x
class QConvBN(QModule):
def __init__(self, quant_type, conv_module, bn_module, qi=True, qo=True, num_bits=8, e_bits=3):
super(QConvBN, self).__init__(quant_type, qi, qo, num_bits, e_bits)
self.conv_module = conv_module
self.bn_module = bn_module
self.qw = QParam(quant_type, num_bits,e_bits)
self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
def fold_bn(self, mean, std):
if self.bn_module.affine:
gamma_ = self.bn_module.weight / std
weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
else:
bias = self.bn_module.bias - gamma_ * mean
else:
gamma_ = 1 / std
weight = self.conv_module.weight * gamma_
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean
else:
bias = -gamma_ * mean
return weight, bias
def freeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qi is not None:
self.qi = qi
if qo is not None:
self.qo = qo
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = self.qw.quantize_tensor(weight.data)
self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
if self.conv_module.bias is None:
self.conv_module.bias = nn.Parameter(quantize_tensor(self.quant_type,
bias, scale=self.qi.scale * self.qw.scale,
zero_point=0., qmax=self.bias_qmax,is_bias=True))
else:
self.conv_module.bias.data = quantize_tensor(self.quant_type,
bias, scale=self.qi.scale * self.qw.scale,
zero_point=0., qmax=self.bias_qmax,is_bias=True)
def fakefreeze(self):
self.conv_module.weight.data = self.qw.dequantize_tensor(self.conv_module.weight.data)
self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,scale=self.qi.scale * self.qw.scale, zero_point=0.)
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi)
if self.training:
y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding,
dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
# mean = y.mean(1)
# var = y.var(1)
mean = y.mean(1).detach()
var = y.var(1).detach()
self.bn_module.running_mean = \
(1 - self.bn_module.momentum) * self.bn_module.running_mean + \
self.bn_module.momentum * mean
self.bn_module.running_var = \
(1 - self.bn_module.momentum) * self.bn_module.running_var + \
self.bn_module.momentum * var
else:
mean = Variable(self.bn_module.running_mean)
var = Variable(self.bn_module.running_var)
std = torch.sqrt(var + self.bn_module.eps)
weight, bias = self.fold_bn(mean, std)
self.qw.update(weight.data)
x = F.conv2d(x, FakeQuantize.apply(weight, self.qw), bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
# x = F.relu(x)
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
def quantize_inference(self, x):
x = x - self.qi.zero_point
x = self.conv_module(x)
x = self.M * x
# if self.quant_type is 'INT':
x = get_nearest_val(self.quant_type,x)
x = x + self.qo.zero_point
# x.clamp_(min=0)
return x
# 待修改 需要有qo吧
class QAdaptiveAvgPool2d(QModule):
def __init__(self, quant_type, qi=False, qo=True, num_bits=8, e_bits=3):
super(QAdaptiveAvgPool2d, self).__init__(quant_type,qi,qo,num_bits,e_bits)
self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
def freeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if qi is not None:
self.qi = qi
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qo is not None:
self.qo = qo
self.M.data = (self.qi.scale / self.qo.scale).data
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi) # 与ReLu一样,先更新qi的scale,再将x用PoT表示了 (不过一般前一层的qo都是True,则x已经被PoT表示了)
x = F.adaptive_avg_pool2d(x,(1, 1)) # 对输入输出都量化一下就算是量化了
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
def quantize_inference(self, x):
x = F.adaptive_avg_pool2d(x,(1, 1)) # 对输入输出都量化一下就算是量化了
x = self.M * x
# if self.quant_type is 'INT':
x = get_nearest_val(self.quant_type,x)
return x
class QConvBNReLU6(QModule):
def __init__(self, quant_type, conv_module, bn_module, qi=True, qo=True, num_bits=8, e_bits=3):
super(QConvBNReLU6, self).__init__(quant_type, qi, qo, num_bits, e_bits)
self.conv_module = conv_module
self.bn_module = bn_module
self.qw = QParam(quant_type, num_bits,e_bits)
self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
def fold_bn(self, mean, std):
if self.bn_module.affine:
gamma_ = self.bn_module.weight / std
weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
else:
bias = self.bn_module.bias - gamma_ * mean
else:
gamma_ = 1 / std
weight = self.conv_module.weight * gamma_
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean
else:
bias = -gamma_ * mean
return weight, bias
def freeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qi is not None:
self.qi = qi
if qo is not None:
self.qo = qo
self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = self.qw.quantize_tensor(weight.data)
self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
self.conv_module.bias.data = quantize_tensor(self.quant_type,
bias, scale=self.qi.scale * self.qw.scale,
zero_point=0., qmax=self.bias_qmax,is_bias=True)
def fakefreeze(self):
self.conv_module.weight.data = self.qw.dequantize_tensor(self.conv_module.weight.data)
self.conv_module.bias.data = dequantize_tensor(self.conv_module.bias.data,scale=self.qi.scale * self.qw.scale, zero_point=0.)
def forward(self, x):
if hasattr(self, 'qi'):
self.qi.update(x)
x = FakeQuantize.apply(x, self.qi)
if self.training:
y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding,
dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
# mean = y.mean(1)
# var = y.var(1)
mean = y.mean(1).detach()
var = y.var(1).detach()
self.bn_module.running_mean = \
(1 - self.bn_module.momentum) * self.bn_module.running_mean + \
self.bn_module.momentum * mean
self.bn_module.running_var = \
(1 - self.bn_module.momentum) * self.bn_module.running_var + \
self.bn_module.momentum * var
else:
mean = Variable(self.bn_module.running_mean)
var = Variable(self.bn_module.running_var)
std = torch.sqrt(var + self.bn_module.eps)
weight, bias = self.fold_bn(mean, std)
self.qw.update(weight.data)
x = F.conv2d(x, FakeQuantize.apply(weight, self.qw), bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
x = F.relu6(x)
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
def quantize_inference(self, x):
a = torch.tensor(6)
a = self.qo.quantize_tensor(a)
x = x - self.qi.zero_point
x = self.conv_module(x)
x = self.M * x
# if self.quant_type is not 'POT':
# x = get_nearest_val(self.quant_type,x)
x = get_nearest_val(self.quant_type,x)
x = x + self.qo.zero_point # 属于qo范围的数据
x.clamp_(min=0, max=a.item())
return x
class QModule_2(nn.Module):
def __init__(self,quant_type, qi0=True, qi1=True, qo=True, num_bits=8, e_bits=3):
super(QModule_2, self).__init__()
if qi0:
self.qi0 = QParam(quant_type,num_bits, e_bits) # qi在此处就已经被num_bits和mode赋值了
if qi1:
self.qi1 = QParam(quant_type,num_bits, e_bits) # qi在此处就已经被num_bits和mode赋值了
if qo:
self.qo = QParam(quant_type,num_bits, e_bits) # qo在此处就已经被num_bits和mode赋值了
self.quant_type = quant_type
self.num_bits = num_bits
self.e_bits = e_bits
self.bias_qmax = bias_qmax(quant_type)
def freeze(self):
pass
def fakefreeze(self):
pass
def quantize_inference(self, x):
raise NotImplementedError('quantize_inference should be implemented.')
class QElementwiseAdd(QModule_2):
def __init__(self, quant_type, qi0=True, qi1=True, qo=True, num_bits=8, e_bits=3):
super(QElementwiseAdd, self).__init__(quant_type, qi0, qi1, qo, num_bits, e_bits)
self.register_buffer('M0', torch.tensor([], requires_grad=False)) # 将M注册为buffer
self.register_buffer('M1', torch.tensor([], requires_grad=False)) # 将M注册为buffer
def freeze(self, qi0=None, qi1=None ,qo=None):
if hasattr(self, 'qi0') and qi0 is not None:
raise ValueError('qi0 has been provided in init function.')
if not hasattr(self, 'qi0') and qi0 is None:
raise ValueError('qi0 is not existed, should be provided.')
if hasattr(self, 'qi1') and qi0 is not None:
raise ValueError('qi1 has been provided in init function.')
if not hasattr(self, 'qi1') and qi0 is None:
raise ValueError('qi1 is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
# 这里因为在池化或者激活的输入,不需要对最大值和最小是进行额外的统计,会共享相同的输出
if qi0 is not None:
self.qi0 = qi0
if qi1 is not None:
self.qi1 = qi1
if qo is not None:
self.qo = qo
# 根据https://zhuanlan.zhihu.com/p/156835141, 这是式3 的系数
self.M0.data = self.qi0.scale / self.qo.scale
self.M1.data = self.qi1.scale / self.qi0.scale
# self.M0.data = self.qi0.scale / self.qo.scale
# self.M1.data = self.qi1.scale / self.qo.scale
def forward(self, x0, x1): # 前向传播,输入张量,x为浮点型数据
if hasattr(self, 'qi0'):
self.qi0.update(x0)
x0 = FakeQuantize.apply(x0, self.qi0) # 对输入张量X完成量化
if hasattr(self, 'qi1'):
self.qi1.update(x1)
x1 = FakeQuantize.apply(x1, self.qi1) # 对输入张量X完成量化
x = x0 + x1
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo)
return x
def quantize_inference(self, x0, x1): # 此处input为已经量化的qx
x0 = x0 - self.qi0.zero_point
x1 = x1 - self.qi1.zero_point
x = self.M0 * (x0 + x1*self.M1)
# if self.quant_type is 'INT':
x = get_nearest_val(self.quant_type,x)
x = x + self.qo.zero_point
return x
class QModule_3(nn.Module):
def __init__(self,quant_type, qix=True, qih=True, qic=True, qox=True, qoh=True, qoc=True, num_bits=8, e_bits=3):
super(QModule_3, self).__init__()
if qix:
self.qix = QParam(quant_type,num_bits, e_bits)
if qox:
self.qox = QParam(quant_type,num_bits, e_bits)
if qih:
self.qih = QParam(quant_type,num_bits, e_bits)
if qoh:
self.qoh = QParam(quant_type,num_bits, e_bits)
if qic:
self.qic = QParam(quant_type,num_bits, e_bits)
if qoc:
self.qoc = QParam(quant_type,num_bits, e_bits)
self.quant_type = quant_type
self.num_bits = num_bits
self.e_bits = e_bits
self.bias_qmax = bias_qmax(quant_type)
def freeze(self):
pass # 空语句
def fakefreeze(self):
pass
def quantize_inference(self, x):
raise NotImplementedError('quantize_inference should be implemented.')
class QLSTM(QModule_3):
def __init__(self, quant_type, lstm_module, qix=True, qih=True, qic=True, qox=True, qoh=True, qoc=True, num_bits=8, e_bits=3):
super(QLSTM, self).__init__(quant_type, qix, qih, qic, qox, qoh, qoc, num_bits, e_bits)
self.lstm_module = lstm_module
self.qwih = QParam(quant_type, num_bits,e_bits)
self.qwhh = QParam(quant_type, num_bits,e_bits)
# self.qbih = QParam(quant_type, num_bits,e_bits)
# self.qbhh = QParam(quant_type, num_bits,e_bits)
# self.register_buffer('Mi', torch.tensor([], requires_grad=False)) # 将Mi注册为buffer
# self.register_buffer('Mh', torch.tensor([], requires_grad=False)) # 将M注册为buffer
# if self.lstm_module.bidirectional:
# self.qwihr = QParam(quant_type, num_bits,e_bits)
# self.qwhhr = QParam(quant_type, num_bits,e_bits)
# self.qbihr = QParam(quant_type, num_bits,e_bits)
# self.qbhhr = QParam(quant_type, num_bits,e_bits)
def freeze(self, qix=None, qih=None, qic=None,qox=None, qoh=None, qoc=None, flag=0):
if hasattr(self, 'qix') and qix is not None:
raise ValueError('qix has been provided in init function.')
if not hasattr(self, 'qix') and qix is None:
raise ValueError('qix is not existed, should be provided.')
if hasattr(self, 'qox') and qox is not None:
raise ValueError('qox has been provided in init function.')
if not hasattr(self, 'qox') and qox is None:
raise ValueError('qox is not existed, should be provided.')
if hasattr(self, 'qih') and qih is not None:
raise ValueError('qih has been provided in init function.')
if not hasattr(self, 'qih') and qih is None and flag==1: # 非第一个lstm layer
raise ValueError('qih is not existed, should be provided.')
if hasattr(self, 'qoh') and qoh is not None:
raise ValueError('qoh has been provided in init function.')
if not hasattr(self, 'qoh') and qoh is None:
raise ValueError('qoh is not existed, should be provided.')
if hasattr(self, 'qic') and qic is not None:
raise ValueError('qic has been provided in init function.')
if not hasattr(self, 'qic') and qic is None and flag==1: # 非第一个lstm layer
raise ValueError('qic is not existed, should be provided.')
if hasattr(self, 'qoc') and qoc is not None:
raise ValueError('qoc has been provided in init function.')
if not hasattr(self, 'qoc') and qoc is None:
raise ValueError('qoc is not existed, should be provided.')
if qix is not None:
self.qix = qix
if qox is not None:
self.qox = qox
# 为了避免第一个lstm layer没有h,c却拥有qih,qic
if qih is not None and flag==1:
self.qih = qih
if qoh is not None:
self.qoh = qoh
if qic is not None and flag==1:
self.qic = qic
if qoc is not None:
self.qoc = qoc
# 这里应该涉及到了两个问题:1. 输出、隐层输出 2. 双向
# 输出、隐层输出可以分别处理
# 双向比较麻烦,在量化后甚至还涉及到了SUM和CONCAT的整合方式
# self.Mi.data = (self.qwih.scale * self.qi.scale / self.qo.scale).data
# self.Mh.data = (self.qwhh.scale * self.qi.scale / self.qo.scale).data
# 对weight伪量化
self.lstm_module.weight_ih_l0.data = FakeQuantize.apply(self.lstm_module.weight_ih_l0.data,self.qwih)
self.lstm_module.weight_hh_l0.data = FakeQuantize.apply(self.lstm_module.weight_hh_l0.data,self.qwhh)
# 对bias伪量化
self.lstm_module.bias_ih_l0.data = quantize_tensor(self.quant_type,self.lstm_module.bias_ih_l0.data,scale=self.qix.scale*self.qwih.scale,zero_point=0,qmax=self.bias_qmax,is_bias=True)
self.lstm_module.bias_ih_l0.data = dequantize_tensor(self.lstm_module.bias_ih_l0.data,scale=self.qix.scale*self.qwih.scale,zero_point=0)
# 第一个layer是没有qih的,需要特殊处理
if flag==1:
self.lstm_module.bias_hh_l0.data = quantize_tensor(self.quant_type,self.lstm_module.bias_hh_l0.data,scale=self.qih.scale*self.qwhh.scale,zero_point=0,qmax=self.bias_qmax,is_bias=True)
self.lstm_module.bias_hh_l0.data = dequantize_tensor(self.lstm_module.bias_hh_l0.data,scale=self.qih.scale*self.qwhh.scale,zero_point=0)
def forward(self, x, h=None, c=None):
if hasattr(self, 'qix'):
self.qix.update(x)
x = FakeQuantize.apply(x, self.qix)
if hasattr(self, 'qih') and h is not None: # 兼顾第一个lstm layer无h,qih
self.qih.update(h)
h = FakeQuantize.apply(h, self.qih)
if hasattr(self, 'qic') and c is not None: # 兼顾第一个lstm layer无c,qic
self.qic.update(c)
c = FakeQuantize.apply(c, self.qic)
self.qwih.update(self.lstm_module.weight_ih_l0.data)
self.qwhh.update(self.lstm_module.weight_hh_l0.data)
layer = nn.LSTM(input_size=self.lstm_module.input_size,
hidden_size=self.lstm_module.hidden_size,
num_layers=1,
batch_first=False,
bidirectional=self.lstm_module.bidirectional,
bias=True)
layer.weight_ih_l0.data = FakeQuantize.apply(self.lstm_module.weight_ih_l0.data,self.qwih)
layer.weight_hh_l0.data = FakeQuantize.apply(self.lstm_module.weight_hh_l0.data,self.qwhh)
layer.bias_ih_l0.data = self.lstm_module.bias_ih_l0.data
layer.bias_hh_l0.data = self.lstm_module.bias_hh_l0.data
if h is None:
x, (h, c) = layer(x)
else:
x, (h, c) = layer(x, (h, c))
if hasattr(self, 'qox'):
self.qox.update(x)
x = FakeQuantize.apply(x, self.qox)
if hasattr(self, 'qoh'):
self.qoh.update(h)
h = FakeQuantize.apply(h, self.qoh)
if hasattr(self, 'qoc'):
self.qoc.update(c)
c = FakeQuantize.apply(c, self.qoc)
return x,(h,c)
def quantize_inference(self, x, h=None, c=None):
# freeze的时是fakequantize,因此这里直接算,无需做scale变换
if h is None:
x, (h, c) = self.lstm_module(x)
if hasattr(self, 'qox'):
x = FakeQuantize.apply(x, self.qox)
if hasattr(self, 'qoh'):
h = FakeQuantize.apply(h, self.qoh)
if hasattr(self, 'qoc'):
c = FakeQuantize.apply(c, self.qoc)
else:
x, (h, c) = self.lstm_module(x, (h, c))
if hasattr(self, 'qox'):
x = FakeQuantize.apply(x, self.qox)
if hasattr(self, 'qoh'):
h = FakeQuantize.apply(h, self.qoh)
if hasattr(self, 'qoc'):
c = FakeQuantize.apply(c, self.qoc)
return x,(h,c)
# new modules for full-precision model - fold bn
# inference应该也需要相应的适配
class ConvBNReLU(nn.Module):
def __init__(self,conv_module, bn_module):
super(ConvBNReLU, self).__init__()
self.conv_module = conv_module
self.bn_module = bn_module
def fold_bn(self, mean, std):
if self.bn_module.affine:
gamma_ = self.bn_module.weight / std
weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
else:
bias = self.bn_module.bias - gamma_ * mean
else:
gamma_ = 1 / std
weight = self.conv_module.weight * gamma_
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean
else:
bias = -gamma_ * mean
return weight, bias
def freeze(self):
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = weight.data
if self.conv_module.bias is None:
self.conv_module.bias = nn.Parameter(bias)
else:
self.conv_module.bias.data = bias
def fakefreeze(self):
pass
def forward(self, x):
if self.training:
y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding,
dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
# mean = y.mean(1)
# var = y.var(1)
mean = y.mean(1).detach()
var = y.var(1).detach()
self.bn_module.running_mean = \
(1 - self.bn_module.momentum) * self.bn_module.running_mean + \
self.bn_module.momentum * mean
self.bn_module.running_var = \
(1 - self.bn_module.momentum) * self.bn_module.running_var + \
self.bn_module.momentum * var
else:
mean = Variable(self.bn_module.running_mean)
var = Variable(self.bn_module.running_var)
std = torch.sqrt(var + self.bn_module.eps)
weight, bias = self.fold_bn(mean, std)
x = F.conv2d(x, weight, bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
x = F.relu(x)
return x
def quantize_inference(self, x):
x = self.conv_module(x)
x.clamp_(min=0)
return x
class ConvBN(nn.Module):
def __init__(self,conv_module, bn_module):
super(ConvBN, self).__init__()
self.conv_module = conv_module
self.bn_module = bn_module
def fold_bn(self, mean, std):
if self.bn_module.affine:
gamma_ = self.bn_module.weight / std
weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
else:
bias = self.bn_module.bias - gamma_ * mean
else:
gamma_ = 1 / std
weight = self.conv_module.weight * gamma_
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean
else:
bias = -gamma_ * mean
return weight, bias
def freeze(self):
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = weight.data
if self.conv_module.bias is None:
self.conv_module.bias = nn.Parameter(bias)
else:
self.conv_module.bias.data = bias
def fakefreeze(self):
pass
def forward(self, x):
if self.training:
y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding,
dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
# mean = y.mean(1)
# var = y.var(1)
mean = y.mean(1).detach()
var = y.var(1).detach()
self.bn_module.running_mean = \
(1 - self.bn_module.momentum) * self.bn_module.running_mean + \
self.bn_module.momentum * mean
self.bn_module.running_var = \
(1 - self.bn_module.momentum) * self.bn_module.running_var + \
self.bn_module.momentum * var
else:
mean = Variable(self.bn_module.running_mean)
var = Variable(self.bn_module.running_var)
std = torch.sqrt(var + self.bn_module.eps)
weight, bias = self.fold_bn(mean, std)
x = F.conv2d(x, weight, bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
return x
def quantize_inference(self, x):
x = self.conv_module(x)
return x
class ConvBNReLU6(nn.Module):
def __init__(self,conv_module, bn_module):
super(ConvBNReLU6, self).__init__()
self.conv_module = conv_module
self.bn_module = bn_module
def fold_bn(self, mean, std):
if self.bn_module.affine:
gamma_ = self.bn_module.weight / std
weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
else:
bias = self.bn_module.bias - gamma_ * mean
else:
gamma_ = 1 / std
weight = self.conv_module.weight * gamma_
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean
else:
bias = -gamma_ * mean
return weight, bias
def freeze(self):
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = weight.data
if self.conv_module.bias is None:
self.conv_module.bias = nn.Parameter(bias)
else:
self.conv_module.bias.data = bias
def fakefreeze(self):
pass
def forward(self, x):
if self.training:
y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding,
dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
# mean = y.mean(1)
# var = y.var(1)
mean = y.mean(1).detach()
var = y.var(1).detach()
self.bn_module.running_mean = \
(1 - self.bn_module.momentum) * self.bn_module.running_mean + \
self.bn_module.momentum * mean
self.bn_module.running_var = \
(1 - self.bn_module.momentum) * self.bn_module.running_var + \
self.bn_module.momentum * var
else:
mean = Variable(self.bn_module.running_mean)
var = Variable(self.bn_module.running_var)
std = torch.sqrt(var + self.bn_module.eps)
weight, bias = self.fold_bn(mean, std)
x = F.conv2d(x, weight, bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
x = F.relu6(x)
return x
def quantize_inference(self, x):
x = self.conv_module(x)
x.clamp_(min=0,max=6)
return x
\ No newline at end of file
# -*- coding: utf-8 -*-
from torch.serialization import load
from model import *
# from extract_ratio import *
from utils import *
import gol
import openpyxl
import sys
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torchvision.transforms.functional import InterpolationMode
import torch.utils.bottleneck as bn
import os
import os.path as osp
from torch.utils.tensorboard import SummaryWriter
import json
from decoder import seq_mnist_decoder
from data import seq_mnist_train, seq_mnist_val
from torch.utils.data import DataLoader
import random
class objdict(dict):
def __getattr__(self, name):
if name in self:
return self[name]
else:
raise AttributeError("No such attribute: " + name)
def __setattr__(self, name, value):
self[name] = value
def __delattr__(self, name):
if name in self:
del self[name]
else:
raise AttributeError("No such attribute: " + name)
# def direct_quantize(model, test_loader,device):
# for i, (data, target) in enumerate(test_loader, 1):
# data = data.to(device)
# output = model.quantize_forward(data).cpu()
# if i % 500 == 0:
# break
# print('direct quantization finish')
# def full_inference(model, test_loader, device):
# correct = 0
# for i, (data, target) in enumerate(test_loader, 1):
# data = data.to(device)
# output = model(data).cpu()
# pred = output.argmax(dim=1, keepdim=True)
# correct += pred.eq(target.view_as(pred)).sum().item()
# print('\nTest set: Full Model Accuracy: {:.2f}%'.format(100. * correct / len(test_loader.dataset)))
# return 100. * correct / len(test_loader.dataset)
# def quantize_inference(model, test_loader, device):
# correct = 0
# for i, (data, target) in enumerate(test_loader, 1):
# data = data.to(device)
# output = model.quantize_inference(data).cpu()
# pred = output.argmax(dim=1, keepdim=True)
# correct += pred.eq(target.view_as(pred)).sum().item()
# print('Test set: Quant Model Accuracy: {:.2f}%'.format(100. * correct / len(test_loader.dataset)))
# return 100. * correct / len(test_loader.dataset)
def direct_quantize(model, val_loader , val_data ,args , trainer_params, decoder, criterion):
model.eval()
loss_value = 0
for i, (item) in enumerate(val_loader):
data, labels, output_len, lab_len = item
data = Variable(data.transpose(1,0), requires_grad=False)
labels = Variable(labels.view(-1), requires_grad=False)
output_len = Variable(output_len.view(-1), requires_grad=False)
lab_len = Variable(lab_len.view(-1), requires_grad=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data = data.to(device)
output = model.quantize_forward(data)
# if i % 500 == 0:
# # break
print('direct quantization finish')
# loss_value /= (len(val_data)//trainer_params.test_batch_size)
# # loss_value = loss_value[0]
# loss_value = loss_value.item()
# print("Average Loss Value for Val Data is = {:.4f}\n".format(float(loss_value)))
def full_inference(model, val_loader , val_data ,args , trainer_params, decoder, criterion):
model.eval()
loss_value = 0
for i, (item) in enumerate(val_loader):
data, labels, output_len, lab_len = item
data = Variable(data.transpose(1,0), requires_grad=False)
labels = Variable(labels.view(-1), requires_grad=False)
output_len = Variable(output_len.view(-1), requires_grad=False)
lab_len = Variable(lab_len.view(-1), requires_grad=False)
# data = data.cuda()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data = data.to(device)
output = model(data)
index = random.randint(0,trainer_params.test_batch_size-1)
label = labels[index*trainer_params.word_size:(index+1)*trainer_params.word_size].data.numpy()
label = label-1
prediction = decoder.decode(output[:,index,:], output_len[index], lab_len[index])
accuracy = decoder.hit(prediction, label)
print("Sample Label = {}".format(decoder.to_string(label)))
print("Sample Prediction = {}".format(decoder.to_string(prediction)))
print("Full Model Accuracy on Sample = {:.2f}%\n\n".format(accuracy))
loss = criterion(output, labels, output_len, lab_len)
# loss_value += loss.data.numpy()
loss_value += loss.cpu().data.numpy()
loss_value /= (len(val_data)//trainer_params.test_batch_size)
# loss_value = loss_value[0]
loss_value = loss_value.item()
print("Full Model Average Loss Value for Val Data is = {:.4f}\n".format(float(loss_value)))
def quantize_inference(model, val_loader , val_data ,args , trainer_params, decoder, criterion):
model.eval()
loss_value = 0
for i, (item) in enumerate(val_loader):
data, labels, output_len, lab_len = item
data = Variable(data.transpose(1,0), requires_grad=False)
labels = Variable(labels.view(-1), requires_grad=False)
output_len = Variable(output_len.view(-1), requires_grad=False)
lab_len = Variable(lab_len.view(-1), requires_grad=False)
# data = data.cuda()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data = data.to(device)
output = model.quantize_inference(data)
index = random.randint(0,trainer_params.test_batch_size-1)
label = labels[index*trainer_params.word_size:(index+1)*trainer_params.word_size].data.numpy()
label = label-1
prediction = decoder.decode(output[:,index,:], output_len[index], lab_len[index])
accuracy = decoder.hit(prediction, label)
print("Sample Label = {}".format(decoder.to_string(label)))
print("Sample Prediction = {}".format(decoder.to_string(prediction)))
print("Quantize Model Accuracy on Sample = {:.2f}%\n\n".format(accuracy))
loss = criterion(output, labels, output_len, lab_len)
# loss_value += loss.data.numpy()
loss_value += loss.cpu().data.numpy()
loss_value /= (len(val_data)//trainer_params.test_batch_size)
# loss_value = loss_value[0]
loss_value = loss_value.item()
print("Quantize Model Average Loss Value for Val Data is = {:.4f}\n".format(float(loss_value)))
def js_div(p_output, q_output, get_softmax=True):
"""
Function that measures JS divergence between target and output logits:
"""
KLDivLoss = nn.KLDivLoss(reduction='sum')
if get_softmax:
p_output = F.softmax(p_output)
q_output = F.softmax(q_output)
log_mean_output = ((p_output + q_output)/2).log()
return (KLDivLoss(log_mean_output, p_output) + KLDivLoss(log_mean_output, q_output))/2
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='PyTorch FP32 Training')
parser.add_argument('-m', '--model', metavar='MODEL ARCH', default='ResNet18')
parser.add_argument('-b', '--batch_size', default=128, type=int, metavar='BATCH SIZE', help='mini-batch size (default: 128)')
parser.add_argument('-j','--workers', default=4, type=int, metavar='WORKERS',help='number of data loading workers (default: 4)')
parser.add_argument('-s', '--save', default=False, type=bool)
# parser.add_argument('-t', '--test', dest='test', action='store_true', help='test model on test set')
parser.add_argument('--params', '-p', type=str, default="default_trainer_params.json", help='Path to params JSON file. Default ignored when resuming.')
# 训练参数
args = parser.parse_args()
with open(args.params) as d:
trainer_params = json.load(d)
# trainer_params = json.load(d, object_hook=ascii_encode_dict)
trainer_params = objdict(trainer_params)
batch_size = args.batch_size
num_workers = args.workers
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
labels = [i for i in range(trainer_params.num_classes-1)]
decoder = seq_mnist_decoder(labels=labels)
criterion = nn.CTCLoss(blank=0, reduction='mean', zero_infinity=False)
random.seed(trainer_params.random_seed)
torch.manual_seed(trainer_params.random_seed)
# if args.cuda:
torch.cuda.manual_seed_all(trainer_params.random_seed)
train_data = seq_mnist_train(trainer_params)
val_data = seq_mnist_val(trainer_params)
train_loader = DataLoader(train_data, batch_size=trainer_params.batch_size, \
shuffle=True, num_workers=trainer_params.num_workers)
val_loader = DataLoader(val_data, batch_size=trainer_params.test_batch_size, \
shuffle=False, num_workers=trainer_params.num_workers)
if args.model == 'LSTM-OCR':
model = BiLSTM(trainer_params)
# writer = SummaryWriter(log_dir='log/' + args.model + '/ptq')
save_dir = 'ckpt'
full_file = save_dir + '/mnist_' + trainer_params.reduce_bidirectional +'_' + str(trainer_params.bidirectional) + '.pt'
model.load_state_dict(torch.load(full_file))
model.to(device)
load_ptq = False
ptq_file_prefix = 'ckpt/mnist_' + trainer_params.reduce_bidirectional +'_' + str(trainer_params.bidirectional) + '_ptq_'
model.eval()
full_acc = full_inference(model, val_loader, val_data, args, trainer_params, decoder,criterion)
# model_fold = fold_model(model) #
# full_params = []
# layer, par_ratio, flop_ratio = extract_ratio(args.model)
# layer = []
# for name, param in model.named_parameters():
# if 'weight' in name:
# n = name.split('.')
# pre = '.'.join(n[:len(n)-1])
# # 提取出weight前的名字(就是这个层的名字,if weight是避免bias重复提取一遍名字)
# layer.append(pre)
# print('===================')
# par_ratio, flop_ratio = fold_ratio(layer, par_ratio, flop_ratio)
# for name, param in model_fold.named_parameters():
# if 'bn' in name or 'sample.1' in name:
# continue
# param_norm = param.data.cpu()
# full_params.append(param_norm) # 没统计bn的 只统计了conv的 而且还是fold后的
# writer.add_histogram(tag='Full_' + name + '_data', values=param.data)
gol._init()
quant_type_list = ['INT','POT','FLOAT']
title_list = []
js_flops_list = []
js_param_list = []
ptq_acc_list = []
acc_loss_list = []
for quant_type in quant_type_list:
num_bit_list = numbit_list(quant_type)
# 对一个量化类别,只需设置一次bias量化表
# int由于位宽大,使用量化表开销过大,直接_round即可
if quant_type != 'INT':
bias_list = build_bias_list(quant_type)
gol.set_value(bias_list, is_bias=True)
for num_bits in num_bit_list:
e_bit_list = ebit_list(quant_type,num_bits)
for e_bits in e_bit_list:
# model_ptq = resnet18()
if args.model == 'LSTM-OCR':
model_ptq = BiLSTM(trainer_params)
if quant_type == 'FLOAT':
title = '%s_%d_E%d' % (quant_type, num_bits, e_bits)
else:
title = '%s_%d' % (quant_type, num_bits)
print('\nPTQ: '+title)
title_list.append(title)
# 设置量化表
if quant_type != 'INT':
plist = build_list(quant_type, num_bits, e_bits)
gol.set_value(plist)
# 判断是否需要载入
if load_ptq is True and osp.exists(ptq_file_prefix + title + '.pt'):
model_ptq.quantize(quant_type,num_bits,e_bits)
model_ptq.load_state_dict(torch.load(ptq_file_prefix + title + '.pt'))
model_ptq.to(device)
print('Successfully load ptq model: ' + title)
else:
model_ptq.load_state_dict(torch.load(full_file))
model_ptq.to(device)
model_ptq.quantize(quant_type,num_bits,e_bits)
model_ptq.eval()
direct_quantize(model_ptq, val_loader, val_data, args, trainer_params, decoder,criterion)
# if args.save == True:
# torch.save(model_ptq.state_dict(), ptq_file_prefix + title + '.pt')
model_ptq.freeze()
quantize_inference(model_ptq, val_loader, val_data, args, trainer_params, decoder,criterion)
# ptq_acc = quantize_inference(model_ptq, val_loader, val_data, args, trainer_params, decoder,criterion)
# ptq_acc_list.append(ptq_acc)
# acc_loss = (full_acc - ptq_acc) / full_acc
# acc_loss_list.append(acc_loss)
# idx = -1
# 获取计算量/参数量下的js-div
js_flops = 0.
js_param = 0.
# for name, param in model_ptq.named_parameters():
# # if '.' not in name or 'bn' in name:
# if 'bn' in name or 'sample.1' in name:
# continue
# writer.add_histogram(tag=title +':'+ name + '_data', values=param.data)
# idx = idx + 1
# # renset中有多个. 需要改写拼一下
# # prefix = name.split('.')[0]
# n = name.split('.')
# prefix = '.'.join(n[:len(n) - 1])
# # weight和bias 1:1 ? 对于ratio,是按层赋予的,此处可以对weight和bias再单独赋予不同的权重,比如(8:2)
# if prefix in layer:
# layer_idx = layer.index(prefix)
# ptq_param = param.data.cpu()
# # 取L2范数
# # ptq_norm = F.normalize(ptq_param,p=2,dim=-1)
# ptq_norm = ptq_param
# writer.add_histogram(tag=title +':'+ name + '_data', values=ptq_param)
# # print(name)
# # print('=========')
# # print(ptq_norm)
# # print('=========')
# # print(full_params[idx])
# js = js_div(ptq_norm,full_params[idx]) # 这里算了fold后的量化前后模型的js距离
# js = js.item()
# if js < 0.:
# js = 0.
# js_flops = js_flops + js * flop_ratio[layer_idx]
# js_param = js_param + js * par_ratio[layer_idx]
# js_flops_list.append(js_flops)
# js_param_list.append(js_param)
# print(title + ': js_flops: %f js_param: %f acc_loss: %f' % (js_flops, js_param, acc_loss))
sys.exit()
# 写入xlsx
workbook = openpyxl.Workbook()
worksheet = workbook.active
worksheet.cell(row=1,column=1,value='FP32-acc')
worksheet.cell(row=1,column=2,value=full_acc)
worksheet.cell(row=3,column=1,value='title')
worksheet.cell(row=3,column=2,value='js_flops')
worksheet.cell(row=3,column=3,value='js_param')
worksheet.cell(row=3,column=4,value='ptq_acc')
worksheet.cell(row=3,column=5,value='acc_loss')
for i in range(len(title_list)):
worksheet.cell(row=i+4, column=1, value=title_list[i])
worksheet.cell(row=i+4, column=2, value=js_flops_list[i])
worksheet.cell(row=i+4, column=3, value=js_param_list[i])
worksheet.cell(row=i+4, column=4, value=ptq_acc_list[i])
worksheet.cell(row=i+4, column=5, value=acc_loss_list[i])
workbook.save('ptq_result_' + args.model + '.xlsx')
writer.close()
ft = open('ptq_result_' + args.model + '.txt','w')
print('title_list:',file=ft)
print(" ".join(title_list),file=ft)
print('js_flops_list:',file=ft)
print(" ".join(str(i) for i in js_flops_list), file=ft)
print('js_param_list:',file=ft)
print(" ".join(str(i) for i in js_param_list), file=ft)
print('ptq_acc_list:',file=ft)
print(" ".join(str(i) for i in ptq_acc_list), file=ft)
print('acc_loss_list:',file=ft)
print(" ".join(str(i) for i in acc_loss_list), file=ft)
ft.close()
This source diff could not be displayed because it is too large. You can view the blob instead.
## update 2023.5.4
### 对fp32的模型进行了改进,并进行了初步的PTQ实验(只做到了对部分参数伪量化 naive fakequantization,结果在ptq_result.txt中)
1. 对fp32的改进
- 支持多层LSTM,并用nn.ModuleList组织
2. 对PTQ的尝试
- 对LSTM的结构,数据流向,输入输出有了更细致的理解。PTQ遇到的主要问题有:
- BiLSTM涉及到了双向的output,需要用SUM或者CONCAT处理,PTQ时可能需要引入更多module处理。
- LSTM内部进行的运算较为复杂,如下图所示:<img src = "fig/math.png" class="h-90 auto">
首先涉及到了多个门i,f,g,他们内部有Wx+b+Wh+b的结构,他们的scale是否整体考虑(即,是共用一个scale,还是多个scale并在+的时候做rescale)是一个问题。<br>他们外部的sigmoid或tanh也是一个问题(会导致不方便在各个层间通过scale变换保证PTQ结果的正确性)。<br>对c',h'的更新涉及到的*,+也是一个问题。
- LSTM内部各个门的权值被组织在`weight_ih_l[k]`、`weight_hh_l[k]`、`bias_ih_l[k]`和`bias_hh_l[k]`中,分属在不同行中,是否应该把这个权值矩阵拆开,分别量化后再合并。
- 暂时还没想好怎么处理上述问题,于是进行了简单的尝试性实验:
- 先只处理单向的LSTM,不考虑双向所需的SUM和CONCAT
-`weight_ih_l[k]``weight_hh_l[k]``bias_ih_l[k]``bias_hh_l[k]`各自整体量化,没有再对每个张量切成四块再分别对`weight_ir_l[k]`,`weight_hr_l[k]`,`bias_r_l[k]`,`weight_if_l[k]`,`weight_hf_l[k]`,`bias_f_l[k]`,`weight_ii_l[k]`,`weight_hi_l[k]`进行量化
- 对weight和bias和每层的输出tensor采取伪量化的方法,避免层间的scale变换。<br><br>后果:<br> (1) freeze后的权值不再是量化后的值,而是其量化值经过scale变换后的值
<br>(2) 没有考虑tanh,sigmoid的量化,他们不再是矩阵乘的形式,还没想好怎么消掉scale。只模拟了将权值和输出张量量化产生的rounding,溢出等误差。
<br>(3) 与实际可直接部署到硬件的lstm量化可能有差异。(不过目前我不太确定LSTM实际的量化应该怎么做,在网上没有找到很多lstm量化相关的资料)
<br>(4) 与之前的其他网络的PTQ量化略有差异,与scale相关的的计算顺序有些不同,但运算原理是类似的。
- 有待改进:
- 对LSTM的PTQ量化的更真实模拟,考虑sigmoid,tanh,各种乘加组合
- 补充对BiLSTM的PTQ量化 (如果还按现在的简化版处理方式,BiLSTM很容易实现,因为不需要考虑各种scale方面的问题,直接SUM或者CONCAT即可)
- 可以考虑把`weight_ih_l[k]``weight_hh_l[k]``bias_ih_l[k]``bias_hh_l[k]`拆开,按各自门的权值分别伪量化后再组合
- 使用更复杂的数据集
- 度量相似度
- 找到比较好的指标来度量精度
- fuse BN-Fc的量化
## update 2023.5.2
basic version: FP32版本,只有单个lstm cell,训练数据集采用序列化的MNIST,仅作记录方便后续修改。
\ No newline at end of file
import os
import math
import numpy
import torch
import torch.nn as nn
from model import *
import argparse
import json
# input_size = 32
# num_units = 128
# num_layers = 1
# bidirectional = True
# recurrent_bias_enabled = True
# lstm1 = nn.LSTM(input_size=input_size,
# hidden_size=num_units,
# num_layers=num_layers,
# batch_first=False,
# bidirectional= bidirectional,
# bias= recurrent_bias_enabled)
# lstm2 = nn.LSTM(input_size=input_size,
# hidden_size=num_units,
# num_layers=num_layers + 1,
# batch_first=False,
# bidirectional= bidirectional,
# bias= recurrent_bias_enabled)
# print("LSTM1:")
# for name,params in lstm1.named_parameters():
# print(f"name:{name},params:{params.shape}")
# print("=============================================")
# print("LSTM2:")
# for name,params in lstm2.named_parameters():
# print(f"name:{name},params:{params.shape}")
class objdict(dict):
def __getattr__(self, name):
if name in self:
return self[name]
else:
raise AttributeError("No such attribute: " + name)
def __setattr__(self, name, value):
self[name] = value
def __delattr__(self, name):
if name in self:
del self[name]
else:
raise AttributeError("No such attribute: " + name)
parser = argparse.ArgumentParser(description='PyTorch BiLSTM Sequential MNIST Example')
parser.add_argument('--params', '-p', type=str, default="default_trainer_params.json", help='Path to params JSON file. Default ignored when resuming.')
args = parser.parse_args()
with open(args.params) as d:
trainer_params = json.load(d)
# trainer_params = json.load(d, object_hook=ascii_encode_dict)
trainer_params = objdict(trainer_params)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 加入设备选择
model = BiLSTM(trainer_params).to(device)
model.quantize('INT',8,0)
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
# import warpctc_pytorch as wp
from torch.autograd import Variable
from torch.utils.data import DataLoader
from model import BiLSTM
from decoder import seq_mnist_decoder
from data import seq_mnist_train, seq_mnist_val
class Seq_MNIST_Trainer():
def __init__(self, trainer_params, args):
self.args = args
self.trainer_params = trainer_params
random.seed(trainer_params.random_seed)
torch.manual_seed(trainer_params.random_seed)
if args.cuda:
torch.cuda.manual_seed_all(trainer_params.random_seed)
self.train_data = seq_mnist_train(trainer_params)
self.val_data = seq_mnist_val(trainer_params)
self.train_loader = DataLoader(self.train_data, batch_size=trainer_params.batch_size, \
shuffle=True, num_workers=trainer_params.num_workers)
self.val_loader = DataLoader(self.val_data, batch_size=trainer_params.test_batch_size, \
shuffle=False, num_workers=trainer_params.num_workers)
self.starting_epoch = 1
self.prev_loss = 10000
self.model = BiLSTM(trainer_params)
self.criterion = nn.CTCLoss(blank=0, reduction='mean', zero_infinity=False)
self.labels = [i for i in range(trainer_params.num_classes-1)]
self.decoder = seq_mnist_decoder(labels=self.labels)
self.optimizer = optim.Adam(self.model.parameters(), lr=trainer_params.lr)
# self.criterion = wp.CTCLoss(size_average=False)
# 默认为false
# if args.init_bn_fc_fusion:
# # 默认是false的,应该是用于记录当前是否fuse了吧
# if not trainer_params.prefused_bn_fc:
# self.model.batch_norm_fc.init_fusion() # fuse bn-fc
# self.trainer_params.prefused_bn_fc = True # 已fuse
# else:
# raise Exception("BN and FC are already fused.")
# 先fuse了再load fuse后的
if args.eval or args.resume :
save_dir = 'ckpt'
full_file = save_dir + '/mnist_' + self.trainer_params.reduce_bidirectional +'_' + str(self.trainer_params.bidirectional) + '.pt'
self.model.load_state_dict(torch.load(full_file))
print("load Model from existing file finished!")
if args.cuda:
# torch.cuda.set_device(args.gpus)
# self.model = self.model.cuda()
# self.criterion = self.criterion.cuda()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 加入设备选择
self.model = self.model.to(device)
self.criterion = self.criterion.to(device)
def serialize(self, model, trainer_params, optimizer, starting_epoch, prev_loss):
package = {'state_dict': model.state_dict(),
'trainer_params': trainer_params,
'optim_dict' : optimizer.state_dict(),
'starting_epoch' : starting_epoch,
'prev_loss': prev_loss
}
return package
# 存储
def save_model(self):
save_dir = 'ckpt'
if not os.path.isdir(save_dir):
os.makedirs(save_dir, mode=0o777)
os.chmod(save_dir, mode=0o777)
# path = self.args.experiments + '/' + name
torch.save(self.model.state_dict(), save_dir + '/mnist_' + self.trainer_params.reduce_bidirectional +'_' + str(self.trainer_params.bidirectional) + '.pt')
# print("Model saved at: {}\n".format(path))
# torch.save(self.serialize(model=self.model, trainer_params=self.trainer_params,
# optimizer=self.optimizer, starting_epoch=epoch + 1, prev_loss=self.prev_loss), path)
def train(self, epoch):
self.model.train()
# 重写的 def __getitem__(self, index)
for i, (item) in enumerate(self.train_loader):
data, labels, output_len, lab_len = item
data = Variable(data.transpose(1,0), requires_grad=False)
labels = Variable(labels.view(-1), requires_grad=False)
output_len = Variable(output_len.view(-1), requires_grad=False)
lab_len = Variable(lab_len.view(-1), requires_grad=False)
if self.args.cuda:
# data = data.cuda()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data = data.to(device)
output = self.model(data)
loss = self.criterion(output, labels, output_len, lab_len)
# loss_value = loss.data[0]
loss_value = loss.item()
print("Loss value for epoch = {}/{} and batch {}/{} is = {:.4f}".format(epoch,
self.args.epochs, (i+1)*self.trainer_params.batch_size, len(self.train_data) , loss_value))
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
# 来同步CPU和GPU的内存,确保在GPU上计算完成后再将结果返回到CPU,避免在多GPU环境下出现计算结果的错误或者不一致。
# if self.args.cuda:
# torch.cuda.synchronize()
def test(self, epoch=0, save_model_flag=False):
self.model.eval()
loss_value = 0
for i, (item) in enumerate(self.val_loader):
data, labels, output_len, lab_len = item
data = Variable(data.transpose(1,0), requires_grad=False)
labels = Variable(labels.view(-1), requires_grad=False)
output_len = Variable(output_len.view(-1), requires_grad=False)
lab_len = Variable(lab_len.view(-1), requires_grad=False)
if self.args.cuda:
# data = data.cuda()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data = data.to(device)
output = self.model(data)
index = random.randint(0,self.trainer_params.test_batch_size-1)
label = labels[index*self.trainer_params.word_size:(index+1)*self.trainer_params.word_size].data.numpy()
label = label-1
prediction = self.decoder.decode(output[:,index,:], output_len[index], lab_len[index])
accuracy = self.decoder.hit(prediction, label)
print("Sample Label = {}".format(self.decoder.to_string(label)))
print("Sample Prediction = {}".format(self.decoder.to_string(prediction)))
print("Accuracy on Sample = {:.2f}%\n\n".format(accuracy))
loss = self.criterion(output, labels, output_len, lab_len)
# loss_value += loss.data.numpy()
loss_value += loss.cpu().data.numpy()
loss_value /= (len(self.val_data)//self.trainer_params.test_batch_size)
# loss_value = loss_value[0]
loss_value = loss_value.item()
print("Average Loss Value for Val Data is = {:.4f}\n".format(float(loss_value)))
if loss_value < self.prev_loss and save_model_flag:
self.prev_loss = loss_value
self.save_model()
# elif save_model_flag:
# self.save_model(epoch, "checkpoint.tar")
def eval_model(self):
self.test()
def train_model(self):
for epoch in range(self.starting_epoch, self.args.epochs + 1):
self.train(epoch)
acc = self.test(epoch=epoch, save_model_flag=True) # 默认不save model (等做ptq的实验时在处理)
if epoch%20==0:
self.optimizer.param_groups[0]['lr'] = self.optimizer.param_groups[0]['lr']*0.98
def export_model(self, simd_factor, pe):
self.model.eval()
self.model.export('r_model_fw_bw.hpp', simd_factor, pe)
def export_image(self):
random.seed()
idx = random.randint(0,self.val_data.images.shape[1]-1)
# idx = 100
img, label = self.val_data.images[:,idx,:], self.val_data.labels[0][idx]
inp = torch.from_numpy(img)
inp = inp.unsqueeze(1)
inp = Variable(inp, requires_grad=False)
out = self.model(inp)
out = self.decoder.decode(out, self.val_data.input_lengths, self.val_data.label_lengths)
out = self.decoder.to_string(out)
img = img.transpose(1, 0)
label -= 1
label = self.decoder.to_string(label)
assert label==out
from PIL import Image, ImageOps
from matplotlib import cm
img1 = (img+1)/2.
im = Image.fromarray(np.uint8(cm.gist_earth(img1)*255)).convert('L')
im = ImageOps.invert(im)
im.save('test_image.png')
img = img.transpose(1, 0)
img = np.reshape(img, (-1, 1))
np.savetxt("test_image.txt", img, fmt='%.10f')
f = open('test_image_gt.txt','w')
f.write(label)
f.close()
print("Prediction on the image = {}".format(out))
print("Label of exported image = {}".format(label))
import torch
import torch.nn as nn
import torch.nn.functional as F
def js_div(p_output, q_output, get_softmax=True):
"""
Function that measures JS divergence between target and output logits:
"""
KLDivLoss = nn.KLDivLoss(reduction='sum')
if get_softmax:
p_output = F.softmax(p_output)
q_output = F.softmax(q_output)
log_mean_output = ((p_output + q_output)/2).log()
return (KLDivLoss(log_mean_output, p_output) + KLDivLoss(log_mean_output, q_output))/2
def ebit_list(quant_type, num_bits):
if quant_type == 'FLOAT':
e_bit_list = list(range(1,num_bits-1))
else:
e_bit_list = [0]
return e_bit_list
def numbit_list(quant_type):
if quant_type == 'INT':
num_bit_list = list(range(2,17))
elif quant_type == 'POT':
num_bit_list = list(range(2,9))
else:
num_bit_list = list(range(2,9))
# num_bit_list = [8]
return num_bit_list
def build_bias_list(quant_type):
if quant_type == 'POT':
return build_pot_list(8) #
else:
return build_float_list(16,7)
def build_list(quant_type, num_bits, e_bits):
if quant_type == 'POT':
return build_pot_list(num_bits)
else:
return build_float_list(num_bits,e_bits)
def build_pot_list(num_bits):
plist = [0.]
for i in range(-2 ** (num_bits-1) + 2, 1):
# i最高到0,即pot量化最大值为1
plist.append(2. ** i)
plist.append(-2. ** i)
plist = torch.Tensor(list(set(plist)))
# plist = plist.mul(1.0 / torch.max(plist))
return plist
def build_float_list(num_bits,e_bits):
m_bits = num_bits - 1 - e_bits
plist = [0.]
# 相邻尾数的差值
dist_m = 2 ** (-m_bits)
e = -2 ** (e_bits - 1) + 1
for m in range(1, 2 ** m_bits):
frac = m * dist_m # 尾数部分
expo = 2 ** e # 指数部分
flt = frac * expo
plist.append(flt)
plist.append(-flt)
for e in range(-2 ** (e_bits - 1) + 2, 2 ** (e_bits - 1) + 1):
expo = 2 ** e
for m in range(0, 2 ** m_bits):
frac = 1. + m * dist_m
flt = frac * expo
plist.append(flt)
plist.append(-flt)
plist = torch.Tensor(list(set(plist)))
return plist
def fold_ratio(layer, par_ratio, flop_ratio):
idx = -1
for name in layer:
idx = idx + 1
# layer是for name, param in model.named_parameters()中提取出来的,一定是有downsample的
if 'bn' in name or 'sample.1' in name:
par_ratio[idx-1] += par_ratio[idx]
flop_ratio[idx-1] += flop_ratio[idx]
return par_ratio,flop_ratio
def fold_model(model):
idx = -1
module_list = []
# print('fold model:')
for name, module in model.named_modules():
# print(name+'-- +')
idx += 1
module_list.append(module)
# 这里之前忘记考虑downsampl里的conv了,导致少融合了一些
if 'bn' in name or 'sample.1' in name:
# print(name+'-- *')
module_list[idx-1] = fold_bn(module_list[idx-1],module) # 在这里修改了
return model
# def fold_model(model):
# last_conv = None
# last_bn = None
# for name, module in model.named_modules():
# if isinstance(module, nn.Conv2d):
# # 如果当前模块是卷积层,则将其 "fold" 到上一个 BN 层中
# if last_bn is not None:
# last_conv = fold_bn(last_conv, last_bn)
# last_bn = None
# last_conv = module
# elif isinstance(module, nn.BatchNorm2d):
# # 如果当前模块是 BN 层,则将其 "fold" 到上一个卷积层中
# last_bn = module
# if last_conv is not None:
# last_conv = fold_bn(last_conv, last_bn)
# last_bn = None
# # 处理最后一个 BN 层
# if last_bn is not None:
# last_conv = fold_bn(last_conv, last_bn)
# return model
def fold_bn(conv, bn):
# 获取 BN 层的参数
gamma = bn.weight.data
beta = bn.bias.data
mean = bn.running_mean
var = bn.running_var
eps = bn.eps
std = torch.sqrt(var + eps)
feat = bn.num_features
# 获取卷积层的参数
weight = conv.weight.data
if conv.bias is not None:
bias = conv.bias.data
if bn.affine:
gamma_ = gamma / std
weight = weight * gamma_.view(feat, 1, 1, 1)
if conv.bias is not None:
bias = gamma_ * bias - gamma_ * mean + beta
else:
bias = beta - gamma_ * mean
else:
gamma_ = 1 / std
weight = weight * gamma_
if conv.bias is not None:
bias = gamma_ * bias - gamma_ * mean
else:
bias = -gamma_ * mean
# 设置新的 weight 和 bias
conv.weight.data = weight
# 适用于bias=none的
if conv.bias is None:
conv.bias = nn.Parameter(bias)
else:
conv.bias.data = bias
return conv
\ No newline at end of file
import torch
import torch.nn as nn
import torch.nn.functional as F
from module import *
import module
from global_var import GlobalVariables
# 定义 ResNet 模型
# 适用于Cifar10
class ResNet_fold(nn.Module):
def __init__(self, block, layers, num_classes=10): # 这里将类别数设置为10
super(ResNet_fold, self).__init__()
self.inplanes = 16 # 因为 CIFAR-10 图片较小,所以开始时需要更少的通道数
GlobalVariables.SELF_INPLANES = self.inplanes
# print('resnet init:'+ str(GlobalVariables.SELF_INPLANES))
# 输入层
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1,
bias=False)
self.bn1 = nn.BatchNorm2d(16)
self.relu = nn.ReLU()
self.convbnrelu1 = ConvBNReLU(self.conv1,self.bn1)
# 残差层(4 个阶段,每个阶段包含 6n+2 个卷积层)
self.layer1 = MakeLayer(block, 16, layers[0])
self.layer2 = MakeLayer(block, 32, layers[1], stride=2)
self.layer3 = MakeLayer(block, 64, layers[2], stride=2)
self.layer4 = MakeLayer(block, 128, layers[3], stride=2)
# 分类层
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(128 * block.expansion, num_classes)
# 参数初始化
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def forward(self, x):
# 输入层
# x = self.conv1(x)
# x = self.bn1(x)
# x = self.relu(x)
x = self.convbnrelu1(x)
# 这里相比于imagenet的,少了一个maxpool,因为cifar10本身图片就小,如果再pool就太小了
# 残差层
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
# 分类层
x = self.avgpool(x) # 输出的尺寸为 B,C,1,1
x = x.view(x.size(0), -1)
x = self.fc(x)
out = F.softmax(x,dim = 1) # 这里不softmax也行 影响不大
return out
def quantize(self, quant_type, num_bits=8, e_bits=3):
self.qconvbnrelu1 = QConvBNReLU(quant_type,self.conv1,self.bn1,qi=True,qo=True,num_bits=num_bits,e_bits=e_bits)
# 没有输入num_bits 需修改
self.layer1.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
self.layer2.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
self.layer3.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
self.layer4.quantize(quant_type=quant_type,num_bits=num_bits, e_bits=e_bits)
self.qavgpool1 = QAdaptiveAvgPool2d(quant_type,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qfc1 = QLinear(quant_type, self.fc,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
# self.qfc1 = QLinear(quant_type, self.fc,qi=True,qo=True,num_bits=num_bits,e_bits=e_bits)
def quantize_forward(self, x):
# for _, layer in self.quantize_layers.items():
# x = layer(x)
# out = F.softmax(x, dim=1)
# return out
x = self.qconvbnrelu1(x)
x = self.layer1.quantize_forward(x)
x = self.layer2.quantize_forward(x)
x = self.layer3.quantize_forward(x)
x = self.layer4.quantize_forward(x)
x = self.qavgpool1(x)
x = x.view(x.size(0), -1)
x = self.qfc1(x)
out = F.softmax(x,dim = 1) # 这里不softmax也行 影响不大
return out
def freeze(self):
self.qconvbnrelu1.freeze() # 因为作为第一层是有qi的,所以freeze的时候无需再重新提供qi
qo = self.layer1.freeze(qinput = self.qconvbnrelu1.qo)
qo = self.layer2.freeze(qinput = qo)
qo = self.layer3.freeze(qinput = qo)
qo = self.layer4.freeze(qinput = qo)
self.qavgpool1.freeze(qi=qo)
self.qfc1.freeze(qi=self.qavgpool1.qo)
# self.qfc1.freeze()
def fakefreeze(self):
self.qconvbnrelu1.fakefreeze()
self.layer1.fakefreeze()
self.layer2.fakefreeze()
self.layer3.fakefreeze()
self.layer4.fakefreeze()
self.qfc1.fakefreeze()
def quantize_inference(self, x):
qx = self.qconvbnrelu1.qi.quantize_tensor(x)
qx = self.qconvbnrelu1.quantize_inference(qx)
qx = self.layer1.quantize_inference(qx)
qx = self.layer2.quantize_inference(qx)
qx = self.layer3.quantize_inference(qx)
qx = self.layer4.quantize_inference(qx)
qx = self.qavgpool1.quantize_inference(qx)
qx = qx.view(qx.size(0), -1)
qx = self.qfc1.quantize_inference(qx)
qx = self.qfc1.qo.dequantize_tensor(qx)
out = F.softmax(qx,dim = 1) # 这里不softmax也行 影响不大
return out
# BasicBlock 类
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
# 第一个卷积层
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.convbnrelu1 = ConvBNReLU(self.conv1,self.bn1)
# 第二个卷积层
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU()
self.convbn1 = ConvBN(self.conv2,self.bn2)
# shortcut
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
# out = self.conv1(x)
# out = self.bn1(out)
# out = self.relu(out)
out = self.convbnrelu1(x)
# out = self.conv2(out)
# out = self.bn2(out)
out = self.convbn1(out)
if self.downsample is not None:
identity = self.downsample(identity)
out += identity
out = self.relu(out)
return out
def quantize(self, quant_type ,num_bits=8, e_bits=3):
self.qconvbnrelu1 = QConvBNReLU(quant_type,self.conv1,self.bn1,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qconvbn1 = QConvBN(quant_type,self.conv2,self.bn2,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
if self.downsample is not None:
self.qconvbn2 = QConvBN(quant_type,self.downsample[0],self.downsample[1],qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qelementadd = QElementwiseAdd(quant_type,qi0=False, qi1=False, qo=True,num_bits=num_bits,e_bits=e_bits)
self.qrelu1 = QReLU(quant_type,qi= False,num_bits=num_bits,e_bits=e_bits) # 需要qi
def quantize_forward(self, x):
identity = x
out = self.qconvbnrelu1(x)
out = self.qconvbn1(out)
if self.downsample is not None:
identity = self.qconvbn2(identity)
# residual add
# out = identity + out # 这里是需要写一个elementwiseadd的变换的,待后续修改
out = self.qelementadd(out,identity)
out = self.qrelu1(out)
return out
def freeze(self, qinput):
# 这里的qconvbnrelu1其实是可以用前一层的qo的,但感觉不太好传参,就没用
# 还需仔细检查
self.qconvbnrelu1.freeze(qi= qinput) # 需要接前一个module的最后一个qo
self.qconvbn1.freeze(qi = self.qconvbnrelu1.qo)
if self.downsample is not None:
self.qconvbn2.freeze(qi = qinput) # 一条支路
self.qelementadd.freeze(qi0 = self.qconvbn1.qo, qi1 = self.qconvbn2.qo)
else:
self.qelementadd.freeze(qi0 = self.qconvbn1.qo, qi1 = qinput)
# 这里或许需要补充个层来处理elementwise add
self.qrelu1.freeze(qi = self.qelementadd.qo)
return self.qrelu1.qi # relu后的qo可用relu统计的qi
def fakefreeze(self):
# 这里的qconvbnrelu1其实是可以用前一层的qo的,但感觉不太好传参,就没用
# 还需仔细检查
self.qconvbnrelu1.fakefreeze() # 需要接前一个module的最后一个qo
self.qconvbn1.fakefreeze()
if self.downsample is not None:
self.qconvbn2.fakefreeze() # 一条支路
def quantize_inference(self, x):
# 感觉是不需要进行初始的quantize_tensor和dequantize_tensor,因为他不是最前/后一层,只要中间的每层都在量化后的领域内,就不需要这种处理。
identity = x
out = self.qconvbnrelu1.quantize_inference(x)
out = self.qconvbn1.quantize_inference(out)
if self.downsample is not None:
identity = self.qconvbn2.quantize_inference(identity)
# out = identity + out # 这里可能需要写一个elementwiseadd的变换的,待后续修改
out = self.qelementadd.quantize_inference(out,identity)
out = self.qrelu1.quantize_inference(out)
return out
# Bottleneck 类
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
# 1x1 卷积层
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.convbnrelu1 = ConvBNReLU(self.conv1,self.bn1)
# 3x3 卷积层
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.convbnrelu2 = ConvBNReLU(self.conv2,self.bn2)
# 1x1 卷积层
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
self.convbn1 = ConvBN(self.conv3,self.bn3)
# shortcut
self.relu = nn.ReLU()
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x
# out = self.conv1(x)
# out = self.bn1(out)
# out = self.relu(out)
out = self.convbnrelu1(x)
# out = self.conv2(out)
# out = self.bn2(out)
# out = self.relu(out)
out = self.convbnrelu2(out)
# out = self.conv3(out)
# out = self.bn3(out)
out = self.convbn1(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity # 相加是在这里处理的
out = self.relu(out)
return out
def quantize(self, quant_type ,num_bits=8, e_bits=3):
self.qconvbnrelu1 = QConvBNReLU(quant_type,self.conv1,self.bn1,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qconvbnrelu2 = QConvBNReLU(quant_type,self.conv2,self.bn2,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qconvbn1 = QConvBN(quant_type,self.conv3,self.bn3,qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
if self.downsample is not None:
self.qconvbn2 = QConvBN(quant_type,self.downsample[0],self.downsample[1],qi=False,qo=True,num_bits=num_bits,e_bits=e_bits)
self.qelementadd = QElementwiseAdd(quant_type,qi0=False, qi1=False, qo=True,num_bits=num_bits,e_bits=e_bits)
self.qrelu1 = QReLU(quant_type,qi= False,num_bits=num_bits,e_bits=e_bits) # 需要qi
def quantize_forward(self, x):
identity = x
out = self.qconvbnrelu1(x)
out = self.qconvbnrelu2(out)
out = self.qconvbn1(out)
if self.downsample is not None:
identity = self.qconvbn2(identity)
# residual add
# out = identity + out # 这里是需要写一个elementwiseadd的变换的,待后续修改
out = self.qelementadd(out,identity)
out = self.qrelu1(out)
return out
def freeze(self, qinput):
# 这里的qconvbnrelu1其实是可以用前一层的qo的,但感觉不太好传参,就没用
# 还需仔细检查
self.qconvbnrelu1.freeze(qi= qinput) # 需要接前一个module的最后一个qo
self.qconvbnrelu2.freeze(qi=self.qconvbnrelu1.qo)
self.qconvbn1.freeze(qi = self.qconvbnrelu2.qo)
if self.downsample is not None:
self.qconvbn2.freeze(qi = qinput) # 一条支路
self.qelementadd.freeze(qi0 = self.qconvbn1.qo, qi1 = self.qconvbn2.qo)
else:
self.qelementadd.freeze(qi0 = self.qconvbn1.qo, qi1 = qinput)
# 这里或许需要补充个层来处理elementwise add
self.qrelu1.freeze(qi = self.qelementadd.qo) # 需要自己统计qi
return self.qrelu1.qi # relu后的qo可用relu统计的qi
def fakefreeze(self):
# 这里的qconvbnrelu1其实是可以用前一层的qo的,但感觉不太好传参,就没用
# 还需仔细检查
self.qconvbnrelu1.fakefreeze()
self.qconvbnrelu2.fakefreeze()
self.qconvbn1.fakefreeze()
if self.downsample is not None:
self.qconvbn2.fakefreeze() # 一条支路
def quantize_inference(self, x):
# 感觉是不需要进行初始的quantize_tensor和dequantize_tensor,因为他不是最前/后一层,只要中间的每层都在量化后的领域内,就不需要这种处理。
identity = x
out = self.qconvbnrelu1.quantize_inference(x)
out = self.qconvbnrelu2.quantize_inference(out)
out = self.qconvbn1.quantize_inference(out)
if self.downsample is not None:
identity = self.qconvbn2.quantize_inference(identity)
# out = identity + out # 这里可能需要写一个elementwiseadd的变换的,待后续修改
out = self.qelementadd.quantize_inference(out,identity)
out = self.qrelu1.quantize_inference(out)
return out
class MakeLayer(nn.Module):
def __init__(self, block, planes, blocks, stride=1):
super(MakeLayer, self).__init__()
# print('makelayer init:'+ str(GlobalVariables.SELF_INPLANES))
self.downsample = None
if stride != 1 or GlobalVariables.SELF_INPLANES != planes * block.expansion:
# self.downsample = nn.Sequential(
# nn.Conv2d(GlobalVariables.SELF_INPLANES, planes * block.expansion,kernel_size=1, stride=stride, bias=False),
# nn.BatchNorm2d(planes * block.expansion)
# )
self.conv1 = nn.Conv2d(GlobalVariables.SELF_INPLANES, planes * block.expansion,kernel_size=1, stride=stride, bias=False)
self.bn1 = nn.BatchNorm2d(planes * block.expansion)
self.convbn1 = ConvBN(self.conv1,self.bn1)
self.downsample = self.convbn1
self.blockdict = nn.ModuleDict()
self.blockdict['block1'] = block(inplanes=GlobalVariables.SELF_INPLANES, planes=planes, stride=stride, downsample=self.downsample)
GlobalVariables.SELF_INPLANES = planes * block.expansion
for i in range(1, blocks): # block的个数 这里只能用字典了
self.blockdict['block' + str(i+1)] = block(inplanes=GlobalVariables.SELF_INPLANES, planes=planes) # 此处进行实例化了
# def _make_layer(self, block, planes, blocks, stride=1):
# downsample = None
# # stride 是卷积层的步幅,而 self.inplanes 表示当前残差块输入的通道数,
# # planes * block.expansion 则表示当前残差块输出的通道数。因此,当 stride 不等于 1 或者 self.inplanes 不等于 planes * block.expansion 时,就需要进行下采样操作
# #该层中除了第一个残差块之外,其他所有残差块的输入通道数和输出通道数都相等,并且具有相同的步幅(都为 1 或者 2)。这些卷积层的输入张量大小不变, 输出张量高宽尺寸会随着残差块的堆叠而逐渐降低
# if stride != 1 or SELF_INPLANES != planes * block.expansion:
# downsample = nn.Sequential(
# nn.Conv2d(SELF_INPLANES, planes * block.expansion,
# kernel_size=1, stride=stride, bias=False),
# nn.BatchNorm2d(planes * block.expansion),
# )
# layers = []
# layers.append(block(SELF_INPLANES, planes, stride, downsample))
# SELF_INPLANES = planes * block.expansion
# for _ in range(1, blocks): # block的个数
# layers.append(block(SELF_INPLANES, planes))
# return nn.Sequential(*layers)
def forward(self,x):
for _, layer in self.blockdict.items():
x = layer(x)
return x
def quantize(self, quant_type, num_bits=8, e_bits=3):
# 需检查
for _, layer in self.blockdict.items():
layer.quantize(quant_type=quant_type,num_bits=num_bits,e_bits=e_bits) # 这里是因为每一块都是block,而block中有具体的quantize策略, n_exp和mode已经在__init__中赋值了
def quantize_forward(self, x):
for _, layer in self.blockdict.items():
x = layer.quantize_forward(x) # 各个block中有具体的quantize_forward
return x
def freeze(self, qinput): # 需要在 Module Resnet的freeze里传出来
# 这里的qconvbnrelu1其实是可以用前一层的qo的,但感觉不太好传参,就没用
# 还需仔细检查
cnt = 0
for _, layer in self.blockdict.items():
if cnt == 0:
qo = layer.freeze(qinput = qinput)
cnt = 1
else:
qo = layer.freeze(qinput = qo) # 各个block中有具体的freeze
return qo # 供后续的层用
def fakefreeze(self):
for _, layer in self.blockdict.items():
layer.fakefreeze()
def quantize_inference(self, x):
# 感觉是不需要进行初始的quantize_tensor和dequantize_tensor,因为他不是最前/后一层,只要中间的每层都在量化后的领域内,就不需要这种处理。
for _, layer in self.blockdict.items():
x = layer.quantize_inference(x) # 每个block中有具体的quantize_inference
return x
# 使用 ResNet18 模型
def resnet18_fold(**kwargs):
model = ResNet_fold(BasicBlock, [2, 2, 2, 2], **kwargs)
return model
# 使用 ResNet50 模型
def resnet50_fold(**kwargs):
model = ResNet_fold(Bottleneck, [3, 4, 6, 3], **kwargs)
return model
# 使用 ResNet152 模型
def resnet152_fold(**kwargs):
model = ResNet_fold(Bottleneck, [3, 8, 36, 3], **kwargs)
return model
......@@ -247,8 +247,13 @@ class QConv2d(QModule):
# foward前更新qw,保证量化weight时候scale正确
self.qw.update(self.conv_module.weight.data)
# 注意:此处主要为了统计各层x和weight范围,未对bias进行量化操作
tmp_wgt = FakeQuantize.apply(self.conv_module.weight, self.qw)
x = F.conv2d(x, tmp_wgt, self.conv_module.bias,
# tmp_wgt = FakeQuantize.apply(self.conv_module.weight, self.qw)
# x = F.conv2d(x, tmp_wgt, self.conv_module.bias,
# stride=self.conv_module.stride,
# padding=self.conv_module.padding, dilation=self.conv_module.dilation,
# groups=self.conv_module.groups)
x = F.conv2d(x, FakeQuantize.apply(self.conv_module.weight, self.qw), self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
......@@ -317,8 +322,9 @@ class QLinear(QModule):
x = FakeQuantize.apply(x, self.qi)
self.qw.update(self.fc_module.weight.data)
tmp_wgt = FakeQuantize.apply(self.fc_module.weight, self.qw)
x = F.linear(x, tmp_wgt, self.fc_module.bias)
# tmp_wgt = FakeQuantize.apply(self.fc_module.weight, self.qw)
# x = F.linear(x, tmp_wgt, self.fc_module.bias)
x = F.linear(x, FakeQuantize.apply(self.fc_module.weight, self.qw), self.fc_module.bias)
if hasattr(self, 'qo'):
self.qo.update(x)
......@@ -924,3 +930,260 @@ class QElementwiseAdd(QModule_2):
return x
# new modules for full-precision model - fold bn
# inference应该也需要相应的适配
class ConvBNReLU(nn.Module):
def __init__(self,conv_module, bn_module):
super(ConvBNReLU, self).__init__()
self.conv_module = conv_module
self.bn_module = bn_module
def fold_bn(self, mean, std):
if self.bn_module.affine:
gamma_ = self.bn_module.weight / std
weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
else:
bias = self.bn_module.bias - gamma_ * mean
else:
gamma_ = 1 / std
weight = self.conv_module.weight * gamma_
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean
else:
bias = -gamma_ * mean
return weight, bias
def freeze(self):
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = weight.data
if self.conv_module.bias is None:
self.conv_module.bias = nn.Parameter(bias)
else:
self.conv_module.bias.data = bias
def fakefreeze(self):
pass
def forward(self, x):
if self.training:
y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding,
dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
# mean = y.mean(1)
# var = y.var(1)
mean = y.mean(1).detach()
var = y.var(1).detach()
self.bn_module.running_mean = \
(1 - self.bn_module.momentum) * self.bn_module.running_mean + \
self.bn_module.momentum * mean
self.bn_module.running_var = \
(1 - self.bn_module.momentum) * self.bn_module.running_var + \
self.bn_module.momentum * var
else:
mean = Variable(self.bn_module.running_mean)
var = Variable(self.bn_module.running_var)
std = torch.sqrt(var + self.bn_module.eps)
weight, bias = self.fold_bn(mean, std)
x = F.conv2d(x, weight, bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
x = F.relu(x)
return x
def quantize_inference(self, x):
x = self.conv_module(x)
x.clamp_(min=0)
return x
class ConvBN(nn.Module):
def __init__(self,conv_module, bn_module):
super(ConvBN, self).__init__()
self.conv_module = conv_module
self.bn_module = bn_module
def fold_bn(self, mean, std):
if self.bn_module.affine:
gamma_ = self.bn_module.weight / std
weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
else:
bias = self.bn_module.bias - gamma_ * mean
else:
gamma_ = 1 / std
weight = self.conv_module.weight * gamma_
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean
else:
bias = -gamma_ * mean
return weight, bias
def freeze(self):
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = weight.data
if self.conv_module.bias is None:
self.conv_module.bias = nn.Parameter(bias)
else:
self.conv_module.bias.data = bias
def fakefreeze(self):
pass
def forward(self, x):
if self.training:
y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding,
dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
# mean = y.mean(1)
# var = y.var(1)
mean = y.mean(1).detach()
var = y.var(1).detach()
self.bn_module.running_mean = \
(1 - self.bn_module.momentum) * self.bn_module.running_mean + \
self.bn_module.momentum * mean
self.bn_module.running_var = \
(1 - self.bn_module.momentum) * self.bn_module.running_var + \
self.bn_module.momentum * var
else:
mean = Variable(self.bn_module.running_mean)
var = Variable(self.bn_module.running_var)
std = torch.sqrt(var + self.bn_module.eps)
weight, bias = self.fold_bn(mean, std)
x = F.conv2d(x, weight, bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
return x
def quantize_inference(self, x):
x = self.conv_module(x)
return x
class ConvBNReLU6(nn.Module):
def __init__(self,conv_module, bn_module):
super(ConvBNReLU6, self).__init__()
self.conv_module = conv_module
self.bn_module = bn_module
def fold_bn(self, mean, std):
if self.bn_module.affine:
gamma_ = self.bn_module.weight / std
weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
else:
bias = self.bn_module.bias - gamma_ * mean
else:
gamma_ = 1 / std
weight = self.conv_module.weight * gamma_
if self.conv_module.bias is not None:
bias = gamma_ * self.conv_module.bias - gamma_ * mean
else:
bias = -gamma_ * mean
return weight, bias
def freeze(self):
std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
weight, bias = self.fold_bn(self.bn_module.running_mean, std)
self.conv_module.weight.data = weight.data
if self.conv_module.bias is None:
self.conv_module.bias = nn.Parameter(bias)
else:
self.conv_module.bias.data = bias
def fakefreeze(self):
pass
def forward(self, x):
if self.training:
y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding,
dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
# mean = y.mean(1)
# var = y.var(1)
mean = y.mean(1).detach()
var = y.var(1).detach()
self.bn_module.running_mean = \
(1 - self.bn_module.momentum) * self.bn_module.running_mean + \
self.bn_module.momentum * mean
self.bn_module.running_var = \
(1 - self.bn_module.momentum) * self.bn_module.running_var + \
self.bn_module.momentum * var
else:
mean = Variable(self.bn_module.running_mean)
var = Variable(self.bn_module.running_var)
std = torch.sqrt(var + self.bn_module.eps)
weight, bias = self.fold_bn(mean, std)
x = F.conv2d(x, weight, bias,
stride=self.conv_module.stride,
padding=self.conv_module.padding, dilation=self.conv_module.dilation,
groups=self.conv_module.groups)
x = F.relu6(x)
return x
def quantize_inference(self, x):
x = self.conv_module(x)
x.clamp_(min=0,max=6)
return x
\ No newline at end of file
from model import *
from model_foldbn import *
from extract_ratio import *
from utils import *
import openpyxl
import gol
import sys
import argparse
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import os
import os.path as osp
from torch.utils.tensorboard import SummaryWriter
from torch.optim.lr_scheduler import CosineAnnealingLR
def js_div_norm(a,b):
a_norm = F.normalize(a.data,p=2,dim=-1)
b_norm = F.normalize(b.data,p=2,dim=-1)
return js_div(a_norm,b_norm).cpu().item()
def js_div_0(a,b):
return js_div(a,b).cpu().item()
def direct_quantize(model, test_loader,device):
for i, (data, target) in enumerate(test_loader, 1):
data = data.to(device)
output = model.quantize_forward(data).cpu()
if i % 500 == 0:
break
print('direct quantization finish')
def quantize_aware_training(model, device, train_loader, optimizer, epoch):
old_sub_str0 = "downsample.0"
new_sub_str0 = "conv1"
old_sub_str1 = "downsample.1"
new_sub_str1 = "bn1"
lossLayer = torch.nn.CrossEntropyLoss()
#统计loss和每个参数的grad
#初始化
loss_sum = 0.
grad_dict = {}
for name,param in model.named_parameters():
if old_sub_str0 in name:
name = name.replace(old_sub_str0, new_sub_str0)
elif old_sub_str1 in name:
name = name.replace(old_sub_str1, new_sub_str1)
grad_dict[name] = torch.zeros_like(param) #param.grad和param形状相同
for batch_idx, (data, target) in enumerate(train_loader, 1):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model.quantize_forward(data)
# 对一批数据求得的loss是平均值
loss = lossLayer(output, target)
loss.backward()
#loss和grads累加
loss_sum += loss
for name,param in model.named_parameters():
if param.grad is not None:
# print('-------'+name+'-------')
if old_sub_str0 in name:
name = name.replace(old_sub_str0, new_sub_str0)
elif old_sub_str1 in name:
name = name.replace(old_sub_str1, new_sub_str1)
grad_dict[name] += param.grad.detach()
# print(grad_dict[name])
# print(grad_dict.items())
# input()
optimizer.step()
if batch_idx % 50 == 0:
print('Quantize Aware Training Epoch: {} [{}/{}]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset), loss.item()
))
batch_size = len(train_loader.batch_sampler)
#对不同batch累加值求平均
for name,grad in grad_dict.items():
grad_dict[name] = grad / batch_size
loss_avg = loss_sum / batch_size
return loss_avg, grad_dict
def full_inference(model, test_loader):
correct = 0
for i, (data, target) in enumerate(test_loader, 1):
data, target = data.to(device), target.to(device)
output = model(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
print('\nTest set: Full Model Accuracy: {:.2f}%\n'.format(100. * correct / len(test_loader.dataset)))
def train(model, device, train_loader, optimizer, epoch):
model.train()
lossLayer = torch.nn.CrossEntropyLoss()
#统计loss和每个参数的grad
#初始化
loss_sum = 0.
grad_dict = {}
for name,param in model.named_parameters():
grad_dict[name] = torch.zeros_like(param) #param.grad和param形状相同
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = lossLayer(output, target)
loss.backward()
#loss和grads累加
loss_sum += loss
for name,param in model.named_parameters():
if param.grad is not None:
# print('-------'+name+'-------')
grad_dict[name] += param.grad.detach()
# print(grad_dict[name])
optimizer.step()
if batch_idx % 50 == 0:
print('Train Epoch: {} [{}/{}]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset), loss.item()
))
batch_size = len(train_loader.batch_sampler)
#对不同batch累加值求平均
for name,grad in grad_dict.items():
grad_dict[name] = grad / batch_size
loss_avg = loss_sum / batch_size
return loss_avg, grad_dict
def quantize_inference(model, test_loader):
correct = 0
for i, (data, target) in enumerate(test_loader, 1):
data, target = data.to(device), target.to(device)
output = model.quantize_inference(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
print('\nTest set: Quant Model Accuracy: {:.2f}%\n'.format(100. * correct / len(test_loader.dataset)))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='QAT Training')
parser.add_argument('-m', '--model', metavar='MODEL ARCH', default='resnet18')
parser.add_argument('-e','--epochs', default=15, type=int, metavar='EPOCHS', help='number of total epochs to run')
parser.add_argument('-b', '--batch_size', default=128, type=int, metavar='BATCH SIZE', help='mini-batch size (default: 128)')
parser.add_argument('-j','--workers', default=1, type=int, metavar='WORKERS',help='number of data loading workers (default: 4)')
parser.add_argument('-lr', '--learning-rate', default=0.001, type=float, metavar='LR', help='initial learning rate', dest='lr')
parser.add_argument('-wd','--weight_decay',default=0.0001,type=float,metavar='WD',help='lr schduler weight decay',dest='wd')
parser.add_argument('-t', '--test', dest='test', action='store_true', help='test model on test set')
args = parser.parse_args()
batch_size = args.batch_size
seed = 1
epochs = args.epochs
lr = args.lr
# momentum = 0.5
weight_decay = args.wd
torch.manual_seed(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
writer = SummaryWriter(log_dir='log/' + args.model + '/qat')
wb = openpyxl.Workbook()
ws = wb.active
old_sub_str0 = "downsample.0"
new_sub_str0 = "conv1"
old_sub_str1 = "downsample.1"
new_sub_str1 = "bn1"
if args.model == 'ResNet18':
model = resnet18_fold()
elif args.model == 'ResNet50':
model = resnet50_fold()
elif args.model == 'ResNet152':
model = resnet152_fold()
layer, par_ratio, flop_ratio = extract_ratio(args.model)
# TODO layer要重新读取
layer = []
# 此处得到的layer是为了标记par_ratio, flop_ratio 对应起来 一层一个名字 一个flop/flop ratio
for name, param in model.named_parameters():
if 'weight' in name:
n = name.split('.') # conv,bn,fc这些有param的层的名字都能提取出来
pre = '.'.join(n[:len(n)-1])
# 提取出weight前的名字(就是这个层的名字,if weight是避免bias重复提取一遍名字)
# 无downsample串
layer.append(pre)
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('../../project/p/data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=True, num_workers=args.workers, pin_memory=False
)
test_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('../../project/p/data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=True, num_workers=args.workers, pin_memory=False
)
# model.load_state_dict(torch.load(full_file))
model.to(device)
momentum = 0.9
# optimizer1 = optim.Adam(model.parameters(), lr=lr)
optimizer1 = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
lr_scheduler1 = CosineAnnealingLR(optimizer1, T_max=epochs)
# 没save .pt 无load
load_qat = False
ckpt_prefix = 'ckpt/qat/'+ args.model + '/'
loss_sum = 0.
full_grad_sum = {}
full_grad_avg = {}
for name,param in model.named_parameters():
full_grad_sum[name] = torch.zeros_like(param)
full_grad_avg[name] = torch.zeros_like(param)
for epoch in range(1, epochs+1):
# 训练原模型,获取梯度分布
loss,full_grad = train(model, device, train_loader, optimizer1, epoch)
if epoch == 1:
loss_start = loss
# print('loss:%f' % loss_avg)
writer.add_scalar('Full.loss',loss,epoch)
# for name,grad in grad_dict.items():
# writer.add_histogram('Full.'+name+'_grad',grad,global_step=epoch)
loss_sum += loss
loss_avg = loss_sum / epoch
# loss的变化量 越大说明收敛的越快(不同model在对比时,相同的epoch数,loss_delta大说明很快就进入了小loss的收敛期)
loss_delta = loss - loss_start
for name,grad in full_grad.items():
full_grad_sum[name] += full_grad[name]
full_grad_avg[name] = full_grad_sum[name] / epoch
if epoch % 5 == 0:
ws = wb.create_sheet('epoch_%d'%epoch)
ws.cell(row=1,column=2,value='loss')
ws.cell(row=1,column=3,value='loss_sum')
ws.cell(row=1,column=4,value='loss_avg')
ws.cell(row=1,column=5,value='loss_delta')
ws.cell(row=2,column=1,value='FP32')
ws.cell(row=2,column=2,value=loss.item())
ws.cell(row=2,column=3,value=loss_sum.item())
ws.cell(row=2,column=4,value=loss_avg.item())
ws.cell(row=2,column=5,value=loss_delta.item())
ws.cell(row=4,column=1,value='title')
ws.cell(row=4,column=2,value='loss')
ws.cell(row=4,column=3,value='loss_sum')
ws.cell(row=4,column=4,value='loss_avg')
ws.cell(row=4,column=5,value='loss_delta')
ws.cell(row=4,column=6,value='js_grad')
ws.cell(row=4,column=7,value='js_grad_sum')
ws.cell(row=4,column=8,value='js_grad_avg')
# lr_scheduler1.step()
quant_type_list = ['INT']
gol._init()
currow=4 #数据从哪行开始写
for quant_type in quant_type_list:
num_bit_list = numbit_list(quant_type)
# 对一个量化类别,只需设置一次bias量化表
# int由于位宽大,使用量化表开销过大,直接_round即可
if quant_type != 'INT':
bias_list = build_bias_list(quant_type)
gol.set_value(bias_list, is_bias=True)
for num_bits in num_bit_list:
e_bit_list = ebit_list(quant_type,num_bits)
for e_bits in e_bit_list:
if quant_type == 'FLOAT':
title = '%s_%d_E%d' % (quant_type, num_bits, e_bits)
else:
title = '%s_%d' % (quant_type, num_bits)
if load_qat is True and osp.exists(ckpt_prefix+'epoch_20/'+title+'.pt'):
continue
currow += 1
print('\nQAT: '+title)
if args.model == 'ResNet18':
model_ptq = resnet18()
elif args.model == 'ResNet50':
model_ptq = resnet50()
elif args.model == 'ResNet152':
model_ptq = resnet152()
# optimizer2 = optim.Adam(model_ptq.parameters(), lr=lr)
# lr_scheduler2 = CosineAnnealingLR(optimizer2, T_max=epochs)
optimizer2 = optim.SGD(model_ptq.parameters(), lr=lr, momentum=momentum)
# 设置量化表
if quant_type != 'INT':
plist = build_list(quant_type, num_bits, e_bits)
gol.set_value(plist)
model_ptq.to(device)
full_file = 'ckpt/cifar10_' + args.model + '.pt'
# model_ptq.load_state_dict(torch.load(full_file))
model_ptq.quantize(quant_type,num_bits,e_bits)
model_ptq.eval()
direct_quantize(model_ptq, train_loader, device)
model_ptq.train()
loss_sum = 0.
qat_grad_sum = {}
qat_grad_avg = {}
# 因为没有freeze 所以model和model_ptq的parameters其实一样,只是name在downsample处略有不同
for name,param in model_ptq.named_parameters():
if old_sub_str0 in name:
name = name.replace(old_sub_str0, new_sub_str0)
elif old_sub_str1 in name:
name = name.replace(old_sub_str1, new_sub_str1)
qat_grad_sum[name] = torch.zeros_like(param)
qat_grad_avg[name] = torch.zeros_like(param)
for epoch in range(1, epochs+1):
loss,qat_grad = quantize_aware_training(model_ptq, device, train_loader, optimizer2, epoch)
# print('loss:%f' % loss_avg)
if epoch == 1:
loss_start = loss
writer.add_scalar(title+'.loss',loss,epoch)
# for name,grad in qat_grad.items():
# writer.add_histogram(title+'.'+name+'_grad',grad,global_step=epoch)
loss_sum += loss
loss_avg = loss_sum / epoch
loss_delta = loss-loss_start
# 这里对各个epoch的梯度求和不太合理吧 修改成下面的每5个epoch只对那一个epoch的梯度求和
for name,param in model_ptq.named_parameters():
# qat_grad_sum[name] += qat_grad[name]
# 只是对name中的部分串做简单替换
if old_sub_str0 in name:
name = name.replace(old_sub_str0, new_sub_str0)
elif old_sub_str1 in name:
name = name.replace(old_sub_str1, new_sub_str1)
qat_grad_sum[name] += qat_grad[name]
qat_grad_avg[name] += qat_grad_sum[name] / epoch
# 应对每一个epoch都这样计算,而不是只计算在某一个epoch的情况
if epoch % 5 == 0:
ws = wb['epoch_%d'%epoch]
js_grad = 0.
js_grad_sum = 0.
js_grad_avg = 0.
for name,_ in model_ptq.named_parameters():
# TODO
# 可以把downsample换成对应conv,bn的名字
# downsample.0 => conv1 downsample.1 => bn1
# 由于没有freeze,因此model和model_ptq中的conv都是没有bias的
# 是否需要考虑BN的相似度和梯度还有待观察
n = name.split('.')
prefix = '.'.join(n[:len(n) - 1])
if old_sub_str0 in prefix:
prefix = prefix.replace(old_sub_str0, new_sub_str0)
elif old_sub_str1 in prefix:
prefix = prefix.replace(old_sub_str1, new_sub_str1)
if old_sub_str0 in name:
name = name.replace(old_sub_str0, new_sub_str0)
elif old_sub_str1 in name:
name = name.replace(old_sub_str1, new_sub_str1)
# layer中是层名的顺序排序,flop_ratio中也是按层名顺序排序的ratio
layer_idx = layer.index(prefix)
# 加权求和
# 这里相当于只记录了full precision时的最后一个epoch的grad
js = js_div_0(qat_grad[name],full_grad[name])
js_sum = js_div_0(qat_grad_sum[name],full_grad_sum[name])
js_avg = js_div_0(qat_grad_avg[name],full_grad_avg[name])
if js < 0:
js = 0
if js_sum < 0:
js_sum = 0
if js_avg < 0:
js_avg = 0
js_grad += flop_ratio[layer_idx] * js
print(f"name{name}\nqat_grad_avg[{name}]={qat_grad_avg[name]}\nfull_grad_avg[{name}]={full_grad_avg[name]}\njs:{js}\nidx:{layer_idx}")
js_grad_sum += flop_ratio[layer_idx] * js_sum
js_grad_avg += flop_ratio[layer_idx] * js_avg
ws.cell(row=currow,column=1,value=title)
ws.cell(row=currow,column=2,value=loss.item())
ws.cell(row=currow,column=3,value=loss_sum.item())
ws.cell(row=currow,column=4,value=loss_avg.item())
ws.cell(row=currow,column=5,value=loss_delta.item())
ws.cell(row=currow,column=6,value=js_grad)
ws.cell(row=currow,column=7,value=js_grad_sum)
ws.cell(row=currow,column=8,value=js_grad_avg)
print(f"name:{name},js_grad:{js_grad},js_sum:{js_grad_sum},js_avg:{js_grad_avg}")
# print(f"quan_type:{quant_type},num_bits:{num_bits},epoch:{epoch}")
# print(f"loss:{loss.item()},loss_sum:{loss_sum.item()},loss_avg:{loss_avg.item()},loss_delta:{loss_delta.item()}")
# print(f"js_grad:{js_grad},js_grad_sum:{js_grad_sum},js_grad_avg:{js_grad_avg}")
# lr_scheduler2.step()
wb.remove(wb['Sheet']) # 根据名称删除工作表
wb.save(args.model + 'qat_result.xlsx')
writer.close()
from model import *
from extract_ratio import *
from utils import *
import openpyxl
import gol
import sys
import argparse
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import os
import os.path as osp
from torch.utils.tensorboard import SummaryWriter
from torch.optim.lr_scheduler import CosineAnnealingLR
def js_div_norm(a,b):
a_norm = F.normalize(a.data,p=2,dim=-1)
b_norm = F.normalize(b.data,p=2,dim=-1)
return js_div(a_norm,b_norm).cpu().item()
def js_div_0(a,b):
return js_div(a,b).cpu().item()
def quantize_aware_training(model, device, train_loader, optimizer, epoch):
lossLayer = torch.nn.CrossEntropyLoss()
#统计loss和每个参数的grad
#初始化
loss_sum = 0.
grad_dict = {}
for name,param in model.named_parameters():
grad_dict[name] = torch.zeros_like(param) #param.grad和param形状相同
for batch_idx, (data, target) in enumerate(train_loader, 1):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model.quantize_forward(data)
# 对一批数据求得的loss是平均值
loss = lossLayer(output, target)
loss.backward()
#loss和grads累加
loss_sum += loss
for name,param in model.named_parameters():
if param.grad is not None:
# print('-------'+name+'-------')
grad_dict[name] += param.grad.detach()
# print(grad_dict[name])
# print(grad_dict.items())
# input()
optimizer.step()
if batch_idx % 50 == 0:
print('Quantize Aware Training Epoch: {} [{}/{}]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset), loss.item()
))
batch_size = len(train_loader.batch_sampler)
#对不同batch累加值求平均
for name,grad in grad_dict.items():
grad_dict[name] = grad / batch_size
loss_avg = loss_sum / batch_size
return loss_avg, grad_dict
def full_inference(model, test_loader, device):
correct = 0
for i, (data, target) in enumerate(test_loader, 1):
data, target = data.to(device), target.to(device)
output = model(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
print('\nTest set: Full Model Accuracy: {:.2f}%\n'.format(100. * correct / len(test_loader.dataset)))
def train(model, device, train_loader, optimizer, epoch):
model.train()
lossLayer = torch.nn.CrossEntropyLoss()
#统计loss和每个参数的grad
#初始化
loss_sum = 0.
grad_dict = {}
for name,param in model.named_parameters():
grad_dict[name] = torch.zeros_like(param) #param.grad和param形状相同
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = lossLayer(output, target)
loss.backward()
#loss和grads累加
loss_sum += loss
for name,param in model.named_parameters():
if param.grad is not None:
# print('-------'+name+'-------')
grad_dict[name] += param.grad.detach()
# print(grad_dict[name])
optimizer.step()
if batch_idx % 50 == 0:
print('Train Epoch: {} [{}/{}]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset), loss.item()
))
batch_size = len(train_loader.batch_sampler)
#对不同batch累加值求平均
for name,grad in grad_dict.items():
grad_dict[name] = grad / batch_size
loss_avg = loss_sum / batch_size
return loss_avg, grad_dict
def quantize_inference(model, test_loader, device):
correct = 0
for i, (data, target) in enumerate(test_loader, 1):
data, target = data.to(device), target.to(device)
output = model.quantize_inference(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
print('\nTest set: Quant Model Accuracy: {:.2f}%\n'.format(100. * correct / len(test_loader.dataset)))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='QAT Training')
parser.add_argument('-m', '--model', metavar='MODEL ARCH', default='resnet18')
parser.add_argument('-e','--epochs', default=15, type=int, metavar='EPOCHS', help='number of total epochs to run')
parser.add_argument('-b', '--batch_size', default=128, type=int, metavar='BATCH SIZE', help='mini-batch size (default: 128)')
parser.add_argument('-j','--workers', default=1, type=int, metavar='WORKERS',help='number of data loading workers (default: 4)')
parser.add_argument('-lr', '--learning-rate', default=0.001, type=float, metavar='LR', help='initial learning rate', dest='lr')
parser.add_argument('-wd','--weight_decay',default=0.0001,type=float,metavar='WD',help='lr schduler weight decay',dest='wd')
parser.add_argument('-t', '--test', dest='test', action='store_true', help='test model on test set')
args = parser.parse_args()
batch_size = args.batch_size
seed = 1
epochs = args.epochs
lr = args.lr
# momentum = 0.5
weight_decay = args.wd
torch.manual_seed(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
writer = SummaryWriter(log_dir='log/' + args.model + '/qat')
wb = openpyxl.Workbook()
ws = wb.active
if args.model == 'ResNet18':
model = resnet18()
elif args.model == 'ResNet50':
model = resnet50()
elif args.model == 'ResNet152':
model = resnet152()
layer, par_ratio, flop_ratio = extract_ratio(args.model)
# TODO layer要重新读取
layer = []
for name, param in model.named_parameters():
if 'weight' in name:
n = name.split('.') # conv,bn,fc这些有param的层的名字都能提取出来
pre = '.'.join(n[:len(n)-1])
# 提取出weight前的名字(就是这个层的名字,if weight是避免bias重复提取一遍名字)
layer.append(pre)
train_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('../../project/p/data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=True, num_workers=args.workers, pin_memory=False
)
test_loader = torch.utils.data.DataLoader(
datasets.CIFAR10('../../project/p/data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])),
batch_size=batch_size, shuffle=True, num_workers=args.workers, pin_memory=False
)
# model.load_state_dict(torch.load(full_file))
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
lr_scheduler = CosineAnnealingLR(optimizer, T_max=epochs)
# 没save .pt 无load
quant_type_list = ['INT']
gol._init()
currow=4 #数据从哪行开始写
for quant_type in quant_type_list:
num_bit_list = numbit_list(quant_type)
# 对一个量化类别,只需设置一次bias量化表
# int由于位宽大,使用量化表开销过大,直接_round即可
if quant_type != 'INT':
bias_list = build_bias_list(quant_type)
gol.set_value(bias_list, is_bias=True)
for num_bits in num_bit_list:
e_bit_list = ebit_list(quant_type,num_bits)
for e_bits in e_bit_list:
if quant_type == 'FLOAT':
title = '%s_%d_E%d' % (quant_type, num_bits, e_bits)
else:
title = '%s_%d' % (quant_type, num_bits)
currow += 1
print('\nQAT: '+title)
if args.model == 'ResNet18':
model_ptq = resnet18()
elif args.model == 'ResNet50':
model_ptq = resnet50()
elif args.model == 'ResNet152':
model_ptq = resnet152()
model_ptq.to(device)
full_file = 'ckpt/cifar10_' + args.model + '.pt'
model_ptq.load_state_dict(torch.load(full_file))
model_ptq.eval()
full_acc = full_inference(model_ptq, test_loader, device)
# 设置量化表
if quant_type != 'INT':
plist = build_list(quant_type, num_bits, e_bits)
gol.set_value(plist)
# model_ptq.load_state_dict(torch.load(full_file))
model_ptq.quantize(quant_type,num_bits,e_bits)
model_ptq.train()
for epoch in range(1, epochs+1):
loss,qat_grad = quantize_aware_training(model_ptq, device, train_loader, optimizer, epoch)
# print('loss:%f' % loss_avg)
if epoch == 1:
loss_start = loss
writer.add_scalar(title+'.loss',loss,epoch)
lr_scheduler.step()
print(f"loss:{loss}")
model_ptq.freeze()
quantize_inference(model_ptq, test_loader, device)
# print(f"Final QAT ACC:{qat_acc}")
## update: <br>2023.4.28<br>
### 目标工作:尝试去解决“预测模型收敛速度”方面的问题
- 问题:按照原有思路,通过QAT from scratch获得前5/10/15/20个epoch的loss下降量和训练梯度相似度进行拟合。但根据qat.py得到的数据结果并不太好。<br>主要有两个方面的问题:<br>(1)出现了距离(即 相似度的差异性)过大、且变化过大(出现了显著的数量级差异,且规律与预期不符)的问题。<br>(2) 对不同量化方式的数据,loss的下降量有正有负,换言之,没有一个明显的loss在减小的趋势,数值较为随机。<br>
- 实验:针对上述问题,我进行了一系列观察、思考、实验,修改了qat.py中可能存在的问题,得到new_qat.py,还新增了model_foldbn.py, 修改了module.py.<br>
### 分析与实验:
1. 问题与方案:
- 量化模型中将BN fold进了Conv,因此我尝试仿照量化中的fold过程,在全精度模型训练时也将BN fold进Conv,具体的代码在module.py和model_foldbn.py中。我对fold后的全精度模型进行了训练验证,其可以正常更新权值参数,提升推理精度,但训练的收敛速度明显变慢了(ResNet18_foldbn在80个epoch时acc才40%)。
- qat.py中model和model_ptq都使用了同一个optimizer,在new_qat.py将其改为两个optimizer,分别为两个model的参数进行优化。
- 在实验中发现如果使用Adam优化器,得到的梯度会比较不稳定,我该用了SGD后稳定性提高了,趋势更显著。
- 对full_grad...等字典存储的是epoch上限时的各组梯度数据,如果直接用于与各个epoch节点的量化模型梯度数据去计算相似度,在大部分情况下是没有对应上的。这里我在实验中暂时只训练5个epoch,还没处理该问题。
- 对lr和momentum进行了一系列调整,但效果不明显。
2. 修改后的效果:
- 在INT量化中,随着量化位宽增大,量化模型的训练梯度与全精度模型的训练梯度的数量级逐渐接近至相同(但具体数值上仍有明显差异)。
- 得到明显改善的js_grad, loss_delta...等数据 (不过与预期仍不相符)
3. 还存在的问题与我的猜想:
- 对于INT量化,随着位宽增加,只有最开始出现了训练梯度相似度上升的趋势,后续呈现了波动趋势。且根据我对梯度数据的观察,他们并没有随着量化位宽增加而呈现出一致性,仅仅是数量级接近了,但具体数值仍有很大的差异。我猜想这是因为QAT from scratch较难,他们没有能有效的训练。也有可能是代码中还有一些未发现的bug。
- 在INT量化中,loss_delta(loss的减小量)也没有随着位宽增加而呈显著一致的增大,只有在位宽较小时有一段增大趋势,后续则呈现了较为随机的波动。
- 尝试进行了QAT from scratch训练,80个epoch左右没能看到明显的训练效果,可能这也是为什么出现上述问题的原因之一。
<br>(注:具体数据可见于ResNet18qat_result.xlsx中)
4. 尝试进行的拟合
loss_delta - js_grad (loss的下降量 - 第5个epoch的训练梯度加权相似度)
<img src = "fig/grad_delta.png" class="h-90 auto">
可以看到拟合效果非常差。
loss_avg - js_grad_avg (loss平均值 - 前5epoch的平均训练梯度的加权相似度)
<img src = "fig/qat.png" class="h-90 auto">
可以看到他们存在一个线性趋势,但这仅仅说明训练梯度相似度越大,loss越大,而loss的大小与训练收敛速度间并不能建立合理的逻辑关系。loss很大说明当前模型的效果很差,导致了loss大。
## update: <br>2023.4.24<br>
补充了一些数据和拟合图<br>
尝试将ResNet18,ResNet50,ResNet152,MobileNetV2四个模型的数据点拟合在同一张图上,效果还不错。不过考虑到这四个模型的结构较为相似,暂不确定与其他的结构差异较大的模型的数据点在一起拟合效果如何。
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment