Commit 147e0fbb by Zhihong Ma

fix: before BN trial

parent e230b520
......@@ -279,11 +279,13 @@ class LeNet(nn.Module):
self.conv_layers = nn.ModuleDict({
# block1
'conv1': nn.Conv2d(3,6,5), # (2*3*5*5) * 32*32*6 (bias占其中的32*32*6) 6144/921600
'bn1': nn.BatchNorm2d(6),
'reluc1': nn.ReLU(),
'pool1': nn.MaxPool2d(2,2),
# block2
'conv2': nn.Conv2d(6,16,5), # (2*6*5*5) * 16*16*16 (bias占其中的16*16*6) 1536/1228800
'bn2': nn.BatchNorm2d(16),
'reluc2': nn.ReLU(),
'pool2': nn.MaxPool2d(2,2),
})
......@@ -316,11 +318,15 @@ class LeNet(nn.Module):
self.quantize_conv_layers=nn.ModuleDict({
# qi=true: 前一层输出的结果是没有量化过的,需要量化。 maxpool和relu都不会影响INT和minmax,所以在这俩之后的层的pi是false
#若前一层是conv,数据minmax被改变,则需要qi=true来量化
'qconv1': QConv2d(self.conv_layers['conv1'], qi=True, qo=True, num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
'qreluc1': QReLU(n_exp=self.n_exp, mode=self.mode),
# 'qconv1': QConv2d(self.conv_layers['conv1'], qi=True, qo=True, num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
# 'qreluc1': QReLU(n_exp=self.n_exp, mode=self.mode),
'qconvbnrelu1': QConvBNReLU(self.conv_layers['conv1'],self.conv_layers['bn1'],qi=True,qo=True,num_bits=num_bits,n_exp=self.n_exp,mode=self.mode),
'qpool1': QMaxPooling2d(kernel_size=2,stride=2,padding=0, n_exp=self.n_exp, mode=self.mode),
'qconv2': QConv2d(self.conv_layers['conv2'], qi=False, qo=True, num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
'qreluc2': QReLU(n_exp=self.n_exp, mode=self.mode),
# 'qconv2': QConv2d(self.conv_layers['conv2'], qi=False, qo=True, num_bits=num_bits, n_exp=self.n_exp, mode=self.mode),
# 'qreluc2': QReLU(n_exp=self.n_exp, mode=self.mode),
'qconvbnrelu1': QConvBNReLU(self.conv_layers['conv2'],self.conv_layers['bn2'],qi=True,qo=True,num_bits=num_bits,n_exp=self.n_exp,mode=self.mode),
'qpool2': QMaxPooling2d(kernel_size=2, stride=2, padding=0, n_exp=self.n_exp, mode=self.mode)
})
......@@ -347,37 +353,51 @@ class LeNet(nn.Module):
def freeze(self):
self.quantize_conv_layers['qconv1'].freeze()
self.quantize_conv_layers['qreluc1'].freeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qpool1'].freeze(self.quantize_conv_layers['qconv1'].qo)
# self.quantize_conv_layers['qconv1'].freeze()
# self.quantize_conv_layers['qreluc1'].freeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qconvbnrelu1'].freeze()
#self.quantize_conv_layers['qpool1'].freeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qpool1'].freeze(self.quantize_conv_layers['qconvbnrelu1'].qo)
# self.quantize_conv_layers['qconv2'].freeze(self.quantize_conv_layers['qconv1'].qo)
# self.quantize_conv_layers['qreluc2'].freeze(self.quantize_conv_layers['qconv2'].qo)
self.quantize_conv_layers['qconvbnrelu2'].freeze()
# self.quantize_conv_layers['qpool2'].freeze(self.quantize_conv_layers['qconv2'].qo)
self.quantize_conv_layers['qpool2'].freeze(self.quantize_conv_layers['qconvbnrelu2'].qo)
self.quantize_conv_layers['qconv2'].freeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qreluc2'].freeze(self.quantize_conv_layers['qconv2'].qo)
self.quantize_conv_layers['qpool2'].freeze(self.quantize_conv_layers['qconv2'].qo)
# self.quantize_fc_layers['qfc1'].freeze(qi=self.quantize_conv_layers['qconv2'].qo)
self.quantize_fc_layers['qfc1'].freeze(qi=self.quantize_conv_layers['qconvbnrelu2'].qo)
self.quantize_fc_layers['qfc1'].freeze(qi=self.quantize_conv_layers['qconv2'].qo)
self.quantize_fc_layers['qreluf1'].freeze(self.quantize_fc_layers['qfc1'].qo)
self.quantize_fc_layers['qfc2'].freeze(qi=self.quantize_fc_layers['qfc1'].qo)
self.quantize_fc_layers['qreluf2'].freeze(self.quantize_fc_layers['qfc2'].qo)
self.quantize_fc_layers['qfc3'].freeze(qi=self.quantize_fc_layers['qfc2'].qo)
def fakefreeze(self):
self.quantize_conv_layers['qconv1'].fakefreeze()
self.quantize_conv_layers['qreluc1'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qpool1'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
# self.quantize_conv_layers['qconv1'].fakefreeze()
# self.quantize_conv_layers['qreluc1'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
# self.quantize_conv_layers['qpool1'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qconvbnrelu1'].fakefreeze()
self.quantize_conv_layers['qpool1'].fakefreeze(self.quantize_conv_layers['qconvbnrelu1'].qo)
# self.quantize_conv_layers['qconv2'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
# self.quantize_conv_layers['qreluc2'].fakefreeze(self.quantize_conv_layers['qconv2'].qo)
# self.quantize_conv_layers['qpool2'].fakefreeze(self.quantize_conv_layers['qconv2'].qo)
self.quantize_conv_layers['qconv2'].fakefreeze(self.quantize_conv_layers['qconv1'].qo)
self.quantize_conv_layers['qreluc2'].fakefreeze(self.quantize_conv_layers['qconv2'].qo)
self.quantize_conv_layers['qpool2'].fakefreeze(self.quantize_conv_layers['qconv2'].qo)
self.quantize_conv_layers['qconvbnrelu2'].fakefreeze()
self.quantize_conv_layers['qpool2'].fakefreeze(self.quantize_conv_layers['qconvbnrelu2'].qo)
self.quantize_fc_layers['qfc1'].fakefreeze(qi=self.quantize_conv_layers['qconv2'].qo)
# self.quantize_fc_layers['qfc1'].fakefreeze(qi=self.quantize_conv_layers['qconv2'].qo)
self.quantize_fc_layers['qfc1'].fakefreeze(qi=self.quantize_conv_layers['qconvbnrelu2'].qo)
self.quantize_fc_layers['qreluf1'].fakefreeze(self.quantize_fc_layers['qfc1'].qo)
self.quantize_fc_layers['qfc2'].fakefreeze(qi=self.quantize_fc_layers['qfc1'].qo)
self.quantize_fc_layers['qreluf2'].fakefreeze(self.quantize_fc_layers['qfc2'].qo)
self.quantize_fc_layers['qfc3'].fakefreeze(qi=self.quantize_fc_layers['qfc2'].qo)
def quantize_inference(self, x):
x = self.quantize_conv_layers['qconv1'].qi.quantize_tensor(x, self.mode)
# x = self.quantize_conv_layers['qconv1'].qi.quantize_tensor(x, self.mode)
x = self.quantize_conv_layers['qconvbnrelu1'].qi.quantize_tensor(x, self.mode)
for s, layer in self.quantize_conv_layers.items():
x = layer.quantize_inference(x)
......
......@@ -405,6 +405,148 @@ class QConv2d(QModule):
# x.clamp_(0., 2. ** self.num_bits - 1.).round_() # 截断范围
class QBN(QModule):
def __init__(self, bn_module, qi=True, qo=True, num_bits=8, n_exp=4, mode=1): # 此处是为了给内蕴的QModule(i.e. qi,qo)赋值mode
super(QBN, self).__init__(qi=qi, qo=qo, num_bits=num_bits, n_exp=n_exp,mode=mode)
self.num_bits = num_bits
self.bn_module = bn_module
self.qw = QParam(num_bits=num_bits, n_exp=n_exp, mode=mode) # 这里是引入一个伪量化层
self.qb = QParam(num_bits=num_bits, n_exp=n_exp, mode=mode)
self.mode = mode #方便层内使用
self.n_exp = n_exp
# 新建qb
self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
# freeze是即将要保存并推断了,所有的量化参数、量化量都在做最后一波更新
def freeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qi is not None: # 有输入qi,可以给self.qi赋值
self.qi = qi
if qo is not None: # 有输入qo,可以给self.qo赋值
self.qo = qo
if self.mode == 1:
# self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
# 量化 weight 且weight实际上是可以直接用于相乘的 (已 -zeropoint)用于finetune后准备量化推理了
# self.bn_module.weight.data = self.qw.quantize_tensor(self.bn_module.weight.data, self.mode)
# self.bn_module.weight.data = self.bn_module.weight.data - self.qw.zero_point
self.bn_module.weight.data = FakeQuantize.apply(self.bn_module.wegiht, self.qw)
# 量化 bias
# bias的num_bits是否也应该受设备量化位宽限制
# self.bn_module.bias.data = quantize_tensor(self.bn_module.bias.data,
# scale=self.qi.scale * self.qw.scale,
# zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp, mode=self.mode)
self.bn_module.bias.data = FakeQuantize.apply(self.bn_module.bias,self.qb)
elif self.mode == 2 or self.mode == 3:
# 量化 weight 且weight实际上是可以直接用于相乘的 (已 -zeropoint)用于finetune后准备量化推理了
self.bn_module.weight.data = self.qw.quantize_tensor(self.bn_module.weight.data, self.mode)
# 量化 bias
# bias的num_bits是否也应该受设备量化位宽限制
self.bn_module.bias.data = quantize_tensor(self.bn_module.bias.data,
scale=self.qb.scale,
zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp, mode=self.mode)
def fakefreeze(self, qi=None, qo=None):
if hasattr(self, 'qi') and qi is not None:
raise ValueError('qi has been provided in init function.')
if not hasattr(self, 'qi') and qi is None:
raise ValueError('qi is not existed, should be provided.')
if hasattr(self, 'qo') and qo is not None:
raise ValueError('qo has been provided in init function.')
if not hasattr(self, 'qo') and qo is None:
raise ValueError('qo is not existed, should be provided.')
if qi is not None: # 有输入qi,可以给self.qi赋值
self.qi = qi
if qo is not None: # 有输入qo,可以给self.qo赋值
self.qo = qo
if self.mode == 1:
# fake quantization weight
self.bn_module.weight.data = self.qw.quantize_tensor(self.bn_module.weight.data, self.mode)
self.bn_module.weight.data = self.qw.dequantize_tensor(self.bn_module.weight.data, self.mode)
# fake quantization bias
self.bn_module.bias.data = quantize_tensor(self.bn_module.bias.data,
scale=self.qi.scale * self.qw.scale,
zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp, mode=self.mode)
self.bn_module.bias.data = dequantize_tensor(self.bn_module.bias.data,
scale=self.qi.scale * self.qw.scale,
zero_point=0, mode=self.mode)
elif self.mode == 2 or self.mode == 3:
# fake quantization weight
self.bn_module.weight.data = self.qw.quantize_tensor(self.bn_module.weight.data, self.mode)
self.bn_module.weight.data = self.qw.dequantize_tensor(self.bn_module.weight.data, self.mode)
# fake quantization bias
self.bn_module.bias.data = quantize_tensor(self.bn_module.bias.data,
scale=self.qb.scale,
zero_point=0, num_bits=self.num_bits, signed=True, n_exp=self.n_exp, mode=self.mode)
self.bn_module.bias.data = dequantize_tensor(self.bn_module.bias.data,
scale=self.qb.scale,
zero_point=0, mode=self.mode)
# FakeQuantize.apply是量化再恢复,forward中的各种参数都是量化再恢复后的数据,基本还在原fp范围内
def forward(self, x):
if hasattr(self, 'qi'):
# qi 在init时就被定了mode
self.qi.update(x) # qi中包含了伪量化层的参数、方法
x = FakeQuantize.apply(x, self.qi) # forward: FP->INT->FP (qi: input的量化) 量化再恢复
# self.qw.update(self.bn_module.weight.data)
# self.qb.update(self.bn_module.bias.data)
# bn_q= torch.nn.BatchNorm2d(num_features=self.bn_module.num_features, affine=self.bn_module.affine, eps=self.bn_module.eps,momentum=self.bn_module.momentum, track_running_stats=self.bn_module.track_running_stats)
# bn_q.weight = FakeQuantize.apply(self.bn_module.weight, self.qw)
# bn_q.bias = FakeQuantize.apply(self.bn_module.bias, self.qb)
# bn_q.running_mean = self.bn_module.running_mean
# bn_q.running_var = self.bn_module.running_var
# x=bn_q(x)
x = self.bn_module(x)
if hasattr(self, 'qo'):
self.qo.update(x)
x = FakeQuantize.apply(x, self.qo) # output量化再恢复
return x
def quantize_inference(self, x): # 量化后的input x
if self.mode == 1:
x = dequantize_tensor(x,scale=self.qi.scale,zero_point=self.qi.zero_point, mode=self.mode)
x = self.bn_module(x) # forward 此处的conv_module的权重参数是在上面freeze量化过的
x = quantize_tensor(x,scale=self.qo.scale,zero_point=self.qo.zero_point,mode=self.mode)
# x.round_()
# x = x + self.qo.zero_point
# x.clamp_(0., 2. ** self.num_bits - 1.).round_() # 截断范围
return x
elif self.mode == 2 or self.mode==3:
x = self.bn_module(x) # forward 此处的conv_module的权重参数是在上面freeze量化过的
# 将计算结果再用PoT重新表示
x = FakeQuantize.apply(x, self.qo) # 首先qo根据forward后的x update过,根据self.qo去quantize+dequantize的话,能得到PoT量化后的结果
return x
# x.round_()
# x.clamp_(0., 2. ** self.num_bits - 1.).round_() # 截断范围
class QLinear(QModule):
def __init__(self, fc_module, qi=True, qo=True, num_bits=8, n_exp=4, mode=1):
......@@ -577,60 +719,6 @@ class QReLU(QModule):
return x
# class QDrop(QModule):
# # dropout的效果仅仅是丢掉tensor中的部分值而已,所以对改变整体上的min,max没有意义,可以不更新qi
#
# def __init__(self, drop_module, qi=True, qo=True, num_bits=8):
# super(QDrop, self).__init__(qi=qi, qo=qo, num_bits=num_bits)
# self.num_bits = num_bits
# self.drop_module = drop_module
# self.qw = QParam(num_bits=num_bits)
# #self.register_buffer('M', torch.tensor([], requires_grad=False)) # 将M注册为buffer
#
# def freeze(self, qi=None, qo=None):
#
# if hasattr(self, 'qi') and qi is not None:
# raise ValueError('qi has been provided in init function.')
# if not hasattr(self, 'qi') and qi is None:
# raise ValueError('qi is not existed, should be provided.')
#
# if hasattr(self, 'qo') and qo is not None:
# raise ValueError('qo has been provided in init function.')
# if not hasattr(self, 'qo') and qo is None:
# raise ValueError('qo is not existed, should be provided.')
#
# if qi is not None:
# self.qi = qi
# if qo is not None:
# self.qo = qo
#
#
# def forward(self, x):
# if hasattr(self, 'qi'):
# self.qi.update(x)
# x = FakeQuantize.apply(x, self.qi)
#
# self.qw.update(self.drop_module.weight.data)
#
# # 权重需要经过伪量化层量化 这里bias是不是没量化 可以用freeze里的方法改下这里吧
# x = nn.dropout(0.5)
#
# if hasattr(self, 'qo'):
# self.qo.update(x)
# x = FakeQuantize.apply(x, self.qo)
#
# return x
#
# def quantize_inference(self, x):
# x = x - self.qi.zero_point
# x = self.fc_module(x)
# x = self.M * x
# x.round_()
# x = x + self.qo.zero_point
# x.clamp_(0., 2.**self.num_bits-1.).round_()
# return x
class QMaxPooling2d(QModule):
def __init__(self, kernel_size=3, stride=1, padding=0, qi=False, num_bits=None, n_exp=4, mode=1):
......@@ -858,16 +946,21 @@ class QConvBNReLU(QModule):
x = self.conv_module(x)
x = self.M * x
x.round_()
# x = F.relu(x) # 在这里补了一个relu
x = x + self.qo.zero_point
x.clamp_(0., 2. ** self.num_bits - 1.).round_()
return x
elif self.mode == 2 or self.mode == 3:
x = self.conv_module(x)
# x = F.relu(x)
x = FakeQuantize.apply(x, self.qo)
return x
class QConvBN(QModule):
def __init__(self, conv_module, bn_module, qi=True, qo=True, num_bits=8, n_exp=4, mode=1):
......@@ -1095,7 +1188,7 @@ class QAdaptiveAvgPool2d(QModule):
x = F.adaptive_avg_pool2d(x,(1, 1))
x = F.adaptive_avg_pool2d(x,(1, 1)) # 对输入输出都量化一下就算是量化了
if hasattr(self, 'qo'):
self.qo.update(x)
......
......@@ -182,6 +182,10 @@ if __name__ == "__main__":
model = resnet50().to(device)
elif args.model == 'resnet152' :
model = resnet152().to(device)
elif args.model == 'LeNet' :
model = LeNet().to(device)
elif args.model == 'NetBN' :
model = NetBN().to(device)
criterion = nn.CrossEntropyLoss()
......
......@@ -11,6 +11,7 @@ from torchvision import datasets, transforms
import os
import os.path as osp
from torch.utils.tensorboard import SummaryWriter
from resnet import *
......@@ -46,17 +47,25 @@ def quantize_inference(model, test_loader, device):
if __name__ == "__main__":
d1 = sys.argv[1] # num_bits
d2 = sys.argv[2] # mode
d3 = sys.argv[3] # n_exp
parser = argparse.ArgumentParser(description='PTQ Training')
parser.add_argument('-m', '--model', metavar='MODEL ARCH', default='resnet18')
parser.add_argument('-n','--num_bits', default=8, type=int, metavar='BITS', help='number of bits')
parser.add_argument('-t','--mode', default=1, type=int, metavar='MODES', help='PTQ mode(1:INT 2:PoT 3:FP)')
parser.add_argument('-e','--n_exp', default=4, type=int, metavar='N_EXP', help='number of exp')
# d1 = sys.argv[1] # num_bits
# d2 = sys.argv[2] # mode
# d3 = sys.argv[3] # n_exp
# d1 = 8
# d2 = 3
# d3 = 4
batch_size = 32
args = parser.parse_args()
d1 = args.num_bits
d2 = args.mode
d3 = args.n_exp
batch_size = 128
using_bn = True
load_quant_model_file = None
# load_model_file = None
net = 'LeNet' # 1:
acc = 0
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
......@@ -80,9 +89,21 @@ if __name__ == "__main__":
)
if using_bn:
# model = LeNet(n_exp=int(d3), mode=int(d2)).to(device)
if args.model == 'resnet18' :
model = resnet18(n_exp=int(d3), mode=int(d2)).to(device)
elif args.model == 'resnet50' :
model = resnet50(n_exp=int(d3), mode=int(d2)).to(device)
elif args.model == 'resnet152' :
model = resnet152(n_exp=int(d3), mode=int(d2)).to(device)
elif args.model == 'LeNet' :
model = LeNet(n_exp=int(d3), mode=int(d2)).to(device)
elif args.model == 'NetBN' :
model = NetBN().to(device)
# model = resnet18(n_exp=int(d3), mode=int(d2)).to(device)
# 生成梯度分布图的时候是从0开始训练的
model.load_state_dict(torch.load('./project/p/ckpt/cifar-10_lenet_bn.pt', map_location='cpu'))
# model.load_state_dict(torch.load('./project/p/ckpt/cifar-10_lenet_bn.pt', map_location='cpu'))
model.load_state_dict(torch.load('./project/p/ckpt/' + args.model + '/' + args.model + '.pt', map_location='cpu'))
# else:
# model = Net()
# model.load_state_dict(torch.load('ckpt/mnist_cnn.pt', map_location='cpu'))
......@@ -91,7 +112,7 @@ if __name__ == "__main__":
model.eval()
full_inference(model, test_loader, device)
full_writer = SummaryWriter(log_dir='./project/p/ptqlog_mode' + str(d2) + '/' + str(d3) + '/' + 'full_log')
full_writer = SummaryWriter(log_dir='./project/p/' + args.model +'/ptqlog_mode' + str(d2) + '/' + str(d3) + '/' + 'full_log')
for name, param in model.named_parameters():
full_writer.add_histogram(tag=name + '_data', values=param.data)
......@@ -99,7 +120,7 @@ if __name__ == "__main__":
model.quantize(num_bits=num_bits)
model.eval()
print('Quantization bit: %d' % num_bits)
writer = SummaryWriter(log_dir='./project/p/ptqlog_mode' + str(d2) + '/' + str(d3) + '/' + 'quant_bit_' + str(d1) + '_log')
writer = SummaryWriter(log_dir='./project/p/'+ args.model + '/ptqlog_mode' + str(d2) + '/' + str(d3) + '/' + 'quant_bit_' + str(d1) + '_log')
if load_quant_model_file is not None:
model.load_state_dict(torch.load(load_quant_model_file))
......@@ -114,12 +135,12 @@ if __name__ == "__main__":
# 原PTQ mode=1时
# save_file = 'ckpt/cifar-10_lenet_bn_ptq_' + str(d1) + '_.pt'
dir_name ='./project/p/ckpt/mode'+ str(d2) + '_' + str(d3) + '/ptq'
dir_name ='./project/p/ckpt/' + args.model + '/mode'+ str(d2) + '_' + str(d3) + '/ptq'
if not os.path.isdir(dir_name):
os.makedirs(dir_name, mode=0o777)
os.chmod(dir_name, mode=0o777)
save_file = './project/p/ckpt/mode'+ str(d2) + '_' + str(d3) + '/ptq' + '/cifar-10_lenet_bn_ptq_' + str(d1) + '_.pt'
save_file = './project/p/ckpt/' + args.model + '/mode'+ str(d2) + '_' + str(d3) + '/ptq' + '/cifar-10_' + args.model + '_ptq_' + str(d1) + '_.pt'
torch.save(model.state_dict(), save_file)
......@@ -130,7 +151,7 @@ if __name__ == "__main__":
# print(model.qconv1.M.device)
acc = quantize_inference(model, test_loader, device)
f = open('./project/p/lenet_ptq_acc' + '.txt', 'a')
f = open('./project/p/' + args.model + '_ptq_acc' + '.txt', 'a')
f.write('bit ' + str(d1) + ': ' + str(acc) + '\n')
f.close()
......
......@@ -17,11 +17,13 @@ class LeNet(nn.Module):
self.conv_layers = nn.ModuleDict({
# block1
'conv1': nn.Conv2d(3,6,5), # (2*3*5*5) * 32*32*6 (bias占其中的32*32*6) 6144/921600
'bn1': nn.BatchNorm2d(6),
'reluc1': nn.ReLU(),
'pool1': nn.MaxPool2d(2,2),
# block2
'conv2': nn.Conv2d(6,16,5), # (2*6*5*5) * 16*16*16 (bias占其中的16*16*6) 1536/1228800
'bn2': nn.BatchNorm2d(16),
'reluc2': nn.ReLU(),
'pool2': nn.MaxPool2d(2,2),
})
......@@ -212,18 +214,14 @@ class ResNet(nn.Module):
self.inplanes = 16 # 因为 CIFAR-10 图片较小,所以开始时需要更少的通道数
GlobalVariables.SELF_INPLANES = self.inplanes
print('resnet init:'+ str(GlobalVariables.SELF_INPLANES))
# print('resnet init:'+ str(GlobalVariables.SELF_INPLANES))
# 输入层
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1,
bias=True)
# self.bn1 = nn.BatchNorm2d(16)
bias=False)
self.bn1 = nn.BatchNorm2d(16)
self.relu = nn.ReLU()
# 残差层(4 个阶段,每个阶段包含 6n+2 个卷积层)
# self.layer1 = self._make_layer(block, 16, layers[0])
# self.layer2 = self._make_layer(block, 32, layers[1], stride=2)
# self.layer3 = self._make_layer(block, 64, layers[2], stride=2)
# self.layer4 = self._make_layer(block, 128, layers[3], stride=2)
self.layer1 = MakeLayer(block, 16, layers[0], n_exp=self.n_exp, mode=self.mode)
self.layer2 = MakeLayer(block, 32, layers[1], stride=2, n_exp=self.n_exp, mode=self.mode)
self.layer3 = MakeLayer(block, 64, layers[2], stride=2, n_exp=self.n_exp, mode=self.mode)
......@@ -233,7 +231,6 @@ class ResNet(nn.Module):
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(128 * block.expansion, num_classes)
# 参数初始化
for m in self.modules():
if isinstance(m, nn.Conv2d):
......@@ -242,34 +239,11 @@ class ResNet(nn.Module):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# 似乎对于resnet的self.inplanes在不断被改变,传递下去
# def _make_layer(self, block, planes, blocks, stride=1):
# downsample = None
# # stride 是卷积层的步幅,而 self.inplanes 表示当前残差块输入的通道数,
# # planes * block.expansion 则表示当前残差块输出的通道数。因此,当 stride 不等于 1 或者 self.inplanes 不等于 planes * block.expansion 时,就需要进行下采样操作
# #该层中除了第一个残差块之外,其他所有残差块的输入通道数和输出通道数都相等,并且具有相同的步幅(都为 1 或者 2)。这些卷积层的输入张量大小不变, 输出张量高宽尺寸会随着残差块的堆叠而逐渐降低
# if stride != 1 or self.inplanes != planes * block.expansion:
# downsample = nn.Sequential(
# nn.Conv2d(self.inplanes, planes * block.expansion,
# kernel_size=1, stride=stride, bias=False),
# nn.BatchNorm2d(planes * block.expansion),
# )
# layers = []
# layers.append(block(self.inplanes, planes, stride, downsample))
# self.inplanes = planes * block.expansion
# for _ in range(1, blocks): # block的个数
# layers.append(block(self.inplanes, planes))
# return nn.Sequential(*layers)
def forward(self, x):
# 输入层
x = self.conv1(x)
# x = self.bn1(x)
x = self.bn1(x)
x = self.relu(x)
# 这里相比于imagenet的,少了一个maxpool,因为cifar10本身图片就小,如果再pool就太小了
......@@ -289,11 +263,8 @@ class ResNet(nn.Module):
return out
def quantize(self, num_bits=8):
# self.qconvbnrelu1 = QConvBNReLU(self.conv1,self.bn1,qi=True,qo=True,num_bits=num_bits,n_exp=self.n_exp, mode=self.mode)
self.qconv1 = QConv2d(self.conv1, qi=True,qo=True,num_bits=num_bits,n_exp=self.n_exp, mode=self.mode)
self.qconvbnrelu1 = QConvBNReLU(self.conv1,self.bn1,qi=True,qo=True,num_bits=num_bits,n_exp=self.n_exp, mode=self.mode)
# 没有输入num_bits 需修改
self.qrelu1 = QReLU(n_exp=self.n_exp, mode=self.mode)
self.layer1.quantize(num_bits=num_bits)
self.layer2.quantize(num_bits=num_bits)
self.layer3.quantize(num_bits=num_bits)
......@@ -307,8 +278,7 @@ class ResNet(nn.Module):
# out = F.softmax(x, dim=1)
# return out
x = self.qconv1(x)
x = self.qrelu1(x)
x = self.qconvbnrelu1(x)
x = self.layer1.quantize_forward(x)
x = self.layer2.quantize_forward(x)
x = self.layer3.quantize_forward(x)
......@@ -322,9 +292,8 @@ class ResNet(nn.Module):
def freeze(self):
self.qconv1.freeze() # 因为作为第一层是有qi的,所以freeze的时候无需再重新提供qi
self.qrelu1.freeze(self.qconv1.qo)
qo = self.layer1.freeze(qinput = self.qconv1.qo)
self.qconvbnrelu1.freeze() # 因为作为第一层是有qi的,所以freeze的时候无需再重新提供qi
qo = self.layer1.freeze(qinput = self.qconvbnrelu1.qo)
qo = self.layer2.freeze(qinput = qo)
qo = self.layer3.freeze(qinput = qo)
qo = self.layer4.freeze(qinput = qo)
......@@ -335,9 +304,8 @@ class ResNet(nn.Module):
pass
def quantize_inference(self, x):
qx = self.qconv1.qi.quantize_tensor(x,mode=self.mode)
qx = self.qconv1.quantize_inference(qx)
qx = self.qrelu1.quantize_inference(qx)
qx = self.qconvbnrelu1.qi.quantize_tensor(x,mode=self.mode)
qx = self.qconvbnrelu1.quantize_inference(qx)
qx = self.layer1.quantize_inference(qx)
qx = self.layer2.quantize_inference(qx)
qx = self.layer3.quantize_inference(qx)
......@@ -364,13 +332,13 @@ class BasicBlock(nn.Module):
# 第一个卷积层
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride,
padding=1, bias=True)
# self.bn1 = nn.BatchNorm2d(planes)
padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
# 第二个卷积层
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1,
padding=1, bias=True)
# self.bn2 = nn.BatchNorm2d(planes)
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
# shortcut
self.relu = nn.ReLU()
......@@ -384,12 +352,11 @@ class BasicBlock(nn.Module):
identity = x
out = self.conv1(x)
# out = self.bn1(out)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
# out = self.bn2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(identity)
......@@ -400,64 +367,54 @@ class BasicBlock(nn.Module):
return out
def quantize(self, num_bits=8):
# self.qconvbnrelu1 = QConvBNReLU(self.conv1,self.bn1,qi=False,qo=True,num_bits=num_bits,n_exp=self.n_exp,mode=self.mode)
# self.qconvbn1 = QConvBN(self.conv2,self.bn2,qi=False,qo=True,num_bits=num_bits,n_exp=self.n_exp,mode=self.mode)
self.qconv1 = QConv2d(self.conv1,qi=False,qo=True,num_bits=num_bits,n_exp=self.n_exp,mode=self.mode)
self.qrelu1 = QReLU(n_exp=self.n_exp,mode=self.mode)
self.qconv2 = QConv2d(self.conv2,qi=False,qo=True,num_bits=num_bits,n_exp=self.n_exp,mode=self.mode)
self.qconvbnrelu1 = QConvBNReLU(self.conv1,self.bn1,qi=False,qo=True,num_bits=num_bits,n_exp=self.n_exp,mode=self.mode)
self.qconvbn1 = QConvBN(self.conv2,self.bn2,qi=False,qo=True,num_bits=num_bits,n_exp=self.n_exp,mode=self.mode)
if self.downsample is not None:
# self.qconvbn2 = QConvBN(self.downsample[0],self.downsample[1],qi=False,qo=True,num_bits=num_bits,n_exp=self.n_exp,mode=self.mode)
self.qconv3 = QConv2d(self.downsample[0],qi=False,qo=True,num_bits=num_bits,n_exp=self.n_exp,mode=self.mode)
self.qconvbn2 = QConvBN(self.downsample[0],self.downsample[1],qi=False,qo=True,num_bits=num_bits,n_exp=self.n_exp,mode=self.mode)
self.qrelu2 = QReLU(qi=True ,num_bits=num_bits,n_exp=self.n_exp,mode=self.mode)
self.qrelu1 = QReLU(qi=True,num_bits=num_bits,n_exp=self.n_exp,mode=self.mode) # 需要qi
def quantize_forward(self, x):
identity = x
# out = self.qconvbnrelu1(x)
# out = self.qconvbn1(out)
out = self.qconv1(x)
out = self.qrelu1(out)
out = self.qconv2(out)
out = self.qconvbnrelu1(x)
out = self.qconvbn1(out)
if self.downsample is not None:
identity = self.qconv3(identity)
identity = self.qconvbn2(identity)
# residual add
out = identity + out # 这里是需要写一个elementwiseadd的变换的,待后续修改
out = self.qrelu2(out)
out = self.qrelu1(out)
return out
def freeze(self, qinput):
# 这里的qconvbnrelu1其实是可以用前一层的qo的,但感觉不太好传参,就没用
# 还需仔细检查
self.qconv1.freeze(qi= qinput) # 需要接前一个module的最后一个qo
self.qrelu1.freeze(self.qconv1.qo)
self.qconv2.freeze(qi = self.qconv1.qo)
self.qconvbnrelu1.freeze(qi= qinput) # 需要接前一个module的最后一个qo
self.qconvbn1.freeze(qi = self.qconvbnrelu1.qo)
if self.downsample is not None:
self.qconv3.freeze(qi = self.qconv2.qo)
self.qrelu2.freeze() # 这里的qo其实不太合理 qrelu2应该自己有对qi的记录可能才好一些,因为是identity+out,实际qo并不等于qocnv3.qo
return self.qrelu2.qi # qrelu没设置qo,只设置了qi
self.qconvbn2.freeze(qi = self.qconvbn1.qo)
self.qrelu1.freeze()
return self.qrelu1.qi # 输入是两路相加的,无法直接用其qo,relu后的qo可用relu统计的qi
else:
self.qrelu2.freeze()
return self.qrelu2.qi
self.qrelu1.freeze()
return self.qrelu1.qi # 输入是两路相加的,无法直接用其qo,relu后的qo可用relu统计的qi
def quantize_inference(self, x):
# 感觉是不需要进行初始的quantize_tensor和dequantize_tensor,因为他不是最前/后一层,只要中间的每层都在量化后的领域内,就不需要这种处理。
identity = x
out = self.qconv1.quantize_inference(x)
out = self.qrelu1.quantize_inference(out)
out = self.qconv2.quantize_inference(out)
out = self.qconvbnrelu1.quantize_inference(x)
out = self.qconvbn1.quantize_inference(out)
if self.downsample is not None:
identity = self.qconv3.quantize_inference(identity)
identity = self.qconvbn2.quantize_inference(identity)
out = identity + out # 这里是需要写一个elementwiseadd的变换的,待后续修改
out = self.qrelu2.quantize_inference(out)
out = self.qrelu1.quantize_inference(out)
return out
......@@ -518,12 +475,12 @@ class MakeLayer(nn.Module):
def __init__(self, block, planes, blocks, stride=1, n_exp=4, mode=1):
super(MakeLayer, self).__init__()
print('makelayer init:'+ str(GlobalVariables.SELF_INPLANES))
# print('makelayer init:'+ str(GlobalVariables.SELF_INPLANES))
self.downsample = None
if stride != 1 or GlobalVariables.SELF_INPLANES != planes * block.expansion:
self.downsample = nn.Sequential(
nn.Conv2d(GlobalVariables.SELF_INPLANES, planes * block.expansion,kernel_size=1, stride=stride, bias=True)
# nn.BatchNorm2d(planes * block.expansion)
nn.Conv2d(GlobalVariables.SELF_INPLANES, planes * block.expansion,kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion)
)
self.n_exp = n_exp
self.mode = mode
......@@ -534,6 +491,27 @@ class MakeLayer(nn.Module):
self.blockdict['block' + str(i+1)] = block(inplanes=GlobalVariables.SELF_INPLANES, planes=planes,n_exp=self.n_exp, mode=self.mode) # 此处进行实例化了
# def _make_layer(self, block, planes, blocks, stride=1):
# downsample = None
# # stride 是卷积层的步幅,而 self.inplanes 表示当前残差块输入的通道数,
# # planes * block.expansion 则表示当前残差块输出的通道数。因此,当 stride 不等于 1 或者 self.inplanes 不等于 planes * block.expansion 时,就需要进行下采样操作
# #该层中除了第一个残差块之外,其他所有残差块的输入通道数和输出通道数都相等,并且具有相同的步幅(都为 1 或者 2)。这些卷积层的输入张量大小不变, 输出张量高宽尺寸会随着残差块的堆叠而逐渐降低
# if stride != 1 or SELF_INPLANES != planes * block.expansion:
# downsample = nn.Sequential(
# nn.Conv2d(SELF_INPLANES, planes * block.expansion,
# kernel_size=1, stride=stride, bias=False),
# nn.BatchNorm2d(planes * block.expansion),
# )
# layers = []
# layers.append(block(SELF_INPLANES, planes, stride, downsample))
# SELF_INPLANES = planes * block.expansion
# for _ in range(1, blocks): # block的个数
# layers.append(block(SELF_INPLANES, planes))
# return nn.Sequential(*layers)
def forward(self,x):
for _, layer in self.blockdict.items():
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment